diff --git a/ompi/mca/bcol/Makefile.am b/ompi/mca/bcol/Makefile.am
deleted file mode 100644
index 06c2ef5770..0000000000
--- a/ompi/mca/bcol/Makefile.am
+++ /dev/null
@@ -1,35 +0,0 @@
-#
-# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-# main library setup
-noinst_LTLIBRARIES = libmca_bcol.la
-libmca_bcol_la_SOURCES =
-
-# header setup
-nobase_ompi_HEADERS =
-nobase_nodist_ompi_HEADERS =
-
-# local files
-headers = bcol.h
-libmca_bcol_la_SOURCES += $(headers) $(nodist_headers)
-
-# Conditionally install the header files
-if WANT_INSTALL_HEADERS
-nobase_ompi_HEADERS += $(headers)
-nobase_nodist_ompi_HEADERS += $(nodist_headers)
-ompidir = $(ompiincludedir)/ompi/mca/bcol
-else
-ompidir = $(includedir)
-endif
-
-include base/Makefile.am
-
-distclean-local:
-	rm -f base/static-components.h
diff --git a/ompi/mca/bcol/base/Makefile.am b/ompi/mca/bcol/base/Makefile.am
deleted file mode 100644
index 929bef0f5b..0000000000
--- a/ompi/mca/bcol/base/Makefile.am
+++ /dev/null
@@ -1,16 +0,0 @@
-#
-# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-
-headers += \
-        base/base.h
-libmca_bcol_la_SOURCES += \
-        base/bcol_base_frame.c \
-        base/bcol_base_init.c
diff --git a/ompi/mca/bcol/base/base.h b/ompi/mca/bcol/base/base.h
deleted file mode 100644
index b95bea398b..0000000000
--- a/ompi/mca/bcol/base/base.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_BASE_H
-#define MCA_BCOL_BASE_H
-
-#include "ompi_config.h"
-
-#include "ompi/mca/mca.h"
-#include "opal/class/opal_list.h"
-#include "ompi/mca/bcol/bcol.h"
-
-/*
- * Global functions for BCOL
- */
-
-BEGIN_C_DECLS
-
-OMPI_DECLSPEC extern opal_list_t mca_bcol_base_components_in_use;
-OMPI_DECLSPEC extern char *ompi_bcol_bcols_string;
-
-OMPI_DECLSPEC extern mca_base_framework_t ompi_bcol_base_framework;
-
-OMPI_DECLSPEC int mca_bcol_base_init(bool enable_progress_threads, bool enable_mpi_threads);
-
-struct mca_bcol_base_module_t;
-OMPI_DECLSPEC int mca_bcol_base_bcol_fns_table_init(struct mca_bcol_base_module_t *bcol_module);
-
-OMPI_DECLSPEC int mca_bcol_base_fn_table_construct(struct mca_bcol_base_module_t *bcol_module);
-
-OMPI_DECLSPEC int mca_bcol_base_fn_table_destroy(struct mca_bcol_base_module_t *bcol_module);
-
-OMPI_DECLSPEC int mca_bcol_base_set_attributes(struct mca_bcol_base_module_t *bcol_module,
-                mca_bcol_base_coll_fn_comm_attributes_t *comm_attribs,
-                mca_bcol_base_coll_fn_invoke_attributes_t *inv_attribs,
-                mca_bcol_base_module_collective_fn_primitives_t bcol_fn,
-                mca_bcol_base_module_collective_fn_primitives_t progress_fn);
-
-END_C_DECLS
-
-#endif /* MCA_BCOL_BASE_H */
diff --git a/ompi/mca/bcol/base/bcol_base_frame.c b/ompi/mca/bcol/base/bcol_base_frame.c
deleted file mode 100644
index e7b6d68d26..0000000000
--- a/ompi/mca/bcol/base/bcol_base_frame.c
+++ /dev/null
@@ -1,374 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2014-2015 Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-
-
-#include "ompi_config.h"
-#include <stdio.h>
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif  /* HAVE_UNIST_H */
-#include "ompi/mca/mca.h"
-#include "opal/mca/base/base.h"
-#include "opal/util/argv.h"
-
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/include/ompi/constants.h"
-#include "opal/mca/mpool/mpool.h"
-#include "opal/class/opal_list.h"
-/*
- * The following file was created by configure.  It contains extern
- * statements and the definition of an array of pointers to each
- * component's public mca_base_component_t struct.
- */
-
-#include "ompi/mca/bcol/base/static-components.h"
-
-static int mca_bcol_base_open(mca_base_open_flag_t flags);
-static int mca_bcol_base_close (void);
-static int mca_bcol_base_register(mca_base_register_flag_t flags);
-
-/*
-**  * Global variables
-**   */
-MCA_BASE_FRAMEWORK_DECLARE(ompi, bcol, NULL, mca_bcol_base_register, mca_bcol_base_open, mca_bcol_base_close,
-                           mca_bcol_base_static_components, 0);
-
-OMPI_DECLSPEC opal_list_t mca_bcol_base_components_in_use = {{0}};
-OMPI_DECLSPEC char *ompi_bcol_bcols_string = NULL;
-OMPI_DECLSPEC int bcol_mpool_compatibility[BCOL_SIZE][BCOL_SIZE] = {{0}};
-OMPI_DECLSPEC int bcol_mpool_index[BCOL_SIZE][BCOL_SIZE] = {{0}};
-
-static void bcol_base_module_constructor(mca_bcol_base_module_t *module)
-{
-    int fnc;
-
-    module->bcol_component = NULL;
-    module->network_context = NULL;
-    module->context_index = -1;
-    module->supported_mode = 0;
-    module->init_module = NULL;
-    module->sbgp_partner_module = NULL;
-    module->squence_number_offset = 0;
-    module->n_poll_loops = 0;
-
-    for (fnc = 0; fnc < BCOL_NUM_OF_FUNCTIONS; fnc++) {
-        module->bcol_function_table[fnc] = NULL;
-        module->small_message_thresholds[fnc] = BCOL_THRESHOLD_UNLIMITED;
-    }
-
-    module->set_small_msg_thresholds = NULL;
-
-    module->header_size = 0;
-    module->bcol_memory_init = NULL;
-
-    module->next_inorder = NULL;
-
-    mca_bcol_base_fn_table_construct(module);
-}
-
-static void bcol_base_module_destructor(mca_bcol_base_module_t *module)
-{
-    int fnc;
-
-    module->bcol_component = NULL;
-
-    module->context_index = -1;
-    module->init_module = NULL;
-    module->sbgp_partner_module = NULL;
-    module->squence_number_offset = 0;
-    module->n_poll_loops = 0;
-
-    for (fnc = 0; fnc < BCOL_NUM_OF_FUNCTIONS; fnc++) {
-        module->bcol_function_table[fnc] = NULL;
-    }
-
-    module->bcol_memory_init = NULL;
-}
-
-OBJ_CLASS_INSTANCE(mca_bcol_base_module_t,
-        opal_object_t,
-        bcol_base_module_constructor,
-        bcol_base_module_destructor);
-
-static void bcol_base_network_context_constructor(bcol_base_network_context_t *nc)
-{
-    nc->context_id = -1;
-    nc->context_data = NULL;
-}
-
-static void bcol_base_network_context_destructor(bcol_base_network_context_t *nc)
-{
-    nc->context_id = -1;
-    nc->context_data = NULL;
-    nc->register_memory_fn = NULL;
-    nc->deregister_memory_fn = NULL;
-}
-
-OBJ_CLASS_INSTANCE(bcol_base_network_context_t,
-        opal_object_t,
-        bcol_base_network_context_constructor,
-        bcol_base_network_context_destructor);
-
-/* get list of subgrouping coponents to use */
-static int mca_bcol_base_set_components_to_use(opal_list_t *bcol_components_avail,
-                opal_list_t *bcol_components_in_use)
-{
-    /* local variables */
-    const mca_base_component_t *b_component;
-
-    mca_base_component_list_item_t *b_cli;
-    mca_base_component_list_item_t *b_clj;
-
-    char **bcols_requested;
-    const char *b_component_name;
-
-    /* split the requst for the bcol modules */
-    bcols_requested = opal_argv_split(ompi_bcol_bcols_string, ',');
-    if (NULL == bcols_requested) {
-        return OMPI_ERROR;
-    }
-
-    /* Initialize list */
-    OBJ_CONSTRUCT(bcol_components_in_use, opal_list_t);
-
-    /* figure out basic collective modules to use */
-    /* loop over list of components requested */
-    for (int i = 0 ; bcols_requested[i] ; ++i) {
-        /* loop over discovered components */
-        OPAL_LIST_FOREACH(b_cli, bcol_components_avail, mca_base_component_list_item_t) {
-            b_component = b_cli->cli_component;
-            b_component_name = b_component->mca_component_name;
-
-            if (0 == strcmp (b_component_name, bcols_requested[i])) {
-                /* found selected component */
-                b_clj = OBJ_NEW(mca_base_component_list_item_t);
-                if (NULL == b_clj) {
-                    opal_argv_free (bcols_requested);
-                    return OPAL_ERR_OUT_OF_RESOURCE;
-                }
-
-                b_clj->cli_component = b_component;
-                opal_list_append(bcol_components_in_use,
-                                (opal_list_item_t *) b_clj);
-                break;
-             } /* end check for bcol component */
-         }
-     }
-
-    /* Note: Need to add error checking to make sure all requested functions
-    ** were found */
-
-    /*
-    ** release resources
-    ** */
-
-    opal_argv_free (bcols_requested);
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_bcol_base_register(mca_base_register_flag_t flags)
-{
-    /* figure out which bcol and sbgp components will actually be used */
-    /* get list of sub-grouping functions to use */
-    ompi_bcol_bcols_string = "basesmuma,basesmuma,iboffload,ptpcoll,ugni";
-    (void) mca_base_var_register("ompi", "bcol", "base", "string",
-                                 "Default set of basic collective components to use",
-                                 MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
-                                 OPAL_INFO_LVL_9,
-                                 MCA_BASE_VAR_SCOPE_READONLY,
-                                 &ompi_bcol_bcols_string);
-
-    return OMPI_SUCCESS;
-}
-
-/**
- * Function for finding and opening either all MCA components, or the one
- * that was specifically requested via a MCA parameter.
- */
-static int mca_bcol_base_open(mca_base_open_flag_t flags)
-{
-    int ret;
-
-    /* Open up all available components */
-    if (OMPI_SUCCESS !=
-        (ret = mca_base_framework_components_open(&ompi_bcol_base_framework, flags))) {
-        return ret;
-    }
-
-    ret = mca_bcol_base_set_components_to_use(&ompi_bcol_base_framework.framework_components,
-                                              &mca_bcol_base_components_in_use);
-    if (OMPI_SUCCESS != ret) {
-        return ret;
-    }
-
-    /* memory registration compatibilities */
-    bcol_mpool_compatibility[BCOL_SHARED_MEMORY_UMA][BCOL_SHARED_MEMORY_UMA]=1;
-    bcol_mpool_compatibility[BCOL_SHARED_MEMORY_UMA][BCOL_SHARED_MEMORY_SOCKET]=1;
-    bcol_mpool_compatibility[BCOL_SHARED_MEMORY_UMA][BCOL_POINT_TO_POINT]=1;
-    bcol_mpool_compatibility[BCOL_SHARED_MEMORY_UMA][BCOL_IB_OFFLOAD]=1;
-    bcol_mpool_compatibility[BCOL_SHARED_MEMORY_SOCKET][BCOL_SHARED_MEMORY_UMA]=1;
-    bcol_mpool_compatibility[BCOL_POINT_TO_POINT]      [BCOL_SHARED_MEMORY_UMA]=1;
-    bcol_mpool_compatibility[BCOL_IB_OFFLOAD]          [BCOL_SHARED_MEMORY_UMA]=1;
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_bcol_base_close (void)
-{
-    opal_list_item_t *item;
-
-    while (NULL != (item = opal_list_remove_first (&mca_bcol_base_components_in_use))) {
-        OBJ_RELEASE(item);
-    }
-
-    OBJ_DESTRUCT(&mca_bcol_base_components_in_use);
-
-    return mca_base_framework_components_close(&ompi_bcol_base_framework, NULL);
-}
-
-/*
- * Prototype implementation of selection logic
- */
-int mca_bcol_base_fn_table_construct(struct mca_bcol_base_module_t *bcol_module){
-
-        int bcol_fn;
-        /* Call all init functions */
-
-        /* Create a function table */
-        for (bcol_fn = 0; bcol_fn < BCOL_NUM_OF_FUNCTIONS; bcol_fn++){
-            /* Create a list object for each bcol type list */
-            OBJ_CONSTRUCT(&(bcol_module->bcol_fns_table[bcol_fn]), opal_list_t);
-        }
-
-    return OMPI_SUCCESS;
-}
-
-int mca_bcol_base_fn_table_destroy(struct mca_bcol_base_module_t *bcol_module){
-
-    int bcol_fn;
-
-    for (bcol_fn = 0; bcol_fn < BCOL_NUM_OF_FUNCTIONS; bcol_fn++){
-        /* gvm FIX: Go through the list and destroy each item */
-        /* Destroy the function table object for each bcol type list */
-        OBJ_DESTRUCT(&(bcol_module->bcol_fns_table[bcol_fn]));
-    }
-
-    return OMPI_SUCCESS;
-}
-
-int mca_bcol_base_set_attributes(struct mca_bcol_base_module_t *bcol_module,
-                mca_bcol_base_coll_fn_comm_attributes_t *arg_comm_attribs,
-                mca_bcol_base_coll_fn_invoke_attributes_t *arg_inv_attribs,
-                mca_bcol_base_module_collective_fn_primitives_t bcol_fn,
-                mca_bcol_base_module_collective_fn_primitives_t progress_fn
-                )
-{
-    mca_bcol_base_coll_fn_comm_attributes_t *comm_attribs = NULL;
-    mca_bcol_base_coll_fn_invoke_attributes_t *inv_attribs = NULL;
-    struct mca_bcol_base_coll_fn_desc_t *fn_filtered = NULL;
-    int coll_type;
-
-    comm_attribs = malloc(sizeof(mca_bcol_base_coll_fn_comm_attributes_t));
-    if (NULL == comm_attribs) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-    inv_attribs = malloc(sizeof(mca_bcol_base_coll_fn_invoke_attributes_t));
-
-    if (NULL == inv_attribs) {
-        free(comm_attribs);
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    coll_type = comm_attribs->bcoll_type = arg_comm_attribs->bcoll_type;
-    comm_attribs->comm_size_min = arg_comm_attribs->comm_size_min;
-    comm_attribs->comm_size_max = arg_comm_attribs->comm_size_max;
-    comm_attribs->data_src = arg_comm_attribs->data_src;
-    comm_attribs->waiting_semantics = arg_comm_attribs->waiting_semantics;
-
-    inv_attribs->bcol_msg_min = arg_inv_attribs->bcol_msg_min;
-    inv_attribs->bcol_msg_max = arg_inv_attribs->bcol_msg_max ;
-    inv_attribs->datatype_bitmap = arg_inv_attribs->datatype_bitmap ;
-    inv_attribs->op_types_bitmap = arg_inv_attribs->op_types_bitmap;
-
-    fn_filtered = OBJ_NEW(mca_bcol_base_coll_fn_desc_t);
-
-    fn_filtered->coll_fn = bcol_fn;
-    fn_filtered->progress_fn = progress_fn;
-
-    fn_filtered->comm_attr = comm_attribs;
-    fn_filtered->inv_attr = inv_attribs;
-
-
-    opal_list_append(&(bcol_module->bcol_fns_table[coll_type]),(opal_list_item_t*)fn_filtered);
-
-    return OMPI_SUCCESS;
-}
-
-int mca_bcol_base_bcol_fns_table_init(struct mca_bcol_base_module_t *bcol_module){
-
-    int ret, bcol_init_fn;
-
-    for (bcol_init_fn =0; bcol_init_fn < BCOL_NUM_OF_FUNCTIONS; bcol_init_fn++) {
-        if (NULL != bcol_module->bcol_function_init_table[bcol_init_fn]) {
-            ret = (bcol_module->bcol_function_init_table[bcol_init_fn]) (bcol_module);
-            if (OMPI_SUCCESS != ret) {
-                return OMPI_ERROR;
-            }
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static void mca_bcol_base_coll_fn_desc_constructor(mca_bcol_base_coll_fn_desc_t *fn)
-{
-    fn->comm_attr = NULL;
-    fn->inv_attr = NULL;
-}
-
-static void mca_bcol_base_coll_fn_desc_destructor(mca_bcol_base_coll_fn_desc_t *fn)
-{
-    if (fn->comm_attr) {
-        free(fn->comm_attr);
-    }
-
-    if (fn->inv_attr) {
-        free(fn->inv_attr);
-    }
-}
-
-OBJ_CLASS_INSTANCE(mca_bcol_base_coll_fn_desc_t,
-                   opal_list_item_t,
-                   mca_bcol_base_coll_fn_desc_constructor,
-                   mca_bcol_base_coll_fn_desc_destructor);
-
-static void lmngr_block_constructor(mca_bcol_base_lmngr_block_t *item)
-{
-    item->base_addr = NULL;
-}
-
-static void lnmgr_block_destructor(mca_bcol_base_lmngr_block_t *item)
-{
-    /* I have nothing to do here */
-}
-OBJ_CLASS_INSTANCE(mca_bcol_base_lmngr_block_t,
-        opal_list_item_t,
-        lmngr_block_constructor,
-        lnmgr_block_destructor);
diff --git a/ompi/mca/bcol/base/bcol_base_init.c b/ompi/mca/bcol/base/bcol_base_init.c
deleted file mode 100644
index f6f0360cd9..0000000000
--- a/ompi/mca/bcol/base/bcol_base_init.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/mca/mca.h"
-#include "opal/mca/base/base.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/include/ompi/constants.h"
-
-int mca_bcol_base_init(bool enable_progress_threads, bool enable_mpi_threads)
-{
-    mca_bcol_base_component_t *bcol_component;
-    mca_base_component_list_item_t *cli;
-    int ret;
-
-    OPAL_LIST_FOREACH(cli, &mca_bcol_base_components_in_use, mca_base_component_list_item_t) {
-        bcol_component = (mca_bcol_base_component_t *) cli->cli_component;
-
-        if (false == bcol_component->init_done) {
-            ret = bcol_component->collm_init_query(true, true);
-            if (OMPI_SUCCESS != ret) {
-                return ret;
-            }
-
-            bcol_component->init_done = true;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-
-
diff --git a/ompi/mca/bcol/base/owner.txt b/ompi/mca/bcol/base/owner.txt
deleted file mode 100644
index 1c86df367b..0000000000
--- a/ompi/mca/bcol/base/owner.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# owner/status file
-# owner: institution that is responsible for this package
-# status: e.g. active, maintenance, unmaintained
-#
-owner: ORNL
-status: unmaintained
diff --git a/ompi/mca/bcol/basesmuma/Makefile.am b/ompi/mca/bcol/basesmuma/Makefile.am
deleted file mode 100644
index 9a9d288f49..0000000000
--- a/ompi/mca/bcol/basesmuma/Makefile.am
+++ /dev/null
@@ -1,66 +0,0 @@
-#
-# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
-# Copyright (c) 2015 Cisco Systems, Inc.  All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-sources = \
-        bcol_basesmuma.h \
-        bcol_basesmuma_utils.h \
-        bcol_basesmuma_bcast.c \
-        bcol_basesmuma_component.c  \
-        bcol_basesmuma_module.c \
-        bcol_basesmuma_buf_mgmt.c \
-        bcol_basesmuma_mem_mgmt.c \
-	bcol_basesmuma_fanin.c \
-        bcol_basesmuma_fanout.c \
-        bcol_basesmuma_progress.c \
-        bcol_basesmuma_reduce.h \
-        bcol_basesmuma_reduce.c \
-        bcol_basesmuma_allreduce.c \
-        bcol_basesmuma_setup.c \
-	bcol_basesmuma_rd_barrier.c  \
-        bcol_basesmuma_rd_nb_barrier.c \
-        bcol_basesmuma_rk_barrier.c \
-        bcol_basesmuma_utils.c    \
-        bcol_basesmuma_bcast_prime.c \
-        bcol_basesmuma_lmsg_knomial_bcast.c \
-        bcol_basesmuma_lmsg_bcast.c \
-        bcol_basesmuma_gather.c \
-        bcol_basesmuma_allgather.c \
-        bcol_basesmuma_smcm.h \
-        bcol_basesmuma_smcm.c
-
-# Make the output library in this directory, and name it either
-# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
-# (for static builds).
-
-component_noinst =
-component_install =
-if MCA_BUILD_ompi_bcol_basesmuma_DSO
-component_install += mca_bcol_basesmuma.la
-else
-component_noinst += libmca_bcol_basesmuma.la
-endif
-
-# See ompi/mca/btl/sm/Makefile.am for an explanation of
-# libmca_common_sm.la.
-
-AM_CPPFLAGS = $(btl_portals_CPPFLAGS)
-
-mcacomponentdir = $(ompilibdir)
-mcacomponent_LTLIBRARIES = $(component_install)
-mca_bcol_basesmuma_la_SOURCES = $(sources)
-mca_bcol_basesmuma_la_LDFLAGS = -module -avoid-version $(btl_portals_LDFLAGS)
-mca_bcol_basesmuma_la_LIBADD = \
-    	$(btl_portals_LIBS)
-
-
-noinst_LTLIBRARIES = $(component_noinst)
-libmca_bcol_basesmuma_la_SOURCES =$(sources)
-libmca_bcol_basesmuma_la_LDFLAGS = -module -avoid-version $(btl_portals_LDFLAGS)
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma.h b/ompi/mca/bcol/basesmuma/bcol_basesmuma.h
deleted file mode 100644
index 7b6c69d2c3..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma.h
+++ /dev/null
@@ -1,1270 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2014      Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2014      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-
-#ifndef MCA_BCOL_basesmuma_EXPORT_H
-#define MCA_BCOL_basesmuma_EXPORT_H
-
-#include "ompi_config.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "opal/mca/mpool/mpool.h"
-#include "ompi/request/request.h"
-#include "ompi/proc/proc.h"
-#include "ompi/patterns/net/netpatterns.h"
-
-#include "ompi/mca/mca.h"
-#include "opal/util/arch.h"
-#include "opal/util/argv.h"
-#include "opal/datatype/opal_datatype.h"
-#include "opal/util/output.h"
-
-#include "bcol_basesmuma_smcm.h"
-BEGIN_C_DECLS
-
-struct list_data_t  {
-    opal_list_item_t super;
-    void *data;
-};
-typedef struct list_data_t list_data_t;
-OBJ_CLASS_DECLARATION(list_data_t);
-
-/*
- * Macro's for manipulating the 64 bit shared memory control bits.
- * The 64 bit field is devided into 4 bit fields
- *
- *   | 48-63: src  |  32-47: index |  16-31: flag |  0-15: sequence number |
- *
- * Only the low 16 bits of the sequence number will be put in the header
- * space.  We will use the fact that the use of the shared buffers is
- * synchronous, and get the upper 48 bits from the local process space.
- */
-
-#define BASESMUMA_CACHE_LINE_SIZE 128
-
-#define SHIFT_UP   <<
-#define SHIFT_DOWN >>
-
-#define SEQ_WIDTH  16
-#define SEQ_BASE    0
-#define FIELD_SEQ_MASK   ( ( 1 SHIFT_UP SEQ_WIDTH ) - 1 )
-#define INPLACE_SEQ_MASK ( (int64_t)FIELD_SEQ_MASK SHIFT_UP SEQ_BASE)
-
-#define FLAG_WIDTH 16
-#define FLAG_BASE  16
-#define FIELD_FLAG_MASK   ( ( 1 SHIFT_UP FLAG_WIDTH ) - 1 )
-#define INPLACE_FLAG_MASK ( (int64_t)FIELD_FLAG_MASK SHIFT_UP FLAG_BASE)
-
-#define INDX_WIDTH 16
-#define INDX_BASE  32
-#define FIELD_INDX_MASK   ( ( 1 SHIFT_UP INDX_WIDTH ) - 1 )
-#define INPLACE_INDX_MASK ( (int64_t)FIELD_INDX_MASK SHIFT_UP INDX_BASE)
-
-#define SRC_WIDTH  16
-#define SRC_BASE   48
-#define FIELD_SRC_MASK   ( ( 1 SHIFT_UP SRC_WIDTH ) - 1 )
-#define INPLACE_SRC_MASK ( (int64_t)FIELD_SRC_MASK SHIFT_UP SRC_BASE)
-/*int64_t INPLACE_SRC_MASK= ((int64_t)FIELD_SRC_MASK SHIFT_UP SRC_BASE); */
-
-
-#define EXTRACT_FLAG(INPUT, OUTPUT, OUTPUT_TYPE, FIELD_BASE, FIELD_MASK) \
-    OUTPUT = (OUTPUT_TYPE) ( (INPUT SHIFT_DOWN FIELD_BASE ) & FIELD_MASK )
-
-#define STORE_FLAG(INPUT, OUTPUT, INPUT_TYPE, OUTPUT_TYPE, FIELD_BASE, INPLACE_FIELD_MASK ) \
-    OUTPUT =                                                            \
-        (                                                               \
-         /* 3 */                                                        \
-         (                                                              \
-          /* 2 */                                                       \
-          (                                                             \
-           /* 1 - shift the input field to the proper location */       \
-           (OUTPUT_TYPE)(                                               \
-                         ((OUTPUT_TYPE)((INPUT_TYPE) (INPUT)))          \
-                         SHIFT_UP FIELD_BASE )                          \
-           /* mask off the extra bits */                                \
-           & ((OUTPUT_TYPE)INPLACE_FIELD_MASK)                          \
-                                                                   )    \
-          /* store back to the OUTPUT field, w/o destroying other fields */ \
-                                                                          ) | OUTPUT \
-                                                                         )
-
-/**
- * Structure to hold the basic shared memory bcoll component.
- */
-struct mca_bcol_basesmuma_component_t {
-    /** Base coll component */
-    mca_bcol_base_component_2_0_0_t super;
-
-    /* management data for collectives with no user data */
-
-    /** MCA parameter: number of memory banks */
-    int basesmuma_num_mem_banks;
-
-    /** MCA parameter: number of regions per memory bank */
-    int basesmuma_num_regions_per_bank;
-
-    /** MCA parameter: Number of simultaneous groups supported */
-    int n_groups_supported;
-
-    /* management data for collectives with user data (ud) - the memory
-     * is actually obtained at the ML level
-     */
-
-    /** MCA paramenter:  number of polling loops to run while waiting
-     *  for children or parent to complete their work
-     */
-    int n_poll_loops;
-
-    /* mpool size */
-    size_t mpool_size;
-
-
-    /* mpool inited - will use this to test whether or not the
-     * shared memory has been inited
-     */
-    bool mpool_inited;
-
-    /* shared memory control buffer - the control structures reside
-     *   in shared memory */
-    bcol_basesmuma_smcm_mmap_t *sm_ctl_structs;
-
-    /* shared memory payload buffer
-     */
-    bcol_basesmuma_smcm_mmap_t *sm_payload_structs;
-
-    /*
-     * list of shared memory control structures
-     */
-    opal_list_t ctl_structures;
-
-
-    /** opal list in which the list of peers that I am "connected" to is stored
-     */
-    opal_list_t sm_connections_list;
-
-    /* opal list in which the list of payload peers that I am "connected" to
-     * is stored
-     */
-    opal_list_t sm_payload_connections_list;
-
-    /*
-     * list of non-blocking admin barriers to progress */
-    opal_mutex_t nb_admin_barriers_mutex;
-    opal_list_t nb_admin_barriers;
-
-    /*
-     * order of fan-in tree
-     */
-    int radix_fanin;
-
-    /*
-     * order of fan-out tree
-     */
-    int radix_fanout;
-
-    /*
-     * Order of read tree
-     */
-    int radix_read_tree;
-
-    /*
-     * order of reduction fan-out tree
-     */
-    int order_reduction_tree;
-
-    /*
-     * K-nomial tree radix
-     */
-    int k_nomial_radix;
-
-    /*
-     * K-ary scatter tree radix
-     */
-    int scatter_kary_radix;
-
-    /*
-     * number of polling loops
-     */
-    int num_to_probe;
-
-    /*
-     * Portals addressing info
-     * void*: because wanted to keep portal library dependencies
-     * as local as possible
-     */
-    void *portals_info;
-    bool portals_init;
-
-    /*
-     * verbosity level
-     */
-    int verbose;
-
-    /*
-     * control file name base string
-     */
-    char *clt_base_fname;
-
-    /*
-     * data file name base string
-     */
-    char *payload_base_fname;
-
-    /*
-     * shared memory scratch space.  This is mapped at the end of the
-     * segement of memory holding the control structures.
-     */
-    char *my_scratch_shared_memory;
-
-    /*
-     * size of scratch memory
-     */
-    size_t my_scratch_shared_memory_size;
-
-    /* the offset will be the same for all ranks */
-    size_t scratch_offset_from_base_ctl_file;
-};
-
-static inline int mca_bcol_basesmuma_err(const char* fmt, ...)
-{
-    va_list list;
-    int ret;
-
-    va_start(list, fmt);
-    ret = vfprintf(stderr, fmt, list);
-    va_end(list);
-    return ret;
-}
-
-#if OPAL_ENABLE_DEBUG
-#define BASESMUMA_VERBOSE(level, args)                                  \
-    do {                                                                \
-        if(mca_bcol_basesmuma_component.verbose >= level) {             \
-            mca_bcol_basesmuma_err("[%s]%s[%s:%d:%s] BCOL-BASESMUMA ",  \
-                                   ompi_process_info.nodename,          \
-                                   OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),  \
-                                   __FILE__, __LINE__, __func__);       \
-            mca_bcol_basesmuma_err args;                                \
-            mca_bcol_basesmuma_err("\n");                               \
-        }                                                               \
-    } while(0)
-#else
-#define BASESMUMA_VERBOSE(level, args)
-#endif
-
-
-/**
- * Convenience typedef */
-typedef struct mca_bcol_basesmuma_component_t mca_bcol_basesmuma_component_t;
-
-#if 0
-/*
- * Implemented function index list
- */
-
-/* barrier */
-enum{
-    FANIN_FAN_OUT_BARRIER_FN,
-    RECURSIVE_DOUBLING_BARRIER_FN,
-    N_BARRIER_FNS
-};
-
-/* reduce */
-enum{
-    FANIN_REDUCE_FN,
-    REDUCE_SCATTER_GATHER_FN,
-    N_REDUCE_FNS
-};
-enum{
-    SHORT_DATA_FN_REDUCE,
-    LONG_DATA_FN_REDUCE,
-    N_REDUCE_FNS_USED
-};
-
-/* all-reduce */
-enum{
-    FANIN_FANOUT_ALLREDUCE_FN,
-    REDUCE_SCATTER_ALLGATHER_FN,
-    N_ALLREDUCE_FNS
-};
-enum{
-    SHORT_DATA_FN_ALLREDUCE,
-    LONG_DATA_FN_ALLREDUCE,
-    N_ALLREDUCE_FNS_USED
-};
-
-
-/* enum for node type */
-enum{
-    ROOT_NODE,
-    LEAF_NODE,
-    INTERIOR_NODE
-};
-
-
-/*
- * N-order tree node description
- */
-struct tree_node_t {
-    /* my rank within the group */
-    int my_rank;
-    /* my node type - root, leaf, or interior */
-    int my_node_type;
-    /* number of nodes in the tree */
-    int tree_size;
-    /* number of parents (0/1) */
-    int n_parents;
-    /* number of children */
-    int n_children;
-    /* parent rank within the group */
-    int parent_rank;
-    /* chidren ranks within the group */
-    int *children_ranks;
-};
-typedef struct tree_node_t tree_node_t;
-
-/*
- * Pair-wise data exchange
- */
-/* enum for node type */
-enum{
-    EXCHANGE_NODE,
-    EXTRA_NODE
-};
-
-struct pair_exchange_node_t {
-
-    /* my rank within the group */
-    int my_rank;
-
-    /* number of nodes this node will exchange data with */
-    int n_exchanges;
-
-    /* ranks of nodes involved in data exchnge */
-    int *rank_exchanges;
-
-    /* number of extra sources of data - outside largest power of 2 in
-     *  this group */
-    int n_extra_sources;
-
-    /* rank of the extra source */
-    int rank_extra_source;
-
-    /* number of tags needed per stripe */
-    int n_tags;
-
-    /* log 2 of largest full power of 2 for this node set */
-    int log_2;
-
-    /* largest power of 2 that fits in this group */
-    int n_largest_pow_2;
-
-    /* node type */
-    int node_type;
-
-};
-typedef struct pair_exchange_node_t pair_exchange_node_t;
-#endif
-/*
- * descriptor for managing the admin nonblocking barrier routine.
- *   This is an sm internal routine, and assumes only 1 outstanding
- *   nb-barrier collective call per block.
- */
-/* forward declarations */
-struct mca_bcol_basesmuma_module_t;
-struct sm_buffer_mgmt;
-
-struct sm_nbbar_desc_t {
-    /* make sure we can put this on a list */
-    opal_list_item_t super;
-
-    /* phase of the collective operation - needed to know how to continue
-     * progressing the nb-barrier */
-    int collective_phase;
-
-    /* iteration to continue at */
-    int recursive_dbl_iteration;
-
-    /* pointer to the collective module this is associated with */
-    struct mca_bcol_basesmuma_module_t *sm_module;
-
-    /* pointer to payload/control structs buffers */
-    struct sm_buffer_mgmt *coll_buff;
-
-    /* pool index */
-    int pool_index;
-
-    /* pointer to the mca_bcol_base_memory_block_desc_t structure
-     * that is actually managing this registration.
-     * This is meaningful when these control structures
-     * are used in conjunction with the user payload
-     * data that is allocated at the ml level.
-     */
-    void *ml_memory_block_descriptor;
-
-};
-typedef struct sm_nbbar_desc_t sm_nbbar_desc_t;
-
-/*
- * Barrier request objects
- */
-
-/* shared memory data strucutures */
-struct mca_bcol_basesmuma_nb_request_process_shared_mem_t {
-    volatile uint64_t coll_index;
-    /* flag used to indicate the status of this memory region */
-    volatile uint64_t flag;
-    volatile uint64_t index;
-
-    /* pading */
-    /* Note: need to change this so it takes less memory */
-    char padding[BASESMUMA_CACHE_LINE_SIZE-3*sizeof(uint64_t)];
-};
-
-typedef struct mca_bcol_basesmuma_nb_request_process_shared_mem_t
-mca_bcol_basesmuma_nb_request_process_shared_mem_t;
-
-/* enum for phase at which the nb barrier is in */
-enum{
-    NB_BARRIER_INACTIVE,
-
-    /* fan-in/fan-out */
-    NB_BARRIER_FAN_IN,
-    NB_BARRIER_FAN_OUT,
-
-    /* recursive doubling */
-    NB_PRE_PHASE,
-    NB_RECURSIVE_DOUBLING,
-    NB_POST_PHASE,
-
-    /* done and not started are the same for all practicle
-     * purposes, as the init funtion always sets this flag
-     */
-    NB_BARRIER_DONE
-};
-
-
-
-/* forward declartion */
-struct mca_bcol_basesmuma_module_t;
-
-struct mca_basesmuma_ctrl_4_hdl_t {
-    int fd;
-    bool status;
-    volatile char buf[128];
-    /*volatile char buf[OPAL_PATH_MAX];*/
-};
-typedef struct mca_basesmuma_ctrl_4_hdl_t mca_basesmuma_ctrl_4_hdl_t;
-
-/* control segment for shared memory */
-struct mca_bcol_basesmuma_ctl_struct_t {
-    /* collective identifier */
-    volatile int64_t sequence_number;
-    volatile int64_t flag;
-    volatile int64_t index;
-    volatile int64_t offset;
-    volatile int64_t offset_zip;
-
-
-    /* used for non-blocking algorithms */
-    int status;
-    int active_requests;
-    int iteration;
-
-    int *src_ptr;
-
-    int start;
-
-    /* process private data */
-    int starting_flag_value;
-
-    /* experiment for large data colls */
-    int n_sends;
-    int length;
-
-    /* hdl framework control structure*/
-    /* no need to pad at this level anymore */
-    volatile int64_t data_hdl;
-    volatile mca_basesmuma_ctrl_4_hdl_t hdl_ctrl;
-
-#ifdef __PORTALS_AVAIL__
-    struct mca_bcol_basesmuma_portal_buf_addr_t portals_buf_addr;
-#endif
-    /* padding */
-    /*char padding[BASESMUMA_CACHE_LINE_SIZE-4*sizeof(uint64_t)-3*sizeof(int)];*/
-    char padding[BASESMUMA_CACHE_LINE_SIZE-6*sizeof(int64_t)-5*sizeof(int)];
-};
-typedef struct mca_bcol_basesmuma_ctl_struct_t mca_bcol_basesmuma_ctl_struct_t;
-
-
-#define SM_BCOLS_MAX 2
-
-/* enum for signaling flag bank, when
- * adding to this list, please keep
- * it alphabetical
- */
-enum {
-    ALLGATHER_FLAG,
-    ALLREDUCE_FLAG,
-    BARRIER_FANIN_FLAG,
-    BARRIER_FANOUT_FLAG,
-    BARRIER_RKING_FLAG,
-    BCAST_FLAG,
-    GATHER_FLAG,
-    REDUCE_FLAG,
-    NUM_SIGNAL_FLAGS
-};
-
-
-/* control region for colls with user data - shared memory */
-struct mca_bcol_basesmuma_header_t {
-    /* collective identifier */
-    volatile int64_t sequence_number;
-    volatile int8_t  flags[NUM_SIGNAL_FLAGS][SM_BCOLS_MAX];
-    volatile int32_t src; /* src of bcast data for unknown root,
-                             bcol id for known root
-                          */
-    /* starting flag - hierarchies */
-    int8_t starting_flag_value[SM_BCOLS_MAX];
-    int8_t ready_flag;
-
-    /* Manju: Cached array of receive buffer offsets
-     *
-     * This array stores the receive buffer offsets (rbuf_offsets) of data buffer.
-     * In general, we assume that sbuf_offset and rbuf_offset of
-     * processes invoking the collective primitive is same. This is
-     * true when the order in which processes invoke their hierarchies are
-     * same.
-     *
-     * For some algorithms (like broadcast, reduce)  we split the ML buffer
-     * and use first half as
-     * source and second half as receive buffer. We swap these buffers for
-     * each change when we change levels i.e., if first half is source for
-     * level 1, in the level 2 of hierarchy it becomes the receive buffer.
-     * For reduce algorithm, each process can invoke hierarchies
-     * (primitives) in different order. For example, process 1 might have level 1 as SM
-     * and level 2 as p2p, and process 2 might have different order where its
-     * level 1 is p2p and level 2 SM. In this case, if in basesmuma reduce
-     * algorithm, if parent assumes its rbuf_offset as child's rbuf_offset
-     * it is wrong. So we cache rbuf_offset of each process so
-     * it could be accessed by processes to obtain the data.
-     */
-
-    volatile int32_t roffsets[SM_BCOLS_MAX];
-
-    /* Manju Start: Experimental ctl fields and should be removed later;
-     * This is used for lmsg reduce for testing
-     * during transition to HDL primitives
-     */
-#if 0
-    int lmsg_reduce_snd_completes;
-    /* There can be atmost 20 ranks in the subgroup. Since this
-     * only for testing this should be good enough */
-    int lmsg_reduce_peers[20];
-    int lmsg_reduce_send_offsets[20];
-    /* End: Experimental ctl fields */
-
-
-    /* no need to pad at this level anymore */
-    volatile int64_t data_hdl;
-#endif
-};
-typedef struct mca_bcol_basesmuma_header_t mca_bcol_basesmuma_header_t;
-
-/* data needed for large messages */
-struct mca_bcol_basesmuma_large_msg_t {
-    /* scatter allgather data */
-    uint64_t offset;
-    uint64_t n_sends;
-    uint64_t length;
-
-    /* portals data */
-
-};
-typedef struct mca_bcol_basesmuma_large_msg_t mca_bcol_basesmuma_large_msg_t;
-
-/* payload struct */
-struct mca_bcol_basesmuma_payload_t {
-
-    /* base pointer to shared memory control structure */
-    mca_bcol_basesmuma_header_t *ctl_struct;
-    void *payload;
-
-};
-
-typedef struct mca_bcol_basesmuma_payload_t mca_bcol_basesmuma_payload_t;
-
-
-
-
-/* memory bank memory management structure */
-struct mem_bank_management_t {
-
-    /* generation counter */
-    uint64_t bank_gen_counter;
-
-    /* descriptor for the non-blocking barrier.  This is
-     *  used to manage this bank of memory.
-     */
-    sm_nbbar_desc_t nb_barrier_desc;
-
-    /* the number of buffers that are not in use, and are
-     * available.  The assumption is that the buffers are
-     * recycled all at once, so are available for re-use
-     * until all buffers have been made available for re-use.
-     */
-    volatile int available_buffers;
-
-    /*
-     * number of buffers freed */
-    volatile int n_buffs_freed;
-
-    /* mutex to ensure atomic recycling of resrouces */
-    opal_mutex_t mutex;
-
-    /* number of buffers being managed */
-    int number_of_buffers;
-
-    /* shared memory control structures */
-    int index_shared_mem_ctl_structs;
-
-
-};
-typedef struct mem_bank_management_t mem_bank_management_t;
-
-/* data structure for shared buffers */
-struct sm_buffer_mgmt {
-    /* number of buffers per process */
-    int number_of_buffs;
-
-    /* size of group */
-    int size_of_group;
-
-    /* number of memory banks */
-    int num_mem_banks;
-
-    /* number of buffers per memory bank */
-    int num_buffs_per_mem_bank;
-
-    /* log base 2 of num_buffs_per_mem_bank */
-    int log2_num_buffs_per_mem_bank;
-
-    /* log base 2 total number of buffers */
-    int log2_number_of_buffs;
-
-    /* mask - masks off the bits corresponding to buffer index */
-    int mask;
-
-    /* control buffers - these point to regions in shared memory */
-    /* leading dimension is the group size - all pointers for a given
-     * set of buffers appear consecutively in this array
-     */
-    volatile void **ctl_buffs;
-
-    /* management data for the control structures -
-     * one per bank of control structures - Will be used for
-     * the payload buffers as well.
-     */
-    mem_bank_management_t *ctl_buffs_mgmt;
-
-    /* data buffers - these point to regions in shared memory */
-    /* leading dimension is the group size - all pointers for a given
-     * set of buffers appear consecutively in this array
-     */
-
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-
-
-
-};
-typedef struct sm_buffer_mgmt sm_buffer_mgmt;
-
-
-struct mca_bcol_basesmuma_nb_coll_buff_desc_t {
-    void     *data_addr;
-    uint64_t     bank_index;
-    uint64_t     buffer_index;
-    int       active_requests;
-    ompi_request_t **requests;
-    int          data_src;
-    int          radix_mask;
-    int          radix_mask_pow;
-    int          iteration;
-    int          status;
-    /* this is for testing */
-    int                 tag;
-
-    volatile mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
-    volatile mca_bcol_basesmuma_ctl_struct_t  *my_ctl_pointer;
-    volatile mca_bcol_basesmuma_ctl_struct_t  *parent_ctl_pointer;
-    volatile mca_bcol_basesmuma_ctl_struct_t  *extra_partner_ctl_pointer;
-};
-
-typedef struct mca_bcol_basesmuma_nb_coll_buff_desc_t mca_bcol_basesmuma_nb_coll_buff_desc_t;
-
-struct mca_bcol_basesmuma_local_mlmem_desc_t {
-
-    uint32_t bank_index_for_release;
-    struct mca_bcol_base_memory_block_desc_t *ml_mem_desc;
-    uint32_t     num_banks;
-    uint32_t     num_buffers_per_bank;
-    uint32_t     size_buffer;
-    uint32_t     *bank_release_counter;
-
-    /*
-     * Number of descriptors allocated is equivalent to number of ml buffers
-     * (number of banks * number of buffers per bank)
-     */
-    mca_bcol_basesmuma_nb_coll_buff_desc_t *nb_coll_desc;
-};
-
-typedef struct mca_bcol_basesmuma_local_mlmem_desc_t mca_bcol_basesmuma_local_mlmem_desc_t;
-
-#ifdef __PORTALS_AVAIL__
-#define MAX_SM_GROUP_SIZE 32
-
-
-struct portals_scatter_allgather_nb_bcast_state_t
-{
-    /* local variables */
-    uint64_t length;
-    int my_rank, src, matched;
-    int src_list[MAX_SM_GROUP_SIZE];
-    int group_size;
-    int64_t ready_flag;
-    int pow_2, pow_2_levels;
-    int src_list_index;
-    uint64_t fragment_size;  /* user buffer size */
-
-    /* Input argument variables */
-    void *my_userbuf;
-    int64_t sequence_number;
-
-    /* Extra source variables */
-    bool secondary_root;
-    int partner , extra_partner;
-
-    /* Scatter Allgather offsets */
-    uint64_t local_sg_offset , global_sg_offset , partner_offset ;
-
-    /* Portals messaging relevant variables */
-    /*
-     * ptl_handle_eq_t allgather_eq_h;
-     */
-    ptl_handle_eq_t read_eq;
-    ptl_event_t  allgather_event;
-    bool msg_posted;
-
-    /* OMPI module and component variables */
-    mca_bcol_basesmuma_component_t *cs;
-    struct mca_bcol_basesmuma_module_t *bcol_module;
-
-    /* Control structure and payload variables */
-    volatile mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
-    volatile mca_bcol_basesmuma_ctl_struct_t  *my_ctl_pointer;
-    volatile mca_bcol_basesmuma_ctl_struct_t  *parent_ctl_pointer; /* scatter source */
-    volatile mca_bcol_basesmuma_ctl_struct_t  *extra_partner_ctl_pointer; /* scatter source */
-
-    int phase;
-};
-
-
-typedef struct portals_scatter_allgather_nb_bcast_state_t sg_state_t;
-#endif
-
-#define SM_ARRAY_INDEX(LEAD_DIM,BUF_INDEX,PROC_INDEX)   \
-    ((LEAD_DIM)*(BUF_INDEX)+(PROC_INDEX))
-/* debug */
-#define BARRIER_BANK_LIST_SIZE 32
-/* end debug */
-
-struct mca_bcol_basesmuma_module_t {
-    /* base structure */
-    mca_bcol_base_module_t super;
-
-    /* free list item with the control structures used for
-     * the no user data collective operations
-     */
-    list_data_t *no_userdata_ctl;
-
-    /* free list item with the control structures used for
-     * the with user data collective operations
-     */
-    list_data_t *userdata_ctl;
-
-    /*
-     * information on sm control backing files for the subgroup
-     * associated with this module.
-     */
-    bcol_basesmuma_smcm_proc_item_t **ctl_backing_files_info;
-
-    /*
-     * information on sm payload backing files for the subgroup
-     * associated with this module.
-     */
-    bcol_basesmuma_smcm_proc_item_t **payload_backing_files_info;
-
-    /*
-     * buffers for the collective that do not involve user data -
-     *   barrier, fanin, fanout.
-     */
-    sm_buffer_mgmt colls_no_user_data;
-
-    /*
-     * buffers for the collective with user data.
-     */
-    sm_buffer_mgmt colls_with_user_data;
-
-    /* recursive-doubling tree node */
-    netpatterns_pair_exchange_node_t recursive_doubling_tree;
-
-    /* k-nomial gather/allgather tree */
-    netpatterns_k_exchange_node_t knomial_allgather_tree;
-
-    /* fanin tree node - root is rank 0 */
-    netpatterns_tree_node_t fanin_node;
-
-    /* fanout tree node - root is rank 0 */
-    netpatterns_tree_node_t fanout_node;
-
-    /* index of blocking barrier memory region to use */
-    int index_blocking_barrier_memory_bank;
-
-    /* comm to shared memory map */
-    int *comm_to_sm_map;
-
-    /* reduction fanout tree */
-    netpatterns_tree_node_t* reduction_tree;
-
-    /* broadcast fanout tree */
-    netpatterns_tree_node_t* fanout_read_tree;
-
-    /* scatter - k-ary tree */
-    int scatter_kary_radix;
-    netpatterns_tree_node_t *scatter_kary_tree;
-
-    /* Knomial exchange tree */
-    /* Currently used for only large message reduce */
-    netpatterns_k_exchange_node_t knomial_exchange_tree;
-
-    /* sequence number offset - want to make sure that we start
-     *   id'ing collectives with id 0, so we can have simple
-     *   resource management.
-     */
-    int64_t squence_number_offset;
-
-    /* basesmuma specific header size into ml buffer
-     * was calculated at ml level - it is the sum of
-     * all headers from all bcols and then aligned to
-     * whatever alignment was requested
-     */
-    uint32_t total_header_size;
-
-    /* list of possible sources */
-    int *src_list;
-
-    /* Number of possible sources */
-    int src_size;
-
-    /* smallest power of k that is smaller
-     * than or equal in size to the uma group
-     */
-    int pow_k_levels;
-
-    /* size of power-of-k group */
-    int pow_k;
-
-    /* smallest power of 2 that is smaller
-     * than or equal to the smuma group size
-     */
-    int pow_2_levels;
-
-    /* size of power-of-2 group */
-    int pow_2;
-
-    /* pointer to the shared memory scratch array of each
-     * process in the group.
-     */
-    void **shared_memory_scratch_space;
-
-    /*
-     * Caching information for re-entrant collectives
-     */
-    mca_bcol_basesmuma_local_mlmem_desc_t ml_mem;
-
-    /*
-     * Cached offsets for lmsg reduce
-     */
-    int **reduce_offsets;
-
-    /*XXX:
-     * Starting to explore the beauty of zero-copy for large message
-     */
-    struct mca_hdl_base_module_t **hdl_module;
-
-#ifdef __PORTALS_AVAIL__
-    /*
-     * Store state for NB blocking functions
-     */
-    sg_state_t sg_state;
-
-#endif
-};
-
-typedef struct mca_bcol_basesmuma_module_t mca_bcol_basesmuma_module_t;
-OBJ_CLASS_DECLARATION(mca_bcol_basesmuma_module_t);
-
-/* shared memory specific arguments for the bcol registration function */
-typedef struct bcol_basesmuma_registration_data_t {
-    char *file_name; /* filename for payload */
-    void *base_addr; /* base address to be mapped */
-    size_t size;     /* size of memory block to be "registered" */
-    size_t size_ctl_structure;
-    size_t data_seg_alignment;
-    bcol_basesmuma_smcm_mmap_t *sm_mmap; /* shared memory map struct */
-    mca_bcol_base_release_buff_fn_t buff_release_cb; /* buffer release
-                                                      call back */
-} bcol_basesmuma_registration_data_t;
-
-
-enum {
-    BUFFER_AVAILABLE,
-    STARTED,
-    FANIN,
-    FANOUT
-};
-
-/* enum used for non-blocking large
- * message bcast
- */
-
-enum {
-    INIT,
-    START,
-    NOT_STARTED,
-    SCATTER,
-    ALLGATHER,
-    EXTRA_RANK,
-    PROBE,
-    SCATTER_ROOT_WAIT,
-    SCATTER_EXTRA_ROOT_WAIT,
-    SCATTER_PARENT_WAIT,
-    FINISHED
-};
-
-/**
- * Global component instance
- */
-OMPI_MODULE_DECLSPEC extern mca_bcol_basesmuma_component_t mca_bcol_basesmuma_component;
-
-
-/*
- * coll module functions
- */
-
-/* query to see if the component is available for use, and can
- * satisfy the thread and progress requirements
- */
-int mca_bcol_basesmuma_init_query(bool enable_progress_threads,
-                                  bool enable_mpi_threads);
-
-/* query to see if the module is available for use on the given
- * communicator, and if so, what it's priority is.
- */
-mca_bcol_base_module_t **
-mca_bcol_basesmuma_comm_query(mca_sbgp_base_module_t *module, int *num_modules);
-
-
-
-/* shared memory specific memory registration function - this will be passed into the mpool */
-int mca_bcol_basesmuma_register_sm(void *context_data, void *base, size_t size,
-                                   void **reg);
-
-/* shared memory specific memory deregistration function - also needed by the mpool */
-int mca_bcol_basesmuma_deregister_sm(void *context_data, void *reg);
-
-/* setup the new k_nomial tree for collectives */
-int bcol_basesmuma_setup_knomial_tree(mca_bcol_base_module_t *super);
-
-/* allocate the memory pool for the shared memory control structures */
-int mca_bcol_basesmuma_allocate_pool_memory(mca_bcol_basesmuma_component_t
-                                            *component);
-
-/* initialize the internal scratch buffers and control structs that will be
-   used by the module */
-int base_bcol_basesmuma_setup_library_buffers(
-                                              mca_bcol_basesmuma_module_t *sm_module,
-                                              mca_bcol_basesmuma_component_t *cs);
-
-
-/* shared memory recursive doubling initialization */
-int bcol_basesmuma_rd_barrier_init(mca_bcol_base_module_t *module);
-
-/* shared memory recusive double barrier */
-int bcol_basesmuma_recursive_double_barrier(bcol_function_args_t *input_args,
-                                            mca_bcol_base_function_t *c_input_args);
-/* shared memory fanin */
-int bcol_basesmuma_fanin_init(mca_bcol_base_module_t *super);
-
-/* shared memory fanout */
-int bcol_basesmuma_fanout_init(mca_bcol_base_module_t *super);
-
-/* shared memory recursive k-ing non-blocking barrier */
-int bcol_basesmuma_barrier_init(mca_bcol_base_module_t *super);
-
-/* Shared memory broadcast */
-int bcol_basesmuma_bcast_init(mca_bcol_base_module_t *super);
-
-int bcol_basesmuma_bcast(bcol_function_args_t *input_args,
-                         mca_bcol_base_function_t *c_input_args);
-
-/* Shared memory non-blocking broadcast */
-int bcol_basesmuma_bcast_k_nomial_anyroot(bcol_function_args_t *input_args,
-                                          mca_bcol_base_function_t *c_input_args);
-
-int bcol_basesmuma_bcast_k_nomial_knownroot(bcol_function_args_t *input_args,
-                                            mca_bcol_base_function_t *c_input_args);
-
-/* Shared memory non-blocking broadcast - Large message anyroot */
-int bcol_basesmuma_binary_scatter_allgather_segment(bcol_function_args_t *input_args,
-                                                    mca_bcol_base_function_t *c_input_args);
-
-#if 0
-/*FIXME: having fun here*/
-int bcol_basesmuma_hdl_zerocopy_bcast(bcol_function_args_t *input_args,
-                                      mca_bcol_base_function_t   *c_input_args);
-#endif
-
-int bcol_basesmuma_lmsg_bcast_k_nomial_anyroot(bcol_function_args_t *input_args,
-                                               mca_bcol_base_function_t *c_input_args);
-
-int bcol_basesmuma_lmsg_scatter_allgather_portals_bcast(bcol_function_args_t *input_args,
-                                                        mca_bcol_base_function_t *c_input_args);
-
-int bcol_basesmuma_lmsg_scatter_allgather_portals_nb_bcast(bcol_function_args_t *input_args,
-                                                           mca_bcol_base_function_t *c_input_args);
-
-int bcol_basesmuma_lmsg_scatter_allgather_portals_nb_knownroot_bcast(bcol_function_args_t *input_args,
-                                                                     mca_bcol_base_function_t *c_input_args);
-
-/*
- *  shared memory scatter
- */
-int bcol_basesmuma_scatter_init(mca_bcol_base_module_t *super);
-
-/* shared memory nonblocking scatter - known root */
-int bcol_basesmuma_nb_scatter_k_array_knownroot(
-                                                bcol_function_args_t *input_args,
-                                                mca_bcol_base_function_t *c_input_args);
-
-/* shared memory non-blocking k-nomial barrier init */
-int bcol_basesmuma_k_nomial_barrier_init(bcol_function_args_t *input_args,
-                                         struct mca_bcol_base_function_t *const_args);
-
-/* shared memory non-blocking k-nomial barrier progress */
-int bcol_basesmuma_k_nomial_barrier_progress(bcol_function_args_t *input_args,
-                                             struct mca_bcol_base_function_t *const_args);
-
-/*shared memory non-blocking k-nomial allgather init */
-int bcol_basesmuma_k_nomial_allgather_init(bcol_function_args_t *input_args,
-                                           struct mca_bcol_base_function_t *const_args);
-
-/* shared memory non-blocking k-nomial allgather progress */
-int bcol_basesmuma_k_nomial_allgather_progress(bcol_function_args_t *input_args,
-                                               struct mca_bcol_base_function_t *const_args);
-
-/* shared memory allgather -- selection logic api */
-int bcol_basesmuma_allgather_init(mca_bcol_base_module_t *super);
-
-/* shared memory blocking k-nomial gather */
-int bcol_basesmuma_k_nomial_gather(bcol_function_args_t *input_args,
-                                   mca_bcol_base_function_t *c_input_args);
-
-/* shared memory non blocking k-nomial gather */
-int bcol_basesmuma_k_nomial_gather_init(bcol_function_args_t *input_args,
-                                        mca_bcol_base_function_t *c_input_args);
-
-/* shared memory non blocking k-nomial gather progress*/
-int bcol_basesmuma_k_nomial_gather_progress(bcol_function_args_t *input_args,
-                                            mca_bcol_base_function_t *c_input_args);
-
-/* shared memory init */
-int bcol_basesmuma_gather_init(mca_bcol_base_module_t *super);
-
-/* allocate shared memory control memory */
-int mca_bcol_basesmuma_allocate_sm_ctl_memory(
-                                              mca_bcol_basesmuma_component_t *cs);
-
-/* Shared memory basesmuma reduce */
-int bcol_basesmuma_reduce_init(mca_bcol_base_module_t *super);
-int bcol_basesmuma_reduce_intra_fanin(bcol_function_args_t *input_args,
-                                      mca_bcol_base_function_t *c_input_args);
-int bcol_basesmuma_reduce_intra_fanin_old(bcol_function_args_t *input_args,
-                                          mca_bcol_base_function_t *c_input_args);
-
-int bcol_basesmuma_reduce_intra_reducescatter_gather(void *sbuf, void *rbuf,
-                                                     int count, struct ompi_datatype_t *dtype,
-                                                     struct ompi_op_t *op,
-                                                     int root,
-                                                     struct ompi_communicator_t *comm,
-                                                     mca_coll_base_module_t *module);
-
-/* Shared memory basesmuma allreduce */
-int bcol_basesmuma_allreduce_init(mca_bcol_base_module_t *super);
-
-int bcol_basesmuma_allreduce_intra_fanin_fanout(bcol_function_args_t *input_args,
-                                                mca_bcol_base_function_t *c_input_args);
-
-int bcol_basesmuma_allreduce_intra_recursive_doubling(bcol_function_args_t *input_args,
-                                                      mca_bcol_base_function_t *c_input_args);
-
-/* initialize non-blocking barrier for recycling the memory buffers.
- *  This is not a general purpose nb_barrier, and relies on the
- *  fact that we will have only one outstanding nb-barrier per bank
- *  at a time.
- */
-int bcol_basesmuma_rd_nb_barrier_init_admin(sm_nbbar_desc_t *sm_desc);
-
-/* admin nonblocking barrier - progress function */
-int bcol_basesmuma_rd_nb_barrier_progress_admin(sm_nbbar_desc_t *sm_desc);
-
-/* Memory syncronization registration function */
-int bcol_basesmuma_memsync_init(mca_bcol_base_module_t *super);
-
-/* smcm allgather function used to exchange file offsets. */
-int bcol_basesmuma_smcm_allgather_connection(
-                                             mca_bcol_basesmuma_module_t *sm_bcol_module,
-                                             mca_sbgp_base_module_t *module,
-                                             opal_list_t *peer_list,
-                                             bcol_basesmuma_smcm_proc_item_t ***backing_files,
-                                             ompi_communicator_t *comm,
-                                             bcol_basesmuma_smcm_file_t input, char *base_fname,
-                                             bool map_all);
-
-/* clean up the backing files associated with a basesmuma bcol module */
-int bcol_basesmuma_smcm_release_connections (mca_bcol_basesmuma_module_t *sm_bcol_module,
-                                             mca_sbgp_base_module_t *sbgp_module, opal_list_t *peer_list,
-                                             bcol_basesmuma_smcm_proc_item_t ***back_files);
-
-/*
- * this function initializes the internal scratch buffers and control
- * structures that will be used by the module
- */
-int base_bcol_masesmuma_setup_library_buffers(
-                                              mca_bcol_basesmuma_module_t *sm_bcol_module,
-                                              mca_bcol_basesmuma_component_t *sm_bcol_component);
-
-/* get the index of the shared memory buffer to be used */
-int bcol_basesmuma_get_buff_index( sm_buffer_mgmt * buff_block,
-                                   uint64_t buff_id );
-
-int bcol_basesmuma_free_buff( sm_buffer_mgmt * buff_block,
-                              uint64_t buff_id );
-
-/* bank init which is used for shared memory optimization, fall back to
- * the bank init above if this causes problems
- */
-int bcol_basesmuma_bank_init_opti(struct mca_bcol_base_memory_block_desc_t *payload_block,
-        uint32_t data_offset,
-        mca_bcol_base_module_t *bcol_module,
-        void *reg_data);
-
-/* cleanup nb_coll_buff_desc */
-void cleanup_nb_coll_buff_desc(mca_bcol_basesmuma_nb_coll_buff_desc_t **desc,
-                                  uint32_t num_banks,
-                                  uint32_t num_buffers_per_bank);
-
-
-/* used for shared memory offset exchange */
-int base_bcol_basesmuma_exchange_offsets(
-                                         mca_bcol_basesmuma_module_t *sm_bcol_module,
-                                         void **result_array, uint64_t mem_offset, int loop_limit,
-                                         int leading_dim);
-
-
-/* the progress function to be called from the opal progress function
- */
-int bcol_basesmuma_progress(void);
-
-/* Macro for initializing my shared memory control structure */
-#define BASESMUMA_HEADER_INIT(my_ctl_pointer,ready_flag, seqn, bcol_id) \
-    do{                                                                 \
-        int i,j;                                                        \
-        int8_t flag_offset = 0;                                         \
-        /* setup resource recycling */                                  \
-        if( (my_ctl_pointer)->sequence_number < (seqn) ) {              \
-            /* Signal arrival */                                        \
-            for( j = 0; j < SM_BCOLS_MAX; j++){                         \
-                (my_ctl_pointer)->starting_flag_value[j]=0;             \
-                for( i = 0; i < NUM_SIGNAL_FLAGS; i++){                 \
-                    (my_ctl_pointer)->flags[i][j] = -1;                 \
-                }                                                       \
-            }                                                           \
-        }                                                               \
-        /* increment the starting flag by one and return */             \
-        flag_offset = (my_ctl_pointer)->starting_flag_value[(bcol_id)]; \
-        (ready_flag) = flag_offset + 1;                                 \
-        opal_atomic_wmb();                                              \
-        (my_ctl_pointer)->sequence_number = (seqn);                     \
-    }while(0)
-
-/* these are all the same, am using a single macro for all collectives */
-
-#define IS_PEER_READY(peer, my_flag, my_sequence_number,flag_index, bcol_id) \
-    (((peer)->sequence_number == (my_sequence_number) && \
-      (peer)->flags[flag_index][bcol_id] >= (my_flag))? true : false )
-
-#if 0
-#define IS_AR_DATA_READY(peer, my_flag, my_sequence_number)     \
-    (((peer)->sequence_number == (my_sequence_number) &&        \
-      (peer)->flags[ALLREDUCE_FLAG][bcol_id] >= (my_flag)       \
-      )? true : false )
-
-#define IS_GDATA_READY(peer, my_flag, my_sequence_number)       \
-    (((peer)->sequence_number == (my_sequence_number) &&        \
-      (peer)->flags[GATHER_FLAG][bcol_id] == (my_flag)          \
-      )? true : false )
-
-#define IS_PEER_READY(peer, my_flag, flag_index, my_sequence_number)    \
-    ((((volatile int64_t)(peer)->sequence_number > (my_sequence_number)) || \
-      (((volatile int64_t)(peer)->sequence_number == (my_sequence_number)) && \
-       ((peer)->flags[flag_index][bcol_id] == (my_flag)))               \
-      )? true : false )
-
-#define IS_ALLREDUCE_PEER_READY(peer, my_flag, my_sequence_number)      \
-    ((((volatile int64_t)(peer)->sequence_number == (my_sequence_number)) && \
-      (((peer)->flags[ALLREDUCE_FLAG][bcol_id] == (my_flag))||((peer)->flags[ALLREDUCE_FLAG][bcol_id] == (my_flag) + 1)) \
-      )? true : false )
-#endif
-
-#define IS_LAST_BCOL_FUNC(ml_args)                                      \
-    ((((ml_args)->n_of_this_type_in_collective ==                       \
-       (ml_args)->index_of_this_type_in_collective + 1 ) )? true : false)
-
-static inline __opal_attribute_always_inline__
-size_t bcol_basesmuma_data_offset_calc(
-                                       mca_bcol_basesmuma_module_t *basesmuma_module)
-{
-    uint32_t offset = basesmuma_module->super.header_size;
-    offset = ((offset + BCOL_HEAD_ALIGN - 1) / BCOL_HEAD_ALIGN) * BCOL_HEAD_ALIGN;
-
-    return (size_t) offset;
-}
-
-
-END_C_DECLS
-
-#endif /* MCA_BCOL_basesmuma_EXPORT_H */
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_allgather.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_allgather.c
deleted file mode 100644
index 97a857ef0c..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_allgather.c
+++ /dev/null
@@ -1,352 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/include/ompi/constants.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/basesmuma/bcol_basesmuma.h"
-/*
-  #define IS_AGDATA_READY(peer, my_flag, my_sequence_number)\
-  (((peer)->sequence_number == (my_sequence_number) && \
-  (peer)->flags[ALLGATHER_FLAG][bcol_id] >= (my_flag) \
-  )? true : false )
-*/
-
-#define CALC_ACTIVE_REQUESTS(active_requests,peers, tree_order) \
-    do{                                                         \
-        for( j = 0; j < (tree_order - 1); j++){                 \
-            if( 0 > peers[j] ) {                                \
-                /* set the bit */                               \
-                *active_requests ^= (1<<j);                     \
-            }                                                   \
-        }                                                       \
-    }while(0)
-
-
-
-/*
- * Recursive K-ing allgather
- */
-
-/*
- *
- * Recurssive k-ing algorithm
- * Example k=3 n=9
- *
- *
- * Number of Exchange steps = log (basek) n
- * Number of steps in exchange step = k (radix)
- *
- */
-int bcol_basesmuma_k_nomial_allgather_init(bcol_function_args_t *input_args,
-                                           struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) const_args->bcol_module;
-    netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-    uint32_t buffer_index = input_args->buffer_index;
-    int *active_requests =
-        &(bcol_module->ml_mem.nb_coll_desc[buffer_index].active_requests);
-
-    int *iteration = &bcol_module->ml_mem.nb_coll_desc[buffer_index].iteration;
-    int *status = &bcol_module->ml_mem.nb_coll_desc[buffer_index].status;
-    int leading_dim, buff_idx, idx;
-
-    int64_t sequence_number = input_args->sequence_num;
-    int my_rank = bcol_module->super.sbgp_partner_module->my_index;
-
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-
-    /* control structures */
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-
-    volatile int8_t ready_flag;
-
-    /* initialize the iteration counter */
-    buff_idx = input_args->src_desc->buffer_index;
-    leading_dim = bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-    data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs+idx;
-
-    /* Set pointer to current proc ctrl region */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-
-    /* initialize headers and ready flag */
-    BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id);
-
-    /* initialize these */
-    *iteration = -1;
-    *active_requests = 0;
-    *status = ready_flag;
-
-    if (EXTRA_NODE == exchange_node->node_type) {
-        /* I am ready at this level */
-        opal_atomic_wmb ();
-        my_ctl_pointer->flags[ALLGATHER_FLAG][bcol_id] = ready_flag;
-    }
-
-    return bcol_basesmuma_k_nomial_allgather_progress (input_args, const_args);
-}
-
-
-/* allgather progress function */
-
-int bcol_basesmuma_k_nomial_allgather_progress(bcol_function_args_t *input_args,
-                                               struct mca_bcol_base_function_t *const_args)
-{
-    /* local variables */
-    int8_t flag_offset;
-    uint32_t buffer_index = input_args->buffer_index;
-    volatile int8_t ready_flag;
-    mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) const_args->bcol_module;
-    netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree;
-    int group_size = bcol_module->colls_no_user_data.size_of_group;
-    int *list_connected = bcol_module->super.list_n_connected; /* critical for hierarchical colls */
-    int bcol_id = (int) bcol_module->super.bcol_id;
-    mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component;
-    int *active_requests =
-        &(bcol_module->ml_mem.nb_coll_desc[buffer_index].active_requests);
-
-    int *iteration = &bcol_module->ml_mem.nb_coll_desc[buffer_index].iteration;
-    int *status = &bcol_module->ml_mem.nb_coll_desc[buffer_index].status;
-    int leading_dim, idx, buff_idx;
-
-    int i, j, probe;
-    int knt;
-    int src;
-    int recv_offset, recv_len;
-    int max_requests = 0; /* critical to set this */
-    int pow_k, tree_order;
-
-    int64_t sequence_number=input_args->sequence_num;
-    int my_rank = bcol_module->super.sbgp_partner_module->my_index;
-
-    int pack_len = input_args->count * input_args->dtype->super.size;
-
-    void *data_addr = (void*)(
-        (unsigned char *) input_args->sbuf +
-        (size_t) input_args->sbuf_offset);
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    volatile char *peer_data_pointer;
-
-    /* control structures */
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    volatile mca_bcol_basesmuma_header_t *peer_ctl_pointer;
-
-#if 0
-    fprintf(stderr,"%d: entering sm allgather progress active requests %d iter %d ready_flag %d\n", my_rank,
-            *active_requests, *iteration, *status);
-#endif
-
-    buff_idx = input_args->src_desc->buffer_index;
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-    data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs+idx;
-
-    /* Set pointer to current proc ctrl region */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-
-    /* increment the starting flag by one and return */
-    /* flag offset seems unnecessary here */
-    flag_offset = my_ctl_pointer->starting_flag_value[bcol_id];
-    ready_flag = *status;
-    my_ctl_pointer->sequence_number = sequence_number;
-    /* k-nomial parameters */
-    tree_order = exchange_node->tree_order;
-    pow_k = exchange_node->log_tree_order;
-
-    /* calculate the maximum number of requests
-     * at each level each rank communicates with
-     * at most (k - 1) peers
-     * so if we set k - 1 bit fields in "max_requests", then
-     * we have max_request  == 2^(k - 1) -1
-     */
-    for(i = 0; i < (tree_order - 1); i++){
-        max_requests ^= (1<<i);
-    }
-
-    /* let's begin the collective, starting with extra ranks and their
-     * respective proxies
-     */
-
-    if (OPAL_UNLIKELY(-1 == *iteration)) {
-        if (EXTRA_NODE == exchange_node->node_type) {
-            /* If I'm in here, then I must be looking for data */
-            ready_flag = flag_offset + 1 + pow_k + 2;
-
-            src = exchange_node->rank_extra_sources_array[0];
-            peer_data_pointer = data_buffs[src].payload;
-            peer_ctl_pointer = data_buffs[src].ctl_struct;
-
-            /* calculate the count */
-            for (i = 0, knt = 0 ; i < group_size ; ++i){
-                knt += list_connected[i];
-            }
-
-            for (i = 0 ; i < cm->num_to_probe ; ++i) {
-                if (IS_PEER_READY(peer_ctl_pointer, ready_flag, sequence_number, ALLGATHER_FLAG, bcol_id)) {
-                    /* we receive the entire message */
-                    opal_atomic_mb ();
-                    memcpy (data_addr, (void *) peer_data_pointer, knt * pack_len);
-
-                    goto FINISHED;
-                }
-            }
-
-            /* haven't found it, state is saved, bail out */
-            return BCOL_FN_STARTED;
-        } else if (0 < exchange_node->n_extra_sources) {
-            /* I am a proxy for someone */
-            src = exchange_node->rank_extra_sources_array[0];
-            peer_data_pointer = data_buffs[src].payload;
-            peer_ctl_pointer = data_buffs[src].ctl_struct;
-
-            /* calculate the offset */
-            for (i = 0, knt = 0 ; i < src ; ++i){
-                knt += list_connected[i];
-            }
-
-            /* probe for extra rank's arrival */
-            for (i = 0 ; i < cm->num_to_probe ; ++i) {
-                if (IS_PEER_READY(peer_ctl_pointer, ready_flag, sequence_number, ALLGATHER_FLAG, bcol_id)) {
-                    opal_atomic_mb ();
-                    /* copy it in */
-                    memcpy ((void *) ((uintptr_t) data_addr + knt * pack_len),
-                            (void *) ((uintptr_t) peer_data_pointer + knt * pack_len),
-                            pack_len * list_connected[src]);
-                    break;
-                }
-            }
-
-            if (i == cm->num_to_probe) {
-                return BCOL_FN_STARTED;
-            }
-        }
-
-        /* bump the ready flag to indicate extra node exchange complete */
-        ++ready_flag;
-        *iteration = 0;
-    }
-
-    /* start the recursive k - ing phase */
-    for (i = *iteration ; i < pow_k ; ++i) {
-        /* I am ready at this level */
-        opal_atomic_wmb ();
-        my_ctl_pointer->flags[ALLGATHER_FLAG][bcol_id] = ready_flag;
-
-        if (0 == *active_requests) {
-            /* flip some bits, if we don't have active requests from a previous visit */
-            CALC_ACTIVE_REQUESTS(active_requests,exchange_node->rank_exchanges[i],tree_order);
-        }
-
-        for (j = 0; j < (tree_order - 1); ++j) {
-
-            /* recv phase */
-            src = exchange_node->rank_exchanges[i][j];
-
-            if (src < 0) {
-                /* then not a valid rank, continue */
-                continue;
-            }
-
-            if (!(*active_requests&(1<<j))) {
-                /* then this peer hasn't been processed at this level */
-                peer_data_pointer = data_buffs[src].payload;
-                peer_ctl_pointer = data_buffs[src].ctl_struct;
-
-                recv_offset = exchange_node->payload_info[i][j].r_offset * pack_len;
-                recv_len = exchange_node->payload_info[i][j].r_len * pack_len;
-
-                /* I am putting the probe loop as the inner most loop to achieve
-                 * better temporal locality
-                 */
-                for (probe = 0 ; probe < cm->num_to_probe ; ++probe) {
-                    if (IS_PEER_READY(peer_ctl_pointer, ready_flag, sequence_number, ALLGATHER_FLAG, bcol_id)) {
-                        /* flip the request's bit */
-                        *active_requests ^= (1<<j);
-                        /* copy the data */
-                        memcpy((void *)((unsigned char *) data_addr + recv_offset),
-                               (void *)((unsigned char *) peer_data_pointer + recv_offset),
-                               recv_len);
-                        break;
-                    }
-                }
-            }
-        }
-
-        if( max_requests == *active_requests ){
-            /* bump the ready flag */
-            ready_flag++;
-            /* reset the active requests for the next level */
-            *active_requests = 0;
-            /* calculate the number of active requests
-             * logically makes sense to do it here. We don't
-             * want to inadvertantly flip a bit to zero that we
-             * set previously
-             */
-        } else {
-            /* state is saved hop out
-             */
-            *status = my_ctl_pointer->flags[ALLGATHER_FLAG][bcol_id];
-            *iteration = i;
-            return BCOL_FN_STARTED;
-        }
-    }
-
-    /* bump the flag one more time for the extra rank */
-    ready_flag = flag_offset + 1 + pow_k + 2;
-
-    /* finish off the last piece, send the data back to the extra  */
-    if( 0 < exchange_node->n_extra_sources ) {
-        /* simply announce my arrival */
-        opal_atomic_wmb ();
-        my_ctl_pointer->flags[ALLGATHER_FLAG][bcol_id] = ready_flag;
-    }
-
-FINISHED:
-    /* bump this up for others to see */
-    my_ctl_pointer->starting_flag_value[bcol_id]++;
-    return BCOL_FN_COMPLETE;
-}
-
-/* Register allreduce functions to the BCOL function table,
- * so they can be selected
- */
-int bcol_basesmuma_allgather_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    comm_attribs.bcoll_type = BCOL_ALLGATHER;
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_basesmuma_k_nomial_allgather_init,
-                                 bcol_basesmuma_k_nomial_allgather_progress);
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_allreduce.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_allreduce.c
deleted file mode 100644
index 0058ec770f..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_allreduce.c
+++ /dev/null
@@ -1,611 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2015 Cisco Systems, Inc.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/constants.h"
-#include "ompi/op/op.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/communicator/communicator.h"
-
-#include "opal/include/opal_stdint.h"
-
-#include "ompi/mca/bcol/base/base.h"
-#include "bcol_basesmuma.h"
-
-static int bcol_basesmuma_allreduce_intra_fanin_fanout_progress (bcol_function_args_t *input_args, mca_bcol_base_function_t *c_input_args);
-
-int bcol_basesmuma_allreduce_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    comm_attribs.bcoll_type = BCOL_ALLREDUCE;
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1048576;
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    /* selection logic at the ml level specifies a
-     * request for a non-blocking algorithm
-     * however, these algorithms are blocking
-     * following what was done at the p2p level
-     * we will specify non-blocking, but beware,
-     * these algorithms are blocking and will not make use
-     * of the progress engine
-     */
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000;
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    /* Set attributes for fanin fanout algorithm */
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_basesmuma_allreduce_intra_fanin_fanout,
-                                 bcol_basesmuma_allreduce_intra_fanin_fanout_progress);
-
-    inv_attribs.bcol_msg_min = 20000;
-    inv_attribs.bcol_msg_max = 10485760; /* range 4 */
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_basesmuma_allreduce_intra_fanin_fanout,
-                                 bcol_basesmuma_allreduce_intra_fanin_fanout_progress);
-
-    /* Differs only in comm size */
-
-    comm_attribs.data_src = DATA_SRC_UNKNOWN;
-    comm_attribs.waiting_semantics = BLOCKING;
-
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 8;
-
-    /* Set attributes for recursive doubling algorithm */
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_basesmuma_allreduce_intra_recursive_doubling,
-                                 NULL);
-
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * Small data fanin reduce
- * ML buffers are used for both payload and control structures
- * This functions works with hierarchical allreduce and
- * progress engine
- */
-static inline int reduce_children (mca_bcol_basesmuma_module_t *bcol_module, volatile void *rbuf, netpatterns_tree_node_t *my_reduction_node,
-                                   int *iteration, volatile mca_bcol_basesmuma_header_t *my_ctl_pointer, ompi_datatype_t *dtype,
-                                   volatile mca_bcol_basesmuma_payload_t *data_buffs, int count, struct ompi_op_t *op, int process_shift)
-{
-    volatile mca_bcol_basesmuma_header_t *child_ctl_pointer;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-    int64_t sequence_number = my_ctl_pointer->sequence_number;
-    int8_t ready_flag = my_ctl_pointer->ready_flag;
-    int group_size = bcol_module->colls_no_user_data.size_of_group;
-
-    if (LEAF_NODE != my_reduction_node->my_node_type) {
-        volatile char *child_data_pointer;
-        volatile void *child_rbuf;
-
-        /* for each child */
-        /* my_result_data = child_result_data (op) my_source_data */
-
-        for (int child = *iteration ; child < my_reduction_node->n_children ; ++child) {
-            int child_rank = my_reduction_node->children_ranks[child] + process_shift;
-
-            if (group_size <= child_rank){
-                child_rank -= group_size;
-            }
-
-            child_ctl_pointer = data_buffs[child_rank].ctl_struct;
-
-            if (!IS_PEER_READY(child_ctl_pointer, ready_flag, sequence_number, ALLREDUCE_FLAG, bcol_id)) {
-                *iteration = child;
-                return BCOL_FN_STARTED;
-            }
-
-            child_data_pointer = data_buffs[child_rank].payload;
-            child_rbuf = child_data_pointer + child_ctl_pointer->roffsets[bcol_id];
-
-            ompi_op_reduce(op, (void *)child_rbuf, (void *)rbuf, count, dtype);
-        } /* end child loop */
-    }
-
-    if (ROOT_NODE != my_reduction_node->my_node_type) {
-        opal_atomic_wmb ();
-        my_ctl_pointer->flags[ALLREDUCE_FLAG][bcol_id] = ready_flag;
-    }
-
-    /* done with this step. move on to fan out */
-    *iteration = -1;
-
-    return BCOL_FN_COMPLETE;
-}
-
-static int allreduce_fanout (mca_bcol_basesmuma_module_t *bcol_module, volatile mca_bcol_basesmuma_header_t *my_ctl_pointer,
-                             volatile void *my_data_pointer, int process_shift, volatile mca_bcol_basesmuma_payload_t *data_buffs,
-                             int sequence_number, int group_size, int rbuf_offset, size_t pack_len)
-{
-    volatile mca_bcol_basesmuma_header_t *parent_ctl_pointer;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-    int8_t ready_flag = my_ctl_pointer->ready_flag + 1;
-    netpatterns_tree_node_t *my_fanout_read_tree;
-    volatile void *parent_data_pointer;
-    int my_fanout_parent, my_rank;
-    void *parent_rbuf, *rbuf;
-
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    my_fanout_read_tree = &(bcol_module->fanout_read_tree[my_rank]);
-
-    if (ROOT_NODE != my_fanout_read_tree->my_node_type) {
-        my_fanout_parent = my_fanout_read_tree->parent_rank + process_shift;
-        if (group_size <= my_fanout_parent) {
-            my_fanout_parent -= group_size;
-        }
-
-        rbuf = (void *)((char *) my_data_pointer + rbuf_offset);
-
-        /*
-         * Get parent payload data and control data.
-         * Get the pointer to the base address of the parent's payload buffer.
-         * Get the parent's control buffer.
-         */
-        parent_data_pointer = data_buffs[my_fanout_parent].payload;
-        parent_ctl_pointer = data_buffs[my_fanout_parent].ctl_struct;
-
-        parent_rbuf = (void *) ((char *) parent_data_pointer + rbuf_offset);
-
-        /* Wait until parent signals that data is ready */
-        /* The order of conditions checked in this loop is important, as it can
-         * result in a race condition.
-         */
-        if (!IS_PEER_READY(parent_ctl_pointer, ready_flag, sequence_number, ALLREDUCE_FLAG, bcol_id)) {
-            return BCOL_FN_STARTED;
-        }
-
-        assert (parent_ctl_pointer->flags[ALLREDUCE_FLAG][bcol_id] == ready_flag);
-
-        /* Copy the rank to a shared buffer writable by the current rank */
-        memcpy ((void *) rbuf, (const void*) parent_rbuf, pack_len);
-    }
-
-    if (LEAF_NODE != my_fanout_read_tree->my_node_type) {
-        opal_atomic_wmb ();
-
-        /* Signal to children that they may read the data from my shared buffer (bump the ready flag) */
-        my_ctl_pointer->flags[ALLREDUCE_FLAG][bcol_id] = ready_flag;
-    }
-
-    my_ctl_pointer->starting_flag_value[bcol_id] += 1;
-
-    return BCOL_FN_COMPLETE;
-
-}
-
-static int bcol_basesmuma_allreduce_intra_fanin_fanout_progress (bcol_function_args_t *input_args, mca_bcol_base_function_t *c_input_args)
-{
-    mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module;
-    int buff_idx = input_args->src_desc->buffer_index;
-    int *iteration = &bcol_module->ml_mem.nb_coll_desc[buff_idx].iteration;
-    void *data_addr = (void *) input_args->src_desc->data_addr;
-    int my_node_index, my_rank, group_size, leading_dim, idx;
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    int64_t sequence_number = input_args->sequence_num;
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    struct ompi_datatype_t *dtype = input_args->dtype;
-    netpatterns_tree_node_t *my_reduction_node;
-    struct ompi_op_t *op = input_args->op;
-    volatile void *my_data_pointer;
-    int count = input_args->count;
-    int rc, process_shift;
-    ptrdiff_t lb, extent;
-    volatile void *rbuf;
-
-    /* get addressing information */
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    group_size = bcol_module->colls_no_user_data.size_of_group;
-    leading_dim = bcol_module->colls_no_user_data.size_of_group;
-    idx = SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-
-    /* Align node index to around sbgp root */
-    process_shift = input_args->root;
-    my_node_index = my_rank - input_args->root;
-    if (0 > my_node_index ) {
-        my_node_index += group_size;
-    }
-
-    data_buffs = (volatile mca_bcol_basesmuma_payload_t *) bcol_module->colls_with_user_data.data_buffs + idx;
-    /* Get control structure and payload buffer */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-    my_data_pointer = (volatile char *) data_addr;
-
-    my_data_pointer = (volatile char *) data_addr;
-    rbuf = (volatile void *)((char *) my_data_pointer + input_args->rbuf_offset);
-
-    /***************************
-     * Fan into root phase
-     ***************************/
-
-    my_reduction_node = &(bcol_module->reduction_tree[my_node_index]);
-    if (-1 != *iteration) {
-        rc = reduce_children (bcol_module, rbuf, my_reduction_node, iteration, my_ctl_pointer,
-                              dtype, data_buffs, count, op, process_shift);
-        if (BCOL_FN_COMPLETE != rc) {
-            return rc;
-        }
-    }
-
-    /* there might be non-contig dtype - so compute the length with get_extent */
-    ompi_datatype_get_extent(dtype, &lb, &extent);
-
-    /***************************
-     * Fan out from root
-     ***************************/
-
-    /* all nodes will have the result after fanout */
-    input_args->result_in_rbuf = true;
-
-    /* Signal that you are ready for fanout phase */
-    return allreduce_fanout (bcol_module, my_ctl_pointer, my_data_pointer, process_shift, data_buffs,
-                             sequence_number, group_size, input_args->rbuf_offset, count * (size_t) extent);
-}
-
-/**
- * Shared memory blocking allreduce.
- */
-int bcol_basesmuma_allreduce_intra_fanin_fanout(bcol_function_args_t *input_args, mca_bcol_base_function_t *c_input_args)
-{
-    /* local variables */
-    mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module;
-    int buff_idx = input_args->src_desc->buffer_index;
-    int *iteration = &bcol_module->ml_mem.nb_coll_desc[buff_idx].iteration;
-    void *data_addr = (void *) input_args->src_desc->data_addr;
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    struct ompi_datatype_t *dtype = input_args->dtype;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-    int rc, my_rank, leading_dim, idx;
-    volatile void *my_data_pointer;
-    volatile void *sbuf, *rbuf;
-    int8_t ready_flag;
-
-    /* get addressing information */
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    leading_dim = bcol_module->colls_no_user_data.size_of_group;
-    idx = SM_ARRAY_INDEX(leading_dim, buff_idx, 0);
-
-    data_buffs = (volatile mca_bcol_basesmuma_payload_t *) bcol_module->colls_with_user_data.data_buffs + idx;
-    /* Get control structure */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-
-    my_data_pointer = (volatile char *) data_addr;
-    rbuf = (volatile void *)((char *) my_data_pointer + input_args->rbuf_offset);
-    sbuf = (volatile void *)((char *) my_data_pointer + input_args->sbuf_offset);
-
-    /* Setup resource recycling */
-    /* Set for multiple instances of bcols */
-    BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, input_args->sequence_num, bcol_id);
-
-    if (sbuf != rbuf) {
-        rc = ompi_datatype_copy_content_same_ddt (dtype, input_args->count, (char *)rbuf,
-                                                  (char *)sbuf);
-        if( 0 != rc ) {
-            return OMPI_ERROR;
-        }
-    }
-
-    *iteration = 0;
-    my_ctl_pointer->ready_flag = ready_flag;
-
-    return bcol_basesmuma_allreduce_intra_fanin_fanout_progress (input_args, c_input_args);
-}
-
-
-
-/* this thing uses the old bcol private control structures */
-int bcol_basesmuma_allreduce_intra_recursive_doubling(bcol_function_args_t *input_args,
-                                                      mca_bcol_base_function_t *c_input_args)
-{
-
-    int my_rank,group_size,my_node_index;
-    int pair_rank, exchange, extra_rank, payload_len;
-    size_t dt_size;
-    int read_offset, write_offset;
-    volatile void *my_data_pointer;
-    volatile mca_bcol_basesmuma_ctl_struct_t *my_ctl_pointer = NULL,
-        *partner_ctl_pointer = NULL,
-        *extra_ctl_pointer = NULL;
-    volatile void *my_read_pointer, *my_write_pointer, *partner_read_pointer,
-        *extra_rank_readwrite_data_pointer,*extra_rank_read_data_pointer;
-    mca_bcol_basesmuma_module_t* bcol_module =
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-
-    int8_t ready_flag;
-    int sbuf_offset,rbuf_offset,flag_offset;
-    int root,count;
-    struct ompi_op_t *op;
-    int64_t sequence_number=input_args->sequence_num;
-    struct ompi_datatype_t *dtype;
-    int first_instance = 0;
-    int leading_dim,idx;
-    int buff_idx;
-    mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
-    /*volatile void **data_buffs;*/
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    netpatterns_pair_exchange_node_t *my_exchange_node;
-
-
-    /*
-     * Get addressing information
-     */
-    buff_idx = input_args->src_desc->buffer_index;
-
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    group_size = bcol_module->colls_no_user_data.size_of_group;
-    leading_dim = bcol_module->colls_no_user_data.size_of_group;
-    idx = SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-
-    /*
-     * Get SM control structures and payload buffers
-     */
-    ctl_structs = (mca_bcol_basesmuma_ctl_struct_t **)
-        bcol_module->colls_with_user_data.ctl_buffs+idx;
-    /*data_buffs = (volatile void **)
-      bcol_module->colls_with_user_data.data_buffs+idx;*/
-
-    data_buffs = (volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs + idx;
-
-
-    /*
-     * Get control structure and payload buffer
-     */
-    my_ctl_pointer = ctl_structs[my_rank];
-    if (my_ctl_pointer->sequence_number < sequence_number) {
-        first_instance=1;
-    }
-    my_data_pointer = data_buffs[my_rank].payload;
-
-    /*
-     * Align node index to around sbgp root
-     */
-    root = input_args->root;
-    my_node_index = my_rank - root;
-    if (0 > my_node_index) {
-        my_node_index += group_size;
-    }
-
-    /*
-     * Get data from arguments
-     */
-    sbuf_offset = input_args->sbuf_offset;
-    rbuf_offset = input_args->rbuf_offset;
-    op   = input_args->op;
-    count = input_args->count;
-    dtype = input_args->dtype;
-
-    /*
-     * Get my node for the reduction tree
-     */
-    my_exchange_node = &(bcol_module->recursive_doubling_tree);
-
-
-    if (first_instance) {
-        my_ctl_pointer->index = 1;
-        my_ctl_pointer->starting_flag_value = 0;
-        flag_offset = 0;
-        my_ctl_pointer->flag = -1;
-        /*
-          for( i = 0; i < NUM_SIGNAL_FLAGS; i++){
-          my_ctl_pointer->flags[ALLREDUCE_FLAG] = -1;
-          }
-        */
-    } else {
-        my_ctl_pointer->index++;
-        flag_offset = my_ctl_pointer->starting_flag_value;
-    }
-
-    /* signal that I have arrived */
-    /* opal_atomic_wmb (); */
-    my_ctl_pointer->sequence_number = sequence_number;
-
-    /* If we use this buffer more than once by an sm module in
-     * a given collective, will need to distinguish between instances, so
-     * we pick up the right data.
-     */
-    ready_flag = flag_offset + sequence_number + 1;
-
-    /*
-     * Set up pointers for using during recursive doubling phase
-     */
-    read_offset = sbuf_offset;
-    write_offset = rbuf_offset;
-    fprintf(stderr,"read offset %d write offset %d\n",read_offset,write_offset);
-    my_read_pointer =  (volatile void *)((char *) my_data_pointer + read_offset);
-    my_write_pointer = (volatile void *)((char *) my_data_pointer + write_offset);
-
-    /*
-     * When there are non-power 2 nodes, the extra nodes' data is copied and
-     * reduced by partner exchange nodes.
-     * Extra nodes: Nodes with rank greater nearest power of 2
-     * Exchange nodes: Nodes with rank lesser than nearest power of 2 that
-     * partner with extras nodes during reduction
-     */
-
-    if (0 < my_exchange_node->n_extra_sources) {
-        /*
-         * Signal extra node that data is ready
-         */
-        opal_atomic_wmb ();
-
-        my_ctl_pointer->flag = ready_flag;
-
-        if (EXCHANGE_NODE == my_exchange_node->node_type) {
-            extra_rank = my_exchange_node->rank_extra_source;
-            extra_ctl_pointer = ctl_structs[extra_rank];
-            extra_rank_readwrite_data_pointer = (void *) ((char *) data_buffs[extra_rank].payload +
-                                                          read_offset);
-
-            /*
-             * Wait for data to get ready
-             */
-            while (!((sequence_number == extra_ctl_pointer->sequence_number) &&
-                     (extra_ctl_pointer->flag >= ready_flag))){
-            }
-
-            ompi_op_reduce(op,(void *)extra_rank_readwrite_data_pointer,
-                           (void *)my_read_pointer, count, dtype);
-        }
-    }
-
-
-    /* --Exchange node that reduces with extra node --: Signal to extra node that data is read
-     * --Exchange node that doesn't reduce data with extra node --: This assignment
-     * is used so it can sync with other nodes during exchange phase
-     * --Extra node--: It can pass to next phase
-     */
-    ready_flag++;
-    /*my_ctl_pointer->flags[ALLREDUCE_FLAG] = ready_flag;*/
-    my_ctl_pointer->flag = ready_flag;
-
-
-    /*
-     * Exchange data with all the nodes that are less than max_power_2
-     */
-    for (exchange=0 ; exchange < my_exchange_node->n_exchanges ; exchange++) {
-        int tmp=0;
-
-        /*my_ctl_pointer->flags[ALLREDUCE_FLAG] = ready_flag;*/
-        my_ctl_pointer->flag = ready_flag;
-        pair_rank=my_exchange_node->rank_exchanges[exchange];
-        partner_ctl_pointer = ctl_structs[pair_rank];
-        partner_read_pointer = (volatile void *) ((char *)data_buffs[pair_rank].payload + read_offset);
-
-        my_read_pointer =  (volatile void *)((char *) my_data_pointer + read_offset);
-        my_write_pointer = (volatile void *)((char *) my_data_pointer + write_offset);
-
-        /*
-         * Wait for partner to be ready, so we can read
-         */
-        /*
-          JSL ----  FIX ME  !!!!! MAKE ME COMPLIANT WITH NEW BUFFERS
-          while (!IS_ALLREDUCE_PEER_READY(partner_ctl_pointer,
-          ready_flag, sequence_number)) {
-          }
-        */
-
-        /*
-         * Perform reduction operation
-         */
-        ompi_3buff_op_reduce(op,(void *)my_read_pointer, (void *)partner_read_pointer,
-                             (void *)my_write_pointer, count, dtype);
-
-
-        /*
-         * Signal that I am done reading my partner's data
-         */
-        ready_flag++;
-        /*my_ctl_pointer->flags[ALLREDUCE_FLAG] = ready_flag;*/
-        my_ctl_pointer->flag = ready_flag;
-
-        while (ready_flag > partner_ctl_pointer->flag){
-            opal_progress();
-        }
-
-        /*
-         * Swap read and write offsets
-         */
-        tmp = read_offset;
-        read_offset = write_offset;
-        write_offset = tmp;
-
-    }
-
-
-    /*
-     * Copy data in from the "extra" source, if need be
-     */
-
-    if (0 < my_exchange_node->n_extra_sources) {
-
-        if (EXTRA_NODE == my_exchange_node->node_type) {
-
-            int extra_rank_read_offset=-1,my_write_offset=-1;
-
-            /* Offset the ready flag to sync with
-             * exchange node which might going through exchange phases
-             * unlike the extra node
-             */
-            ready_flag = ready_flag + my_exchange_node->log_2;
-
-            if (my_exchange_node->log_2%2) {
-                extra_rank_read_offset = rbuf_offset;
-                my_write_offset = rbuf_offset;
-
-            } else {
-                extra_rank_read_offset = sbuf_offset;
-                my_write_offset = sbuf_offset;
-
-            }
-
-            my_write_pointer = (volatile void*)((char *)my_data_pointer + my_write_offset);
-            extra_rank = my_exchange_node->rank_extra_source;
-            extra_ctl_pointer = ctl_structs[extra_rank];
-
-            extra_rank_read_data_pointer = (volatile void *) ((char *)data_buffs[extra_rank].payload +
-                                                              extra_rank_read_offset);
-
-            /*
-             * Wait for the exchange node to be ready
-             */
-            ompi_datatype_type_size(dtype, &dt_size);
-            payload_len = count*dt_size;
-#if 0
-            fix me JSL !!!!!
-                while (!IS_DATA_READY(extra_ctl_pointer, ready_flag, sequence_number)){
-                }
-#endif
-            memcpy((void *)my_write_pointer,(const void *)
-                   extra_rank_read_data_pointer, payload_len);
-
-            ready_flag++;
-            /*my_ctl_pointer->flags[ALLREDUCE_FLAG] = ready_flag;*/
-            my_ctl_pointer->flag = ready_flag;
-
-
-        } else {
-
-            /*
-             * Signal parent that data is ready
-             */
-            opal_atomic_wmb ();
-            /*my_ctl_pointer->flags[ALLREDUCE_FLAG] = ready_flag;*/
-            my_ctl_pointer->flag = ready_flag;
-
-            /* wait until child is done to move on - this buffer will
-             *   be reused for the next stripe, so don't want to move
-             *   on too quick.
-             */
-            extra_rank = my_exchange_node->rank_extra_source;
-            extra_ctl_pointer = ctl_structs[extra_rank];
-        }
-    }
-
-    input_args->result_in_rbuf = my_exchange_node->log_2 & 1;
-
-    my_ctl_pointer->starting_flag_value += 1;
-
-    return BCOL_FN_COMPLETE;
-}
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c
deleted file mode 100644
index 340c0c4c7f..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c
+++ /dev/null
@@ -1,487 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-
-#include "ompi_config.h"
-
-#include "ompi/constants.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/communicator/communicator.h"
-
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-
-#include "bcol_basesmuma.h"
-
-#define __TEST_BLOCKING__   1
-#define __TEST_WAIT__       0
-#define __TEST_TEST__       0
-
-/* debug
- *   #include "opal/sys/timer.h"
- *
- *   extern uint64_t timers[7];
- *   end debug */
-
-/* debug */
-/* end debug */
-int bcol_basesmuma_bcast_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    comm_attribs.bcoll_type = BCOL_BCAST;
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1048576;
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_basesmuma_bcast_k_nomial_knownroot,
-                                 bcol_basesmuma_bcast_k_nomial_knownroot);
-
-    inv_attribs.bcol_msg_min = 10000000;
-    inv_attribs.bcol_msg_max = 10485760; /* range 4 */
-
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_basesmuma_bcast_k_nomial_knownroot,
-                                 bcol_basesmuma_bcast_k_nomial_knownroot);
-
-    comm_attribs.data_src = DATA_SRC_UNKNOWN;
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_basesmuma_bcast_k_nomial_anyroot,
-                                 bcol_basesmuma_bcast_k_nomial_anyroot);
-
-    comm_attribs.data_src = DATA_SRC_UNKNOWN;
-    inv_attribs.bcol_msg_min = 10000000;
-    inv_attribs.bcol_msg_max = 10485760; /* range 4 */
-
-#ifdef __PORTALS_AVAIL__
-
-    comm_attribs.waiting_semantics = BLOCKING;
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_basesmuma_lmsg_scatter_allgather_portals_bcast,
-                                 bcol_basesmuma_lmsg_scatter_allgather_portals_bcast);
-
-
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_basesmuma_lmsg_scatter_allgather_portals_nb_bcast,
-                                 bcol_basesmuma_lmsg_scatter_allgather_portals_nb_bcast);
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_basesmuma_lmsg_scatter_allgather_portals_nb_knownroot_bcast,
-                                 bcol_basesmuma_lmsg_scatter_allgather_portals_nb_knownroot_bcast);
-
-#else
-    /*
-      if (super->use_hdl) {
-      mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-      bcol_basesmuma_hdl_zerocopy_bcast,
-      bcol_basesmuma_hdl_zerocopy_bcast);
-      } else { */
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, NULL, NULL);
-    /*
-      bcol_basesmuma_binary_scatter_allgather_segment,
-      bcol_basesmuma_binary_scatter_allgather_segment);
-    */
-    /*    } */
-#endif
-
-    return OMPI_SUCCESS;
-}
-
-/* includes shared memory optimization */
-
-/**
- * Shared memory blocking Broadcast - fanin, for small data buffers.
- * This routine assumes that buf (the input buffer) is a single writer
- * multi reader (SWMR) shared memory buffer owned by the calling rank
- * which is the only rank that can write to this buffers.
- * It is also assumed that the buffers are registered and fragmented
- * at the ML level and that buf is sufficiently large to hold the data.
- *
- *
- * @param buf - SWMR shared buffer within a sbgp that the
- * executing rank can write to.
- * @param count - the number of elements in the shared buffer.
- * @param dtype - the datatype of a shared buffer element.
- * @param root - the index within the sbgp of the root.
- * @param module - basesmuma module.
- */
-int bcol_basesmuma_bcast(bcol_function_args_t *input_args,
-                         mca_bcol_base_function_t *c_input_args)
-{
-    /* local variables */
-    int group_size, process_shift, my_node_index;
-    int my_rank;
-    int rc = OMPI_SUCCESS;
-    int my_fanout_parent;
-    int leading_dim, buff_idx, idx;
-    volatile int8_t ready_flag;
-    int count=input_args->count;
-    struct ompi_datatype_t* dtype=input_args->dtype;
-    int root=input_args->root;
-    int64_t sequence_number=input_args->sequence_num;
-    mca_bcol_basesmuma_module_t* bcol_module=
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    volatile char* parent_data_pointer;
-    mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    volatile mca_bcol_basesmuma_header_t *parent_ctl_pointer;
-    netpatterns_tree_node_t* my_fanout_read_tree;
-    size_t pack_len = 0, dt_size;
-
-    void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr );
-
-#if 0
-    fprintf(stderr,"Entering sm broadcast input_args->sbuf_offset %d \n",input_args->sbuf_offset);
-    fflush(stderr);
-#endif
-
-
-    /* we will work only on packed data - so compute the length*/
-    ompi_datatype_type_size(dtype, &dt_size);
-    pack_len=count*dt_size;
-
-    buff_idx = input_args->src_desc->buffer_index;
-
-    /* Get addressing information */
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    group_size = bcol_module->colls_no_user_data.size_of_group;
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-    data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs+idx;
-
-    /* Align node index to around sbgp root */
-    process_shift = root;
-    my_node_index = my_rank - root;
-    if(0 > my_node_index ) {
-        my_node_index += group_size;
-    }
-
-    /* get my node for the bcast tree */
-    my_fanout_read_tree = &(bcol_module->fanout_read_tree[my_node_index]);
-    my_fanout_parent = my_fanout_read_tree->parent_rank + process_shift;
-    if(group_size <= my_fanout_parent){
-        my_fanout_parent -= group_size;
-    }
-
-    /* Set pointer to current proc ctrl region */
-    /*my_ctl_pointer = ctl_structs[my_rank]; */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-
-    /* setup resource recycling */
-
-    BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id);
-
-    /*
-     * Fan out from root
-     */
-    if(ROOT_NODE == my_fanout_read_tree->my_node_type) {
-        input_args->result_in_rbuf = false;
-        /* Root should only signal it is ready */
-        my_ctl_pointer->flags[BCAST_FLAG][bcol_id] = ready_flag;
-
-    }else if(LEAF_NODE == my_fanout_read_tree->my_node_type) {
-        input_args->result_in_rbuf = false;
-        /*
-         * Get parent payload data and control data.
-         * Get the pointer to the base address of the parent's payload buffer.
-         * Get the parent's control buffer.
-         */
-        parent_data_pointer = data_buffs[my_fanout_parent].payload;
-        parent_ctl_pointer = data_buffs[my_fanout_parent].ctl_struct;
-
-        /* Wait until parent signals that data is ready */
-        /* The order of conditions checked in this loop is important, as it can
-         * result in a race condition.
-         */
-        while (!IS_PEER_READY(parent_ctl_pointer, ready_flag, sequence_number, BCAST_FLAG, bcol_id)){
-            opal_progress();
-        }
-
-        /* Copy the rank to a shared buffer writable by the current rank */
-        memcpy(data_addr, (void *)parent_data_pointer, pack_len);
-
-        if( 0 != rc ) {
-            return OMPI_ERROR;
-        }
-
-    }else{
-        input_args->result_in_rbuf = false;
-        /* Interior node */
-
-        /* Get parent payload data and control data */
-        parent_data_pointer = data_buffs[my_fanout_parent].payload;
-        parent_ctl_pointer =  data_buffs[my_fanout_parent].ctl_struct;
-
-
-        /* Wait until parent signals that data is ready */
-        /* The order of conditions checked in this loop is important, as it can
-         * result in a race condition.
-         */
-        while (!IS_PEER_READY(parent_ctl_pointer, ready_flag, sequence_number, BCAST_FLAG, bcol_id)){
-            opal_progress();
-        }
-
-        /* Copy the rank to a shared buffer writable by the current rank */
-        memcpy(data_addr, (void *)parent_data_pointer,pack_len);
-
-        /* Signal to children that they may read the data from my shared buffer */
-        opal_atomic_wmb ();
-        my_ctl_pointer->flags[BCAST_FLAG][bcol_id] = ready_flag;
-    }
-
-    /* if I am the last instance of a basesmuma function in this collectie,
-     *   release the resrouces */
-
-    my_ctl_pointer->starting_flag_value[bcol_id]++;
-
-    return rc;
-}
-
-
-/*zero-copy large massage communication methods*/
-#if 0
-int bcol_basesmuma_hdl_zerocopy_bcast(bcol_function_args_t *input_args,
-                                      mca_bcol_base_function_t   *c_input_args)
-{
-    /* local variables */
-    int group_size, process_shift, my_node_index;
-    int my_rank, first_instance=0, flag_offset;
-    int rc = OMPI_SUCCESS;
-    int my_fanout_parent;
-    int leading_dim, buff_idx, idx;
-    volatile int64_t ready_flag;
-    int count=input_args->count;
-    struct ompi_datatype_t* dtype=input_args->dtype;
-    int root=input_args->root;
-    int64_t sequence_number=input_args->sequence_num;
-    mca_bcol_basesmuma_module_t* bcol_module=
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-
-    netpatterns_tree_node_t* my_fanout_read_tree;
-    size_t pack_len = 0, dt_size;
-
-    void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr);
-
-    struct mca_hdl_base_descriptor_t *hdl_desc;
-    struct mca_hdl_base_segment_t *hdl_seg;
-    int ret, completed, ridx/*remote rank index*/;
-    bool status;
-    volatile mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
-    mca_bcol_basesmuma_ctl_struct_t  *my_ctl_pointer= NULL;
-    volatile mca_bcol_basesmuma_ctl_struct_t  *parent_ctl_pointer= NULL;
-    volatile mca_bcol_basesmuma_ctl_struct_t  *child_ctl_pointer= NULL;
-    struct mca_hdl_base_module_t* hdl = bcol_module->hdl_module[0];
-
-
-    /* we will work only on packed data - so compute the length*/
-    ompi_datatype_type_size(dtype, &dt_size);
-    pack_len = count * dt_size;
-
-    buff_idx = input_args->src_desc->buffer_index;
-
-    /* Get addressing information */
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    group_size = bcol_module->colls_no_user_data.size_of_group;
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-    ctl_structs = (volatile mca_bcol_basesmuma_ctl_struct_t **)
-        bcol_module->colls_with_user_data.ctl_buffs+idx;
-    my_ctl_pointer = ctl_structs[my_rank];
-
-    /* Align node index to around sbgp root */
-    process_shift = root;
-    my_node_index = my_rank - root;
-    if(0 > my_node_index ) {
-        my_node_index += group_size;
-    }
-
-    /* get my node for the bcast tree */
-    my_fanout_read_tree = &(bcol_module->fanout_read_tree[my_node_index]);
-    my_fanout_parent = my_fanout_read_tree->parent_rank + process_shift;
-    if(group_size <= my_fanout_parent){
-        my_fanout_parent -= group_size;
-    }
-
-    /* setup resource recycling */
-    if( my_ctl_pointer->sequence_number < sequence_number ) {
-        first_instance = 1;
-    }
-
-    if( first_instance ) {
-        /* Signal arrival */
-        my_ctl_pointer->flag  = -1;
-        my_ctl_pointer->index = 1;
-        /* this does not need to use any flag values , so only need to
-         * set the value for subsequent values that may need this */
-        my_ctl_pointer->starting_flag_value = 0;
-        flag_offset = 0;
-    } else {
-        /* only one thread at a time will be making progress on this
-         *   collective, so no need to make this atomic */
-        my_ctl_pointer->index++;
-    }
-
-
-    /* increment the starting flag by one and return */
-    flag_offset = my_ctl_pointer->starting_flag_value;
-    ready_flag = flag_offset + sequence_number + 1;
-    my_ctl_pointer->sequence_number = sequence_number;
-
-    hdl_desc = (mca_hdl_base_descriptor_t *)
-        malloc (sizeof (mca_hdl_base_descriptor_t) * 1);
-
-    /*prepare a hdl data segment*/
-    hdl_seg = (mca_hdl_base_segment_t*)
-        malloc ( sizeof (mca_hdl_base_segment_t) * 1);
-    hdl_seg->seg_addr.pval = input_args->sbuf;
-    hdl_seg->seg_len = pack_len;
-
-
-    hdl->endpoint->ready_flag = ready_flag;
-    hdl->endpoint->local_ctrl  = my_ctl_pointer;
-    hdl->endpoint->sbgp_contextid =
-        bcol_module->super.sbgp_partner_module->group_comm->c_contextid;
-
-    /*
-     * Fan out from root
-     */
-    if(ROOT_NODE == my_fanout_read_tree->my_node_type) {
-        input_args->result_in_rbuf = false;
-
-        hdl_desc->des_src = hdl_seg;
-        hdl_desc->des_src_cnt = 1;
-        hdl_desc->isroot = true;
-
-        /*As the general semantics, there might multiple pairs of send/recv
-         *on the topology tree*/
-        for (ridx = 0; ridx < my_fanout_read_tree->n_children; ridx++) {
-            child_ctl_pointer =
-                ctl_structs[my_fanout_read_tree->children_ranks[ridx]];
-            hdl->endpoint->remote_ctrl = child_ctl_pointer;
-            ret = hdl->hdl_send(hdl, hdl->endpoint, hdl_desc);
-            if (ret !=  OMPI_SUCCESS) {
-                BASESMUMA_VERBOSE(1, ("send eror on rank %d ........", my_rank));
-                goto exit_ERROR;
-            }
-        }
-    }else if(LEAF_NODE == my_fanout_read_tree->my_node_type) {
-        input_args->result_in_rbuf = false;
-        /*
-         * Get parent payload data and control data.
-         * Get the pointer to the base address of the parent's payload buffer.
-         * Get the parent's control buffer.
-         */
-        parent_ctl_pointer = ctl_structs[my_fanout_parent];
-
-        hdl_desc->des_dst = hdl_seg;
-        hdl_desc->des_dst_cnt = 1;
-        hdl_desc->isroot = false;
-        hdl->endpoint->remote_ctrl = parent_ctl_pointer;
-
-#if __TEST_BLOCKING__
-        ret = hdl->hdl_recv(hdl, hdl->endpoint, hdl_desc);
-#else
-        ret = hdl->hdl_recvi(hdl, hdl->endpoint, NULL, 0, 0, &hdl_desc);
-#endif
-
-#if __TEST_WAIT__
-        ret = hdl->hdl_wait(hdl, hdl->endpoint, hdl_desc);
-        BASESMUMA_VERBOSE(1,("wait on rank %d is done!", my_rank));
-#endif
-        if (OMPI_SUCCESS != ret) {
-            BASESMUMA_VERBOSE(1, ("recvi eror on rank %d ........", my_rank));
-            goto exit_ERROR;
-        }
-
-        status = false;
-#if __TEST_TEST__
-        while (!status) {
-            hdl->hdl_test(&hdl_desc, &completed, &status);
-            opal_progress();
-            BASESMUMA_VERBOSE(1, ("test on rank %d ........", my_rank));
-        }
-#endif
-
-        goto Release;
-
-    }else{
-        input_args->result_in_rbuf = false;
-        /* Interior node */
-
-        /* Get parent payload data and control data */
-        parent_ctl_pointer = ctl_structs[my_fanout_parent];
-
-        hdl_desc->des_dst = hdl_seg;
-        hdl_desc->des_dst_cnt = 1;
-        hdl_desc->isroot = false;
-
-        hdl->endpoint->remote_ctrl = parent_ctl_pointer;
-
-        ret = hdl->hdl_recv(hdl, hdl->endpoint, hdl_desc);
-        if (OMPI_SUCCESS != ret) {
-            goto exit_ERROR;
-        }
-        if (OMPI_SUCCESS != ret) {
-            BASESMUMA_VERBOSE(1, ("recvi eror on rank %d ........", my_rank));
-            goto exit_ERROR;
-        }
-
-        /* Signal to children that they may read the data from my shared buffer */
-        opal_atomic_wmb ();
-        hdl_desc->des_src = hdl_seg;
-        hdl_desc->des_src_cnt = 1;
-        for (ridx = 0; ridx < my_fanout_read_tree->n_children; ridx++) {
-            child_ctl_pointer =
-                ctl_structs[my_fanout_read_tree->children_ranks[ridx]];
-            hdl->endpoint->remote_ctrl = child_ctl_pointer;
-
-            ret = hdl->hdl_send(hdl, hdl->endpoint, hdl_desc);
-            if (ret !=  OMPI_SUCCESS) {
-                BASESMUMA_VERBOSE(1, ("send eror on rank %d ........", my_rank));
-                goto exit_ERROR;
-            }
-        }
-        goto Release;
-    }
-
- Release:
-    /* if I am the last instance of a basesmuma function in this collectie,
-     *   release the resrouces */
-    if (IS_LAST_BCOL_FUNC(c_input_args)) {
-        rc = bcol_basesmuma_free_buff(
-                                      &(bcol_module->colls_with_user_data),
-                                      sequence_number);
-    }
-
-    my_ctl_pointer->starting_flag_value += 1;
-
-    return BCOL_FN_COMPLETE;
- exit_ERROR:
-    return OMPI_ERROR;
-}
-#endif
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast_prime.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast_prime.c
deleted file mode 100644
index 4bb451f907..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast_prime.c
+++ /dev/null
@@ -1,895 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013 Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-#include "ompi/constants.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/communicator/communicator.h"
-
-#include "bcol_basesmuma_utils.h"
-#include "bcol_basesmuma.h"
-
-/* debug
- *   #include "opal/sys/timer.h"
- *
- *   extern uint64_t timers[7];
- *   end debug */
-
-/* debug */
-#include <unistd.h>
-/* end debug */
-
-/* includes shared memory optimization */
-
-#define  BCOL_BASESMUMA_SM_PROBE(src_list, n_src, my_index, matched, src) \
-  do {                                                                    \
-    int j;                                                                \
-    for( j = 0; j < n_src; j++) {                                         \
-      parent_ctl_pointer = data_buffs[src_list[j]].ctl_struct;            \
-      parent_data_pointer = data_buffs[src_list[j]].payload;              \
-      if( IS_DATA_READY(parent_ctl_pointer,ready_flag,sequence_number)) { \
-        src = src_list[j];                                                \
-        matched = 1;                                                      \
-        break;                                                            \
-      }                                                                   \
-    }                                                                     \
-  } while(0)
-
-/*
-  #define IS_LARGE_DATA_READY(peer, my_flag, my_sequence_number) \
-  (((peer)->sequence_number == (my_sequence_number) && \
-  (peer)->flags[BCAST_FLAG] >= (my_flag) \
-  )? true : false )
-*/
-
-/*
-  #define IS_KNOWN_ROOT_DATA_READY(peer, my_flag, my_sequence_number) \
-  (((peer)->sequence_number == (my_sequence_number) && \
-  (peer)->flags[BCAST_FLAG][bcol_id] >= (my_flag) \
-  )? true : false )
-*/
-
-#define  BCOL_BASESMUMA_SM_LARGE_MSG_PROBE(src_list, n_src, my_index, matched, src, flag_index, bcol_id) \
-  do {                                                                        \
-    int j;                                                                \
-    for( j = 0; j < n_src; j++) {                                        \
-      /* fprintf(stderr,"my_rank %d and %d\n",my_rank,1);         */        \
-      if(src_list[j] != -1) {                                                \
-        parent_ctl_pointer = ctl_structs[src_list[j]];                        \
-        parent_data_pointer = (void *) data_buffs[src_list[j]].ctl_struct; \
-        /*fprintf(stderr,"my_rank %d ready flag %d partner flag %d and %d\n",my_rank,ready_flag,parent_ctl_pointer->flag,2);    */ \
-        if( IS_PEER_READY(parent_ctl_pointer,ready_flag,sequence_number, flag_index, bcol_id)) { \
-          src = src_list[j];                                                \
-          matched = 1;                                                        \
-          index = j;                                                        \
-          /*  fprintf(stderr,"found it from %d!\n",src);*/                \
-          break;                                                        \
-        }                                                                \
-      }                                                                        \
-    }                                                                        \
-  } while(0)
-
-#define K_NOMIAL_DATA_SRC(radix, my_group_index, group_size, group_root, data_src, radix_mask) \
-  do {                                                                        \
-    int relative_rank = (my_group_index >= group_root) ? my_group_index - group_root : \
-      my_group_index - group_root + group_size;                                \
-    radix_mask = 1;                                                        \
-    while (radix_mask < group_size) {                                        \
-      if (relative_rank % (radix * radix_mask)) {                        \
-        data_src = relative_rank/(radix * radix_mask) * (radix * radix_mask) + group_root; \
-        if (data_src >= group_size) data_src -= group_size;                \
-        break;                                                                \
-      }                                                                        \
-      radix_mask *= radix;                                                \
-    }                                                                        \
-  } while (0)
-
-int bcol_basesmuma_bcast_k_nomial_knownroot(bcol_function_args_t *input_args,
-                                            mca_bcol_base_function_t *c_input_args)
-{
-  /* local variables */
-  mca_bcol_basesmuma_module_t* bcol_module=
-    (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-  mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-  int i, matched = 0;
-  int group_size;
-  int my_rank;
-  int leading_dim,
-    buff_idx,
-    idx;
-  int count = input_args->count;
-  struct ompi_datatype_t* dtype = input_args->dtype;
-  int64_t sequence_number = input_args->sequence_num;
-  int radix =
-    mca_bcol_basesmuma_component.k_nomial_radix;
-  int radix_mask;
-  int16_t data_src = -1;
-
-  volatile int8_t ready_flag;
-  int bcol_id = (int) bcol_module->super.bcol_id;
-  volatile mca_bcol_basesmuma_payload_t *data_buffs;
-  volatile char* parent_data_pointer;
-  volatile mca_bcol_basesmuma_header_t *parent_ctl_pointer;
-  volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-
-  size_t pack_len = 0;
-  void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr +
-                             input_args->sbuf_offset);
-
-#if 0
-  fprintf(stderr,"Entering nb-sm broadcast input_args->sbuf_offset %d \n",input_args->sbuf_offset);
-  fflush(stderr);
-#endif
-
-
-  /* we will work only on packed data - so compute the length*/
-  BASESMUMA_VERBOSE(3, ("Calling bcol_basesmuma_bcast_k_nomial_knownroot"));
-
-  pack_len = mca_bcol_base_get_buff_length(dtype, count);
-  /* Some hierarchical algorithms have data that is accumulated at each step
-   * this factor accounts for this
-   */
-  pack_len = pack_len*input_args->hier_factor;
-  buff_idx = input_args->buffer_index;
-
-  /* Get addressing information */
-  my_rank     = bcol_module->super.sbgp_partner_module->my_index;
-  group_size  = bcol_module->colls_no_user_data.size_of_group;
-  leading_dim = bcol_module->colls_no_user_data.size_of_group;
-  idx         = SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-  data_buffs = (volatile mca_bcol_basesmuma_payload_t *)
-    bcol_module->colls_with_user_data.data_buffs + idx;
-
-  /* Set pointer to current proc ctrl region */
-  my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-
-  /* setup resource recycling */
-  BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id);
-  /* removing dependence on sequence number */
-  /* I believe this is resolved now with the signaling flags */
-  /*
-    ready_temp = 1 + (int8_t) flag_offset + (int8_t) bcol_id;
-    if( ready_temp >= my_ctl_pointer->flags[BCAST_FLAG][bcol_id]) {
-    ready_flag = ready_temp;
-    } else {
-    ready_flag =  my_ctl_pointer->flags[BCAST_FLAG][bcol_id];
-    }
-    opal_atomic_wmb ();
-    my_ctl_pointer->sequence_number = sequence_number;
-  */
-
-
-  /* non-blocking broadcast algorithm */
-
-  /* If I am the root, then signal ready flag */
-  if(input_args->root_flag) {
-    BASESMUMA_VERBOSE(10,("I am the root of the data"));
-    /*
-     * signal ready flag
-     */
-    opal_atomic_wmb ();
-    my_ctl_pointer->flags[BCAST_FLAG][bcol_id] = ready_flag;
-
-    /* root is finished */
-    goto Release;
-  }
-
-
-  /* Calculate source of the data */
-  K_NOMIAL_DATA_SRC(radix, my_rank, group_size,
-                    input_args->root_route->rank, data_src, radix_mask);
-
-
-  parent_ctl_pointer = data_buffs[data_src].ctl_struct;
-  parent_data_pointer = data_buffs[data_src].payload;
-
-  for( i = 0; i < cs->num_to_probe && 0 == matched; i++) {
-
-    if(IS_PEER_READY(parent_ctl_pointer,ready_flag,sequence_number, BCAST_FLAG, bcol_id)) {
-      matched = 1;
-      break;
-    }
-  }
-
-  /* If not matched, then hop out and put me on progress list */
-  if(0 == matched ) {
-    BASESMUMA_VERBOSE(10,("Shared memory probe didn't find a match"));
-    return BCOL_FN_NOT_STARTED;
-  }
-
-  /* else, we found our root within the group ... */
-  BASESMUMA_VERBOSE(10,("Shared memory probe was matched, the root is %d", data_src));
-
-  /* copy the data */
-  memcpy(data_addr, (void *) parent_data_pointer, pack_len);
-  /* set the memory barrier to ensure completion */
-  opal_atomic_wmb ();
-  /* signal that I am done */
-  my_ctl_pointer->flags[BCAST_FLAG][bcol_id] = ready_flag;
-
-
- Release:
-  my_ctl_pointer->starting_flag_value[bcol_id]++;
-  return BCOL_FN_COMPLETE;
-}
-
-
-/**
- * Shared memory non-blocking Broadcast - K-nomial fan-out for small data buffers.
- * This routine assumes that buf (the input buffer) is a single writer
- * multi reader (SWMR) shared memory buffer owned by the calling rank
- * which is the only rank that can write to this buffers.
- * It is also assumed that the buffers are registered and fragmented
- * at the ML level and that buf is sufficiently large to hold the data.
- *
- *
- * @param buf - SWMR shared buffer within a sbgp that the
- * executing rank can write to.
- * @param count - the number of elements in the shared buffer.
- * @param dtype - the datatype of a shared buffer element.
- * @param root - the index within the sbgp of the root.
- * @param module - basesmuma module.
- */
-int bcol_basesmuma_bcast_k_nomial_anyroot(bcol_function_args_t *input_args,
-                                          mca_bcol_base_function_t *c_input_args)
-{
-  /* local variables */
-  mca_bcol_basesmuma_module_t* bcol_module=
-    (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-  mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-  int i;
-  int group_size;
-  int my_rank;
-  int leading_dim, buff_idx, idx;
-  int count=input_args->count;
-  struct ompi_datatype_t* dtype=input_args->dtype;
-  int64_t sequence_number=input_args->sequence_num;
-  int radix = cs->k_nomial_radix;
-  int radix_mask;
-  int relative_rank;
-  int pow_k_group_size;
-
-  volatile int8_t ready_flag;
-  int bcol_id = (int) bcol_module->super.bcol_id;
-  volatile mca_bcol_basesmuma_payload_t *data_buffs;
-  volatile void* parent_data_pointer;
-
-  volatile mca_bcol_basesmuma_header_t *child_ctl_pointer;
-  volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-
-  size_t pack_len = 0;
-  void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr +
-                             input_args->sbuf_offset);
-
-#if 0
-  fprintf(stderr,"Entering nb-sm broadcast input_args->sbuf_offset %d \n",input_args->sbuf_offset);
-  fflush(stderr);
-#endif
-
-
-
-  /* we will work only on packed data - so compute the length*/
-  pack_len = mca_bcol_base_get_buff_length(dtype, count);
-
-  buff_idx = input_args->buffer_index;
-
-  /* Get addressing information */
-  my_rank = bcol_module->super.sbgp_partner_module->my_index;
-  group_size = bcol_module->colls_no_user_data.size_of_group;
-  leading_dim=bcol_module->colls_no_user_data.size_of_group;
-  idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-
-  /* get pow_k_levels and pow_k_group_size */
-  pow_k_group_size = bcol_module->pow_k;
-
-
-  data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
-    bcol_module->colls_with_user_data.data_buffs+idx;
-
-  /* Set pointer to current proc ctrl region */
-  my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-
-  BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id);
-
-  /* non-blocking broadcast algorithm */
-
-  /* If I am the root, then signal ready flag */
-  if(input_args->root_flag) {
-
-    BASESMUMA_VERBOSE(10,("I am the root of the data"));
-    /*
-     * set the radix_mask */
-    radix_mask = pow_k_group_size;
-    /* send to children */
-    opal_atomic_wmb ();
-    BASESMUMA_K_NOMIAL_SEND_CHILDREN(radix_mask,
-                                     radix,0,
-                                     my_rank,group_size, ready_flag);
-    /* root is finished */
-    goto Release;
-  }
-
-  /* If I am not the root, then poll on possible "senders'" control structs */
-  for( i = 0; i < cs->num_to_probe; i++) {
-
-    if( ready_flag == my_ctl_pointer->flags[BCAST_FLAG][bcol_id]) {
-
-      /* else, we found our root within the group ... */
-      parent_data_pointer = data_buffs[my_ctl_pointer->src].payload;
-      BASESMUMA_VERBOSE(5,("%d found it from %d \n",my_rank,my_ctl_pointer->src));
-      /* memcopy the data */
-      memcpy(data_addr, (void *) parent_data_pointer, pack_len);
-      /* compute my relative rank */
-      relative_rank = (my_rank - my_ctl_pointer->src) < 0 ? my_rank -
-        my_ctl_pointer->src + group_size : my_rank - my_ctl_pointer->src;
-
-      /* compute my radix mask */
-      radix_mask = 1;
-      while(radix_mask < group_size ){
-        if( 0 != relative_rank % (radix*radix_mask)) {
-          /* found it */
-          break;
-        }
-        radix_mask *= radix;
-      }
-      /* go one step back */
-      radix_mask /= radix;
-
-      /* send to children */
-      opal_atomic_wmb ();
-      BASESMUMA_K_NOMIAL_SEND_CHILDREN(radix_mask,
-                                       radix, relative_rank,
-                                       my_rank, group_size, ready_flag);
-      /* bail */
-
-      goto Release;
-    }
-
-  }
-
-
-
-  /* If not matched, then hop out and put me on progress list */
-  BASESMUMA_VERBOSE(10,("Shared memory probe didn't find a match"));
-  /*fprintf(stderr,"bcol_id %d Not started\n",bcol_id);*/
-  return BCOL_FN_NOT_STARTED;
-
-
-
- Release:
-
-
-  my_ctl_pointer->starting_flag_value[bcol_id]++;
-
-  return BCOL_FN_COMPLETE;
-}
-
-
-/* non-blocking binary scatter allgather anyroot algorithm for large data
- * broadcast
- */
-
-
-#if 0
-/* prototype code for shared memory scatter/allgather algorithm. Signaling scheme
- * works, should be used as a reference for other types of shared memory scatter/allgather
- * algorithms.
- */
-int bcol_basesmuma_binary_scatter_allgather_segment(bcol_function_args_t *input_args,
-                                                    mca_bcol_base_function_t *c_input_args)
-{
-
-  /* local variables */
-  int i, j;
-  int length;
-  int start;
-  int my_rank, parent_rank;
-  int partner;
-  int src = -1;
-  int matched = 0;
-  int group_size;
-  int first_instance=0;
-  int leading_dim, buff_idx, idx;
-  int64_t sequence_number=input_args->sequence_num;
-
-  int64_t ready_flag;
-  int64_t local_offset;
-
-  int flag_offset;
-  int pow_2, pow_2_levels;
-  int index = -1;
-
-  mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-  mca_bcol_basesmuma_module_t *bcol_module =
-    (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module;
-  /* use the old control structs for large messages,
-   * otherwise we will destroy the shared memory
-   * optimization
-   */
-  mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
-  mca_bcol_basesmuma_ctl_struct_t  *my_ctl_pointer;
-  mca_bcol_basesmuma_ctl_struct_t  *parent_ctl_pointer; /* binomial fanout */
-  mca_bcol_basesmuma_ctl_struct_t  *partner_ctl_pointer; /* recursive double */
-
-  /* for now, we use the payload buffer for single fragment */
-  volatile mca_bcol_basesmuma_payload_t *data_buffs;
-  volatile void *parent_data_pointer; /* binomial scatter */
-  volatile void *partner_data_pointer;  /* recursive double */
-
-  uint32_t fragment_size;  /* ml buffer size for now */
-
-  /* we will transfer the entire buffer,
-   * so start at the base address of the ml buffer
-   */
-  void *data_addr = (void *) ((unsigned char *) input_args->src_desc->base_data_addr);
-#if 0
-  fprintf(stderr,"AAA Entering nb-sm large msg broadcast input_args->frag_size %d \n",input_args->frag_size);
-  fflush(stderr);
-#endif
-
-  buff_idx = input_args->src_desc->buffer_index;
-
-  group_size = bcol_module->colls_no_user_data.size_of_group;
-  leading_dim=bcol_module->colls_no_user_data.size_of_group;
-
-  /* get the largest power of two that is smaller than
-   * or equal to the group size
-   */
-  pow_2_levels = bcol_module->pow_2_levels;
-  pow_2 = bcol_module->pow_2;
-
-  /* get the fragment size
-   */
-
-  /* still just the size of the entire buffer */
-  fragment_size = input_args->buffer_size;
-  idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-  my_rank = bcol_module->super.sbgp_partner_module->my_index;
-
-
-  /* grab the control structs */
-  ctl_structs = (mca_bcol_basesmuma_ctl_struct_t **)
-    bcol_module->colls_with_user_data.ctl_buffs+idx;
-
-  /* grab the data buffs */
-  data_buffs = (mca_bcol_basesmuma_payload_t *)
-    bcol_module->colls_with_user_data.data_buffs+idx;
-
-  my_ctl_pointer = ctl_structs[my_rank];
-
-  if(my_ctl_pointer->sequence_number < sequence_number) {
-    first_instance = 1;
-  }
-
-  if(first_instance) {
-    my_ctl_pointer->flag = -1;
-    my_ctl_pointer->index = 1;
-
-    my_ctl_pointer->starting_flag_value = 0;
-
-    flag_offset = 0;
-
-  } else {
-
-    my_ctl_pointer->index++;
-  }
-
-  /* increment the starting flag by one and return */
-  flag_offset = my_ctl_pointer->starting_flag_value;
-  ready_flag = flag_offset + sequence_number + 1;
-
-  my_ctl_pointer->sequence_number = sequence_number;
-
-  /* am I the root */
-  if(input_args->root_flag) {
-    /* if I've already been here, then
-     * hop down to the allgather
-     */
-    if(ALLGATHER == my_ctl_pointer->status) {
-      goto Allgather;
-    }
-    BASESMUMA_VERBOSE(10,("I am the root of the data"));
-    /* debug print */
-    /*fprintf(stderr,"I am the root %d\n",my_rank);*/
-    /*
-     * signal ready flag
-     */
-    /* set the offset into the buffer */
-    my_ctl_pointer->offset = 0;
-    /* how many children do I have */
-    my_ctl_pointer->n_sends = pow_2_levels;
-    /* my data length */
-    my_ctl_pointer->length = fragment_size;
-
-    /* important that these be set before my children
-     * see the ready flag raised
-     */
-    opal_atomic_wmb ();
-    my_ctl_pointer->flag = ready_flag;
-
-    /* root is finished */
-    if( my_rank < pow_2 ) {
-      /* if I'm in the power of two group,
-       * then goto the allgather
-       */
-      my_ctl_pointer->status = ALLGATHER;
-      goto Allgather;
-
-    } else {
-
-      /* if I'm not, then I'm done and release */
-      goto Release;
-    }
-
-  }
-
-  /* what phase am I participating in
-   */
-  switch(my_ctl_pointer->status) {
-
-  case SCATTER:
-    goto Scatter;
-    break;
-
-  case ALLGATHER:
-    goto Allgather;
-    break;
-
-  case EXTRA_RANK:
-    goto Extra;
-    break;
-
-  default:
-    break;
-  }
-
-
- Extra:
-  /* am I part of the non-power-of-2 group */
-  if( my_rank >= pow_2 ) {
-    /* find parent to copy from */
-    parent_rank = my_rank&(pow_2-1);
-    parent_ctl_pointer = ctl_structs[parent_rank];
-    /* start at the base */
-    parent_data_pointer = (void *) data_buffs[parent_rank].ctl_struct;
-
-    /* now, I need to do some arithmetic to
-     * arrive at the value everyone else does
-     * when they have completed the algorithm
-     */
-
-    /* compute ready flag value to poll on */
-    ready_flag = ready_flag + pow_2_levels;
-
-    /* start to poll */
-    for( i = 0; i< cs->num_to_probe; i++) {
-      if(IS_LARGE_DATA_READY(parent_ctl_pointer,ready_flag, sequence_number)) {
-        /* copy the data and bail */
-        memcpy(data_addr,(void *)parent_data_pointer,fragment_size);
-        goto Release;
-      }
-      /*
-         else {
-         opal_progress();
-         }
-      */
-    }
-    my_ctl_pointer->status = EXTRA_RANK;
-
-    /* hop out and put me onto a progress queue */
-    return BCOL_FN_NOT_STARTED;
-  }
-
- Scatter:
-
-  /* on first entry, compute the list of possible sources */
-  if( NULL == my_ctl_pointer->src_ptr ) {
-    my_ctl_pointer->src_ptr = (int *) malloc(sizeof(int)*(pow_2_levels+1));
-
-    for( i = 0; i < pow_2_levels; i++) {
-      my_ctl_pointer->src_ptr[i] = my_rank ^ (1<<i);
-    }
-    /* am I participating in the non-power of two */
-    if((my_rank+pow_2) < group_size) {
-      /* extra rank that I'm paired with */
-      my_ctl_pointer->src_ptr[i] = my_rank + pow_2;
-    } else {
-      /* no extra rank to worry about */
-      my_ctl_pointer->src_ptr[i] = -1;
-    }
-  }
-
-  /* If I am not the root, then poll on possible "senders'" control structs */
-  for( i = 0; i < cs->num_to_probe && 0 == matched; i++) {
-
-    /* Shared memory iprobe */
-    BCOL_BASESMUMA_SM_LARGE_MSG_PROBE(my_ctl_pointer->src_ptr, pow_2_levels+1,
-                                      my_rank, matched, src);
-  }
-
-  /* If not matched, then hop out and put me on progress list */
-  if(0 == matched ) {
-
-    BASESMUMA_VERBOSE(10,("Shared memory probe didn't find a match"));
-
-    my_ctl_pointer->status = SCATTER;
-    return BCOL_FN_NOT_STARTED;
-
-  } else if ( src >= pow_2 ){
-
-    /* If matched from an extra rank, then get the whole message from partner */
-    memcpy((void *) data_addr, (void *) parent_data_pointer,
-           parent_ctl_pointer->length);
-
-    /* now I am the psuedo-root in the power-of-two group */
-    my_ctl_pointer->offset = 0;
-    my_ctl_pointer->length = parent_ctl_pointer->length;
-    my_ctl_pointer->n_sends = parent_ctl_pointer->n_sends;
-
-    /* set the memory barrier */
-    opal_atomic_wmb ();
-
-    /* fire the ready flag */
-    my_ctl_pointer->flag = ready_flag;
-    my_ctl_pointer->status = ALLGATHER;
-    /* go to the allgather */
-    goto Allgather;
-  }
-
-
-  /* we need to see whether this is really
-   * who we are looking for
-   */
-  for( i = 0; i < parent_ctl_pointer->n_sends; i++) {
-    /* debug print */
-    /*
-        fprintf(stderr,"I am %d checking on a hit from %d with n_sends %d\n",my_rank,src,parent_ctl_pointer->n_sends);
-        fflush(stderr);
-    */
-    /* end debug */
-    if( my_rank == (src^(1<<i))) {
-
-      /* we found our root within the group ... */
-      BASESMUMA_VERBOSE(10,("Shared memory probe was matched, the root is %d", src));
-      /* this is who I've been looking for */
-      my_ctl_pointer->n_sends = i;
-
-      if ( i > 0) {
-        /* compute the size of the chunk to copy */
-        length = (parent_ctl_pointer->length)/
-          (1<<(parent_ctl_pointer->n_sends - my_ctl_pointer->n_sends));
-        my_ctl_pointer->length = length;
-        my_ctl_pointer->offset =
-          parent_ctl_pointer->offset+length;
-
-        /*fprintf(stderr,"%d's offset %d and length %d \n",my_rank,my_ctl_pointer->offset,length);*/
-
-        /* now we can copy the data */
-        memcpy((void *) ((uint64_t) data_addr+my_ctl_pointer->offset),
-               (void *) ((uint64_t) parent_data_pointer+(uint64_t) parent_ctl_pointer->offset +
-                         (uint64_t) length),
-               (size_t)length);
-      } else {
-        /* this "trick" takes care of the first level
-         * of recurssive doubling
-         */
-        length = parent_ctl_pointer->length/
-          (1<<(parent_ctl_pointer->n_sends - 1));
-        my_ctl_pointer->length = length;
-        my_ctl_pointer->offset = parent_ctl_pointer->offset;
-
-        /*fprintf(stderr,"%d's offset %d and length %d\n",my_rank,my_ctl_pointer->offset,length);*/
-        /* now we can copy the data */
-        memcpy((void *) ((uint64_t) data_addr+my_ctl_pointer->offset),
-               (void *) ((uint64_t) parent_data_pointer+(uint64_t) my_ctl_pointer->offset),
-               (size_t)length);
-      }
-      /* set the memory barrier to ensure completion */
-      opal_atomic_wmb ();
-      /* signal that I am done */
-      my_ctl_pointer->flag = ready_flag;
-      /* set my status */
-      my_ctl_pointer->status = ALLGATHER;
-      /* time for allgather phase */
-      goto Allgather;
-    }
-
-  }
-
-  /* this is not who we are looking for,
-   * mark as false positive so we don't
-   * poll here again
-   */
-  my_ctl_pointer->src_ptr[index] = -1;
-  /* probably we should jump out and put onto progress list */
-  my_ctl_pointer->status = SCATTER;
-  return BCOL_FN_NOT_STARTED;
-
- Allgather:
-
-  /* zip it back up - we have already taken care of first level */
-  /* needed for non-blocking conditional */
-  matched = 0;
-
-  /* get my local_offset */
-  local_offset = my_ctl_pointer->offset;
-
-  /* bump the ready flag */
-  ready_flag++;
-
-  /* first level of zip up */
-  length = 2*fragment_size/pow_2;
-
-  /* first level of zip-up
-   * already includes first level of
-   * recursive doubling
-   */
-  start = 1;
-
-  /* for non-blocking, check to see if I need to reset the state */
-  if(my_ctl_pointer->flag >= ready_flag) {
-    /* then reset the state */
-    ready_flag = my_ctl_pointer->flag;
-    start = my_ctl_pointer->start;
-    /* get the local offset */
-    local_offset = my_ctl_pointer->offset_zip;
-    /* compute the correct length */
-    length = length*(1<<(start - 1));
-    /* careful! skip over the opal_atomic_wmb () to avoid the
-     * cost on every re-entry
-     */
-    goto Loop;
-  }
-
-
-  opal_atomic_wmb ();
-  /* I am ready, set the flag */
-  my_ctl_pointer->flag = ready_flag;
-
- Loop:
-
-  for( i = start; i < pow_2_levels; i++) {
-    /* get my partner for this level */
-    partner = my_rank^(1<<i);
-    partner_ctl_pointer = ctl_structs[partner];
-    partner_data_pointer = (void *) data_buffs[partner].ctl_struct;
-
-    /* is data ready */
-    for( j = 0; j < cs->num_to_probe && matched == 0; j++) {
-      if(IS_LARGE_DATA_READY(partner_ctl_pointer, ready_flag, sequence_number)) {
-
-        /* debug prints
-           fprintf(stderr,"666 I am %d and sequence num is %d partner is %d ready_flag %d parent ready_flag %d buff_idx %d partner_offset %d\n",
-           my_rank,sequence_number,partner, ready_flag,partner_ctl_pointer->flag,buff_idx,partner_ctl_pointer->offset);
-        */
-        /* debug print */
-#if 0
-        fprintf(stderr,"I am %d and sequence num is %d partner is %d ready_flag %d parent ready_flag %d buff_idx %d \n",
-                my_rank,sequence_number,partner, ready_flag,parent_ctl_pointer->flag,buff_idx);
-#endif
-        /* end debug prints */
-
-        assert(partner_ctl_pointer->flag >= ready_flag);
-        /* found it */
-        matched = 1;
-        /* only copy it, if you sit at a lower level in the tree */
-        if( my_ctl_pointer->n_sends <= partner_ctl_pointer->n_sends ) {
-
-          /* calculate the local offset based on partner's remote offset */
-          if( partner_ctl_pointer->offset < my_ctl_pointer->offset ) {
-            /* then I'm looking "up" the tree */
-            local_offset -= length;
-            /* debug print */
-            /*fprintf(stderr,"I am %d and partner is %d partner offset %d length %d \n",my_rank,partner, local_offset,length);*/
-            /* end debug */
-            memcpy((void *) ((uint64_t) data_addr + (uint64_t) local_offset),
-                   (void *) ((uint64_t) partner_data_pointer + (uint64_t) local_offset),
-                   length);
-          } else {
-            /* I'm looking "down" the tree */
-            local_offset += length;
-            /* debug print */
-            /*fprintf(stderr,"I am %d and partner is %d partner offset %d length %d \n",my_rank,partner, local_offset,length);*/
-            /* end debug */
-            memcpy((void *) ((uint64_t) data_addr + (uint64_t) local_offset),
-                   (void *) ((uint64_t) partner_data_pointer + (uint64_t) local_offset),
-                   length);
-            /* reset my local offset */
-            local_offset -= length;
-          }
-
-        }
-        /* bump the ready flag */
-        ready_flag++;
-        /* ensure completion */
-        opal_atomic_wmb ();
-
-        /* fire the flag for the next level */
-        my_ctl_pointer->flag = ready_flag;
-
-        /* double the length */
-        length *= 2;
-      }
-    }
-    /* check to see what kind of progress I've made */
-    if( 0 == matched ) {
-      /* save state, hop out and try again later */
-      my_ctl_pointer->start = i;
-      /* save the local offset */
-      my_ctl_pointer->offset_zip = local_offset;
-      /* put in progress queue */
-      return BCOL_FN_STARTED;
-    }
-    /* else, start next level of recursive doubling */
-    matched = 0;
-
-  }
-
-
-  /* cleanup */
-  if(NULL != my_ctl_pointer->src_ptr) {
-    free(my_ctl_pointer->src_ptr);
-    my_ctl_pointer->src_ptr = NULL;
-  }
-
- Release:
-
-
-  /* If I am the last instance, release the resource */
-  /*
-    if( IS_LAST_BCOL_FUNC(c_input_args)) {
-    rc = bcol_basesmuma_free_buff(
-    &(bcol_module->colls_with_user_data),
-    sequence_number);
-    }
-  */
-
-  my_ctl_pointer->starting_flag_value++;
-  my_ctl_pointer->status = FINISHED;
-  return BCOL_FN_COMPLETE;
-
-}
-#endif
-
-#if 0
-int mca_bcol_basesmuma_bcast_binomial_scatter_allgather(void *desc)
-{
-  /* local variables */
-  int rc, n_frags_sent;
-  uint32_t stripe_number;
-  int count, count_processed;
-  size_t dt_size;
-  uint32_t n_data_segments_to_schedule;
-  ompi_datatype_t *dtype;
-  message_descriptor_t *message_descriptor;
-  mca_bcol_basesmuma_module_t *bcol_module;
-  int pipe_depth;
-
-
-  /* get the full message descriptor */
-
-
-  /* compute the number of fragments to send */
-
-
-  /* start to fill the pipeline */
-
-
-  return OMPI_SUCCESS;
-
-
-
-
-}
-#endif
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c
deleted file mode 100644
index 86a2811b00..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c
+++ /dev/null
@@ -1,486 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2014 Los Alamos National Security, LLC.
- *                         All rights reserved.
- * Copyright (c) 2014      Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2014-2015 Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-#include "ompi/constants.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/patterns/comm/coll_ops.h"
-
-#include "opal/dss/dss.h"
-
-#include "bcol_basesmuma.h"
-/*
- * With support for nonblocking collectives, we don't have an upper
- * limit on the number of outstanding collectives per communicator.
- * Also, since we want to avoid communication to figure out which
- * buffers other ranks in the group will use, we will rely on the
- * fact that collective operations are called in the same order
- * in each process, to assign a unique ID to each collective operation.
- * We use this to create a static mapping from the index to the buffer
- * that will be used.  Also, because there is no limit to the number of
- * outstanding collective operations, we use a generation index for each
- * memory bank, so the collective will use the buffer only when the
- * correct generation of the bank is ready for use.
- */
-int bcol_basesmuma_get_buff_index( sm_buffer_mgmt *buff_block,
-                                   uint64_t buff_id )
-{
-    /* local variables */
-    int memory_bank;
-    uint64_t generation;
-    int index=-1;
-
-
-    /* get the bank index that will be used */
-    memory_bank=buff_id& buff_block->mask;
-    memory_bank = memory_bank SHIFT_DOWN buff_block->log2_num_buffs_per_mem_bank;
-
-    /* get the generation of the bank this maps to */
-    generation = buff_id SHIFT_DOWN (buff_block->log2_number_of_buffs);
-
-    /* check to see if the bank is available */
-    if( generation == buff_block->ctl_buffs_mgmt[memory_bank].
-        bank_gen_counter ) {
-
-        /* get the buffer index that will be returned */
-        index=buff_id & buff_block->mask;
-
-        /* no in-use counter increment, as the mapping is static, and
-         * all we need to know if the number of collectives that complete */
-
-    } else {
-        /* progress communications so that resources can be freed up */
-        opal_progress();
-    }
-
-    /* return */
-    return index;
-}
-
-/* release the shared memory buffers
- *  buf_id is the unique ID assigned to the particular buffer
- */
-int bcol_basesmuma_free_buff( sm_buffer_mgmt * buff_block,
-                              uint64_t buff_id )
-{
-    /* local variables */
-    int ret=OMPI_SUCCESS;
-    int memory_bank;
-    uint64_t generation;
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-
-    /* get the bank index that will be used */
-    memory_bank=buff_id& buff_block->mask;
-    memory_bank = memory_bank SHIFT_DOWN buff_block->log2_num_buffs_per_mem_bank;
-
-    /* get the generation of the bank this maps to */
-    generation = buff_id SHIFT_DOWN (buff_block->log2_number_of_buffs);
-
-    /* the generation counter should not change until all resrouces
-     *   associated with this bank have been freed.
-     */
-    assert(generation == buff_block->ctl_buffs_mgmt[memory_bank].bank_gen_counter);
-    (void)generation;  // silence compiler warning
-
-    /*
-     * increment counter of completed buffers
-     */
-    OPAL_THREAD_ADD32(&(buff_block->ctl_buffs_mgmt[memory_bank].n_buffs_freed),
-                      1);
-
-    /*
-     * If I am the last to checkin - initiate resource recycling
-     */
-    if( buff_block->ctl_buffs_mgmt[memory_bank].n_buffs_freed ==
-        buff_block->ctl_buffs_mgmt[memory_bank].number_of_buffers ) {
-
-        /* Lock to ensure atomic recycling of resources */
-        OPAL_THREAD_LOCK(&(buff_block->ctl_buffs_mgmt[memory_bank].mutex));
-
-        /* make sure someone else did not already get to this */
-        if( buff_block->ctl_buffs_mgmt[memory_bank].n_buffs_freed !=
-            buff_block->ctl_buffs_mgmt[memory_bank].number_of_buffers ) {
-            /* release lock and exit */
-            OPAL_THREAD_UNLOCK(&(buff_block->ctl_buffs_mgmt[memory_bank].mutex));
-        } else {
-            sm_nbbar_desc_t *p_sm_nb_desc = NULL;
-            /* initiate the freeing of resources.  Need to make sure the other
-             * ranks in the group are also done with their resources before this
-             * block is made available for use again.
-             * No one else will try to allocate from this block or free back to
-             * this block until the next genration counter has been incremented,
-             * so will just reset the number of freed buffers to 0, so no one else
-             * will try to also initialize the recycling of these resrouces
-             */
-            buff_block->ctl_buffs_mgmt[memory_bank].n_buffs_freed=0;
-
-            /* Start the nonblocking barrier */
-            p_sm_nb_desc = &(buff_block->ctl_buffs_mgmt[memory_bank].nb_barrier_desc);
-            p_sm_nb_desc->coll_buff = buff_block;
-            bcol_basesmuma_rd_nb_barrier_init_admin(p_sm_nb_desc);
-
-            if( NB_BARRIER_DONE !=
-                buff_block->ctl_buffs_mgmt[memory_bank].
-                nb_barrier_desc.collective_phase) {
-
-                opal_list_t *list=&(cs->nb_admin_barriers);
-                opal_list_item_t *append_item;
-
-                /* put this onto the progression list */
-                OPAL_THREAD_LOCK(&(cs->nb_admin_barriers_mutex));
-                append_item=(opal_list_item_t *)
-                    &(buff_block->ctl_buffs_mgmt[memory_bank].nb_barrier_desc);
-                opal_list_append(list,append_item);
-                OPAL_THREAD_UNLOCK(&(cs->nb_admin_barriers_mutex));
-                /* progress communications so that resources can be freed up */
-                opal_progress();
-            } else {
-                /* mark the block as available */
-                (buff_block->ctl_buffs_mgmt[memory_bank].bank_gen_counter)++;
-            }
-
-            /* get out of here */
-            OPAL_THREAD_UNLOCK(&(buff_block->ctl_buffs_mgmt[memory_bank].mutex));
-        }
-
-    }
-
-    /* return */
-    return ret;
-}
-
-/*
- * Allocate buffers for storing non-blocking collective descriptions, required
- * for making code re-entrant
- *
- */
-static int init_nb_coll_buff_desc(mca_bcol_basesmuma_nb_coll_buff_desc_t **desc,
-                                  void *base_addr, uint32_t num_banks,
-                                  uint32_t num_buffers_per_bank,
-                                  uint32_t size_buffer,
-                                  uint32_t header_size,
-                                  int group_size,
-                                  int pow_k)
-{
-    uint32_t i, j, ci;
-    mca_bcol_basesmuma_nb_coll_buff_desc_t *tmp_desc = NULL;
-    int k_nomial_radix = mca_bcol_basesmuma_component.k_nomial_radix;
-    int pow_k_val = (0 == pow_k) ? 1 : pow_k;
-    int num_to_alloc = (k_nomial_radix - 1) * pow_k_val * 2 + 1 ;
-
-
-    *desc = (mca_bcol_basesmuma_nb_coll_buff_desc_t *)calloc(num_banks * num_buffers_per_bank, sizeof(mca_bcol_basesmuma_nb_coll_buff_desc_t));
-    if (NULL == *desc) {
-        return OMPI_ERROR;
-    }
-
-    tmp_desc = *desc;
-
-    for (i = 0; i < num_banks; i++) {
-        for (j = 0; j < num_buffers_per_bank; j++) {
-            ci = i * num_buffers_per_bank + j;
-            tmp_desc[ci].bank_index = i;
-            tmp_desc[ci].buffer_index = j;
-            /* *2  is for gather session  +1 for extra peer */
-            tmp_desc[ci].requests = (ompi_request_t **)
-                calloc(num_to_alloc, sizeof(ompi_request_t *));
-            tmp_desc[ci].data_addr = (void *)
-                ((unsigned char*)base_addr + ci * size_buffer + header_size);
-            BASESMUMA_VERBOSE(10, ("ml memory cache setup %d %d - %p", i, j, tmp_desc[ci].data_addr));
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-
-/*
- * Free buffers for storing non-blocking collective descriptions.
- *
- */
-void cleanup_nb_coll_buff_desc(mca_bcol_basesmuma_nb_coll_buff_desc_t **desc,
-                                  uint32_t num_banks,
-                                  uint32_t num_buffers_per_bank)
-{
-    uint32_t ci;
-    if (NULL != *desc) {
-        for (ci=0; ci<num_banks*num_buffers_per_bank; ci++) {
-            if (NULL != ((*desc)[ci]).requests) {
-                free(((*desc)[ci]).requests);
-                ((*desc))[ci].requests = NULL;
-            }
-        }
-        free(*desc);
-        *desc = NULL;
-    }
-}
-
-
-#if 1
-/* New init function used for new control scheme where we put the control
- * struct at the top of the payload buffer
- */
-int bcol_basesmuma_bank_init_opti(struct mca_bcol_base_memory_block_desc_t *payload_block,
-        uint32_t data_offset,
-        mca_bcol_base_module_t *bcol_module,
-        void *reg_data)
-{
-    /* assumption here is that the block has been registered with
-     * sm bcol hence has been mapped by each process, need to be
-     * sure that memory is mapped amongst sm peers
-     */
-
-    /* local variables */
-    int ret = OMPI_SUCCESS, i, j;
-    sm_buffer_mgmt *pload_mgmt;
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-    bcol_basesmuma_registration_data_t *sm_reg_data =
-        (bcol_basesmuma_registration_data_t *) reg_data;
-    mca_bcol_basesmuma_module_t *sm_bcol =
-        (mca_bcol_basesmuma_module_t *) bcol_module;
-    mca_bcol_base_memory_block_desc_t *ml_block = payload_block;
-    size_t malloc_size;
-    bcol_basesmuma_smcm_file_t input_file;
-    int leading_dim,loop_limit,buf_id;
-    unsigned char *base_ptr;
-    mca_bcol_basesmuma_module_t *sm_bcol_module=
-        (mca_bcol_basesmuma_module_t *)bcol_module;
-    int my_idx, array_id;
-    mca_bcol_basesmuma_header_t *ctl_ptr;
-    void **results_array=NULL, *mem_offset;
-
-    mca_bcol_basesmuma_local_mlmem_desc_t *ml_mem = &sm_bcol_module->ml_mem;
-
-    /* first, we get a pointer to the payload buffer management struct */
-    pload_mgmt = &(sm_bcol->colls_with_user_data);
-
-    /* go ahead and get the header size that is cached on the payload block
-     */
-    sm_bcol->total_header_size = data_offset;
-
-    /* allocate memory for pointers to mine and my peers' payload buffers
-     * difference here is that now we use our new data struct
-     */
-    malloc_size = ml_block->num_banks*ml_block->num_buffers_per_bank*
-        pload_mgmt->size_of_group *sizeof(mca_bcol_basesmuma_payload_t);
-    pload_mgmt->data_buffs = (mca_bcol_basesmuma_payload_t *) malloc(malloc_size);
-    if( !pload_mgmt->data_buffs) {
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-
-    /* allocate some memory to hold the offsets */
-    results_array = (void **) malloc(pload_mgmt->size_of_group * sizeof (void *));
-    if (NULL == results_array) {
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-
-    /* setup the input file for the shared memory connection manager */
-    input_file.file_name = sm_reg_data->file_name;
-    input_file.size = sm_reg_data->size;
-    input_file.size_ctl_structure = 0;
-    input_file.data_seg_alignment = BASESMUMA_CACHE_LINE_SIZE;
-    input_file.mpool_size = sm_reg_data->size;
-
-    /* call the connection manager and map my shared memory peers' file
-     */
-    ret = bcol_basesmuma_smcm_allgather_connection(
-        sm_bcol,
-        sm_bcol->super.sbgp_partner_module,
-        &(cs->sm_connections_list),
-        &(sm_bcol->payload_backing_files_info),
-        sm_bcol->super.sbgp_partner_module->group_comm,
-        input_file, cs->payload_base_fname,
-        false);
-    if( OMPI_SUCCESS != ret ) {
-        goto exit_ERROR;
-    }
-
-
-    /* now we exchange offset info - don't assume symmetric virtual memory
-     */
-
-    mem_offset = (void *) ((uintptr_t) ml_block->block->base_addr -
-                           (uintptr_t) cs->sm_payload_structs->data_addr);
-
-    /* call into the exchange offsets function */
-    ret=comm_allgather_pml(&mem_offset, results_array, sizeof (void *), MPI_BYTE,
-                           sm_bcol_module->super.sbgp_partner_module->my_index,
-                           sm_bcol_module->super.sbgp_partner_module->group_size,
-                           sm_bcol_module->super.sbgp_partner_module->group_list,
-                           sm_bcol_module->super.sbgp_partner_module->group_comm);
-    if( OMPI_SUCCESS != ret ) {
-        goto exit_ERROR;
-    }
-
-    /* convert memory offset to virtual address in current rank */
-    leading_dim = pload_mgmt->size_of_group;
-    loop_limit =  ml_block->num_banks*ml_block->num_buffers_per_bank;
-    for (i=0;i< sm_bcol_module->super.sbgp_partner_module->group_size;i++) {
-
-        /* get the base pointer */
-        int array_id=SM_ARRAY_INDEX(leading_dim,0,i);
-        if( i == sm_bcol_module->super.sbgp_partner_module->my_index) {
-            /* me */
-            base_ptr=cs->sm_payload_structs->map_addr;
-        } else {
-            base_ptr=sm_bcol_module->payload_backing_files_info[i]->
-                sm_mmap->map_addr;
-        }
-
-        /* first, set the pointer to the control struct */
-        pload_mgmt->data_buffs[array_id].ctl_struct=(mca_bcol_basesmuma_header_t *)
-            (uintptr_t)(((uint64_t)(uintptr_t)results_array[array_id])+(uint64_t)(uintptr_t)base_ptr);
-        /* second, calculate where to set the data pointer */
-        pload_mgmt->data_buffs[array_id].payload=(void *)
-            (uintptr_t)((uint64_t)(uintptr_t) pload_mgmt->data_buffs[array_id].ctl_struct +
-                        (uint64_t)(uintptr_t) data_offset);
-
-        for( buf_id = 1 ; buf_id < loop_limit ; buf_id++ ) {
-            int array_id_m1=SM_ARRAY_INDEX(leading_dim,(buf_id-1),i);
-            array_id=SM_ARRAY_INDEX(leading_dim,buf_id,i);
-            /* now, play the same game as above
-             *
-             * first, set the control struct's position */
-            pload_mgmt->data_buffs[array_id].ctl_struct=(mca_bcol_basesmuma_header_t *)
-                (uintptr_t)(((uint64_t)(uintptr_t)(pload_mgmt->data_buffs[array_id_m1].ctl_struct) +
-                             (uint64_t)(uintptr_t)ml_block->size_buffer));
-
-            /* second, set the payload pointer */
-            pload_mgmt->data_buffs[array_id].payload =(void *)
-                (uintptr_t)((uint64_t)(uintptr_t) pload_mgmt->data_buffs[array_id].ctl_struct +
-                            (uint64_t)(uintptr_t) data_offset);
-        }
-
-    }
-
-    /* done with the index array */
-    free (results_array);
-    results_array = NULL;
-
-    /* initialize my control structures!! */
-    my_idx = sm_bcol_module->super.sbgp_partner_module->my_index;
-    leading_dim = sm_bcol_module->super.sbgp_partner_module->group_size;
-    for( buf_id = 0; buf_id < loop_limit; buf_id++){
-        array_id = SM_ARRAY_INDEX(leading_dim,buf_id,my_idx);
-        ctl_ptr = pload_mgmt->data_buffs[array_id].ctl_struct;
-
-        /* initialize the data structures */
-        for( j = 0; j < SM_BCOLS_MAX; j++){
-            for( i = 0; i < NUM_SIGNAL_FLAGS; i++){
-                ctl_ptr->flags[i][j] = -1;
-            }
-        }
-        ctl_ptr->sequence_number = -1;
-        ctl_ptr->src = -1;
-    }
-
-
-
-
-    /* setup the data structures needed for releasing the payload
-     * buffers back to the ml level
-     */
-    for( i=0 ; i < (int) ml_block->num_banks ; i++ ) {
-        sm_bcol->colls_with_user_data.
-            ctl_buffs_mgmt[i].nb_barrier_desc.ml_memory_block_descriptor=
-            ml_block;
-    }
-
-    ml_mem->num_banks = ml_block->num_banks;
-    ml_mem->bank_release_counter = calloc(ml_block->num_banks, sizeof(uint32_t));
-    ml_mem->num_buffers_per_bank = ml_block->num_buffers_per_bank;
-    ml_mem->size_buffer = ml_block->size_buffer;
-    /* pointer to ml level descriptor */
-    ml_mem->ml_mem_desc = ml_block;
-
-    if (OMPI_SUCCESS != init_nb_coll_buff_desc(&ml_mem->nb_coll_desc,
-                                               ml_block->block->base_addr,
-                                               ml_mem->num_banks,
-                                               ml_mem->num_buffers_per_bank,
-                                               ml_mem->size_buffer,
-                                               data_offset,
-                                               sm_bcol_module->super.sbgp_partner_module->group_size,
-                                               sm_bcol_module->pow_k)) {
-
-        BASESMUMA_VERBOSE(10, ("Failed to allocate memory descriptors for storing state of non-blocking collectives\n"));
-        return OMPI_ERROR;
-    }
-
-    return OMPI_SUCCESS;
-
-exit_ERROR:
-    if (NULL != results_array) {
-        free(results_array);
-    }
-    return ret;
-}
-
-#endif
-
-
-
-/* Basesmuma interface function used for buffer release */
-#if 0
-/* gvm
- * A collective operation calls this routine to release the payload buffer.
- * All processes in the shared memory sub-group of a bcol should call the non-blocking
- * barrier on the last payload buffer of a memory bank. On the completion
- * of the non-blocking barrier, the ML callback is called which is responsible
- * for recycling the memory bank.
- */
-mca_bcol_basesmuma_module_t *sm_bcol_module
-int bcol_basesmuma_free_payload_buff(
-    struct mca_bcol_base_memory_block_desc_t *block,
-    sm_buffer_mgmt *ctl_mgmt,
-    uint64_t buff_id)
-{
-    /* local variables */
-    int ret = OMPI_SUCCESS;
-
-    memory_bank = BANK_FROM_BUFFER_IDX(buff_id);
-    ctl_mgmt->ctl_buffs_mgmt[memory_bank].n_buffs_freed++;
-
-    OPAL_THREAD_ADD32(&(ctl_mgmt->ctl_buffs_mgmt[memory_bank].n_buffs_freed),1);
-
-    if (ctl_mgmt->ctl_buffs_mgmt[memory_bank].n_buffs_freed == block->size_buffers_bank){
-
-        /* start non-blocking barrier */
-        bcol_basesmuma_rd_nb_barrier_init_admin(
-            &(ctl_mgmt->ctl_buffs_mgmt[memory_bank].nb_barrier_desc));
-
-        if (NB_BARRIER_DONE !=
-            ctl_mgmt->ctl_buffs_mgmt[memory_bank].
-            nb_barrier_desc.collective_phase){
-
-            /* progress the barrier */
-            opal_progress();
-        }
-        else{
-            /* free the buffer - i.e. initiate callback to ml level */
-            block->ml_release_cb(block,memory_bank);
-        }
-    }
-    return ret;
-}
-#endif
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_component.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_component.c
deleted file mode 100644
index 787188522a..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_component.c
+++ /dev/null
@@ -1,380 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "opal/mca/mpool/base/base.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "opal/align.h"
-#include "bcol_basesmuma.h"
-
-/*
- * Public string showing the coll ompi_sm V2 component version number
- */
-const char *mca_bcol_basesmuma_component_version_string =
-    "Open MPI bcol - basesmuma collective MCA component version " OMPI_VERSION;
-
-/*
- * Local functions
- */
-
-static int basesmuma_register(void);
-static int basesmuma_open(void);
-static int basesmuma_close(void);
-static int mca_bcol_basesmuma_deregister_ctl_sm(
-                                                mca_bcol_basesmuma_component_t *bcol_component);
-
-
-static inline int mca_bcol_basesmuma_param_register_int(
-                                                        const char* param_name, int default_value, int *storage)
-{
-    *storage = default_value;
-    return mca_base_component_var_register(&mca_bcol_basesmuma_component.super.bcol_version, param_name,
-                                           NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                           OPAL_INFO_LVL_9,
-                                           MCA_BASE_VAR_SCOPE_READONLY, storage);
-}
-
-static inline int mca_bcol_basesmuma_param_register_bool(
-                                                         const char* param_name, bool default_value, bool *storage)
-{
-    *storage = default_value;
-    return mca_base_component_var_register(&mca_bcol_basesmuma_component.super.bcol_version, param_name,
-                                           NULL, MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
-                                           OPAL_INFO_LVL_9,
-                                           MCA_BASE_VAR_SCOPE_READONLY, storage);
-}
-
-/*
- * Instantiate the public struct with all of our public information
- * and pointers to our public functions in it
- */
-
-mca_bcol_basesmuma_component_t mca_bcol_basesmuma_component = {
-
-    /* First, fill in the super */
-
-    {
-        /* First, the mca_component_t struct containing meta
-           information about the component itself */
-
-        .bcol_version = {
-            MCA_BCOL_BASE_VERSION_2_0_0,
-
-            /* Component name and version */
-
-            .mca_component_name = "basesmuma",
-            MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
-                                  OMPI_RELEASE_VERSION),
-
-            /* Component open and close functions */
-
-            .mca_open_component = basesmuma_open,
-            .mca_close_component = basesmuma_close,
-            .mca_register_component_params = basesmuma_register,
-        },
-
-        /* Initialization / querying functions */
-
-        .collm_init_query = mca_bcol_basesmuma_init_query,
-        .collm_comm_query = mca_bcol_basesmuma_comm_query,
-        .init_done = false,
-        .need_ordering = false,
-        .priority = 0, /* (default) priority */
-    },
-};
-
-/*
- * Register the component
- */
-static int basesmuma_register(void)
-{
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-
-    /* set component priority */
-    mca_bcol_basesmuma_param_register_int("priority", 90, &cs->super.priority);
-
-    /* Number of memory banks */
-    mca_bcol_basesmuma_param_register_int("basesmuma_num_ctl_banks", 2,
-                                          &cs->basesmuma_num_mem_banks);
-
-    /* Number of regions per memory bank */
-    mca_bcol_basesmuma_param_register_int("basesmuma_num_buffs_per_bank", 16,
-                                          &cs->basesmuma_num_regions_per_bank);
-
-    /* number of polling loops to allow pending resources to
-     * complete their work
-     */
-    mca_bcol_basesmuma_param_register_int("n_poll_loops", 4, &cs->n_poll_loops);
-
-
-    /* Number of groups supported */
-    mca_bcol_basesmuma_param_register_int("n_groups_supported", 100,
-                                          &cs->n_groups_supported);
-
-    /* order of fanin tree */
-    mca_bcol_basesmuma_param_register_int("radix_fanin", 2, &cs->radix_fanin);
-
-    /* order of fanout tree */
-    mca_bcol_basesmuma_param_register_int("radix_fanout", 2, &cs->radix_fanout);
-
-    /* order of read tree */
-    mca_bcol_basesmuma_param_register_int("radix_read_tree", 3,
-                                          &cs->radix_read_tree);
-
-    /* order of reduction fanout tree */
-    mca_bcol_basesmuma_param_register_int("order_reduction_tree", 2,
-                                          &cs->order_reduction_tree);
-
-    /* k-nomial radix */
-    mca_bcol_basesmuma_param_register_int("k_nomial_radix", 3, &cs->k_nomial_radix);
-
-    /* number of polling loops for non-blocking algorithms */
-    mca_bcol_basesmuma_param_register_int("num_to_probe", 10, &cs->num_to_probe);
-
-    /* radix of the k-ary scatter tree */
-    mca_bcol_basesmuma_param_register_int("scatter_kary_radix", 4,
-                                          &cs->scatter_kary_radix);
-
-    /* register parmeters controlling message fragementation */
-    mca_bcol_basesmuma_param_register_int("min_frag_size", getpagesize(),
-                                          &cs->super.min_frag_size);
-    mca_bcol_basesmuma_param_register_int("max_frag_size", FRAG_SIZE_NO_LIMIT,
-                                          &cs->super.max_frag_size);
-
-    /* by default use pre-registered shared memory segments */
-    /* RLG NOTE: When we have a systematic way to handle single memory
-     * copy semantics, we need to update this logic
-     */
-    mca_bcol_basesmuma_param_register_bool("can_use_user_buffers", false,
-                                           &cs->super.can_use_user_buffers);
-
-    mca_bcol_basesmuma_param_register_int("verbose", 0, &cs->verbose);
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * Open the component
- */
-static int basesmuma_open(void)
-{
-
-    /* local variables */
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-    int ret = OMPI_SUCCESS;
-    opal_mutex_t *mutex_ptr;
-    int dummy;
-
-    /*
-     * Make sure that the number of banks is a power of 2
-     */
-    cs->basesmuma_num_mem_banks=
-        ompi_roundup_to_power_radix(2,cs->basesmuma_num_mem_banks, &dummy);
-    if ( 0 == cs->basesmuma_num_mem_banks ) {
-        ret=OMPI_ERROR;
-        goto exit_ERROR;
-    }
-
-    /*
-     * Make sure that the the number of buffers is a power of 2
-     */
-    cs->basesmuma_num_regions_per_bank=
-        ompi_roundup_to_power_radix(2,cs->basesmuma_num_regions_per_bank, &dummy);
-    if ( 0 == cs->basesmuma_num_regions_per_bank ) {
-        ret=OMPI_ERROR;
-        goto exit_ERROR;
-    }
-
-    /* Portals initialization */
-    cs->portals_init = false;
-    cs->portals_info = NULL;
-
-    /*
-     * initialization
-     */
-    cs->sm_ctl_structs=NULL;
-    OBJ_CONSTRUCT(&(cs->sm_connections_list),opal_list_t);
-    OBJ_CONSTRUCT(&(cs->nb_admin_barriers),opal_list_t);
-    mutex_ptr= &(cs->nb_admin_barriers_mutex);
-    OBJ_CONSTRUCT(mutex_ptr, opal_mutex_t);
-
-    /* Control structures object construct
-     */
-    OBJ_CONSTRUCT(&(cs->ctl_structures), opal_list_t);
-
-    /* shared memory has not been registered yet */
-    cs->mpool_inited = false;
-
-    /* initialize base file names */
-    cs->clt_base_fname="sm_ctl_mem_";
-    cs->payload_base_fname="sm_payload_mem_";
-
-    /* initialize the size of the shared memory scartch region */
-    cs->my_scratch_shared_memory_size=getpagesize();
-    cs->my_scratch_shared_memory=NULL;
-    cs->scratch_offset_from_base_ctl_file=0;
-
-    /*
-     * register the progess function
-     */
-    ret=opal_progress_register(bcol_basesmuma_progress);
-    if (MPI_SUCCESS != ret) {
-        opal_output(ompi_bcol_base_framework.framework_output, "failed to register the progress function");
-    }
-
-    return ret;
-
- exit_ERROR:
-    return ret;
-}
-
-/*
- * release the control structure backing file
- */
-static int mca_bcol_basesmuma_deregister_ctl_sm(mca_bcol_basesmuma_component_t *bcol_component)
-{
-    if (NULL != bcol_component->sm_ctl_structs) {
-        OBJ_RELEASE(bcol_component->sm_ctl_structs);
-    }
-
-    return OMPI_SUCCESS;
-}
-
-
-/*
- * Close the component
- */
-static int basesmuma_close(void)
-{
-    int ret;
-    bcol_basesmuma_registration_data_t *net_ctx;
-    bcol_base_network_context_t *net_reg;
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-
-    /* gvm Leak FIX */
-    OPAL_LIST_DESTRUCT (&cs->ctl_structures);
-
-    /* deregister the progress function */
-    ret=opal_progress_unregister(bcol_basesmuma_progress);
-    if (MPI_SUCCESS != ret) {
-        opal_output(ompi_bcol_base_framework.framework_output, "failed to unregister the progress function");
-    }
-
-    /* remove the control structure backing file */
-    ret=mca_bcol_basesmuma_deregister_ctl_sm(&mca_bcol_basesmuma_component);
-    if (MPI_SUCCESS != ret) {
-        opal_output(ompi_bcol_base_framework.framework_output, "failed to remove control structure backing file");
-    }
-
-    /* remove the network contexts - only one network context defined for
-     * this component.
-     */
-    /* file_name returne by asprintf, so need to free the resource */
-    if(mca_bcol_basesmuma_component.super.network_contexts ) {
-        net_reg=(bcol_base_network_context_t *)
-            mca_bcol_basesmuma_component.super.network_contexts[0];
-        if(net_reg) {
-            net_ctx=(bcol_basesmuma_registration_data_t *)net_reg->context_data;
-            if( net_ctx) {
-                if(net_ctx->file_name) {
-                    free(net_ctx->file_name);
-                }
-                free(net_ctx);
-            }
-            free(net_reg);
-        }
-        free(mca_bcol_basesmuma_component.super.network_contexts);
-        mca_bcol_basesmuma_component.super.network_contexts=NULL;
-    }
-
-    /* normal return */
-    return OMPI_SUCCESS;
-}
-
-/* query to see if the component is available for use, and can
- * satisfy the thread and progress requirements
- */
-int mca_bcol_basesmuma_init_query(bool enable_progress_threads,
-                                  bool enable_mpi_threads)
-{
-    /* done */
-    return OMPI_SUCCESS;
-}
-
-/* This routine is used to allocate shared memory for the the shared
- * memory control regions.
- */
-int mca_bcol_basesmuma_allocate_sm_ctl_memory(mca_bcol_basesmuma_component_t *cs)
-{
-    /* local variables */
-    int name_length, ret = OMPI_SUCCESS;
-    size_t ctl_length;
-    char *name;
-    size_t page_size = getpagesize ();
-
-    /* set the file name */
-    name_length=asprintf(&name,
-                         "%s"OPAL_PATH_SEP"%s""%0d",
-                         ompi_process_info.job_session_dir,
-                         cs->clt_base_fname,
-                         (int)getpid());
-    if( 0 > name_length ) {
-        return OMPI_ERROR;
-    }
-    /* make sure name is not too long */
-    if ( OPAL_PATH_MAX < (name_length-1) ) {
-        free (name);
-        return OMPI_ERROR;
-    }
-
-    /* compute segment length */
-
-    ctl_length=(cs->basesmuma_num_mem_banks*
-                cs->basesmuma_num_regions_per_bank+cs->basesmuma_num_mem_banks)
-        *sizeof(mca_bcol_basesmuma_ctl_struct_t)*cs->n_groups_supported;
-    /* need two banks of memory per group - for algorithms that have
-     * user payload, and those that don't
-     */
-    ctl_length*=2;
-
-    /* add space for internal library management purposes */
-    ctl_length+=cs->my_scratch_shared_memory_size;
-
-    /* round up to multiple of page size */
-    ctl_length = OPAL_ALIGN(ctl_length, page_size, size_t);
-
-    /* allocate the shared file */
-    cs->sm_ctl_structs=bcol_basesmuma_smcm_mem_reg (NULL, ctl_length, getpagesize(), name);
-    if( !cs->sm_ctl_structs) {
-        opal_output (ompi_bcol_base_framework.framework_output,
-                     "In mca_bcol_basesmuma_allocate_sm_ctl_memory failed to allocathe backing file %s\n", name);
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    /* free the memory allocated by asprintf for the file name -
-     * in mca_base_smcm_mem_reg this name is copied into a new
-     * memory location */
-    free (name);
-
-    /* successful return */
-    return ret;
-}
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanin.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanin.c
deleted file mode 100644
index 670b9af94c..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanin.c
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/* Recursive doubling blocking barrier */
-
-#include "ompi_config.h"
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/patterns/net/netpatterns.h"
-
-#include "opal/sys/atomic.h"
-
-#include "ompi/mca/bcol/base/base.h"
-#include "bcol_basesmuma.h"
-
-/********************************************************************************/
-/********************************** New Fan-In **********************************/
-/********************************************************************************/
-
-static int bcol_basesmuma_fanin_new(bcol_function_args_t *input_args,
-                                    mca_bcol_base_function_t *c_input_args)
-{
-    /* local variables */
-    int64_t sequence_number;
-
-    mca_bcol_basesmuma_module_t* bcol_module =
-        (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module;
-
-    int i, child_rank, idx, n_children, probe,
-        my_rank = bcol_module->super.sbgp_partner_module->my_index,
-        leading_dim = bcol_module->colls_no_user_data.size_of_group;
-    int8_t  ready_flag;
-    int8_t bcol_id = (int8_t) bcol_module->super.bcol_id;
-    int buff_index = input_args->buffer_index;
-    int *active_requests =
-        &(bcol_module->ml_mem.nb_coll_desc[buff_index].active_requests);
-    mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component;
-    int matched = 0;
-
-
-    volatile mca_bcol_basesmuma_payload_t *ctl_structs;
-
-    /* control structures */
-    volatile mca_bcol_basesmuma_header_t *my_ctl;
-    volatile mca_bcol_basesmuma_header_t *child_ctl;
-
-
-    netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
-
-    /* Figure out - what instance of the basesmuma bcol I am */
-    sequence_number = input_args->sequence_num;
-
-    idx = SM_ARRAY_INDEX(leading_dim, buff_index, 0);
-    ctl_structs = (volatile mca_bcol_basesmuma_payload_t *)
-                        bcol_module->colls_with_user_data.data_buffs + idx;
-    my_ctl = ctl_structs[my_rank].ctl_struct;
-
-    /* Init the header */
-    BASESMUMA_HEADER_INIT(my_ctl, ready_flag, sequence_number, bcol_id);
-
-    /* Cache num of children value in a local variable */
-    n_children = my_tree_node->n_children;
-
-    /* initialize the active requests */
-    *active_requests = 0;
-    /* create a bit map for children */
-    for( i = 0; i < n_children; i++){
-        *active_requests ^= (1<<i);
-    }
-
-    /* Wait until my childeren arrive */
-    for (i = 0; i < n_children; ++i) {
-        matched = 0;
-        /* Get child ctl struct */
-        child_rank = my_tree_node->children_ranks[i];
-        child_ctl = ctl_structs[child_rank].ctl_struct;
-        /* I'm sacrificing cache for concurrency */
-        for( probe = 0; probe < cm->num_to_probe && (0 == matched); probe++){
-            if(IS_PEER_READY(child_ctl, ready_flag, sequence_number,BARRIER_FANIN_FLAG, bcol_id)) {
-                matched = 1;
-                /* flip the bit */
-                *active_requests ^= (1<<i);
-            }
-        }
-    }
-
-    if(0 == *active_requests ) {
-        if(ROOT_NODE != my_tree_node->my_node_type){
-            /* I have no more active requests,
-               signal my parent */
-            my_ctl->flags[BARRIER_FANIN_FLAG][bcol_id] = ready_flag;
-        }
-    } else {
-        return BCOL_FN_STARTED;
-    }
-
-    my_ctl->starting_flag_value[bcol_id]++;
-
-    return BCOL_FN_COMPLETE;
-}
-
-static int bcol_basesmuma_fanin_new_progress(bcol_function_args_t *input_args,
-                                    mca_bcol_base_function_t *c_input_args)
-{
-    /* local variables */
-    int64_t sequence_number;
-
-    mca_bcol_basesmuma_module_t* bcol_module =
-        (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module;
-
-    int i, child_rank, flag_offset, idx, n_children, probe,
-        my_rank = bcol_module->super.sbgp_partner_module->my_index,
-        leading_dim = bcol_module->colls_no_user_data.size_of_group;
-    int8_t  ready_flag;
-    int8_t bcol_id = (int8_t) bcol_module->super.bcol_id;
-    int buff_index = input_args->buffer_index;
-    int *active_requests =
-        &(bcol_module->ml_mem.nb_coll_desc[buff_index].active_requests);
-    mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component;
-    int matched = 0;
-
-
-    volatile mca_bcol_basesmuma_payload_t *ctl_structs;
-
-    /* control structures */
-    volatile mca_bcol_basesmuma_header_t *my_ctl;
-    volatile mca_bcol_basesmuma_header_t *child_ctl;
-
-
-    netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
-
-    sequence_number = input_args->sequence_num;
-
-    idx = SM_ARRAY_INDEX(leading_dim, buff_index, 0);
-    ctl_structs = (volatile mca_bcol_basesmuma_payload_t *)
-                        bcol_module->colls_with_user_data.data_buffs + idx;
-    my_ctl = ctl_structs[my_rank].ctl_struct;
-
-
-    flag_offset = my_ctl->starting_flag_value[bcol_id];
-    ready_flag = flag_offset + 1;
-    my_ctl->sequence_number = sequence_number;
-
-    /* Cache num of children value in a local variable */
-    n_children = my_tree_node->n_children;
-
-
-    /* Wait until my childeren arrive */
-    for (i = 0; i < n_children; ++i) {
-        matched = 0;
-        /* Get child ctl struct */
-        if ( 1 == ((*active_requests >> i)&1) ) {
-            child_rank = my_tree_node->children_ranks[i];
-            child_ctl = ctl_structs[child_rank].ctl_struct;
-            /* I'm sacrificing cache for concurrency */
-            for( probe = 0; probe < cm->num_to_probe && (0 == matched); probe++){
-                if(IS_PEER_READY(child_ctl, ready_flag, sequence_number, BARRIER_FANIN_FLAG,bcol_id)) {
-                    matched = 1;
-                    /* flip the bit */
-                    *active_requests ^= (1<<i);
-                }
-            }
-        }
-    }
-    if(0 == *active_requests ){
-        if(ROOT_NODE != my_tree_node->my_node_type){
-            /* If I am not the root of the fanin tree,
-               then signal my parent */
-            my_ctl->flags[BARRIER_FANIN_FLAG][bcol_id] = ready_flag;
-        }
-    } else {
-        return BCOL_FN_STARTED;
-    }
-
-    my_ctl->starting_flag_value[bcol_id]++;
-
-    return BCOL_FN_COMPLETE;
-}
-
-
-int bcol_basesmuma_fanin_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    BASESMUMA_VERBOSE(10, ("Basesmuma Fan-In register.\n"));
-
-    comm_attribs.bcoll_type = BCOL_FANIN;
-
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super,
-        &comm_attribs, &inv_attribs,
-        bcol_basesmuma_fanin_new,
-        bcol_basesmuma_fanin_new_progress);
-
-    return OMPI_SUCCESS;
-}
-
-
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanout.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanout.c
deleted file mode 100644
index f3d3d23c40..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanout.c
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/* Recursive doubling blocking barrier */
-
-#include "ompi_config.h"
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/patterns/net/netpatterns.h"
-
-#include "opal/sys/atomic.h"
-
-#include "ompi/mca/bcol/base/base.h"
-#include "bcol_basesmuma.h"
-
-/***********************************************************************************/
-/*********************************** New Fan-Out ***********************************/
-/***********************************************************************************/
-
-static int bcol_basesmuma_fanout_new(
-                bcol_function_args_t *input_args,
-                mca_bcol_base_function_t *c_input_args)
-{
-    /* local variables */
-    int64_t sequence_number;
-
-    mca_bcol_basesmuma_module_t* bcol_module =
-        (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module;
-
-    int idx, probe,
-        my_rank = bcol_module->super.sbgp_partner_module->my_index,
-        leading_dim = bcol_module->colls_no_user_data.size_of_group;
-    int8_t  ready_flag;
-    int8_t bcol_id = (int8_t) bcol_module->super.bcol_id;
-    int buff_index = input_args->buffer_index;
-    mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component;
-
-
-    volatile mca_bcol_basesmuma_payload_t *ctl_structs;
-
-    /* control structures */
-    volatile mca_bcol_basesmuma_header_t *my_ctl;
-    volatile mca_bcol_basesmuma_header_t *parent_ctl;
-
-
-    netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
-
-    /* Figure out - what instance of the basesmuma bcol I am */
-    sequence_number = input_args->sequence_num;
-
-    idx = SM_ARRAY_INDEX(leading_dim, buff_index, 0);
-    ctl_structs = (volatile mca_bcol_basesmuma_payload_t *)
-                        bcol_module->colls_with_user_data.data_buffs + idx;
-    my_ctl = ctl_structs[my_rank].ctl_struct;
-
-    /* init the header */
-    BASESMUMA_HEADER_INIT(my_ctl, ready_flag, sequence_number, bcol_id);
-
-    /* Wait on my parent to arrive */
-    if (my_tree_node->n_parents) {
-        parent_ctl = ctl_structs[my_tree_node->parent_rank].ctl_struct;
-        for( probe = 0; probe < cm->num_to_probe; probe++){
-           if (IS_PEER_READY(parent_ctl, ready_flag, sequence_number, BARRIER_FANOUT_FLAG, bcol_id)) {
-              /* signal my children */
-               my_ctl->flags[BARRIER_FANOUT_FLAG][bcol_id] = ready_flag;
-               /* bump the starting flag */
-               my_ctl->starting_flag_value[bcol_id]++;
-               return BCOL_FN_COMPLETE;
-
-            }
-        }
-
-    } else {
-        /* I am the root of the fanout */
-        my_ctl->flags[BARRIER_FANOUT_FLAG][bcol_id] = ready_flag;
-        /* bump the starting flag */
-        my_ctl->starting_flag_value[bcol_id]++;
-        return BCOL_FN_COMPLETE;
-    }
-
-
-
-
-
-    return BCOL_FN_STARTED;
-}
-
-int bcol_basesmuma_fanout_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    BASESMUMA_VERBOSE(10, ("Basesmuma Fan-Out register.\n"));
-
-    comm_attribs.bcoll_type = BCOL_FANOUT;
-
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super,
-        &comm_attribs, &inv_attribs,
-        bcol_basesmuma_fanout_new,
-        bcol_basesmuma_fanout_new);
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_gather.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_gather.c
deleted file mode 100644
index ef3d856b88..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_gather.c
+++ /dev/null
@@ -1,1106 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/mca/bcol/basesmuma/bcol_basesmuma.h"
-#include "ompi/constants.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/communicator/communicator.h"
-
-/* debug
- *   #include "opal/sys/timer.h"
- *
- *   extern uint64_t timers[7];
- *   end debug */
-
-/* debug */
-#include <unistd.h>
-/* end debug */
-
-/* non-blocking gather routines: init and progress functions */
-int bcol_basesmuma_gather_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    comm_attribs.bcoll_type = BCOL_GATHER;
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1048576;
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-    comm_attribs.waiting_semantics = BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000;
-    inv_attribs.datatype_bitmap = 0x11111111;
-    inv_attribs.op_types_bitmap = 0x11111111;
-
-    /* Set attributes for fanin fanout algorithm */
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_basesmuma_k_nomial_gather_init,
-                                 bcol_basesmuma_k_nomial_gather_progress);
-
-    return OMPI_SUCCESS;
-}
-
-int bcol_basesmuma_k_nomial_gather_init(bcol_function_args_t *input_args,
-                                        mca_bcol_base_function_t *c_input_args)
-{
-    /* local variables */
-    int leading_dim, buff_idx, idx;
-    int src, i, j, k_temp1, k_temp2;
-    int pseudo_root, proxy_root, pseudo_base_adj;
-    volatile int8_t ready_flag;
-    int count=input_args->count;
-    struct ompi_datatype_t* dtype=input_args->dtype;
-    int root=input_args->root;
-    int base_adj, base;
-    int total_peers, my_pow_k=0;
-    int64_t sequence_number=input_args->sequence_num;
-    mca_bcol_basesmuma_module_t* bcol_module=
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-    int my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    netpatterns_k_exchange_node_t *exchange_node =
-        &bcol_module->knomial_allgather_tree;
-    uint32_t buffer_index = input_args->buffer_index;
-    int *active_requests =
-        &(bcol_module->ml_mem.nb_coll_desc[buffer_index].active_requests);
-
-    int *iteration = &bcol_module->ml_mem.nb_coll_desc[buffer_index].iteration;
-    int *status = &bcol_module->ml_mem.nb_coll_desc[buffer_index].status;
-
-    int buff_offset = bcol_module->super.hier_scather_offset;
-
-    /* "indirectors" */
-    int *inv_map = exchange_node->inv_reindex_map;
-    int *reindex_map = exchange_node->reindex_map;
-    int stray = exchange_node->k_nomial_stray;
-
-    /* tree radix */
-    int tree_order = exchange_node->tree_order;
-    /* tree depth */
-    int pow_k =  exchange_node->log_tree_order;
-    /* largest power of k less than or equal to np */
-    int cnt = exchange_node->n_largest_pow_tree_order;
-
-    /* payload structures */
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-
-    /* control structures */
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-
-    size_t pack_len = 0, dt_size;
-
-#if 0
-    fprintf(stderr,"Entering sm gather input_args->sbuf_offset %d \n",input_args->sbuf_offset);
-    fflush(stderr);
-#endif
-
-
-    /* we will work only on packed data - so compute the length*/
-    /* this is the size of my data, this is not gatherv so it's the same
-     * for all ranks in the communicator.
-     */
-    ompi_datatype_type_size(dtype, &dt_size);
-    pack_len=count*dt_size;
-    /* now set the "real" offset */
-    buff_offset = buff_offset*pack_len;
-
-    buff_idx = input_args->src_desc->buffer_index;
-
-    /* Get addressing information */
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-    data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs+idx;
-
-    /* Set pointer to current proc ctrl region */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-
-    /* init the header */
-    BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id);
-
-    /* init active requests, iteration, and status */
-    *iteration = 0;
-    *active_requests = 0;
-    *status = -1;
-    /* calculate the number of steps necessary for this collective */
-
-    /* first thing we do is figure out where the root is in our new indexing */
-    /* find root in new indexing */
-    pseudo_root = inv_map[root];
-    /* see if this is larger than the stray */
-    if (pseudo_root >= stray) {
-        /* then we need to define the proxy root, everyone can do this */
-        proxy_root = pseudo_root - cnt;
-    } else {
-        proxy_root = pseudo_root;
-    }
-
-    /* do some figuring */
-    if (EXCHANGE_NODE == exchange_node->node_type) {
-        total_peers = 0;
-        my_pow_k = pow_k;
-        k_temp1 = tree_order;
-        k_temp2 = 1;
-        for( i = 0; i < pow_k; i++) {
-            /* then find the base */
-            FIND_BASE(base,exchange_node->reindex_myid,i+1,tree_order);
-            /* now find the adjusted base */
-            base_adj = base + (base + proxy_root)%k_temp1;
-            /* ok, now find out WHO is occupying this slot */
-            pseudo_base_adj = reindex_map[base_adj];
-
-            if(my_rank == pseudo_base_adj ) {
-                /* then go ahead and poll for children's data */
-                for( j = 0; j < (tree_order - 1); j++ ) {
-                    /* send phase
-                     */
-                    /* get communication partner */
-
-                    src = exchange_node->rank_exchanges[i][j];
-                    /* remember, if we have extra ranks, then we won't participate
-                     * with a least one peer. Make a check
-                     */
-                    if( src < 0 ){
-                        continue;
-                    }else{
-
-                        /* flip a bit to represent this request */
-                        *active_requests ^= (1<<total_peers++);
-                    }
-
-
-                }
-            } else {
-                /* announce my arrival */
-                my_pow_k = i;
-                break;
-            }
-
-            k_temp1 = k_temp1*tree_order;
-            k_temp2 = k_temp2*tree_order;
-        }
-    }
-
-    *iteration = my_pow_k;
-
-    if (EXTRA_NODE == exchange_node->node_type || 0 == exchange_node->n_extra_sources) {
-        if (0 == my_pow_k || EXTRA_NODE == exchange_node->node_type) {
-            opal_atomic_rmb ();
-
-            my_ctl_pointer->flags[GATHER_FLAG][bcol_id] = ready_flag;
-        }
-
-        if ((EXTRA_NODE == exchange_node->node_type && root != my_rank) || 0 == my_pow_k) {
-            /* nothing more to do */
-            my_ctl_pointer->starting_flag_value[bcol_id]++;
-
-            return BCOL_FN_COMPLETE;
-        }
-    }
-
-    return BCOL_FN_STARTED;
-}
-
-
-int bcol_basesmuma_k_nomial_gather_progress(bcol_function_args_t *input_args,
-                                            mca_bcol_base_function_t *c_input_args)
-{
-    /* local variables */
-    int group_size;
-    int flag_offset;
-    int leading_dim, buff_idx, idx;
-    int src, knt, i, j, k_temp1, k_temp2;
-    volatile int8_t ready_flag;
-    int count=input_args->count;
-    struct ompi_datatype_t* dtype=input_args->dtype;
-    int root=input_args->root;
-    int probe;
-    int matched;
-    int64_t sequence_number=input_args->sequence_num;
-    mca_bcol_basesmuma_module_t* bcol_module=
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-    int my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component;
-    netpatterns_k_exchange_node_t *exchange_node =
-        &bcol_module->knomial_allgather_tree;
-    uint32_t buffer_index = input_args->buffer_index;
-    int *active_requests =
-        &(bcol_module->ml_mem.nb_coll_desc[buffer_index].active_requests);
-    int *iteration = &bcol_module->ml_mem.nb_coll_desc[buffer_index].iteration;
-    int *status = &bcol_module->ml_mem.nb_coll_desc[buffer_index].status;
-    int buff_offset = bcol_module->super.hier_scather_offset;
-    /* "indirectors" */
-    int *list_connected = bcol_module->super.list_n_connected;
-    /* tree radix */
-    int tree_order = exchange_node->tree_order;
-    /* payload structures */
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    volatile char *child_data_pointer;
-    /* control structures */
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    volatile mca_bcol_basesmuma_header_t *child_ctl_pointer;
-    /*volatile mca_bcol_basesmuma_ctl_struct_t* parent_ctl_pointer; */
-
-    size_t pack_len = 0, dt_size;
-    void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr);
-
-
-#if 0
-    fprintf(stderr,"Entering sm gather input_args->sbuf_offset %d \n",input_args->sbuf_offset);
-    fflush(stderr);
-#endif
-
-
-    /* we will work only on packed data - so compute the length*/
-    /* this is the size of my data, this is not gatherv so it's the same
-     * for all ranks in the communicator.
-     */
-    ompi_datatype_type_size(dtype, &dt_size);
-    pack_len=count*dt_size;
-    /* now set the "real" offset */
-    buff_offset = buff_offset*pack_len;
-
-    buff_idx = input_args->src_desc->buffer_index;
-
-    /* Get addressing information */
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-
-    group_size = bcol_module->colls_no_user_data.size_of_group;
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-    data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs+idx;
-
-    /* Set pointer to current proc ctrl region */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-    /* restart the ready_flag state */
-    flag_offset = my_ctl_pointer->starting_flag_value[bcol_id];
-    ready_flag = flag_offset + 1;
-
-    /* calculate the number of steps necessary for this collective */
-
-    /* first thing we do is figure out where the root is in our new indexing */
-    /* find root in new indexing */
-    if( EXTRA_NODE == exchange_node->node_type ) {
-
-        /* poll for data from proxy */
-        src = exchange_node->rank_extra_sources_array[0];
-        /* get src data buffer */
-        child_data_pointer = data_buffs[src].payload;
-        child_ctl_pointer = data_buffs[src].ctl_struct;
-        /* remember to bump your flag */
-        ready_flag++;
-
-        /* in this case, you must block */
-        for (i = 0 ; i < cm->num_to_probe ; ++i) {
-            if (IS_PEER_READY(child_ctl_pointer,ready_flag,sequence_number, GATHER_FLAG, bcol_id)){
-                /* receive the data from the proxy, aka pseudo-root */
-                memcpy((void *) ((unsigned char *) data_addr + buff_offset),
-                       (void *) ((unsigned char *) child_data_pointer+buff_offset),
-                       pack_len * group_size);
-
-                goto FINISHED;
-            }
-        }
-
-        return BCOL_FN_STARTED;
-    }
-
-
-    if (0 < exchange_node->n_extra_sources && (-1 == (*status))) {
-        /* am a proxy, poll for pack_len data from extra */
-        src = exchange_node->rank_extra_sources_array[0];
-        /* get src data buffer */
-        child_data_pointer = data_buffs[src].payload;
-        child_ctl_pointer = data_buffs[src].ctl_struct;
-        knt = 0;
-        for( i = 0; i < src; i++){
-            knt += list_connected[i];
-        }
-        /* must block here also */
-        matched = 0;
-        for (i = 0, matched = 0 ; i < cm->num_to_probe && (0 == matched) ; ++i) {
-            if(IS_PEER_READY(child_ctl_pointer,ready_flag,sequence_number, GATHER_FLAG, bcol_id)){
-                matched = 1;
-                memcpy((void *) ((unsigned char *) data_addr + buff_offset + pack_len*knt),
-                       (void *) ((unsigned char *) child_data_pointer + buff_offset +
-                                 pack_len*knt), pack_len*list_connected[src]);
-                *status = 0;
-                if( 0 == *active_requests ){
-                    goto LAST_STEP;
-                }
-
-                break;
-            }
-        }
-        if( 0 == matched ){
-            return BCOL_FN_STARTED;
-        }
-    }
-
-    /* start the k-nomial gather phase */
-    /* only "active ranks participate, once a rank has forwarded its data, it becomes inactive */
-    for (probe = 0 ; probe < cm->num_to_probe ; ++probe) {
-        k_temp1 = tree_order;
-        k_temp2 = 1;
-        for (i = 0 ; i < *(iteration) ; ++i) {
-
-            /* then go ahead and poll for children's data */
-            for (j = 0 ; j < (tree_order - 1) ; ++j) {
-                /* send phase
-                 */
-                /* get communication partner */
-
-                src = exchange_node->rank_exchanges[i][j];
-                /* remember, if we have extra ranks, then we won't participate
-                 * with a least one peer. Make a check
-                 */
-                /* if the bit that corresponds to this child has been set to zero,
-                 * then it has already checked in and data received
-                 */
-                if (src < 0 || 1 != ((*active_requests >> ((tree_order - 1)*i + j))&1)){
-                    continue;
-                }
-                child_data_pointer = data_buffs[src].payload;
-                child_ctl_pointer = data_buffs[src].ctl_struct;
-
-                if(IS_PEER_READY(child_ctl_pointer,ready_flag,sequence_number, GATHER_FLAG, bcol_id)){
-                    /* copy the data */
-                    memcpy((void *) ((unsigned char *) data_addr + buff_offset +
-                                     exchange_node->payload_info[i][j].r_offset*pack_len),
-                           (void *) ((unsigned char *) child_data_pointer + buff_offset +
-                                     exchange_node->payload_info[i][j].r_offset*pack_len),
-                           exchange_node->payload_info[i][j].r_len*pack_len);
-                    /* flip the bit to zero */
-                    *active_requests ^= (1<<((tree_order - 1)*i + j));
-                    if(0 == (*active_requests)) {
-                        goto LAST_STEP;
-                    }
-                }
-            }
-        }
-
-        k_temp1 = k_temp1*tree_order;
-        k_temp2 = k_temp2*tree_order;
-    }
-
-
-    return BCOL_FN_STARTED;
-
-LAST_STEP:
-    /* last step, proxies send full data back to the extra ranks */
-    if( 0 < exchange_node->n_extra_sources &&
-        root == exchange_node->rank_extra_sources_array[0]) {
-        /* regardless, I will bump the ready flag and set it in case someone is watching */
-        /* announce that data is ready */
-        ready_flag++;
-    }
-
-    /* signal that data is ready */
-    opal_atomic_wmb ();
-    my_ctl_pointer->flags[GATHER_FLAG][bcol_id] = ready_flag;
-
-FINISHED:
-
-
-    my_ctl_pointer->starting_flag_value[bcol_id]++;
-
-    return BCOL_FN_COMPLETE;
-}
-
-
-/* Blocking routines, used to prototype and test signaling,
- * as well as debug hierarchical algorithm
- */
-#if 0
-int bcol_basesmuma_gather_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    comm_attribs.bcoll_type = BCOL_GATHER;
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 16;
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-    comm_attribs.waiting_semantics = BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000;
-    inv_attribs.datatype_bitmap = 0x11111111;
-    inv_attribs.op_types_bitmap = 0x11111111;
-
-
-    /* Set attributes for fanin fanout algorithm */
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, bcol_basesmuma_k_nomial_gather,
-                                 bcol_basesmuma_k_nomial_gather);
-
-    return OMPI_SUCCESS;
-}
-#endif
-
-
-/* original, fully blocking, fully synchronous gather - should result in worst performance when used */
-#if 0
-int bcol_basesmuma_k_nomial_gather(bcol_function_args_t *input_args,
-                                   mca_bcol_base_function_t *c_input_args)
-{
-    /* local variables */
-    int group_size;
-    int first_instance=0, flag_offset;
-    int rc = OMPI_SUCCESS;
-    int leading_dim, buff_idx, idx;
-    int *group_list;
-    int src, comm_src, knt, i, k, j, k_temp1, k_temp2;
-    int pseudo_root, proxy_root, pseudo_base_adj;
-    volatile int64_t ready_flag;
-    int count=input_args->count;
-    struct ompi_datatype_t* dtype=input_args->dtype;
-    int root=input_args->root;
-    int base_adj, base;
-    int64_t sequence_number=input_args->sequence_num;
-    mca_bcol_basesmuma_module_t* bcol_module=
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-    int my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-    netpatterns_k_exchange_node_t *exchange_node =
-        &bcol_module->knomial_allgather_tree;
-
-    int buff_offset = bcol_module->super.hier_scather_offset;
-
-    /* "indirectors" */
-    int *list_connected = bcol_module->super.list_n_connected;
-    int *inv_map = exchange_node->inv_reindex_map;
-    int *reindex_map = exchange_node->reindex_map;
-    /*int *reindex_map = exchange_node->reindex_map;*/
-    /* stray rank == first rank in the extra set */
-    int stray = exchange_node->k_nomial_stray;
-
-    /* tree radix */
-    int tree_order = exchange_node->tree_order;
-    /* tree depth */
-    int pow_k =  exchange_node->log_tree_order;
-    /* largest power of k less than or equal to np */
-    int cnt = exchange_node->n_largest_pow_tree_order;
-
-    /*fprintf(stderr,"tree order %d pow_k %d stray %d root %d\n",tree_order, pow_k, stray, root);*/
-    /* payload structures */
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    volatile char *child_data_pointer;
-
-    /* control structures */
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    volatile mca_bcol_basesmuma_header_t *child_ctl_pointer;
-    /*volatile mca_bcol_basesmuma_ctl_struct_t* parent_ctl_pointer; */
-
-    size_t pack_len = 0, dt_size;
-    void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr);
-
-    /* active in the algorithm */
-    bool active = true;
-
-#if 0
-    fprintf(stderr,"Entering sm gather input_args->sbuf_offset %d \n",input_args->sbuf_offset);
-    fflush(stderr);
-#endif
-
-
-    /* we will work only on packed data - so compute the length*/
-    /* this is the size of my data, this is not gatherv so it's the same
-     * for all ranks in the communicator.
-     */
-    ompi_datatype_type_size(dtype, &dt_size);
-    pack_len=count*dt_size;
-    /* now set the "real" offset */
-    buff_offset = buff_offset*pack_len;
-
-    buff_idx = input_args->src_desc->buffer_index;
-
-    /* Get addressing information */
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    /* I have a feeling that I'll need this */
-    group_list = bcol_module->super.sbgp_partner_module->group_list;
-
-    group_size = bcol_module->colls_no_user_data.size_of_group;
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-    /*ctl_structs=(mca_bcol_basesmuma_ctl_struct_t **)
-      bcol_module->colls_with_user_data.ctl_buffs+idx;
-    */
-    data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs+idx;
-
-    /* Set pointer to current proc ctrl region */
-    /*my_ctl_pointer = ctl_structs[my_rank]; */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-
-    /* setup resource recycling */
-    if( my_ctl_pointer->sequence_number < sequence_number ) {
-        first_instance=1;
-    }
-
-    if( first_instance ) {
-        /* Signal arrival */
-        my_ctl_pointer->flag = -1;
-        my_ctl_pointer->gflag = -1;
-        my_ctl_pointer->index=1;
-        /* this does not need to use any flag values , so only need to
-         * set the value for subsequent values that may need this */
-        my_ctl_pointer->starting_flag_value=0;
-        flag_offset=0;
-
-    } else {
-        /* only one thread at a time will be making progress on this
-         *   collective, so no need to make this atomic */
-        my_ctl_pointer->index++;
-    }
-
-
-    /* increment the starting flag by one and return */
-    flag_offset = my_ctl_pointer->starting_flag_value;
-    ready_flag = flag_offset + sequence_number + 1;
-    my_ctl_pointer->sequence_number = sequence_number;
-
-/* debug
-   fprintf(stderr," sequence_number %lld flag_offset %d starting flag val %d\n",sequence_number,flag_offset, my_ctl_pointer->starting_flag_value);
-   fflush(stderr);
-   end debug */
-
-
-    /*
-     * Fan out from root
-     */
-    /* don't need this either */
-    /* root is the local leader */
-    /* calculate the number of steps necessary for this collective */
-
-    /* first thing we do is figure out where the root is in our new indexing */
-    /* find root in new indexing */
-    pseudo_root = inv_map[root];
-    /* see if this is larger than the stray */
-    if( pseudo_root >= stray ) {
-        /* then we need to define the proxy root, everyone can do this */
-        proxy_root = pseudo_root - cnt;
-    }else {
-        proxy_root = pseudo_root;
-    }
-
-
-
-    if( EXTRA_NODE == exchange_node->node_type ) {
-
-        /* signal arrival */
-        my_ctl_pointer->gflag = ready_flag;
-
-        /* send is done */
-
-        /* poll for data only if I am the root */
-        /* bump the ready flag */
-        ready_flag++;
-        if( root == my_rank ){
-            /* poll for data from proxy */
-            src = exchange_node->rank_extra_sources_array[0];
-            /* get src data buffer */
-            child_data_pointer = data_buffs[src].payload;
-            child_ctl_pointer = data_buffs[src].ctl_struct;
-            while(!IS_GDATA_READY(child_ctl_pointer,ready_flag,sequence_number)){
-                opal_progress();
-            }
-            /* receive the data from the proxy, aka pseudo-root */
-
-            memcpy((void *) ((unsigned char *) data_addr + buff_offset),(void *) ((unsigned char *) child_data_pointer+buff_offset)
-                   ,pack_len*group_size);
-        }
-        goto FINISHED;
-
-
-    } else if( 0 < exchange_node->n_extra_sources ) {
-
-        /* am a proxy, poll for pack_len data from extra */
-        src = exchange_node->rank_extra_sources_array[0];
-        /* get src data buffer */
-        child_data_pointer = data_buffs[src].payload;
-        child_ctl_pointer = data_buffs[src].ctl_struct;
-        knt = 0;
-        for( i = 0; i < src; i++){
-            knt += list_connected[i];
-        }
-        while(!IS_GDATA_READY(child_ctl_pointer,ready_flag,sequence_number)){
-            opal_progress();
-        }
-        memcpy((void *) ((unsigned char *) data_addr + buff_offset + pack_len*knt),
-               (void *) ((unsigned char *) child_data_pointer + buff_offset +
-                         pack_len*knt), pack_len*list_connected[src]);
-        /*fprintf(stderr,"999 proxy received data from %d at offset %d of length %d\n",src,
-          buff_offset+pack_len*knt,pack_len*list_connected[src]);
-        */
-    }
-
-    /* start the k-nomial gather phase */
-    /* only "active ranks participate, once a rank has forwarded its data, it becomes inactive */
-    knt = 0;
-    while(active){
-        k_temp1 = tree_order;
-        k_temp2 = 1;
-        for( i = 0; i < pow_k; i++) {
-            /* then find the base */
-            /*FIND_BASE(base,my_rank,i+1,tree_order);*/
-            FIND_BASE(base,exchange_node->reindex_myid,i+1,tree_order);
-            /* now find the adjusted base */
-            base_adj = base + (base + proxy_root)%k_temp1;
-            /* ok, now find out WHO is occupying this slot */
-            /*pseudo_base_adj = inv_map[base_adj];*/
-            pseudo_base_adj = reindex_map[base_adj];
-
-            if(my_rank == pseudo_base_adj ) {
-                /* then go ahead and poll for children's data */
-                for( j = 0; j < (tree_order - 1); j++ ) {
-                    /* send phase
-                     */
-                    /* get communication partner */
-
-                    src = exchange_node->rank_exchanges[i][j];
-                    /*fprintf(stderr,"comm_src %d\n",comm_src);*/
-                    /* remember, if we have extra ranks, then we won't participate
-                     * with a least one peer. Make a check
-                     */
-                    if( src < 0 ){
-                        continue;
-                    }
-
-                    /*fprintf(stderr,"src %d\n",src);*/
-                    child_data_pointer = data_buffs[src].payload;
-                    child_ctl_pointer = data_buffs[src].ctl_struct;
-                    while(!IS_GDATA_READY(child_ctl_pointer,ready_flag,sequence_number)){
-                        opal_progress();
-                    }
-                    memcpy((void *) ((unsigned char *) data_addr + buff_offset +
-                                     exchange_node->payload_info[i][j].r_offset*pack_len),
-                           (void *) ((unsigned char *) child_data_pointer + buff_offset +
-                                     exchange_node->payload_info[i][j].r_offset*pack_len),
-                           exchange_node->payload_info[i][j].r_len*pack_len);
-                    /*
-                      fprintf(stderr,"999 receiving data from %d at offset %d of length %d\n",
-                      exchange_node->rank_exchanges[i][j], buff_offset + exchange_node->payload_info[i][j].r_offset,
-                      exchange_node->payload_info[i][j].r_len*pack_len);
-                    */
-                    opal_atomic_wmb ();
-                    knt++;
-                    if(knt == exchange_node->n_actual_exchanges) {
-                        /* this is the trick to break the root out,
-                         * only the root should be able to satisfy this
-                         */
-                        /*
-                          fprintf(stderr,"hello n_actual is %d \n",knt);
-                          fprintf(stderr,"hello n_actual_exch is %d \n",
-                          exchange_node->n_actual_exchanges);
-                        */
-                        goto LAST_STEP;
-                    }
-                }
-            } else {
-                /* announce my arrival */
-                my_ctl_pointer->gflag = ready_flag;
-                active = false;
-                break;
-            }
-
-            k_temp1 = k_temp1*tree_order;
-            k_temp2 = k_temp2*tree_order;
-        }
-    }
-LAST_STEP:
-    /* last step, proxies send full data back to the extra ranks */
-    if( 0 < exchange_node->n_extra_sources &&
-        root == exchange_node->rank_extra_sources_array[0]) {
-        /* regardless, I will bump the ready flag and set it in case someone is watching */
-        /* announce that data is ready */
-        ready_flag++;
-        my_ctl_pointer->gflag = ready_flag;
-    }
-
-
-FINISHED:
-
-/* debug
-   fprintf(stderr," my_ctl_pointer->index %d n of this type %d %u \n",
-   my_ctl_pointer->index,c_input_args->n_of_this_type_in_collective,getpid());
-   fflush(stderr);
-   end debug */
-
-    my_ctl_pointer->starting_flag_value+=1;
-
-    return BCOL_FN_COMPLETE;
-}
-
-#endif
-
-
-#if 0
-/* blocking, asynchronous polling gather routine */
-int bcol_basesmuma_k_nomial_gather(bcol_function_args_t *input_args,
-                                   mca_bcol_base_function_t *c_input_args)
-{
-    /* local variables */
-    int group_size;
-    int first_instance=0, flag_offset;
-    int rc = OMPI_SUCCESS;
-    int leading_dim, buff_idx, idx;
-    int *group_list;
-    int src, comm_src, knt, i, k, j, k_temp1, k_temp2;
-    int pseudo_root, proxy_root, pseudo_base_adj;
-    volatile int64_t ready_flag;
-    int count=input_args->count;
-    struct ompi_datatype_t* dtype=input_args->dtype;
-    int root=input_args->root;
-    int base_adj, base;
-    int total_peers, my_pow_k;
-    int probe;
-    int matched;
-    int64_t sequence_number=input_args->sequence_num;
-    mca_bcol_basesmuma_module_t* bcol_module=
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-    int my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component;
-    netpatterns_k_exchange_node_t *exchange_node =
-        &bcol_module->knomial_allgather_tree;
-
-    int buff_offset = bcol_module->super.hier_scather_offset;
-
-    /* "indirectors" */
-    int *list_connected = bcol_module->super.list_n_connected;
-    int *inv_map = exchange_node->inv_reindex_map;
-    int *reindex_map = exchange_node->reindex_map;
-    /*int *reindex_map = exchange_node->reindex_map;*/
-    /* stray rank == first rank in the extra set */
-    int stray = exchange_node->k_nomial_stray;
-
-    /* tree radix */
-    int tree_order = exchange_node->tree_order;
-    /* tree depth */
-    int pow_k =  exchange_node->log_tree_order;
-    /* largest power of k less than or equal to np */
-    int cnt = exchange_node->n_largest_pow_tree_order;
-
-    /*fprintf(stderr,"tree order %d pow_k %d stray %d root %d\n",tree_order, pow_k, stray, root);*/
-    /* payload structures */
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    volatile char *child_data_pointer;
-
-    /* control structures */
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    volatile mca_bcol_basesmuma_header_t *child_ctl_pointer;
-    /*volatile mca_bcol_basesmuma_ctl_struct_t* parent_ctl_pointer; */
-
-    size_t pack_len = 0, dt_size;
-    void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr);
-
-    /* active in the algorithm */
-    bool active = true;
-
-#if 0
-    fprintf(stderr,"Entering sm gather root %d \n",root);
-    fflush(stderr);
-#endif
-
-
-    /* we will work only on packed data - so compute the length*/
-    /* this is the size of my data, this is not gatherv so it's the same
-     * for all ranks in the communicator.
-     */
-    ompi_datatype_type_size(dtype, &dt_size);
-    pack_len=count*dt_size;
-    /* now set the "real" offset */
-    buff_offset = buff_offset*pack_len;
-
-    buff_idx = input_args->src_desc->buffer_index;
-
-    /* Get addressing information */
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    /* I have a feeling that I'll need this */
-    group_list = bcol_module->super.sbgp_partner_module->group_list;
-
-    group_size = bcol_module->colls_no_user_data.size_of_group;
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-    /*ctl_structs=(mca_bcol_basesmuma_ctl_struct_t **)
-      bcol_module->colls_with_user_data.ctl_buffs+idx;
-    */
-    data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs+idx;
-
-    /* Set pointer to current proc ctrl region */
-    /*my_ctl_pointer = ctl_structs[my_rank]; */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-
-    /* setup resource recycling */
-    if( my_ctl_pointer->sequence_number < sequence_number ) {
-        first_instance=1;
-    }
-
-    if( first_instance ) {
-        /* Signal arrival */
-        my_ctl_pointer->flag = -1;
-        my_ctl_pointer->gflag = -1;
-        my_ctl_pointer->index=1;
-        /* this does not need to use any flag values , so only need to
-         * set the value for subsequent values that may need this */
-        my_ctl_pointer->starting_flag_value=0;
-        flag_offset=0;
-
-    } else {
-        /* only one thread at a time will be making progress on this
-         *   collective, so no need to make this atomic */
-        my_ctl_pointer->index++;
-    }
-
-
-    /* increment the starting flag by one and return */
-    flag_offset = my_ctl_pointer->starting_flag_value;
-    ready_flag = flag_offset + sequence_number + 1;
-    my_ctl_pointer->sequence_number = sequence_number;
-
-/* debug
-   fprintf(stderr," sequence_number %lld flag_offset %d starting flag val %d\n",sequence_number,flag_offset, my_ctl_pointer->starting_flag_value);
-   fflush(stderr);
-   end debug */
-
-
-    /*
-     * Fan out from root
-     */
-    /* don't need this either */
-    /* root is the local leader */
-    /* calculate the number of steps necessary for this collective */
-
-    /* first thing we do is figure out where the root is in our new indexing */
-    /* find root in new indexing */
-    pseudo_root = inv_map[root];
-    /* see if this is larger than the stray */
-    if( pseudo_root >= stray ) {
-        /* then we need to define the proxy root, everyone can do this */
-        proxy_root = pseudo_root - cnt;
-    }else {
-        proxy_root = pseudo_root;
-    }
-    if( EXTRA_NODE == exchange_node->node_type ) {
-
-        /* signal arrival */
-        my_ctl_pointer->gflag = ready_flag;
-
-        /* send is done */
-
-        /* poll for data only if I am the root */
-        /* bump the ready flag */
-        ready_flag++;
-        if( root == my_rank ){
-            /* poll for data from proxy */
-            src = exchange_node->rank_extra_sources_array[0];
-            /* get src data buffer */
-            child_data_pointer = data_buffs[src].payload;
-            child_ctl_pointer = data_buffs[src].ctl_struct;
-            /* in this case, you must block */
-            while(!IS_GDATA_READY(child_ctl_pointer,ready_flag,sequence_number)){
-                opal_progress();
-            }
-            /* receive the data from the proxy, aka pseudo-root */
-
-            memcpy((void *) ((unsigned char *) data_addr + buff_offset),
-                   (void *) ((unsigned char *) child_data_pointer+buff_offset)
-                   ,pack_len*group_size);
-        }
-        goto FINISHED;
-
-
-    } else if( 0 < exchange_node->n_extra_sources ) {
-
-        /* am a proxy, poll for pack_len data from extra */
-        src = exchange_node->rank_extra_sources_array[0];
-        /* get src data buffer */
-        child_data_pointer = data_buffs[src].payload;
-        child_ctl_pointer = data_buffs[src].ctl_struct;
-        knt = 0;
-        for( i = 0; i < src; i++){
-            knt += list_connected[i];
-        }
-        /* must block here also */
-        while(!IS_GDATA_READY(child_ctl_pointer,ready_flag,sequence_number)){
-            opal_progress();
-        }
-        memcpy((void *) ((unsigned char *) data_addr + buff_offset + pack_len*knt),
-               (void *) ((unsigned char *) child_data_pointer + buff_offset +
-                         pack_len*knt), pack_len*list_connected[src]);
-        /*fprintf(stderr,"999 proxy received data from %d at offset %d of length %d\n",src,
-          buff_offset+pack_len*knt,pack_len*list_connected[src]);
-        */
-    }
-    /* do some figuring */
-
-    total_peers = 0;
-    my_pow_k = pow_k;
-    k_temp1 = tree_order;
-    k_temp2 = 1;
-    for( i = 0; i < pow_k; i++) {
-        /* then find the base */
-        /*FIND_BASE(base,my_rank,i+1,tree_order);*/
-        FIND_BASE(base,exchange_node->reindex_myid,i+1,tree_order);
-        /* now find the adjusted base */
-        base_adj = base + (base + proxy_root)%k_temp1;
-        /* ok, now find out WHO is occupying this slot */
-        /*pseudo_base_adj = inv_map[base_adj];*/
-        pseudo_base_adj = reindex_map[base_adj];
-
-        if(my_rank == pseudo_base_adj ) {
-            /* then go ahead and poll for children's data */
-            for( j = 0; j < (tree_order - 1); j++ ) {
-                /* send phase
-                 */
-                /* get communication partner */
-
-                src = exchange_node->rank_exchanges[i][j];
-                /*fprintf(stderr,"comm_src %d\n",comm_src);*/
-                /* remember, if we have extra ranks, then we won't participate
-                 * with a least one peer. Make a check
-                 */
-                if( src < 0 ){
-                    continue;
-                }else{
-                    total_peers++;
-                }
-
-
-            }
-        } else {
-            /* announce my arrival */
-            my_pow_k = i;
-            break;
-        }
-
-        k_temp1 = k_temp1*tree_order;
-        k_temp2 = k_temp2*tree_order;
-    }
-
-    if( 0 == my_pow_k ){
-        /* signal arrival */
-        my_ctl_pointer->gflag = ready_flag;
-
-        goto FINISHED;
-    }
-
-
-
-    /* start the k-nomial gather phase */
-    /* only "active ranks participate, once a rank has forwarded its data, it becomes inactive */
-    knt = 0;
-    while(active){
-        k_temp1 = tree_order;
-        k_temp2 = 1;
-        for( i = 0; i < my_pow_k; i++) {
-
-            /* then go ahead and poll for children's data */
-            for( j = 0; j < (tree_order - 1); j++ ) {
-                matched = 0;
-                /* send phase
-                 */
-                /* get communication partner */
-
-                src = exchange_node->rank_exchanges[i][j];
-                /*fprintf(stderr,"comm_src %d\n",comm_src);*/
-                /* remember, if we have extra ranks, then we won't participate
-                 * with a least one peer. Make a check
-                 */
-                if( src < 0 ){
-                    continue;
-                }
-
-                /*fprintf(stderr,"src %d\n",src);*/
-                child_data_pointer = data_buffs[src].payload;
-                child_ctl_pointer = data_buffs[src].ctl_struct;
-
-                /* if child has been marked, then skip */
-                if( sequence_number == child_ctl_pointer->mark ){
-                    continue;
-                }
-
-
-                for( probe = 0; probe < cm->num_to_probe && (0 == matched); probe++){
-                    if(IS_GDATA_READY(child_ctl_pointer,ready_flag,sequence_number)){
-                        /* mark the child's pointer */
-                        child_ctl_pointer->mark = sequence_number;
-                        /* copy the data */
-
-                        memcpy((void *) ((unsigned char *) data_addr + buff_offset +
-                                         exchange_node->payload_info[i][j].r_offset*pack_len),
-                               (void *) ((unsigned char *) child_data_pointer + buff_offset +
-                                         exchange_node->payload_info[i][j].r_offset*pack_len),
-                               exchange_node->payload_info[i][j].r_len*pack_len);
-                        /*
-                          fprintf(stderr,"999 receiving data from %d at offset %d of length %d\n",
-                          exchange_node->rank_exchanges[i][j], buff_offset + exchange_node->payload_info[i][j].r_offset,
-                          exchange_node->payload_info[i][j].r_len*pack_len);
-                        */
-                        knt++;
-                        if(knt == total_peers) {
-                            /* this is the trick to break the root out,
-                             * only the root should be able to satisfy this
-                             */
-                            /*
-                              fprintf(stderr,"hello n_actual is %d \n",knt);
-                              fprintf(stderr,"hello n_actual_exch is %d \n",
-                              exchange_node->n_actual_exchanges);
-                            */
-                            opal_atomic_wmb ();
-                            my_ctl_pointer->gflag = ready_flag;
-
-                            goto LAST_STEP;
-                        }
-                        matched = 1;
-                    }else{
-                        opal_progress();
-                    }
-                }
-            }
-        }
-
-        k_temp1 = k_temp1*tree_order;
-        k_temp2 = k_temp2*tree_order;
-    }
-LAST_STEP:
-    /* last step, proxies send full data back to the extra ranks */
-    if( 0 < exchange_node->n_extra_sources &&
-        root == exchange_node->rank_extra_sources_array[0]) {
-        /* regardless, I will bump the ready flag and set it in case someone is watching */
-        /* announce that data is ready */
-        ready_flag++;
-        my_ctl_pointer->gflag = ready_flag;
-    }
-
-
-FINISHED:
-
-/* debug
-   fprintf(stderr," my_ctl_pointer->index %d n of this type %d %u \n",
-   my_ctl_pointer->index,c_input_args->n_of_this_type_in_collective,getpid());
-   fflush(stderr);
-   end debug */
-
-    my_ctl_pointer->starting_flag_value+=1;
-
-    return BCOL_FN_COMPLETE;
-}
-#endif
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.c
deleted file mode 100644
index c985a6889a..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.c
+++ /dev/null
@@ -1,1878 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#ifdef __PORTALS_AVAIL__
-#define __PORTALS_ENABLE__
-
-#include "ompi/constants.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/communicator/communicator.h"
-
-#include "bcol_basesmuma.h"
-#include "bcol_basesmuma_portals.h"
-#include "bcol_basesmuma_lmsg_bcast.h"
-#include "bcol_basesmuma_utils.h"
-
-
-
-/*
- * Scatter/Gather Broadcast algorithm
- *
- * Algorithm highlights:
- *
- * Uses portals for data transfer
- *
- * All processes participating in the broadcast are arranged in a
- * binmoial tree.
- *
- * Phase1: Scatter the broadcast data to all the children
- * Phase2: All processes in the tree participates in recursive doubling
- * algorithm to obtain the missing data.
- */
-
-
-static int completed_scatter = 0;
-#if 0
-int bcol_basesmuma_lmsg_scatter_allgather_portals_bcast_old(bcol_function_args_t *input_args,
-    mca_bcol_base_function_t *c_input_args)
-{
-
-    /* local variables */
-    int i;
-    uint64_t length;
-    int my_rank, parent_rank, src =-1, matched = 0;
-    int *src_list = NULL;
-    int group_size = -1, dummy_group_size;
-    int first_instance=0;
-    int rc = OMPI_SUCCESS;
-    int leading_dim, buff_idx, idx;
-    int count=input_args->count;
-    size_t pack_len = 0, dt_size =0 ;
-	int64_t ready_flag;
-    int flag_offset;
-    int pow_2, pow_2_levels;
-    int src_list_index = -1;
-    uint64_t fragment_size;  /* user buffer size */
-	int sg_matchbits = 0;
-	/* Input argument variables */
-	void *my_userbuf = (void*)((unsigned char*)input_args->userbuf);
-	int64_t sequence_number=input_args->sequence_num;
-    struct ompi_datatype_t* dtype=input_args->dtype;
-
-	/* Extra source variables */
-	bool secondary_root = false;
-	int partner = -1, extra_partner = -1;
-
-	/* Scatter Allgather offsets */
-	uint64_t local_sg_offset = 0, global_sg_offset = 0, partner_offset = 0;
-
-	/* Portals messaging relevant variables */
-	mca_bcol_basesmuma_portal_proc_info_t *portals_info;
-	ptl_handle_eq_t allgather_eq_h;
-	ptl_event_t  allgather_event;
-	bool blocked_post = false;
-	bool msg_posted = false;
-	int total_msg_posts = -1, scatter_posts = -1, allgather_posts = -1, extra_src_posts = -1;
-
-	/* OMPI module and component variables */
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-    mca_bcol_basesmuma_module_t *bcol_module =
-        (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module;
-
-    /* Control structure and payload variables */
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-	volatile mca_bcol_basesmuma_header_t *my_ctl_pointer = NULL;
-	volatile mca_bcol_basesmuma_header_t *parent_ctl_pointer = NULL;
-	volatile mca_bcol_basesmuma_header_t *partner_ctl_pointer = NULL;
-
-	struct mca_bcol_basesmuma_portal_buf_addr_t *my_lmsg_ctl_pointer = NULL;
-	struct mca_bcol_basesmuma_portal_buf_addr_t *parent_lmsg_ctl_pointer = NULL;
-	struct mca_bcol_basesmuma_portal_buf_addr_t *partner_lmsg_ctl_pointer = NULL;
-
-	/* Make sure there userbuffer is not null */
-	assert(my_userbuf != NULL);
-
-    /* Get portals info*/
-	portals_info = (mca_bcol_basesmuma_portal_proc_info_t*)cs->portals_info;
-
-	/* Get addresing information */
-    buff_idx = input_args->src_desc->buffer_index;
-    group_size = bcol_module->colls_no_user_data.size_of_group;
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-
-    /* calculate the largest power of two that is smaller than
-     * or equal to the group size
-     */
-    pow_2_levels = pow_sm_k(2,group_size, &(dummy_group_size));
-    if( group_size < (1<<pow_2_levels)) {
-        pow_2_levels--;
-    }
-    /* power-of-two group size */
-    pow_2 = 1<<pow_2_levels;
-
-
-     /* we will work only on packed data - so compute the length*/
-    ompi_datatype_type_size(dtype, &dt_size);
-    fragment_size = count*dt_size;
-
-    /* grab the data buffs */
-    data_buffs = (mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs+idx;
-
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-	my_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t*)
-							data_buffs[my_rank].payload;
-
-    if(my_ctl_pointer->sequence_number < sequence_number) {
-        first_instance = 1;
-    }
-
-    if(first_instance) {
-        my_ctl_pointer->flag = -1;
-        my_ctl_pointer->index = 1;
-
-        my_ctl_pointer->starting_flag_value = 0;
-        flag_offset = 0;
-
-    } else {
-        my_ctl_pointer->index++;
-    }
-
-	assert( -1 == my_ctl_pointer->flag);
-
-    /* increment the starting flag by one and return */
-    flag_offset = my_ctl_pointer->starting_flag_value;
-    ready_flag = flag_offset + sequence_number + 1;
-
-    my_ctl_pointer->sequence_number = sequence_number;
-	sg_matchbits = sequence_number ;
-
-	/* Construct my portal buffer address and copy to payload buffer */
-	mca_bcol_basesmuma_construct_portal_address(my_lmsg_ctl_pointer,
-						portals_info->portal_id.nid,
-						portals_info->portal_id.pid,
-						sg_matchbits,
-						bcol_module->super.sbgp_partner_module->group_comm->c_contextid);
-
-	my_lmsg_ctl_pointer->userbuf = my_userbuf;
-	my_lmsg_ctl_pointer->userbuf_length = fragment_size;
-
-
-	/*
-	 * If I am the root of bcast, scatter the data to my children
-	 */
-    if (input_args->root_flag) {
-        BASESMUMA_VERBOSE(10,("I am the root of the data"));
-        my_lmsg_ctl_pointer->offset = 0;
-        my_lmsg_ctl_pointer->n_sends = pow_2_levels;
-        my_lmsg_ctl_pointer->length = fragment_size;
-
-	rc = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*)
-				cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q,
-						PTL_EQ_HANDLER_NONE, &allgather_eq_h);
-
-		if (rc != PTL_OK) {
-		BASESMUMA_VERBOSE(10,( "PtlEQAlloc() failed: %d \n",rc));
-			goto Release;
-	}
-
-		/* Compute number of posts required
-		 * We post the data buffer for both scatter and allgather phase at once so to avoid
-		 * posting overhead
-		 */
-		if (my_rank >= pow_2) {
-			/* I am root and my rank is greater than pow_2, I will hand
-			 * over to rank (that is < pow_2) to act as secondary root
-			 */
-			total_msg_posts  = 1;
-		}
-		else {
-
-			extra_src_posts = (my_rank + pow_2 < group_size ) ? 1: 0;
-			scatter_posts = my_lmsg_ctl_pointer->n_sends;
-			allgather_posts = pow_2_levels - 1;
-
-			total_msg_posts = scatter_posts + allgather_posts + extra_src_posts ;
-		}
-
-		 mca_bcol_basesmuma_portals_post_msg(cs, my_lmsg_ctl_pointer,
-						   my_userbuf, fragment_size, allgather_eq_h,
-						   total_msg_posts,
-						   blocked_post,
-						  PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE);
-		 msg_posted = true ;
-        /* important that these be set before my children
-         * see the ready flag raised
-         */
-        opal_atomic_wmb ();
-        my_ctl_pointer->flag = ready_flag;
-
-		/* Wait for my scatter partner */
-		if (my_rank >= pow_2) {
-			int scatter_partner = -1;
-			volatile mca_bcol_basesmuma_header_t *scatter_partner_ctl_pointer = NULL;
-
-			scatter_partner = my_rank - pow_2;
-			scatter_partner_ctl_pointer =
-					data_buffs[scatter_partner].ctl_struct;
-
-			while(!IS_SG_DATA_READY(scatter_partner_ctl_pointer, ready_flag,
-									sequence_number)){
-					opal_progress();
-			}
-
-			goto Release;
-		}
-		else {
-			wait_for_peers(my_rank, my_lmsg_ctl_pointer->n_sends, data_buffs,
-							ready_flag, sequence_number);
-		}
-
-		goto Allgather;
-    }
-
-
-Extra :
-    if( my_rank >= pow_2 ) {
-        parent_rank = my_rank & (pow_2-1);
-        parent_ctl_pointer = data_buffs[parent_rank].ctl_struct;
-		parent_lmsg_ctl_pointer =
-				(mca_bcol_basesmuma_portal_buf_addr_t*)data_buffs[parent_rank].payload;
-
-		ready_flag = ready_flag + pow_2_levels;
-
-		while(!IS_SG_DATA_READY(parent_ctl_pointer, ready_flag, sequence_number)) {
-
-				opal_progress();
-
-        }
-
-
-		mca_bcol_basesmuma_portals_get_msg_fragment_no_eq_h(cs, my_lmsg_ctl_pointer,
-								parent_lmsg_ctl_pointer, 0,
-								0, fragment_size);
-
-		my_ctl_pointer->flag = ready_flag;
-
-		goto Release;
-    }
-
-Scatter:
-
-    /* I am not root of bcast compute the list of possible
-	 * where I will receive bcast data from.
-	 */
-    src_list = (int *) malloc(sizeof(int) * (pow_2_levels + 1));
-    for( i = 0; i < pow_2_levels; i++) {
-        src_list[i] = my_rank ^ (1<<i);
-    }
-
-	/* My source might be process > pow_2 */
-	if ((my_rank + pow_2) < group_size) {
-            src_list[i] = my_rank + pow_2;
-    } else {
-            src_list[i] = -1;
-    }
-
-Probe:
-
-    /* If I am not the root, then poll on possible "senders'" control structs */
-	/* For portals we block for now */
-	while (!matched) {
-      /* Shared memory iprobe */
-      SG_LARGE_MSG_PROBE(src_list, pow_2_levels + 1,
-                src_list_index, matched, src, data_buffs, parent_ctl_pointer,
-				parent_lmsg_ctl_pointer,ready_flag, sequence_number);
-    }
-
-	/* If I am a secondary root
-	 * Secondary root acts as root of bcast data when real root of data
-	 * is process with group rank greater than pow_2 */
-	if ((matched) && (src == pow_2 + my_rank)) {
-		volatile mca_bcol_basesmuma_header_t *extra_src_ctl_pointer = NULL;
-		struct mca_bcol_basesmuma_portal_buf_addr_t *extra_src_lmsg_ctl_pointer = NULL;
-
-		secondary_root = true;
-        BASESMUMA_VERBOSE(10,("I am the secondary root for the data"));
-        my_lmsg_ctl_pointer->offset = 0;
-        my_lmsg_ctl_pointer->n_sends = pow_2_levels;
-        my_lmsg_ctl_pointer->length = fragment_size;
-
-		extra_src_ctl_pointer = data_buffs[src].ctl_struct;
-		extra_src_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t*)data_buffs[src].payload;
-
-		/* create an event queue for the incoming buffer */
-	rc = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*)
-				cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q,
-						PTL_EQ_HANDLER_NONE, &allgather_eq_h);
-
-		if (rc != PTL_OK) {
-		BASESMUMA_VERBOSE(10,( "PtlEQAlloc() failed: %d \n",rc));
-			goto Release;
-	}
-
-		mca_bcol_basesmuma_portals_get_msg_fragment_no_eq_h(cs, my_lmsg_ctl_pointer,
-								extra_src_lmsg_ctl_pointer, 0,
-								0, fragment_size);
-
-
-		extra_src_posts = 0;
-		scatter_posts = my_lmsg_ctl_pointer->n_sends;
-		allgather_posts = pow_2_levels - 1;
-
-		total_msg_posts = scatter_posts + allgather_posts + extra_src_posts ;
-
-		mca_bcol_basesmuma_portals_post_msg(cs, my_lmsg_ctl_pointer,
-						   my_userbuf, fragment_size, allgather_eq_h,
-						   total_msg_posts,
-						   blocked_post,
-						  PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE);
-		msg_posted = true ;
-        /* important that these be set before my children
-         * see the ready flag raised
-         */
-        opal_atomic_wmb ();
-        my_ctl_pointer->flag = ready_flag;
-
-		wait_for_peers(my_rank, my_lmsg_ctl_pointer->n_sends, data_buffs,
-							ready_flag, sequence_number);
-		goto Allgather;
-    }
-
-    /* Verify whether we got the right
-	 * source of the data, by computing the source's intended
-	 * destinations
-     */
-    for( i = 0; i < parent_lmsg_ctl_pointer->n_sends; i++) {
-		uint64_t local_offset = 0;
-		uint64_t remote_offset = 0;
-
-		BASESMUMA_VERBOSE(5,("%d found it from %d \n",my_rank,src));
-
-       if( my_rank == (src^(1<<i))) {
-            parent_ctl_pointer = data_buffs[src].ctl_struct;
-            parent_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t*)data_buffs[src].payload;
-
-            /* we found our root within the group ... */
-            BASESMUMA_VERBOSE(10,("Shared memory probe was matched, the root is %d", src));
-
-			my_lmsg_ctl_pointer->n_sends = i;
-
-		    /* Am I source for other process during scatter phase */
-            if ( i > 0) {
-
-                /* compute the size of the chunk to copy */
-                length = (parent_lmsg_ctl_pointer->length)/
-                    (1<<(parent_lmsg_ctl_pointer->n_sends - my_lmsg_ctl_pointer->n_sends));
-                my_lmsg_ctl_pointer->length = length;
-                my_lmsg_ctl_pointer->offset =
-				parent_lmsg_ctl_pointer->offset + length;
-
-
-				local_offset = my_lmsg_ctl_pointer->offset;
-				remote_offset = parent_lmsg_ctl_pointer->offset + length;
-
-				mca_bcol_basesmuma_portals_get_msg_fragment_no_eq_h(cs, my_lmsg_ctl_pointer,
-								parent_lmsg_ctl_pointer,local_offset,
-								remote_offset, length);
-				rc = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*)
-									cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q,
-													PTL_EQ_HANDLER_NONE,
-													&allgather_eq_h);
-
-				if (rc != PTL_OK) {
-				BASESMUMA_VERBOSE(10,( "PtlEQAlloc() failed: %d \n",rc));
-					goto Release;
-			}
-
-				/* Now post the message for other children to read */
-				extra_src_posts = (my_rank + pow_2 < group_size ) ? 1: 0;
-				scatter_posts = my_lmsg_ctl_pointer->n_sends;
-				allgather_posts = pow_2_levels - 1;
-
-				total_msg_posts = scatter_posts + allgather_posts + extra_src_posts ;
-
-
-				mca_bcol_basesmuma_portals_post_msg(cs, my_lmsg_ctl_pointer,
-						   my_userbuf, my_lmsg_ctl_pointer->userbuf_length,
-						   allgather_eq_h,
-						   total_msg_posts,
-						   blocked_post,
-						   PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE
-						   | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE
-					  );
-				msg_posted = true;
-				/* set the memory barrier to ensure completion
-				 * and signal I am done getting scatter data*/
-			opal_atomic_wmb ();
-			my_ctl_pointer->flag = ready_flag;
-
-				wait_for_peers(my_rank, my_lmsg_ctl_pointer->n_sends, data_buffs,
-							ready_flag, sequence_number);
-
-            } else {
-                /* takes care of first level recurssive double */
-		length = parent_lmsg_ctl_pointer->length/
-                    (1<<(parent_lmsg_ctl_pointer->n_sends - 1));
-                my_lmsg_ctl_pointer->length = length;
-                my_lmsg_ctl_pointer->offset = parent_lmsg_ctl_pointer->offset;
-
-				local_offset = my_lmsg_ctl_pointer->offset;
-				remote_offset = my_lmsg_ctl_pointer->offset;
-
-
-				while(!IS_SG_DATA_READY(parent_ctl_pointer, ready_flag, sequence_number)) {
-			opal_progress();
-			}
-
-				mca_bcol_basesmuma_portals_get_msg_fragment_no_eq_h(cs, my_lmsg_ctl_pointer,
-								parent_lmsg_ctl_pointer,local_offset,
-								remote_offset, length);
-
-				/* signal that I am done reading data from parent */
-		    opal_atomic_wmb ();
-	        my_ctl_pointer->flag = ready_flag;
-			}
-
-            /* time for allgather phase */
-             input_args->status = ALLGATHER;
-
-            BASESMUMA_VERBOSE(5,("Completed %d found it from %d \n",my_rank,src));
-
-			while(ready_flag > parent_ctl_pointer->flag);
-
-			goto Allgather;
-        }
-	}
-
-	{
-	/* this is not who we are looking for,
-	 * mark as false positive so we don't
-	 * poll here again
-	 */
-		src_list[src_list_index] = -1;
-	matched = 0;
-	goto Probe;
-     }
-
-Allgather:
-
-	BASESMUMA_VERBOSE(5,(" %d Completed Scatter %d times \n", my_rank, completed_scatter));
-
-    /* zip it back up - we have already taken care of first level */
-    global_sg_offset = my_lmsg_ctl_pointer->offset;
-
-	/* first level of zip up */
-    length = 2 * fragment_size/pow_2;
-
-
-	if (!msg_posted) {
-		rc = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*)
-				cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q,
-						PTL_EQ_HANDLER_NONE, &allgather_eq_h);
-
-		/* Posting for all phases of recursive doubling */
-		extra_src_posts = (my_rank + pow_2 < group_size ) ? 1: 0;
-		allgather_posts = pow_2_levels - 1;
-		total_msg_posts = allgather_posts + extra_src_posts ;
-
-
-		mca_bcol_basesmuma_portals_post_msg(cs, my_lmsg_ctl_pointer,
-						   my_userbuf, my_lmsg_ctl_pointer->userbuf_length,
-						   allgather_eq_h, total_msg_posts , blocked_post,
-						   PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE
-						   | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE
-							);
-		msg_posted = true;
-	}
-
-
-	ready_flag++;
-    opal_atomic_wmb ();
-    my_ctl_pointer->flag = ready_flag;
-
-	/*
-	 * Recursive doubling allgather implementation
-	 */
-    for( i = 1; i < pow_2_levels; i++) {
-        /* get my partner for this level */
-        partner = my_rank^(1<<i);
-        partner_ctl_pointer = data_buffs[partner].ctl_struct;
-        partner_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t*)
-											data_buffs[partner].payload;
-
-
-		/* Block until partner is at this level of recursive-doubling stage */
-        while(!IS_SG_DATA_READY(partner_ctl_pointer, ready_flag, sequence_number)) {
-            opal_progress();
-        }
-        assert(partner_ctl_pointer->flag >= ready_flag);
-
-		if (partner_lmsg_ctl_pointer->offset < my_lmsg_ctl_pointer->offset) {
-			global_sg_offset -= length;
-			local_sg_offset = global_sg_offset;
-		} else {
-			local_sg_offset = global_sg_offset + length;
-		}
-
-
-		BASESMUMA_VERBOSE(10,("Allgather Phase: Get message from process %d, length %d", partner, length));
-		mca_bcol_basesmuma_portals_get_msg_fragment_no_eq_h(cs, my_lmsg_ctl_pointer,
-								partner_lmsg_ctl_pointer,local_sg_offset,
-								local_sg_offset, length);
-
-		ready_flag++;
-		opal_atomic_wmb ();
-	my_ctl_pointer->flag = ready_flag;
-
-		/* Block until partner completed this level of recursive-doubling stage */
-	while(!IS_SG_DATA_READY(partner_ctl_pointer, ready_flag, sequence_number)) {
-		    opal_progress();
-        }
-
-        /*
-		 * Compute length for next recursive doubling phase
-		 */
-        length *= 2;
-    }
-
-
-	/* If I am source for non-power 2 children wait for them */
-	/* If I am secondary root then my partner would be real root
-	 * so no need for exchange of data with the extra partner */
-	extra_partner = my_rank + pow_2 ;
-	if ((extra_partner < group_size) && (!secondary_root)) {
-		volatile mca_bcol_basesmuma_header_t *extra_partner_ctl_pointer = NULL;
-
-		extra_partner_ctl_pointer = data_buffs[extra_partner].ctl_struct;
-		/* Block until extra partner has copied data */
-	while(!IS_SG_DATA_READY(extra_partner_ctl_pointer, ready_flag, sequence_number)) {
-		    opal_progress();
-        }
-
-	}
-
-Release:
-
-	/* free the event queue */
-	rc = PtlEQFree(allgather_eq_h);
-	if (rc != PTL_OK) {
-		BASESMUMA_VERBOSE(10,("PtlEQFree() failed: %d )\n",rc));
-	}
-
-    my_ctl_pointer->starting_flag_value++;
-    input_args->status = FINISHED;
-
-    return BCOL_FN_COMPLETE;
-
-}
-#endif
-
-/*
- * Blocking Portals Scatter Allgather
- *
- *
- *
- *
- *
- */
-
-int bcol_basesmuma_lmsg_scatter_allgather_portals_bcast(bcol_function_args_t *input_args,
-    mca_bcol_base_function_t *c_input_args)
-{
-
-    /* local variables */
-    int i;
-    uint64_t length;
-    int my_rank, parent_rank, src =-1, matched = 0;
-    int *src_list = NULL;
-    int group_size = -1, dummy_group_size;
-    int first_instance=0;
-    int rc = OMPI_SUCCESS;
-    int leading_dim, buff_idx, idx;
-    int count=input_args->count;
-    size_t pack_len = 0, dt_size =0 ;
-	volatile int8_t ready_flag;
-    int flag_offset;
-    int pow_2, pow_2_levels;
-    int src_list_index = -1;
-    uint64_t fragment_size;  /* user buffer size */
-	int sg_matchbits;
-
-	/* Input argument variables */
-	void *my_userbuf = (void*)((unsigned char*)input_args->userbuf);
-	int64_t sequence_number=input_args->sequence_num;
-    struct ompi_datatype_t* dtype=input_args->dtype;
-
-	/* Extra source variables */
-	bool secondary_root = false;
-	int partner = -1, extra_partner = -1;
-
-	/* Scatter Allgather offsets */
-	uint64_t local_sg_offset = 0, global_sg_offset = 0, partner_offset = 0;
-
-	/* Portals messaging relevant variables */
-	mca_bcol_basesmuma_portal_proc_info_t *portals_info;
-	ptl_handle_eq_t allgather_eq_h;
-	ptl_event_t  allgather_event;
-	bool blocked_post = false;
-	bool msg_posted = false;
-	int total_msg_posts = -1, scatter_posts = -1, allgather_posts = -1, extra_src_posts = -1;
-
-	/* OMPI module and component variables */
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-    mca_bcol_basesmuma_module_t *bcol_module =
-        (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module;
-
-	/* Control structure and payload variables */
-	volatile mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
-    volatile mca_bcol_basesmuma_ctl_struct_t  *my_ctl_pointer = NULL;
-	volatile mca_bcol_basesmuma_ctl_struct_t  *parent_ctl_pointer = NULL; /* binomial fanout */
-    volatile mca_bcol_basesmuma_ctl_struct_t  *partner_ctl_pointer = NULL; /* recursive double */
-
-	/* Make sure there userbuffer is not null */
-	assert(my_userbuf != NULL);
-
-    /* Get portals info*/
-	portals_info = (mca_bcol_basesmuma_portal_proc_info_t*)cs->portals_info;
-
-	/* Get addresing information */
-    buff_idx = input_args->src_desc->buffer_index;
-    group_size = bcol_module->colls_no_user_data.size_of_group;
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-
-    /* calculate the largest power of two that is smaller than
-     * or equal to the group size
-     */
-    pow_2_levels = pow_sm_k(2,group_size, &(dummy_group_size));
-    if( group_size < (1<<pow_2_levels)) {
-        pow_2_levels--;
-    }
-    /* power-of-two group size */
-    pow_2 = 1<<pow_2_levels;
-
-
-     /* we will work only on packed data - so compute the length*/
-    ompi_datatype_type_size(dtype, &dt_size);
-    fragment_size = count*dt_size;
-
-    /* grab the ctl buffs */
-    ctl_structs = (volatile mca_bcol_basesmuma_ctl_struct_t **)
-        bcol_module->colls_with_user_data.ctl_buffs+idx;
-
-
-    my_ctl_pointer = ctl_structs[my_rank];
-    if(my_ctl_pointer->sequence_number < sequence_number) {
-        first_instance = 1;
-    }
-
-    if(first_instance) {
-        for( i = 0; i < NUM_SIGNAL_FLAGS; i++){
-            my_ctl_pointer->flags[i] = -1;
-        }
-        my_ctl_pointer->index = 1;
-
-        my_ctl_pointer->starting_flag_value = 0;
-        flag_offset = 0;
-
-    } else {
-        my_ctl_pointer->index++;
-    }
-
-
-    /* increment the starting flag by one and return */
-    flag_offset = my_ctl_pointer->starting_flag_value;
-    /*ready_flag = flag_offset + sequence_number + 1;*/
-    ready_flag = flag_offset + 1;
-
-    my_ctl_pointer->sequence_number = sequence_number;
-	sg_matchbits = sequence_number ;
-
-	/* Construct my portal buffer address and copy to payload buffer */
-	mca_bcol_basesmuma_construct_portal_address(&my_ctl_pointer->portals_buf_addr,
-						portals_info->portal_id.nid,
-						portals_info->portal_id.pid,
-						sg_matchbits,
-						bcol_module->super.sbgp_partner_module->group_comm->c_contextid);
-
-	my_ctl_pointer->portals_buf_addr.userbuf = my_userbuf;
-	my_ctl_pointer->portals_buf_addr.userbuf_length = fragment_size;
-
-
-    if (input_args->root_flag) {
-        my_ctl_pointer->offset = 0;
-        my_ctl_pointer->n_sends = pow_2_levels;
-        my_ctl_pointer->length = fragment_size;
-
-	rc = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*)
-				cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q,
-						PTL_EQ_HANDLER_NONE, &allgather_eq_h);
-
-		if (rc != PTL_OK) {
-		BASESMUMA_VERBOSE(10,( "PtlEQAlloc() failed: %d \n",rc));
-			goto Release;
-	}
-
-		/* Compute number of posts required */
-		if (my_rank >= pow_2) {
-			/* I am root and my rank is greater than pow_2, I will hand
-			 * over to rank (that is < pow_2) to act as secondary root
-			 */
-			total_msg_posts  = 1;
-		}
-		else {
-
-			extra_src_posts = (my_rank + pow_2 < group_size ) ? 1: 0;
-			scatter_posts = my_ctl_pointer->n_sends;
-			allgather_posts = pow_2_levels - 1;
-
-			total_msg_posts = scatter_posts + allgather_posts + extra_src_posts ;
-		}
-
-		 mca_bcol_basesmuma_portals_post_msg(cs,
-						 &my_ctl_pointer->portals_buf_addr,
-						   my_userbuf, fragment_size, allgather_eq_h,
-						   total_msg_posts,
-						   blocked_post,
-						  PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE);
-		 msg_posted = true ;
-
-		 /* important that these be set before my children
-         * see the ready flag raised
-         */
-        opal_atomic_wmb ();
-        my_ctl_pointer->flags[BCAST_FLAGS] = ready_flag;
-        BASESMUMA_VERBOSE(1,("I am the root(ctl_pointer %x) of the data flag value %d",my_ctl_pointer, my_ctl_pointer->flag));
-		/* Wait for my scatter partner */
-		if (my_rank >= pow_2) {
-			int scatter_partner = -1;
-			volatile mca_bcol_basesmuma_ctl_struct_t *scatter_partner_ctl_pointer = NULL;
-
-			scatter_partner = my_rank - pow_2;
-			scatter_partner_ctl_pointer =
-					ctl_structs[scatter_partner];
-
-			while(!IS_SG_DATA_READY(scatter_partner_ctl_pointer, ready_flag,
-									sequence_number)){
-SCATTER_WAIT_FOR_EXTRA:
-					opal_progress();
-			}
-
-			goto Release;
-		}
-		else {
-
-			wait_for_peers_nb(my_rank, my_ctl_pointer->n_sends, ctl_structs,
-							ready_flag, sequence_number);
-		}
-
-		goto Allgather;
-    }
-
-
-Extra :
-    if( my_rank >= pow_2 ) {
-        parent_rank = my_rank & (pow_2-1);
-        parent_ctl_pointer = ctl_structs[parent_rank];
-
-		ready_flag = ready_flag + pow_2_levels;
-
-		while(!IS_SG_DATA_READY(parent_ctl_pointer, ready_flag, sequence_number)) {
-
-				opal_progress();
-
-        }
-
-
-		mca_bcol_basesmuma_portals_get_msg_fragment_no_eq_h(cs, &my_ctl_pointer->portals_buf_addr,
-								&parent_ctl_pointer->portals_buf_addr, 0,
-								0, fragment_size);
-
-		my_ctl_pointer->flags[BCAST_FLAG] = ready_flag;
-
-		goto Release;
-    }
-
-Scatter:
-
-    /* compute the list of possible sources */
-    src_list = (int *) malloc(sizeof(int) * (pow_2_levels + 1));
-    for( i = 0; i < pow_2_levels; i++) {
-        src_list[i] = my_rank ^ (1<<i);
-    }
-
-	/* My source might be process > pow_2 */
-	if ((my_rank + pow_2) < group_size) {
-            src_list[i] = my_rank + pow_2;
-    } else {
-            src_list[i] = -1;
-    }
-
-Probe:
-
-    /* If I am not the root, then poll on possible "senders'" control structs */
-	/* For portals we block for now */
-	while (!matched) {
-      /* Shared memory iprobe */
-      SG_LARGE_MSG_NB_PROBE(src_list, pow_2_levels + 1,
-                src_list_index, matched, src, ctl_structs,
-				parent_ctl_pointer, ready_flag, sequence_number);
-	}
-
-	BASESMUMA_VERBOSE(1,("Scatter : Im non-root match received"));
-	/* If I am a secondary root */
-	if ((matched) && (src == pow_2 + my_rank)) {
-		volatile mca_bcol_basesmuma_ctl_struct_t *extra_src_ctl_pointer = NULL;
-
-		secondary_root = true;
-        BASESMUMA_VERBOSE(10,("I am the secondary root for the data"));
-        my_ctl_pointer->offset = 0;
-        my_ctl_pointer->n_sends = pow_2_levels;
-        my_ctl_pointer->length = fragment_size;
-
-		extra_src_ctl_pointer = ctl_structs[src];
-
-		/* create an event queue for the incoming buffer */
-	rc = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*)
-				cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q,
-						PTL_EQ_HANDLER_NONE, &allgather_eq_h);
-
-		if (rc != PTL_OK) {
-		BASESMUMA_VERBOSE(10,( "PtlEQAlloc() failed: %d \n",rc));
-			goto Release;
-	}
-
-		mca_bcol_basesmuma_portals_get_msg_fragment_no_eq_h(cs,
-						&my_ctl_pointer->portals_buf_addr,
-						&extra_src_ctl_pointer->portals_buf_addr, 0,
-						0, fragment_size);
-
-
-		extra_src_posts = 0;
-		scatter_posts = my_ctl_pointer->n_sends;
-		allgather_posts = pow_2_levels - 1;
-
-		total_msg_posts = scatter_posts + allgather_posts + extra_src_posts ;
-
-		mca_bcol_basesmuma_portals_post_msg(cs,
-						  &my_ctl_pointer->portals_buf_addr,
-						   my_userbuf, fragment_size, allgather_eq_h,
-						   total_msg_posts,
-						   blocked_post,
-						   PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE | PTL_MD_OP_GET
-						   | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE);
-		msg_posted = true ;
-
-		/* important that these be set before my children
-         * see the ready flag raised
-         */
-        opal_atomic_wmb ();
-        my_ctl_pointer->flags[BCAST_FLAG] = ready_flag;
-
-		wait_for_peers_nb(my_rank, my_ctl_pointer->n_sends, ctl_structs,
-							ready_flag, sequence_number);
-		goto Allgather;
-    }
-
-    /* we need to see whether this is really
-     * who we are looking for
-     */
-    for( i = 0; i < parent_ctl_pointer->n_sends; i++) {
-		uint64_t local_offset = 0;
-		uint64_t remote_offset = 0;
-
-		BASESMUMA_VERBOSE(1,("%d found it from %d \n",my_rank,src));
-
-       if( my_rank == (src^(1<<i))) {
-            parent_ctl_pointer = ctl_structs[src];
-
-            /* we found our root within the group ... */
-            BASESMUMA_VERBOSE(10,("Shared memory probe was matched, the root is %d", src));
-
-			my_ctl_pointer->n_sends = i;
-
-		    /* Am I source for other process during scatter phase */
-            if ( i > 0) {
-
-                /* compute the size of the chunk to copy */
-                length = (parent_ctl_pointer->length)/
-                    (1<<(parent_ctl_pointer->n_sends - my_ctl_pointer->n_sends));
-                my_ctl_pointer->length = length;
-                my_ctl_pointer->offset =
-				parent_ctl_pointer->offset + length;
-
-
-				local_offset = my_ctl_pointer->offset;
-				remote_offset = parent_ctl_pointer->offset + length;
-
-				mca_bcol_basesmuma_portals_get_msg_fragment_no_eq_h(cs, &my_ctl_pointer->portals_buf_addr,
-								&parent_ctl_pointer->portals_buf_addr,local_offset,
-								remote_offset, length);
-				rc = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*)
-									cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q,
-													PTL_EQ_HANDLER_NONE,
-													&allgather_eq_h);
-
-				if (rc != PTL_OK) {
-				BASESMUMA_VERBOSE(10,( "PtlEQAlloc() failed: %d \n",rc));
-					goto Release;
-			}
-
-				/* Now post the message for other children to read */
-				extra_src_posts = (my_rank + pow_2 < group_size ) ? 1: 0;
-				scatter_posts = my_ctl_pointer->n_sends;
-				allgather_posts = pow_2_levels - 1;
-
-				total_msg_posts = scatter_posts + allgather_posts + extra_src_posts ;
-
-
-				mca_bcol_basesmuma_portals_post_msg(cs, &my_ctl_pointer->portals_buf_addr,
-						   my_userbuf, my_ctl_pointer->portals_buf_addr.userbuf_length,
-						   allgather_eq_h,
-						   total_msg_posts,
-						   blocked_post,
-						   PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE
-						   | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE
-					  );
-				msg_posted = true;
-				/* set the memory barrier to ensure completion */
-			opal_atomic_wmb ();
-		/* signal that I am done */
-			my_ctl_pointer->flags[BCAST_FLAG] = ready_flag;
-
-				wait_for_peers_nb(my_rank, my_ctl_pointer->n_sends, ctl_structs,
-							ready_flag, sequence_number);
-
-            } else {
-                /* takes care of first level recurssive double */
-		length = parent_ctl_pointer->length/
-                    (1<<(parent_ctl_pointer->n_sends - 1));
-                my_ctl_pointer->length = length;
-                my_ctl_pointer->offset = parent_ctl_pointer->offset;
-
-				local_offset = my_ctl_pointer->offset;
-				remote_offset = my_ctl_pointer->offset;
-
-
-				while(!IS_SG_DATA_READY(parent_ctl_pointer, ready_flag, sequence_number)) {
-			opal_progress();
-			}
-
-				mca_bcol_basesmuma_portals_get_msg_fragment_no_eq_h(cs,
-								&my_ctl_pointer->portals_buf_addr,
-								&parent_ctl_pointer->portals_buf_addr, local_offset,
-								remote_offset, length);
-
-				/* signal that I am done reading data from parent */
-		    opal_atomic_wmb ();
-	        my_ctl_pointer->flags[BCAST_FLAG] = ready_flag;
-			}
-
-            /* time for allgather phase */
-             input_args->status = ALLGATHER;
-
-            BASESMUMA_VERBOSE(5,("Completed %d found it from %d \n",my_rank,src));
-
-			while(ready_flag > parent_ctl_pointer->flags[BCAST_FLAG]);
-
-			goto Allgather;
-        }
-	}
-
-	{
-	/* this is not who we are looking for,
-	 * mark as false positive so we don't
-	 * poll here again
-	 */
-		src_list[src_list_index] = -1;
-	matched = 0;
-	goto Probe;
-     }
-
-Allgather:
-
-	BASESMUMA_VERBOSE(5,(" %d Completed Scatter %d times \n", my_rank, completed_scatter));
-
-    /* zip it back up - we have already taken care of first level */
-    global_sg_offset = my_ctl_pointer->offset;
-
-	/* first level of zip up */
-    length = 2 * fragment_size/pow_2;
-
-
-	if (!msg_posted) {
-		rc = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*)
-				cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q,
-						PTL_EQ_HANDLER_NONE, &allgather_eq_h);
-
-		/* Posting for all phases of recursive doubling */
-		extra_src_posts = (my_rank + pow_2 < group_size ) ? 1: 0;
-		allgather_posts = pow_2_levels - 1;
-		total_msg_posts = allgather_posts + extra_src_posts ;
-
-
-		mca_bcol_basesmuma_portals_post_msg(cs, &my_ctl_pointer->portals_buf_addr,
-						   my_userbuf, my_ctl_pointer->portals_buf_addr.userbuf_length,
-						   allgather_eq_h, total_msg_posts , blocked_post,
-						   PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE
-						   | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE
-							);
-		msg_posted = true;
-	}
-
-	ready_flag++;
-    opal_atomic_wmb ();
-    my_ctl_pointer->flags[BCAST_FLAG] = ready_flag;
-
-    for( i = 1; i < pow_2_levels; i++) {
-        /* get my partner for this level */
-        partner = my_rank^(1<<i);
-        partner_ctl_pointer =ctl_structs[partner];
-
-
-		/* Block until partner is at this level of recursive-doubling stage */
-        while(!IS_SG_DATA_READY(partner_ctl_pointer, ready_flag, sequence_number)) {
-            opal_progress();
-        }
-        assert(partner_ctl_pointer->flags[BCAST_FLAG] >= ready_flag);
-
-		if (partner_ctl_pointer->offset < my_ctl_pointer->offset) {
-			global_sg_offset -= length;
-			local_sg_offset = global_sg_offset;
-		} else {
-			local_sg_offset = global_sg_offset + length;
-		}
-
-
-		BASESMUMA_VERBOSE(10,("Allgather Phase: Get message from process %d, length %d", partner, length));
-		mca_bcol_basesmuma_portals_get_msg_fragment_no_eq_h(cs,
-								&my_ctl_pointer->portals_buf_addr,
-								&partner_ctl_pointer->portals_buf_addr,local_sg_offset,
-								local_sg_offset, length);
-
-		ready_flag++;
-		opal_atomic_wmb ();
-	my_ctl_pointer->flags[BCAST_FLAG] = ready_flag;
-
-		/* Block until partner is at this level of recursive-doubling stage */
-	while(!IS_SG_DATA_READY(partner_ctl_pointer, ready_flag, sequence_number)) {
-		    opal_progress();
-        }
-
-        /* double the length */
-        length *= 2;
-    }
-
-
-	/* If I am source for non-power 2 children wait for them */
-	/* If I am secondary root then my partner would be real root
-	 * so no need for exchange of data with the extra partner */
-	extra_partner = my_rank + pow_2 ;
-	if ((extra_partner < group_size) && (!secondary_root)) {
-		volatile mca_bcol_basesmuma_ctl_struct_t *extra_partner_ctl_pointer = NULL;
-
-		extra_partner_ctl_pointer = ctl_structs[extra_partner];
-		/* Block until extra partner has copied data */
-	while(!IS_SG_DATA_READY(extra_partner_ctl_pointer, ready_flag, sequence_number)) {
-		    opal_progress();
-        }
-
-	}
-
-Release:
-
-	/* free the event queue */
-	rc = PtlEQFree(allgather_eq_h);
-	if (rc != PTL_OK) {
-		BASESMUMA_VERBOSE(10,("PtlEQFree() failed: %d )\n",rc));
-	}
-
-    my_ctl_pointer->starting_flag_value++;
-    input_args->status = FINISHED;
-
-    return BCOL_FN_COMPLETE;
-
-}
-
-
-/*
- * static sg_state_t *sg_state = NULL;
- */
-
-int bcol_basesmuma_lmsg_scatter_allgather_portals_nb_bcast(bcol_function_args_t *input_args,
-    mca_bcol_base_function_t *c_input_args)
-{
-	int i;
-	mca_bcol_basesmuma_portal_proc_info_t *portals_info;
-	int dummy_group_size;
-    int rc = OMPI_SUCCESS;
-	int buff_idx;
-	int count=input_args->count;
-    size_t pack_len = 0, dt_size =0 ;
-    struct ompi_datatype_t* dtype=input_args->dtype;
-	int completed_posts = 0;
-	sg_state_t *sg_state = NULL;
-	mca_bcol_basesmuma_module_t *bcol_module = NULL;
-	int extra_src_posts = -1,allgather_posts = -1, total_msg_posts = -1;
-
-    bcol_module = (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module;
-	/*
-	sg_state = (sg_state_t*)bcol_module->sg_state;
-     */
-	sg_state = (sg_state_t*)&(bcol_module->sg_state);
-	/* Re-entering the algorithm */
-   switch (sg_state->phase) {
-	case PROBE:
-		 if (input_args->root_flag) {
-			/* I became a root for this group */
-			sg_state->phase = START;
-			goto Start;
-		 }
-		 goto Probe;
-		 break;
-
-	case SCATTER_ROOT_WAIT:
-		 goto Scatter_root_wait;
-
-	case SCATTER_EXTRA_ROOT_WAIT:
-		 goto Scatter_extra_root_wait;
-
-	case SCATTER_PARENT_WAIT:
-		 goto Scatter_parent_wait;
-
-	default:
-		break;
-   }
-
-	sg_state->phase = INIT;
-
-	BASESMUMA_VERBOSE(1,("Im entering portals_nb_bcast Unknown root "));
-	/* Allocate space for algorithm state */
-	/*
-	sg_state = (sg_state_t *) malloc(sizeof(sg_state_t));
-	bcol_module->sg_state = (void *)sg_state;
-
-	assert(NULL != sg_state);
-	*/
-
-	sg_state->secondary_root = false;
-	sg_state->msg_posted = false;
-	sg_state->matched = 0;
-	sg_state->phase = SCATTER;
-	/* Copy input args to local variables */
-	sg_state->my_userbuf = (void*)((unsigned char*)input_args->userbuf);
-	assert(sg_state->my_userbuf != NULL);
-	sg_state->sequence_number=input_args->sequence_num;
-	sg_state->cs = &mca_bcol_basesmuma_component;
-    sg_state->bcol_module = (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module;
-	/* Should this be buffer index (ML) or control buffer index ? */
-    buff_idx = input_args->src_desc->buffer_index;
-
-	/* Initialize SM group info used for control signaling */
-	init_sm_group_info(sg_state, buff_idx);
-
-    /* calculate the largest power of two that is smaller than
-     * or equal to the group size
-     */
-    sg_state->pow_2_levels = pow_sm_k(2, sg_state->group_size, &(dummy_group_size));
-    if( sg_state->group_size < (1 << sg_state->pow_2_levels)) {
-        sg_state->pow_2_levels--;
-    }
-    /* power-of-two group size */
-    sg_state->pow_2 = 1 << sg_state->pow_2_levels;
-
-
-    /* we will work only on packed data - so compute the length*/
-    ompi_datatype_type_size(dtype, &dt_size);
-    sg_state->fragment_size = count*dt_size;
-
-
-	/* Init portals scatter allgather info */
-	rc = init_sm_portals_sg_info(sg_state);
-
-	if (rc != OMPI_SUCCESS) {
-		goto Release;
-	}
-
-Start :
-Extra :
-    /*
-	 *  My rank >  pow2 groupsize
-	 */
-    if( sg_state->my_rank >= sg_state->pow_2 ) {
-
-		if (input_args->root_flag){
-
-			rc = sm_portals_extra_root_scatter(sg_state);
-			if (rc != OMPI_SUCCESS) {
-				goto Release;
-			}
-
-		} else {
-			/*
-			 * Wait for my partner to receive bcast data, and copy from it
-			 */
-			int extra_parent_rank;
-			volatile mca_bcol_basesmuma_ctl_struct_t  *extra_parent_ctl_pointer = NULL; /* binomial fanout */
-		extra_parent_rank = sg_state->my_rank & (sg_state->pow_2-1);
-		extra_parent_ctl_pointer = sg_state->ctl_structs[extra_parent_rank];
-
-			sg_state->ready_flag = sg_state->ready_flag + sg_state->pow_2_levels;
-
-			while(!IS_SG_DATA_READY(extra_parent_ctl_pointer, sg_state->ready_flag,
-								sg_state->sequence_number)) {
-				opal_progress();
-
-		}
-
-			mca_bcol_basesmuma_portals_get_msg_fragment(sg_state->cs,
-								sg_state->read_eq,
-								&sg_state->my_ctl_pointer->portals_buf_addr,
-								&extra_parent_ctl_pointer->portals_buf_addr, 0,
-								0, sg_state->fragment_size);
-
-			sg_state->my_ctl_pointer->flag = sg_state->ready_flag;
-		}
-
-		goto Release;
-    }
-
-    if (input_args->root_flag) {
-
-		BASESMUMA_VERBOSE(1,("Scatter : Im root (bcol_module %x,ctl_pointer %x) my ready flag %d \n",
-									sg_state->bcol_module, sg_state->my_ctl_pointer, sg_state->ready_flag));
-		rc = sm_portals_root_scatter(sg_state);
-
-		/* gvm Fix: Redudant
-		opal_atomic_wmb ();
-		*/
-
-		sg_state->my_ctl_pointer->flag = sg_state->ready_flag;
-
-		if (rc != OMPI_SUCCESS) {
-			goto Release;
-		}
-
-Scatter_root_wait:
-
-		BASESMUMA_VERBOSE(5,("Scatter: Im root waiting for children to complete my flag %d",
-									sg_state->my_ctl_pointer->flag));
-
-		for( i = 0; i < sg_state->cs->num_to_probe && completed_posts < sg_state->my_ctl_pointer->n_sends;
-						i++) {
-
-			completed_posts = wait_for_post_complete_nb(sg_state->my_rank,
-								sg_state->my_ctl_pointer->n_sends, sg_state->ctl_structs,
-								sg_state->ready_flag, sg_state->sequence_number);
-
-		}
-
-		if (completed_posts < sg_state->my_ctl_pointer->n_sends) {
-			sg_state->phase = SCATTER_ROOT_WAIT;
-			return  BCOL_FN_STARTED;
-		}
-
-		goto Allgather;
-    }
-
-
-Scatter:
-
-	BASESMUMA_VERBOSE(1,("Scatter : Im non-root probing for data "));
-    /* compute the list of possible sources */
-	/*
-    sg_state->src_list = (int *) malloc(sizeof(int) * (sg_state->pow_2_levels + 1));
-	*/
-	assert(MAX_SM_GROUP_SIZE > sg_state->pow_2_levels+1);
-
-    for( i = 0; i < sg_state->pow_2_levels; i++) {
-        sg_state->src_list[i] = sg_state->my_rank ^ (1<<i);
-    }
-
-	/* My source might be process > pow_2 */
-
-	if ((sg_state->my_rank + sg_state->pow_2) < sg_state->group_size) {
-            sg_state->src_list[i] = sg_state->my_rank + sg_state->pow_2;
-    } else {
-            sg_state->src_list[i] = -1;
-    }
-
-
-	BASESMUMA_VERBOSE(1,("Scatter : Ready flag %d Im  non-root probing for %d procs %d:%d \n",
-					sg_state->ready_flag,sg_state->pow_2_levels,sg_state->src_list[0],sg_state->src_list[1]));
-Probe:
-    /* If I am not the root, then poll on possible "senders'" control structs */
-	/* For portals we block for now */
-    /* Shared memory iprobe */
-
-
-	/*
-		SG_LARGE_MSG_NB_PROBE(sg_state->src_list, sg_state->pow_2_levels + 1,
-                sg_state->src_list_index, sg_state->matched, sg_state->src,
-				sg_state->ctl_structs,
-				sg_state->parent_ctl_pointer, sg_state->ready_flag, sg_state->sequence_number);
-	 */
-
-	for( i = 0; i < sg_state->cs->num_to_probe && 0 == sg_state->matched;
-						i++) {
-		sg_large_msg_probe(sg_state);
-	}
-
-	if (!sg_state->matched) {
-		sg_state->phase = PROBE;
-		return BCOL_FN_STARTED;
-	}
-
-	BASESMUMA_VERBOSE(1,("Scatter : Im non-root match received"));
-	/* If I am a secondary root */
-	if ((sg_state->matched) && (sg_state->src == sg_state->pow_2 + sg_state->my_rank)) {
-
-		BASESMUMA_VERBOSE(5,("Scatter : Im secondary root \n"));
-
-		rc = sm_portals_secondary_root_scatter(sg_state);
-		if (rc != OMPI_SUCCESS) {
-				goto Release;
-		}
-
-Scatter_extra_root_wait:
-
-		for( i = 0; i < sg_state->cs->num_to_probe && completed_posts < sg_state->my_ctl_pointer->n_sends;
-						i++) {
-
-			completed_posts = wait_for_post_complete_nb(sg_state->my_rank, sg_state->my_ctl_pointer->n_sends,
-							sg_state->ctl_structs, sg_state->ready_flag, sg_state->sequence_number);
-
-		}
-
-		if (completed_posts < sg_state->my_ctl_pointer->n_sends) {
-			sg_state->phase = SCATTER_EXTRA_ROOT_WAIT;
-			return  BCOL_FN_STARTED;
-		}
-
-		goto Allgather;
-    }
-
-    /* we need to see whether this is really
-     * who we are looking for
-     */
-    for( i = 0; i < sg_state->parent_ctl_pointer->n_sends; i++) {
-		uint64_t local_offset = 0;
-		uint64_t remote_offset = 0;
-
-		BASESMUMA_VERBOSE(5,("%d found it from %d \n",sg_state->my_rank,sg_state->src));
-
-       if( sg_state->my_rank == (sg_state->src^(1<<i))) {
-            sg_state->parent_ctl_pointer = sg_state->ctl_structs[sg_state->src];
-
-            /* we found our root within the group ... */
-            BASESMUMA_VERBOSE(5,("Shared memory probe was matched, the root is	%d ",sg_state->src));
-
-			sg_state->my_ctl_pointer->n_sends = i;
-
-		    /* Am I source for other process during scatter phase */
-            if ( i > 0) {
-				BASESMUMA_VERBOSE(1,("Scatter : Im Internal node \n"));
-
-				rc = sm_portals_internode_scatter(sg_state);
-
-				if (rc != OMPI_SUCCESS) {
-					goto Release;
-				}
-
-Scatter_parent_wait:
-
-				for( i = 0; i < sg_state->cs->num_to_probe && completed_posts < sg_state->my_ctl_pointer->n_sends;
-						i++) {
-
-					completed_posts = wait_for_post_complete_nb(sg_state->my_rank,
-								sg_state->my_ctl_pointer->n_sends,
-								sg_state->ctl_structs,
-								sg_state->ready_flag, sg_state->sequence_number);
-				}
-
-				if (completed_posts < sg_state->my_ctl_pointer->n_sends) {
-					sg_state->phase = SCATTER_PARENT_WAIT;
-					return  BCOL_FN_STARTED;
-				}
-
-            } else {
-
-				BASESMUMA_VERBOSE(1,("Scatter : Im leaf node \n"));
-
-				/* takes care of first level recurssive double */
-		sg_state->length = sg_state->parent_ctl_pointer->length/
-                    (1<<(sg_state->parent_ctl_pointer->n_sends - 1));
-                sg_state->my_ctl_pointer->length = sg_state->length;
-                sg_state->my_ctl_pointer->offset = sg_state->parent_ctl_pointer->offset;
-
-
-				while(!IS_SG_DATA_READY(sg_state->parent_ctl_pointer,
-										sg_state->ready_flag, sg_state->sequence_number)) {
-			opal_progress();
-			}
-
-				mca_bcol_basesmuma_portals_get_msg_fragment(sg_state->cs,
-								sg_state->read_eq,
-								&sg_state->my_ctl_pointer->portals_buf_addr,
-								&sg_state->parent_ctl_pointer->portals_buf_addr,
-								sg_state->my_ctl_pointer->offset,
-								sg_state->my_ctl_pointer->offset, sg_state->length);
-
-				/* signal that I am done reading data from parent */
-				/*
-		    opal_atomic_wmb ();
-				*/
-	        sg_state->my_ctl_pointer->flag = sg_state->ready_flag;
-			}
-
-            BASESMUMA_VERBOSE(1,("Completed %d found it from %d \n",
-									sg_state->my_rank, sg_state->src));
-
-			while(sg_state->ready_flag > sg_state->parent_ctl_pointer->flag);
-
-			goto Allgather;
-        }
-	}
-
-	{
-	/* this is not who we are looking for,
-	 * mark as false positive so we don't
-	 * poll here again
-	 */
-		sg_state->src_list[sg_state->src_list_index] = -1;
-	sg_state->matched = 0;
-	goto Probe;
-     }
-
-Allgather:
-
-	BASESMUMA_VERBOSE(5,("Completed Scatter phase"));
-
-    /* zip it back up - we have already taken care of first level */
-    sg_state->global_sg_offset = sg_state->my_ctl_pointer->offset;
-
-	/* first level of zip up */
-    sg_state->length = 2 * sg_state->fragment_size/sg_state->pow_2;
-
-
-	/* Posting for all phases of recursive doubling */
-	extra_src_posts = (sg_state->my_rank + sg_state->pow_2 < sg_state->group_size ) ? 1: 0;
-	allgather_posts = sg_state->pow_2_levels - 1;
-	total_msg_posts = allgather_posts + extra_src_posts ;
-
-	if ((!sg_state->msg_posted) && (total_msg_posts > 0)){
-
-			mca_bcol_basesmuma_portals_post_msg(sg_state->cs, &sg_state->my_ctl_pointer->portals_buf_addr,
-						   sg_state->my_userbuf, sg_state->my_ctl_pointer->portals_buf_addr.userbuf_length,
-						   PTL_EQ_NONE, total_msg_posts, blocked_post,
-						   PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE
-						   | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE
-							);
-			sg_state->msg_posted = true;
-	}
-
-	BASESMUMA_VERBOSE(5,("Done with allgather phase"));
-	/* I reached an allgather phase */
-	sg_state->ready_flag++;
-    opal_atomic_wmb ();
-    sg_state->my_ctl_pointer->flag = sg_state->ready_flag;
-
-	rc = sm_portals_bcasts_allgather_phase(sg_state);
-
-	if (rc != OMPI_SUCCESS) {
-		BASESMUMA_VERBOSE(10,("Error in Bcast's allgather phase "));
-		goto Release;
-	}
-
-	/* If I am source for non-power 2 children wait for them */
-	/* If I am secondary root then my partner would be real root
-	 * so no need for exchange of data with the extra partner */
-	sg_state->extra_partner = sg_state->my_rank + sg_state->pow_2 ;
-	if ((sg_state->extra_partner < sg_state->group_size) && (!sg_state->secondary_root)) {
-
-		sg_state->extra_partner_ctl_pointer = sg_state->ctl_structs[sg_state->extra_partner];
-		/* Block until extra partner has copied data */
-	while(!IS_SG_DATA_READY(sg_state->extra_partner_ctl_pointer,
-								sg_state->ready_flag, sg_state->sequence_number)) {
-		    opal_progress();
-        }
-
-	}
-
-Release:
-
-    BASESMUMA_VERBOSE(1,("Im done "));
-
-    sg_state->my_ctl_pointer->starting_flag_value++;
-    sg_state->phase = FINISHED;
-
-
-	return BCOL_FN_COMPLETE;
-
-}
-
-
-int bcol_basesmuma_lmsg_scatter_allgather_portals_nb_knownroot_bcast(bcol_function_args_t *input_args,
-    mca_bcol_base_function_t *c_input_args)
-{
-
-	int i;
-	mca_bcol_basesmuma_portal_proc_info_t *portals_info;
-	int dummy_group_size;
-    int rc = OMPI_SUCCESS;
-	int buff_idx;
-	int count=input_args->count;
-    size_t pack_len = 0, dt_size =0 ;
-    struct ompi_datatype_t* dtype=input_args->dtype;
-	int completed_posts = 0;
-	sg_state_t *sg_state = NULL;
-    mca_bcol_basesmuma_module_t *bcol_module=NULL;
-	int extra_src_posts = -1,allgather_posts = -1, total_msg_posts = -1;
-    bcol_module = (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module;
-
-	sg_state = (sg_state_t*)(&bcol_module->sg_state);
-
-	BASESMUMA_VERBOSE(1,("Im entering nb_knownroot_bcast bcol = %x ",
-							c_input_args->bcol_module));
-	/* Re-entering the algorithm */
-	switch (sg_state->phase) {
-		case PROBE:
-			 if (input_args->root_flag) {
-				/* I became a root for this group */
-				sg_state->phase = START;
-				goto Start;
-			 }
-			 goto Probe;
-			 break;
-
-		case SCATTER_ROOT_WAIT:
-			 goto Scatter_root_wait;
-
-		case SCATTER_EXTRA_ROOT_WAIT:
-			 goto Scatter_extra_root_wait;
-
-		case SCATTER_PARENT_WAIT:
-			 goto Scatter_parent_wait;
-
-		default:
-			break;
-	}
-
-	/* Allocate space for algorithm state */
-	/*
-	sg_state = (sg_state_t *) malloc(sizeof(sg_state_t));
-	bcol_module->sg_state = (void*) sg_state;
-	*/
-
-	/* Make sure there userbuffer is not null */
-
-	sg_state->phase = INIT;
-	sg_state->secondary_root = false;
-	sg_state->msg_posted = false;
-	sg_state->matched = 0;
-	/* Copy input args to local variables */
-	sg_state->my_userbuf = (void*)((unsigned char*)input_args->userbuf);
-	assert(sg_state->my_userbuf != NULL);
-	sg_state->sequence_number=input_args->sequence_num;
-	sg_state->cs = &mca_bcol_basesmuma_component;
-	sg_state->bcol_module = bcol_module;
-    buff_idx = input_args->src_desc->buffer_index;
-
-	/* Initialize SM group info used for control signaling */
-	init_sm_group_info(sg_state, buff_idx);
-
-    /* calculate the largest power of two that is smaller than
-     * or equal to the group size
-     */
-    sg_state->pow_2_levels = pow_sm_k(2, sg_state->group_size, &(dummy_group_size));
-    if( sg_state->group_size < (1 << sg_state->pow_2_levels)) {
-        sg_state->pow_2_levels--;
-    }
-    /* power-of-two group size */
-    sg_state->pow_2 = 1 << sg_state->pow_2_levels;
-
-
-     /* we will work only on packed data - so compute the length*/
-    ompi_datatype_type_size(dtype, &dt_size);
-    sg_state->fragment_size = count*dt_size;
-
-
-	/* Init portals scatter allgather info */
-	rc = init_sm_portals_sg_info(sg_state);
-
-	if (rc != OMPI_SUCCESS) {
-		goto Release;
-	}
-Start:
-Extra :
-    /*
-	 *  My rank >  pow2 groupsize
-	 */
-    if( sg_state->my_rank >= sg_state->pow_2 ) {
-
-		if (input_args->root_flag){
-
-			rc = sm_portals_extra_root_scatter(sg_state);
-			if (rc != OMPI_SUCCESS) {
-				goto Release;
-			}
-
-		} else {
-			/*
-			 * Wait for my partner to receive bcast data, and copy from it
-			 */
-			int extra_parent_rank;
-			volatile mca_bcol_basesmuma_ctl_struct_t  *extra_parent_ctl_pointer = NULL; /* binomial fanout */
-		extra_parent_rank = sg_state->my_rank & (sg_state->pow_2-1);
-		extra_parent_ctl_pointer = sg_state->ctl_structs[extra_parent_rank];
-
-			sg_state->ready_flag = sg_state->ready_flag + sg_state->pow_2_levels;
-
-			while(!IS_SG_DATA_READY(extra_parent_ctl_pointer, sg_state->ready_flag,
-								sg_state->sequence_number)) {
-				opal_progress();
-
-		}
-
-			mca_bcol_basesmuma_portals_get_msg_fragment(sg_state->cs,
-								sg_state->read_eq,
-								&sg_state->my_ctl_pointer->portals_buf_addr,
-								&extra_parent_ctl_pointer->portals_buf_addr, 0,
-								0, sg_state->fragment_size);
-
-			sg_state->my_ctl_pointer->flag = sg_state->ready_flag;
-		}
-
-		goto Release;
-    }
-
-    if (input_args->root_flag) {
-
-		BASESMUMA_VERBOSE(1,("Scatter : Im root (bcol_module %x,ctl_pointer %x) my ready flag %d \n",
-									bcol_module, sg_state->my_ctl_pointer, sg_state->ready_flag));
-		rc = sm_portals_root_scatter(sg_state);
-
-		sg_state->my_ctl_pointer->flag = sg_state->ready_flag;
-
-		if (rc != OMPI_SUCCESS) {
-			goto Release;
-		}
-
-Scatter_root_wait:
-
-		BASESMUMA_VERBOSE(5,("Scatter: Im root waiting for children to complete my flag %d",
-									sg_state->my_ctl_pointer->flag));
-		for( i = 0; i < sg_state->cs->num_to_probe && completed_posts < sg_state->my_ctl_pointer->n_sends;
-						i++) {
-			completed_posts = wait_for_post_complete_nb(sg_state->my_rank,
-								sg_state->my_ctl_pointer->n_sends, sg_state->ctl_structs,
-								sg_state->ready_flag, sg_state->sequence_number);
-		}
-
-		if (completed_posts < sg_state->my_ctl_pointer->n_sends) {
-			sg_state->phase = SCATTER_ROOT_WAIT;
-			return  BCOL_FN_STARTED;
-		}
-
-		goto Allgather;
-    }
-
-
-Probe:
-
-	sg_state->src = compute_src_from_root(input_args->root_route->rank, sg_state->my_rank,
-					sg_state->pow_2, sg_state->group_size);
-
-	sg_state->parent_ctl_pointer = sg_state->ctl_structs[sg_state->src];
-
-	while(!IS_SG_DATA_READY(sg_state->parent_ctl_pointer, sg_state->ready_flag,
-								sg_state->sequence_number)) {
-				opal_progress();
-
-    }
-	sg_state->matched = true;
-
-	/* If I am a secondary root */
-	if ((sg_state->matched) && (sg_state->src == sg_state->pow_2 + sg_state->my_rank)) {
-
-		rc = sm_portals_secondary_root_scatter(sg_state);
-		if (rc != OMPI_SUCCESS) {
-				goto Release;
-		}
-Scatter_extra_root_wait:
-
-		for( i = 0; i < sg_state->cs->num_to_probe && completed_posts < sg_state->my_ctl_pointer->n_sends;
-						i++) {
-
-			completed_posts = wait_for_post_complete_nb(sg_state->my_rank, sg_state->my_ctl_pointer->n_sends,
-							sg_state->ctl_structs, sg_state->ready_flag, sg_state->sequence_number);
-
-		}
-
-		if (completed_posts < sg_state->my_ctl_pointer->n_sends) {
-			sg_state->phase = SCATTER_EXTRA_ROOT_WAIT;
-			return  BCOL_FN_STARTED;
-		}
-
-		goto Allgather;
-    }
-
-    /* we need to see whether this is really
-     * who we are looking for
-     */
-    for( i = 0; i < sg_state->parent_ctl_pointer->n_sends; i++) {
-		uint64_t local_offset = 0;
-		uint64_t remote_offset = 0;
-
-		BASESMUMA_VERBOSE(5,("%d found it from %d \n",sg_state->my_rank,sg_state->src));
-
-       if( sg_state->my_rank == (sg_state->src^(1<<i))) {
-            sg_state->parent_ctl_pointer = sg_state->ctl_structs[sg_state->src];
-
-            /* we found our root within the group ... */
-            BASESMUMA_VERBOSE(10,("Shared memory probe was matched, the root is	%d ",sg_state->src));
-
-			sg_state->my_ctl_pointer->n_sends = i;
-
-		    /* Am I source for other process during scatter phase */
-            if ( i > 0) {
-
-				rc = sm_portals_internode_scatter(sg_state);
-
-				if (rc != OMPI_SUCCESS) {
-					goto Release;
-				}
-Scatter_parent_wait:
-
-				for( i = 0; i < sg_state->cs->num_to_probe && completed_posts < sg_state->my_ctl_pointer->n_sends;
-						i++) {
-
-					completed_posts = wait_for_post_complete_nb(sg_state->my_rank,
-								sg_state->my_ctl_pointer->n_sends,
-								sg_state->ctl_structs,
-								sg_state->ready_flag, sg_state->sequence_number);
-				}
-
-				if (completed_posts < sg_state->my_ctl_pointer->n_sends) {
-					sg_state->phase = SCATTER_PARENT_WAIT;
-					return  BCOL_FN_STARTED;
-				}
-
-            } else {
-
-                /* takes care of first level recursive double */
-		sg_state->length = sg_state->parent_ctl_pointer->length/
-                    (1<<(sg_state->parent_ctl_pointer->n_sends - 1));
-                sg_state->my_ctl_pointer->length = sg_state->length;
-                sg_state->my_ctl_pointer->offset = sg_state->parent_ctl_pointer->offset;
-
-
-				while(!IS_SG_DATA_READY(sg_state->parent_ctl_pointer,
-										sg_state->ready_flag, sg_state->sequence_number)) {
-			opal_progress();
-			}
-
-				mca_bcol_basesmuma_portals_get_msg_fragment(sg_state->cs,
-								sg_state->read_eq,
-								&sg_state->my_ctl_pointer->portals_buf_addr,
-								&sg_state->parent_ctl_pointer->portals_buf_addr,
-								sg_state->my_ctl_pointer->offset,
-								sg_state->my_ctl_pointer->offset, sg_state->length);
-
-				/* signal that I am done reading data from parent */
-	        sg_state->my_ctl_pointer->flag = sg_state->ready_flag;
-			}
-
-            BASESMUMA_VERBOSE(5,("Completed %d found it from %d \n",
-									sg_state->my_rank, sg_state->src));
-
-			while(sg_state->ready_flag > sg_state->parent_ctl_pointer->flag);
-
-			goto Allgather;
-        }
-	}
-
-	{
-	/* this is not who we are looking for,
-	 * mark as false positive so we don't
-	 * poll here again
-	 */
-		sg_state->src_list[sg_state->src_list_index] = -1;
-	sg_state->matched = 0;
-	goto Probe;
-     }
-
-Allgather:
-
-    /* zip it back up - we have already taken care of first level */
-    sg_state->global_sg_offset = sg_state->my_ctl_pointer->offset;
-
-	/* first level of zip up */
-    sg_state->length = 2 * sg_state->fragment_size/sg_state->pow_2;
-
-	/* Posting for all phases of recursive doubling */
-	extra_src_posts = (sg_state->my_rank + sg_state->pow_2 < sg_state->group_size ) ? 1: 0;
-	allgather_posts = sg_state->pow_2_levels - 1;
-	total_msg_posts = allgather_posts + extra_src_posts ;
-
-	if ((!sg_state->msg_posted) && (total_msg_posts > 0)){
-
-			mca_bcol_basesmuma_portals_post_msg(sg_state->cs, &sg_state->my_ctl_pointer->portals_buf_addr,
-						   sg_state->my_userbuf, sg_state->my_ctl_pointer->portals_buf_addr.userbuf_length,
-						   PTL_EQ_NONE, total_msg_posts, blocked_post,
-						   PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE
-						   | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE
-							);
-			sg_state->msg_posted = true;
-	}
-
-	sg_state->ready_flag++;
-    opal_atomic_wmb ();
-    sg_state->my_ctl_pointer->flag = sg_state->ready_flag;
-
-	rc = sm_portals_bcasts_allgather_phase(sg_state);
-
-	if (rc != OMPI_SUCCESS) {
-		BASESMUMA_VERBOSE(10,("Error in Bcast's allgather phase "));
-		goto Release;
-	}
-
-	/* If I am source for non-power 2 children wait for them */
-	/* If I am secondary root then my partner would be real root
-	 * so no need for exchange of data with the extra partner */
-	sg_state->extra_partner = sg_state->my_rank + sg_state->pow_2 ;
-	if ((sg_state->extra_partner < sg_state->group_size) && (!sg_state->secondary_root)) {
-
-		sg_state->extra_partner_ctl_pointer = sg_state->ctl_structs[sg_state->extra_partner];
-		/* Block until extra partner has copied data */
-	while(!IS_SG_DATA_READY(sg_state->extra_partner_ctl_pointer,
-								sg_state->ready_flag, sg_state->sequence_number)) {
-		    opal_progress();
-        }
-
-	}
-
-Release:
-
-    BASESMUMA_VERBOSE(1,("Im done "));
-
-    sg_state->my_ctl_pointer->starting_flag_value++;
-    sg_state->phase = FINISHED;
-
-	return BCOL_FN_COMPLETE;
-
-}
-#endif /* __PORTALS_AVAIL__ */
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.h b/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.h
deleted file mode 100644
index d15851b036..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.h
+++ /dev/null
@@ -1,626 +0,0 @@
-#ifdef __PORTALS_AVAIL__
-#define __PORTALS_ENABLE__
-
-#include <unistd.h>
-
-#include "ompi_config.h"
-#include "ompi/constants.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/communicator/communicator.h"
-
-#include "bcol_basesmuma_utils.h"
-#include "bcol_basesmuma_portals.h"
-#include "bcol_basesmuma.h"
-
-#if 0
-struct scatter_allgather_nb_bcast_state_t
-{
-    /* local variables */
-    uint64_t length;
-    int my_rank, src, matched;
-    int *src_list;
-    int group_size;
-	int64_t ready_flag;
-    int pow_2, pow_2_levels;
-    int src_list_index;
-    uint64_t fragment_size;  /* user buffer size */
-
-	/* Input argument variables */
-	void *my_userbuf;
-	int64_t sequence_number;
-
-	/* Extra source variables */
-	bool secondary_root;
-	int partner , extra_partner;
-
-	/* Scatter Allgather offsets */
-	uint64_t local_sg_offset , global_sg_offset , partner_offset ;
-
-	/* Portals messaging relevant variables */
-	ptl_handle_eq_t allgather_eq_h;
-	ptl_handle_eq_t read_eq;
-	ptl_event_t  allgather_event;
-	bool msg_posted;
-
-	/* OMPI module and component variables */
-    mca_bcol_basesmuma_component_t *cs;
-    mca_bcol_basesmuma_module_t *bcol_module;
-
-	/* Control structure and payload variables */
-	volatile mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
-    volatile mca_bcol_basesmuma_ctl_struct_t  *my_ctl_pointer;
-	volatile mca_bcol_basesmuma_ctl_struct_t  *parent_ctl_pointer; /* scatter source */
-	volatile mca_bcol_basesmuma_ctl_struct_t  *extra_partner_ctl_pointer; /* scatter source */
-
-	int phase;
-};
-
-typedef struct scatter_allgather_nb_bcast_state_t sg_state_t;
-#endif
-
-bool blocked_post = false;
-
-#define IS_SG_DATA_READY(peer, my_flag, my_sequence_number) 								\
-    (((peer)->sequence_number == (my_sequence_number) && 									\
-      (peer)->flags[BCAST_FLAG] >= (my_flag) 															\
-     )? true : false )
-
-
-
-#define  SG_LARGE_MSG_PROBE(src_list, n_src, src_list_index, matched,								\
-						    src, data_buffs, data_src_ctl_pointer,							\
-							data_src_lmsg_ctl_pointer, ready_flag,							\
-							sequence_number)  					  							\
-do {                                                                              			\
-    int j;                                                                        			\
-    for( j = 0; j < n_src; j++) {                                                 			\
-        if(src_list[j] != -1) {                                                   			\
-            data_src_ctl_pointer = data_buffs[src_list[j]].ctl_struct;                      \
-            data_src_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t*)				\
-										data_buffs[src_list[j]].payload;                    \
-            if( IS_SG_DATA_READY(data_src_ctl_pointer,ready_flag,sequence_number)) {   	    \
-                src = src_list[j];                                                			\
-                matched = 1;                                                      			\
-                src_list_index = j;   																\
-                break;                                                        				\
-            }                                                                     			\
-        }                                                                         			\
-    }                                                                             			\
-} while(0)
-
-#define  SG_LARGE_MSG_NB_PROBE(src_list, n_src, src_list_index, matched,					\
-						    src, ctl_structs, data_src_ctl_pointer,							\
-							ready_flag, sequence_number)  									\
-do {                                                                              			\
-    int j;                                                                        			\
-    for( j = 0; j < n_src; j++) {                                                 			\
-        if(src_list[j] != -1) {                                                   			\
-            data_src_ctl_pointer = ctl_structs[src_list[j]];		                        \
-            if( IS_SG_DATA_READY(data_src_ctl_pointer,ready_flag,sequence_number)) {   	    \
-                src = src_list[j];                                                			\
-                matched = 1;                                                      			\
-                src_list_index = j;   														\
-                break;                                                        				\
-            }                                                                     			\
-        }                                                                         			\
-    }                                                                             			\
-} while(0)
-
-
-
-
-
-static inline  __opal_attribute_always_inline__
-int wait_for_peers(int my_rank, int npeers, volatile mca_bcol_basesmuma_payload_t *data_buffs,
-				int flag_value, int sn)
-{
-	int *peers_list = NULL;
-	int counter = 0, diter = 0;
-	volatile mca_bcol_basesmuma_header_t *peer_ctl_pointer = NULL;
-
-	peers_list = (int *)malloc(sizeof(int) * npeers);
-
-	for (diter = 0; diter < npeers; diter++ ){
-		peers_list[diter] = my_rank ^ (1<<diter);
-		assert(peers_list[diter] != -1);
-	}
-
-	counter = 0;
-	while (counter < npeers) {
-		for (diter = 0; diter < npeers; diter++){
-			if (-1 != peers_list[diter]) {
-				peer_ctl_pointer = data_buffs[peers_list[diter]].ctl_struct;
-
-				if (IS_SG_DATA_READY(peer_ctl_pointer, flag_value, sn)) {
-					counter++;
-					peers_list[diter] = -1;
-				}
-			}
-		}
-		opal_progress();
-	}
-
-	return 0;
-}
-
-static inline  __opal_attribute_always_inline__
-int wait_for_peers_nb(int my_rank, int npeers,
-				volatile mca_bcol_basesmuma_ctl_struct_t **ctl_structs,
-				volatile int flag_value, int sn)
-{
-	int *peers_list = NULL;
-	int counter = 0, diter = 0;
-	volatile mca_bcol_basesmuma_ctl_struct_t *peer_ctl_pointer = NULL;
-
-	peers_list = (int *)malloc(sizeof(int) * npeers);
-
-	for (diter = 0; diter < npeers; diter++ ){
-		peers_list[diter] = my_rank ^ (1<<diter);
-		assert(peers_list[diter] != -1);
-	}
-
-	counter = 0;
-	while (counter < npeers) {
-		for (diter = 0; diter < npeers; diter++){
-			if (-1 != peers_list[diter]) {
-				peer_ctl_pointer = ctl_structs[peers_list[diter]];
-
-				if (IS_SG_DATA_READY(peer_ctl_pointer, flag_value, sn)) {
-					counter++;
-					peers_list[diter] = -1;
-				}
-			}
-		}
-		opal_progress();
-	}
-
-	return 0;
-}
-
-static inline  __opal_attribute_always_inline__
-int wait_for_post_complete_nb(int my_rank, int npeers,
-				volatile mca_bcol_basesmuma_ctl_struct_t **ctl_structs,
-				int flag_value, int sn)
-{
-	/* int *peers_list = NULL; */
-	int peers_list[MAX_SM_GROUP_SIZE];
-	int counter = 0, diter = 0;
-	volatile mca_bcol_basesmuma_ctl_struct_t *peer_ctl_pointer = NULL;
-
-/*	peers_list = (int *)malloc(sizeof(int) * npeers); */
-
-	assert(npeers < MAX_SM_GROUP_SIZE);
-
-	for (diter = 0; diter < npeers; diter++ ){
-		peers_list[diter] = my_rank ^ (1<<diter);
-		assert(peers_list[diter] != -1);
-	}
-
-	counter = 0;
-	for (diter = 0; diter < npeers; diter++){
-		peer_ctl_pointer = ctl_structs[peers_list[diter]];
-
-		if (IS_SG_DATA_READY(peer_ctl_pointer, flag_value, sn)) {
-					counter++;
-		}
-	}
-
-/*	free(peers_list); */
-	return counter;
-}
-
-static inline  __opal_attribute_always_inline__
-int  sg_large_msg_probe(sg_state_t *sg_state)
-{
-	int j,n_src = sg_state->pow_2_levels+1;
-
-
-	for( j = 0; j < n_src; j++) {
-        if(sg_state->src_list[j] != -1) {
-			sg_state->parent_ctl_pointer = sg_state->ctl_structs[sg_state->src_list[j]];
-
-			BASESMUMA_VERBOSE(5,("Parent %d ctl pointer (parent=%x, my ctl=%x) flag %d",
-								sg_state->src_list[j],sg_state->parent_ctl_pointer,
-								sg_state->my_ctl_pointer,
-								sg_state->parent_ctl_pointer->flag));
-
-			if (IS_SG_DATA_READY(sg_state->parent_ctl_pointer,
-						sg_state->ready_flag, sg_state->sequence_number)) {
-                sg_state->src = sg_state->src_list[j];
-                sg_state->matched = 1;
-                sg_state->src_list_index = j;
-				break;
-            }
-        }
-    }
-
-	return 0;
-}
-/*
- * I will post message for all the my children
- */
-static inline  __opal_attribute_always_inline__
-int sm_portals_root_scatter(sg_state_t *sg_state)
-{
-	int extra_src_posts = -1, scatter_posts = -1, allgather_posts = -1,
-						total_msg_posts = -1;
-
-	BASESMUMA_VERBOSE(10,("I am the root of the data"));
-    sg_state->my_ctl_pointer->offset = 0;
-    sg_state->my_ctl_pointer->n_sends = sg_state->pow_2_levels;
-    sg_state->my_ctl_pointer->length = sg_state->fragment_size;
-
-
-
-	extra_src_posts = (sg_state->my_rank + sg_state->pow_2 < sg_state->group_size ) ? 1: 0;
-	scatter_posts = sg_state->my_ctl_pointer->n_sends;
-	allgather_posts = sg_state->pow_2_levels - 1;
-
-	total_msg_posts = scatter_posts + allgather_posts + extra_src_posts ;
-
-	if ( total_msg_posts <= 0) {
-		BASESMUMA_VERBOSE(10,("No need to post the data "));
-		return OMPI_SUCCESS;
-	}
-
-	mca_bcol_basesmuma_portals_post_msg(sg_state->cs,
-						 &sg_state->my_ctl_pointer->portals_buf_addr,
-						   sg_state->my_userbuf, sg_state->fragment_size,
-						   PTL_EQ_NONE,
-						   total_msg_posts,
-						   blocked_post,
-						  PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE |
-						  PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE);
-
-	/*
-	 mca_bcol_basesmuma_portals_post_msg(sg_state->cs,
-						 &sg_state->my_ctl_pointer->portals_buf_addr,
-						   sg_state->my_userbuf, sg_state->fragment_size,
-						   sg_state->allgather_eq_h,
-						   total_msg_posts,
-						   blocked_post,
-						  PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE |
-						  PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE);
-	 */
-
-	 sg_state->msg_posted = true ;
-
-	/*
-	opal_atomic_wmb();
-	*/
-	sg_state->my_ctl_pointer->flag = sg_state->ready_flag;
-
-	return OMPI_SUCCESS;
-}
-
-/*
- * Im root but my rank > pow2_groupsize, so will copy to partner who
- * will act as root (secondary)
- */
-static inline  __opal_attribute_always_inline__
-int sm_portals_extra_root_scatter(sg_state_t *sg_state)
-{
-	int scatter_partner = -1;
-	volatile mca_bcol_basesmuma_ctl_struct_t *scatter_partner_ctl_pointer = NULL;
-
-	int	total_msg_posts  = 1;
-
-	if ( total_msg_posts <= 0) {
-		BASESMUMA_VERBOSE(10,("No need to post the data "));
-	}
-	else {
-		mca_bcol_basesmuma_portals_post_msg(sg_state->cs,
-						 &sg_state->my_ctl_pointer->portals_buf_addr,
-						   sg_state->my_userbuf, sg_state->fragment_size,
-						   PTL_EQ_NONE,
-						   total_msg_posts,
-						   blocked_post,
-						  PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE | PTL_MD_OP_GET
-						  | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE);
-	sg_state->msg_posted = true ;
-
-	}
-
-	opal_atomic_wmb();
-	sg_state->my_ctl_pointer->flag = sg_state->ready_flag;
-
-
-
-	scatter_partner = sg_state->my_rank - sg_state->pow_2;
-	scatter_partner_ctl_pointer =
-					sg_state->ctl_structs[scatter_partner];
-
-	while(!IS_SG_DATA_READY(scatter_partner_ctl_pointer, sg_state->ready_flag,
-									sg_state->sequence_number)){
-					opal_progress();
-	}
-
-	return OMPI_SUCCESS;
-}
-
-/*
- * Gets msg from the partner (> pow2_groupsize) and posts the
- * message acting as root
- */
-static inline  __opal_attribute_always_inline__
-int sm_portals_secondary_root_scatter(sg_state_t *sg_state)
-{
-
-	volatile mca_bcol_basesmuma_ctl_struct_t *extra_src_ctl_pointer = NULL;
-	int scatter_posts, allgather_posts, extra_src_posts, total_msg_posts;
-
-	sg_state->secondary_root = true;
-    BASESMUMA_VERBOSE(10,("I am the secondary root for the data"));
-    sg_state->my_ctl_pointer->offset = 0;
-    sg_state->my_ctl_pointer->n_sends = sg_state->pow_2_levels;
-    sg_state->my_ctl_pointer->length = sg_state->fragment_size;
-
-	extra_src_ctl_pointer = sg_state->ctl_structs[sg_state->src];
-
-	mca_bcol_basesmuma_portals_get_msg_fragment(sg_state->cs,
-						sg_state->read_eq,
-						&sg_state->my_ctl_pointer->portals_buf_addr,
-						&extra_src_ctl_pointer->portals_buf_addr, 0,
-						0, sg_state->fragment_size);
-
-
-	extra_src_posts = 0;
-	scatter_posts = sg_state->my_ctl_pointer->n_sends;
-	allgather_posts = sg_state->pow_2_levels - 1;
-
-	total_msg_posts = scatter_posts + allgather_posts + extra_src_posts ;
-
-	if (total_msg_posts > 0) {
-		mca_bcol_basesmuma_portals_post_msg(sg_state->cs,
-						  &sg_state->my_ctl_pointer->portals_buf_addr,
-						   sg_state->my_userbuf, sg_state->fragment_size,
-						   PTL_EQ_NONE,
-						   total_msg_posts,
-						   blocked_post,
-						   PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE | PTL_MD_OP_GET
-						   | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE);
-		sg_state->msg_posted = true ;
-	}
-    opal_atomic_wmb();
-    sg_state->my_ctl_pointer->flag = sg_state->ready_flag;
-
-	return OMPI_SUCCESS;
-}
-
-/*
- * Internode Scatter: Get data from my parent and post for my children
- */
-
-static inline  __opal_attribute_always_inline__
-int sm_portals_internode_scatter(sg_state_t *sg_state)
-{
-
-	int scatter_posts, allgather_posts, extra_src_posts,
-					total_msg_posts;
-	uint64_t local_offset, remote_offset;
-
-	/* compute the size of the chunk to copy */
-	sg_state->length = (sg_state->parent_ctl_pointer->length)/
-       (1<<(sg_state->parent_ctl_pointer->n_sends - sg_state->my_ctl_pointer->n_sends));
-	sg_state->my_ctl_pointer->length = sg_state->length;
-	sg_state->my_ctl_pointer->offset =
-				sg_state->parent_ctl_pointer->offset + sg_state->length;
-
-
-	local_offset = sg_state->my_ctl_pointer->offset;
-	remote_offset = sg_state->parent_ctl_pointer->offset +
-						sg_state->length;
-
-	mca_bcol_basesmuma_portals_get_msg_fragment(sg_state->cs,
-								sg_state->read_eq,
-								&sg_state->my_ctl_pointer->portals_buf_addr,
-								&sg_state->parent_ctl_pointer->portals_buf_addr,local_offset,
-								remote_offset,sg_state->length);
-
-	/* Now post the message for other children to read */
-	extra_src_posts = (sg_state->my_rank + sg_state->pow_2 <
-								sg_state->group_size ) ? 1: 0;
-	scatter_posts = sg_state->my_ctl_pointer->n_sends;
-	allgather_posts = sg_state->pow_2_levels - 1;
-
-	total_msg_posts = scatter_posts + allgather_posts + extra_src_posts ;
-
-	if (total_msg_posts > 0) {
-		mca_bcol_basesmuma_portals_post_msg(sg_state->cs, &sg_state->my_ctl_pointer->portals_buf_addr,
-						   sg_state->my_userbuf, sg_state->my_ctl_pointer->portals_buf_addr.userbuf_length,
-						   PTL_EQ_NONE,
-						   total_msg_posts,
-						   blocked_post,
-						   PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE
-						   | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE);
-
-		sg_state->msg_posted = true;
-	}
-	/*
-    opal_atomic_wmb();
-	 */
-    sg_state->my_ctl_pointer->flag = sg_state->ready_flag;
-
-	return OMPI_SUCCESS;
-}
-
-/*
- * Bcast's Allgather Phase:
- * Combines data from all processes using recursive doubling algorithm
- */
-static inline  __opal_attribute_always_inline__
-int sm_portals_bcasts_allgather_phase(sg_state_t *sg_state)
-{
-	int ag_loop,  partner;
-	volatile mca_bcol_basesmuma_ctl_struct_t  *partner_ctl_pointer = NULL; /* recursive double */
-
-
-	for( ag_loop = 1; ag_loop < sg_state->pow_2_levels; ag_loop++) {
-	        /* get my partner for this level */
-        partner = sg_state->my_rank^(1<<ag_loop);
-        partner_ctl_pointer = sg_state->ctl_structs[partner];
-
-
-		/* Block until partner is at this level of recursive-doubling stage */
-        while(!IS_SG_DATA_READY(partner_ctl_pointer, sg_state->ready_flag,
-								sg_state->sequence_number)) {
-            opal_progress();
-        }
-        assert(partner_ctl_pointer->flag >= sg_state->ready_flag);
-
-		if (partner_ctl_pointer->offset < sg_state->my_ctl_pointer->offset) {
-			sg_state->global_sg_offset -= sg_state->length;
-			sg_state->local_sg_offset = sg_state->global_sg_offset;
-		} else {
-			sg_state->local_sg_offset = sg_state->global_sg_offset + sg_state->length;
-		}
-
-
-		BASESMUMA_VERBOSE(10,("Allgather Phase: Get message from process %d, length %d",
-								partner, sg_state->length));
-		mca_bcol_basesmuma_portals_get_msg_fragment(sg_state->cs,
-								sg_state->read_eq,
-								&sg_state->my_ctl_pointer->portals_buf_addr,
-								&partner_ctl_pointer->portals_buf_addr,sg_state->local_sg_offset,
-								sg_state->local_sg_offset, sg_state->length);
-
-		sg_state->ready_flag++;
-		opal_atomic_wmb();
-	sg_state->my_ctl_pointer->flag = sg_state->ready_flag;
-
-		/* Block until partner is at this level of recursive-doubling stage */
-	while(!IS_SG_DATA_READY(partner_ctl_pointer, sg_state->ready_flag,
-								sg_state->sequence_number)) {
-		    opal_progress();
-        }
-
-        /* double the length */
-        sg_state->length *= 2;
-    }
-
-	return OMPI_SUCCESS;
-
-}
-
-
-static inline  __opal_attribute_always_inline__
-int init_sm_group_info(sg_state_t *sg_state, int buff_idx)
-{
-	int idx, leading_dim;
-	int first_instance=0;
-    int flag_offset;
-
-	/* Get addresing information */
-    sg_state->group_size = sg_state->bcol_module->colls_no_user_data.size_of_group;
-    leading_dim = sg_state->bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-
-	BASESMUMA_VERBOSE(1,("My buffer idx %d group size %d, leading dim %d, idx %d",
-							buff_idx,sg_state->group_size,leading_dim,idx));
-    /* grab the ctl buffs */
-    sg_state->ctl_structs = (volatile mca_bcol_basesmuma_ctl_struct_t **)
-        sg_state->bcol_module->colls_with_user_data.ctl_buffs+idx;
-
-	sg_state->my_rank = sg_state->bcol_module->super.sbgp_partner_module->my_index;
-    sg_state->my_ctl_pointer = sg_state->ctl_structs[sg_state->my_rank];
-
-	if (sg_state->my_ctl_pointer->sequence_number < sg_state->sequence_number) {
-        first_instance = 1;
-    }
-
-    if(first_instance) {
-        sg_state->my_ctl_pointer->flag = -1;
-        sg_state->my_ctl_pointer->index = 1;
-
-        sg_state->my_ctl_pointer->starting_flag_value = 0;
-        flag_offset = 0;
-
-    } else {
-        sg_state->my_ctl_pointer->index++;
-    }
-
-	/* For bcast we shud have only entry to this bcol
-	assert(sg_state->my_ctl_pointer->flag == -1);
-	*/
-
-	/* increment the starting flag by one and return */
-    flag_offset = sg_state->my_ctl_pointer->starting_flag_value;
-    sg_state->ready_flag = flag_offset + sg_state->sequence_number + 1;
-
-    sg_state->my_ctl_pointer->sequence_number = sg_state->sequence_number;
-
-	return OMPI_SUCCESS;
-
-}
-
-static inline  __opal_attribute_always_inline__
-int init_sm_portals_sg_info(sg_state_t *sg_state)
-{
-/* Get portals info*/
-	mca_bcol_basesmuma_portal_proc_info_t *portals_info;
-	int rc = OMPI_SUCCESS;
-	int sg_matchbits;
-
-	portals_info = (mca_bcol_basesmuma_portal_proc_info_t*)sg_state->cs->portals_info;
-
-	sg_matchbits = sg_state->sequence_number ;
-
-	/* Construct my portal buffer address and copy to payload buffer */
-	mca_bcol_basesmuma_construct_portal_address(&sg_state->my_ctl_pointer->portals_buf_addr,
-						portals_info->portal_id.nid,
-						portals_info->portal_id.pid,
-						sg_matchbits,
-						sg_state->bcol_module->super.sbgp_partner_module->group_comm->c_contextid);
-
-	sg_state->my_ctl_pointer->portals_buf_addr.userbuf = sg_state->my_userbuf;
-	sg_state->my_ctl_pointer->portals_buf_addr.userbuf_length = sg_state->fragment_size;
-
-	return OMPI_SUCCESS;
-}
-
-static inline  __opal_attribute_always_inline__
-int compute_src_from_root(int group_root, int my_group_rank, int pow2, int
-				group_size)
-{
-
-	int root, relative_rank, src, i;
-
-	if (group_root < pow2) {
-        root = group_root;
-    } else {
-        /* the source of the data is extra node,
-           the real root it represented by some rank from
-           pow2 group */
-        root = group_root - pow2;
-        /* shortcut for the case when my rank is root for the group */
-        if (my_group_rank == root) {
-            return group_root;
-        }
-    }
-
-    relative_rank = (my_group_rank - root) < 0 ? my_group_rank - root + pow2 :
-                                           my_group_rank - root;
-
-    for (i = 1; i < pow2; i<<=1) {
-        if (relative_rank & i) {
-            src = my_group_rank ^ i;
-            if (src >= pow2)
-                src -= pow2;
-
-            return src;
-        }
-    }
-
-	return -1;
-}
-
-int bcol_basesmuma_lmsg_scatter_allgather_portals_bcast(bcol_function_args_t *input_args,
-    mca_bcol_base_function_t *c_input_args);
-
-int bcol_basesmuma_lmsg_scatter_allgather_portals_nb_bcast(bcol_function_args_t *input_args,
-    mca_bcol_base_function_t *c_input_args);
-
-int bcol_basesmuma_lmsg_scatter_allgather_portals_nb_knownroot_bcast(bcol_function_args_t *input_args,
-    mca_bcol_base_function_t *c_input_args);
-
-#endif
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_knomial_bcast.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_knomial_bcast.c
deleted file mode 100644
index a1454102a8..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_knomial_bcast.c
+++ /dev/null
@@ -1,452 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-/* #define __PORTALS_AVAIL__ */
-#ifdef __PORTALS_AVAIL__
-
-#define __PORTALS_ENABLE__
-#include "ompi/mca/bcol/basesmuma/bcol_basesmuma.h"
-#include "ompi/constants.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/communicator/communicator.h"
-#include "bcol_basesmuma_utils.h"
-
-#include "bcol_basesmuma_portals.h"
-
-/* debug */
-#include <unistd.h>
-/* end debug */
-
-
-/**
- * Shared memory non-blocking Broadcast - K-nomial fan-out for small data buffers.
- * This routine assumes that buf (the input buffer) is a single writer
- * multi reader (SWMR) shared memory buffer owned by the calling rank
- * which is the only rank that can write to this buffers.
- * It is also assumed that the buffers are registered and fragmented
- * at the ML level and that buf is sufficiently large to hold the data.
- *
- *
- * @param buf - SWMR shared buffer within a sbgp that the
- * executing rank can write to.
- * @param count - the number of elements in the shared buffer.
- * @param dtype - the datatype of a shared buffer element.
- * @param root - the index within the sbgp of the root.
- * @param module - basesmuma module.
- */
-int bcol_basesmuma_lmsg_bcast_k_nomial_anyroot(bcol_function_args_t *input_args,
-    mca_bcol_base_function_t *c_input_args)
-{
-#if 0
-		/* local variables */
-    mca_bcol_basesmuma_module_t* bcol_module=
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-    int i, matched = 0;
-    int src=-1;
-    int group_size;
-    int my_rank, first_instance=0, flag_offset;
-    int rc = OMPI_SUCCESS;
-    int leading_dim, buff_idx, idx;
-    int count=input_args->count;
-    struct ompi_datatype_t* dtype=input_args->dtype;
-    int64_t sequence_number=input_args->sequence_num;
-
-	volatile int64_t ready_flag;
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    volatile char* parent_data_pointer;
-    volatile mca_bcol_basesmuma_header_t *parent_ctl_pointer;
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    void *userbuf = (void *)((unsigned char *)input_args->userbuf);
-
-    size_t pack_len = 0, dt_size;
-
-    struct mca_bcol_basesmuma_portal_buf_addr_t *my_lmsg_ctl_pointer = NULL;
-    struct mca_bcol_basesmuma_portal_buf_addr_t *parent_lmsg_ctl_pointer = NULL;
-	mca_bcol_basesmuma_portal_proc_info_t *portals_info;
-	portals_info = (mca_bcol_basesmuma_portal_proc_info_t*)cs->portals_info;
-
-    /* we will work only on packed data - so compute the length*/
-    ompi_datatype_type_size(dtype, &dt_size);
-    pack_len=count*dt_size;
-    buff_idx = input_args->src_desc->buffer_index;
-
-    /* Get addressing information */
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    group_size = bcol_module->colls_no_user_data.size_of_group;
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-
-    data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs+idx;
-
-    /* Set pointer to current proc ctrl region */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-	my_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t*) data_buffs[my_rank].payload;
-
-    /* setup resource recycling */
-    if( my_ctl_pointer->sequence_number < sequence_number ) {
-        first_instance=1;
-    }
-
-	if( first_instance ) {
-        /* Signal arrival */
-        my_ctl_pointer->flag = -1;
-        my_ctl_pointer->index=1;
-        /* this does not need to use any flag values , so only need to
-         * set the value for subsequent values that may need this */
-        my_ctl_pointer->starting_flag_value=0;
-        flag_offset=0;
-
-    } else {
-        /* only one thread at a time will be making progress on this
-         *   collective, so no need to make this atomic */
-        my_ctl_pointer->index++;
-    }
-
-
-    /* increment the starting flag by one and return */
-    flag_offset = my_ctl_pointer->starting_flag_value;
-    ready_flag = flag_offset + sequence_number + 1;
-    my_ctl_pointer->sequence_number = sequence_number;
-
-
-	/* Construct my portal buffer address and copy to payload buffer */
-	mca_bcol_basesmuma_construct_portal_address(my_lmsg_ctl_pointer,
-						portals_info->portal_id.nid,
-						portals_info->portal_id.pid,
-						sequence_number,
-						bcol_module->super.sbgp_partner_module->group_comm->c_contextid);
-
-    /* non-blocking broadcast algorithm */
-
-    /* If I am the root, then signal ready flag */
-    if(input_args->root_flag) {
-		ptl_handle_eq_t eq_h;
-		ptl_event_t event;
-		int ret;
-
-        BASESMUMA_VERBOSE(10,("I am the root of the data"));
-
-		/* create an event queue for the incoming buffer */
-	ret = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*)
-				cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q, PTL_EQ_HANDLER_NONE, &eq_h);
-
-		if (ret != PTL_OK) {
-	  fprintf(stderr, "PtlEQAlloc() failed: %d \n",ret);
-			return OMPI_ERR_OUT_OF_RESOURCE;
-	}
-
-		/* Post the message using portal copy */
-
-		 mca_bcol_basesmuma_portals_post_msg_nb_nopers(cs, my_lmsg_ctl_pointer, userbuf,
-							pack_len, eq_h, my_lmsg_ctl_pointer->nsends);
-
-		/*
-         * signal ready flag
-         */
-        my_ctl_pointer->flag = ready_flag;
-
-		/* wait for a response from the client */
-	mca_bcol_basesmuma_portals_wait_event_nopers(eq_h, POST_MSG_EVENT,
-					&event, my_lmsg_ctl_pointer->nsends);
-
-		/* free the event queue */
-		ret = PtlEQFree(eq_h);
-		if (ret != PTL_OK) {
-		    fprintf(stderr, "PtlEQFree() failed: %d )\n",ret);
-		}
-
-        /* root is finished */
-        goto Release;
-    }
-
-    /* If I am not the root, then poll on possible "senders'" control structs */
-    for( i = 0; i < cs->num_to_probe && 0 == matched; i++) {
-
-        /* Shared memory iprobe */
-		/*
-		BCOL_BASESMUMA_SM_PROBE(bcol_module->src, bcol_module->src_size,
-                my_rank, matched, src);
-		*/
-		do {
-			int j, n_src, my_index;
-			n_src = bcol_module->src_size;
-
-			for( j = 0; j < n_src; j++) {
-			parent_ctl_pointer = data_buffs[bcol_module->src[j]].ctl_struct;
-			parent_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t *)
-											data_buffs[bcol_module->src[j]].payload;
-			if (IS_DATA_READY(parent_ctl_pointer,ready_flag,sequence_number)) {
-
-					src = bcol_module->src[j];
-			matched = 1;
-			break;
-			}
-		}
-		} while(0);
-
-    }
-
-    /* If not matched, then hop out and put me on progress list */
-    if(0 == matched ) {
-        BASESMUMA_VERBOSE(10,("Shared memory probe didn't find a match"));
-        return BCOL_FN_NOT_STARTED;
-    }
-
-    /* else, we found our root within the group ... */
-    BASESMUMA_VERBOSE(10,("Shared memory probe was matched, the root is %d", src));
-
-    /* receive the data from sender */
-    /* get the data buff */
-    /* taken care of in the macro */
-    /*parent_data_pointer = data_buffs[src].payload;*/
-    /* copy the data */
-	mca_bcol_basesmuma_portals_get_msg(cs, parent_lmsg_ctl_pointer, userbuf, pack_len);
-
-    /* set the memory barrier to ensure completion */
-    opal_atomic_wmb ();
-    /* signal that I am done */
-    my_ctl_pointer->flag = ready_flag;
-
-    /* am I the last one? If so, release buffer */
-
-Release:
-    my_ctl_pointer->starting_flag_value++;
-
-    return BCOL_FN_COMPLETE;
-#endif
-}
-
-#if 0
-
-#define BASESMUMA_K_NOMIAL_SEND_SIGNAL(radix_mask, radix, my_relative_index,		\
-		my_group_index, group_size,sm_data_buffs,sender_ready_flag,			\
-				num_pending_sends) 													\
-{																					\
-    int k, rc;																  	    \
-    int dst; 			                                               			    \
-	int comm_dst;																	\
-    volatile mca_bcol_basesmuma_header_t *recv_ctl_pointer = NULL;					\
-	volatile mca_bcol_basesmuma_portal_buf_addr_t  *recv_lmsg_ctl_pointer = NULL;   \
-                                                                                    \
-    num_pending_sends = 0;													        \
-    while(radix_mask > 0) {															\
-        /* For each level of tree, do sends */										\
-        for (k = 1;																	\
-			k < radix && my_relative_index + radix_mask * k < group_size;  	  		\
-			++k) {   	                                              	   			\
-                                                                                    \
-            dst = my_group_index + radix_mask * k;                        		    \
-            if (dst >= group_size) {												\
-                dst -= group_size;													\
-            }                                                                	    \
-			/* Signal the children to get data */									\
-			recv_ctl_pointer	  = data_buffs[dst].ctl;							\
-			recv_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t *)		\
-											data_buffs[dst].payload;				\
-			recv_lmsg_ctl_pointer->src_index = my_group_index;						\
-			recv_lmsg_ctl_pointer->flag = sender_ready_flag;						\
-            ++num_pending_sends;													\
-        }																			\
-        radix_mask /= radix;														\
-    }                                                                      			\
-																		    \
-}
-
-
-
-int bcol_basesmuma_lmsg_bcast_k_nomial_anyroot(bcol_function_args_t *input_args,
-    mca_bcol_base_function_t *c_input_args)
-{
-    /* local variables */
-    mca_bcol_basesmuma_module_t* bcol_module=
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-    int i, matched = 0;
-    int src=-1;
-    int group_size;
-    int my_rank, first_instance=0, flag_offset;
-    int rc = OMPI_SUCCESS;
-    int leading_dim, buff_idx, idx;
-    int count=input_args->count;
-    struct ompi_datatype_t* dtype=input_args->dtype;
-    int64_t sequence_number=input_args->sequence_num;
-
-	volatile int64_t ready_flag;
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    volatile char* parent_data_pointer;
-    volatile mca_bcol_basesmuma_header_t *parent_ctl_pointer;
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    void *userbuf = (void *)((unsigned char *)input_args->userbuf);
-
-    size_t pack_len = 0, dt_size;
-
-    struct mca_bcol_basesmuma_portal_buf_addr_t *my_lmsg_ctl_pointer = NULL;
-    struct mca_bcol_basesmuma_portal_buf_addr_t *parent_lmsg_ctl_pointer = NULL;
-	mca_bcol_basesmuma_portal_proc_info_t *portals_info;
-	portals_info = (mca_bcol_basesmuma_portal_proc_info_t*)cs->portals_info;
-
-    /* we will work only on packed data - so compute the length*/
-    ompi_datatype_type_size(dtype, &dt_size);
-    pack_len=count*dt_size;
-    buff_idx = input_args->src_desc->buffer_index;
-
-    /* Get addressing information */
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    group_size = bcol_module->colls_no_user_data.size_of_group;
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-
-    data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs+idx;
-
-    /* Set pointer to current proc ctrl region */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-	my_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t*) data_buffs[my_rank].payload;
-
-    /* setup resource recycling */
-    if( my_ctl_pointer->sequence_number < sequence_number ) {
-        first_instance=1;
-    }
-
-	if( first_instance ) {
-        /* Signal arrival */
-        my_ctl_pointer->flag = -1;
-        my_ctl_pointer->index=1;
-        /* this does not need to use any flag values , so only need to
-         * set the value for subsequent values that may need this */
-        my_ctl_pointer->starting_flag_value=0;
-        flag_offset=0;
-
-    } else {
-        /* only one thread at a time will be making progress on this
-         *   collective, so no need to make this atomic */
-        my_ctl_pointer->index++;
-    }
-
-
-    /* increment the starting flag by one and return */
-    flag_offset = my_ctl_pointer->starting_flag_value;
-    ready_flag = flag_offset + sequence_number + 1;
-    my_ctl_pointer->sequence_number = sequence_number;
-
-
-	/* Construct my portal buffer address and copy to payload buffer */
-	mca_bcol_basesmuma_construct_portal_address(my_lmsg_ctl_pointer,
-						portals_info->portal_id.nid,
-						portals_info->portal_id.pid,
-						sequence_number,
-						bcol_module->super.sbgp_partner_module->group_comm->c_contextid);
-
-	my_lmsg_ctl_pointer->userbuf = userbuff;
-	my_lsmg_ctl_pointer->userbuf_length = fragment_length;
-	/* create an event queue  */
-	ret = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*)
-				cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q, PTL_EQ_HANDLER_NONE, &eq_h);
-
-    /* non-blocking broadcast algorithm */
-
-    /* If I am the root, then signal ready flag */
-    if(input_args->root_flag) {
-		ptl_handle_eq_t eq_h;
-		ptl_event_t event;
-		int ret;
-		int root_radix_mask = sm_module->pow_knum;
-
-        BASESMUMA_VERBOSE(10,("I am the root of the data"));
-
-
-		if (ret != PTL_OK) {
-	  fprintf(stderr, "PtlEQAlloc() failed: %d \n",ret);
-			return OMPI_ERR_OUT_OF_RESOURCE;
-	}
-
-		BASESMUMA_K_NOMIAL_SEND_SIGNAL(root_radix_mask, radix, 0,
-			my_rank, group_size, data_buffs, ready_flag, nsends) ;
-
-		mca_bcol_basesmuma_portals_post_msg_nb_nopers(cs, my_lmsg_ctl_pointer, userbuf,
-							pack_len, eq_h, nsends);
-
-		/* wait for a response from the client */
-	mca_bcol_basesmuma_portals_wait_event_nopers(eq_h, POST_MSG_EVENT,
-					&event, nsends);
-
-        /* root is finished */
-        goto Release;
-    }
-
-	/* Im not a root so wait until someone puts data and
-	 * compute where to get data from */
-
-	while (my_ctl_pointer->flag != ready_flag) ;
-
-	my_data_source_index = lmsg_ctl_pointer->src_index;
-
-	parent_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t *)
-											data_buffs[my_data_source_index].payload;
-
-	mca_bcol_basesmuma_portals_get_msg(cs, parent_lmsg_ctl_pointer, userbuf, pack_len);
-
-
-
-
-	/* I am done getting data, should I send the data to someone  */
-
-	my_relative_index = (my_rank - my_data_source_index) < 0 ? my_rank -
-		my_data_source_index  + group_size : my_rank - my_data_source_index;
-
-	/*
-     * 2. Locate myself in the tree:
-     * calculate number of radix steps that we should to take
-     */
-    radix_mask = 1;
-    while (radix_mask < group_size) {
-        if (0 != my_relative_index % (radix * radix_mask)) {
-            /* I found my level in tree */
-            break;
-        }
-        radix_mask *= radix;
-    }
-
-	/* go one step back */
-    radix_mask /=radix;
-
-	BASESMUMA_K_NOMIAL_SEND_SIGNAL(radix_mask, radix, my_relative_index,
-		my_rank, group_size,data_buffs,ready_flag,nsends)
-
-	mca_bcol_basesmuma_portals_post_msg_nb_nopers(cs, my_lmsg_ctl_pointer, userbuf,
-							pack_len, eq_h, nsends);
-
-	/* wait for childrens to read */
-    mca_bcol_basesmuma_portals_wait_event_nopers(eq_h, POST_MSG_EVENT,
-					&event, nsends);
-
-
-
-Release:
-	/* free the event queue */
-	ret = PtlEQFree(eq_h);
-	if (ret != PTL_OK) {
-		    fprintf(stderr, "PtlEQFree() failed: %d )\n",ret);
-	}
-
-
-    my_ctl_pointer->starting_flag_value++;
-
-    return BCOL_FN_COMPLETE;
-}
-
-#endif
-#endif
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_mem_mgmt.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_mem_mgmt.c
deleted file mode 100644
index eff6697ec2..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_mem_mgmt.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-#include "bcol_basesmuma.h"
-
-
-/* Shared memory registration function: Calls into the "shared memory
-   connection manager" (aka - smcm) and registers a chunk of memory by
-   opening and mmaping a file.
-
-   @input:
-
-   void *reg_data  - shared memory specific data needed by the registration
-   function.
-
-   void *base      - pointer to memory address.
-
-   size_t size     - size of memory chunk to be registered with sm.
-
-   mca_mpool_base_registration_t *reg  - registration data is cached here.
-
-   @output:
-
-   returns OMPI_SUCCESS on successful registration.
-
-   returns OMPI_ERROR on failure.
-
-*/
-
-int mca_bcol_basesmuma_register_sm(void *context_data, void *base, size_t size,
-                                   void **reg_desc)
-{
-
-    /* local variables */
-    int ret = OMPI_SUCCESS;
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-    bcol_basesmuma_registration_data_t *sm_reg =
-        (bcol_basesmuma_registration_data_t*) context_data;
-
-    /* cache some info on sm_reg aka "context_data", you'll need it later */
-    sm_reg->base_addr = base;
-    sm_reg->size = size;
-
-    /* call into the shared memory registration function in smcm
-     * we need to be sure that the memory is page aligned in order
-     * to "map_fixed"
-     */
-    sm_reg->sm_mmap = bcol_basesmuma_smcm_mem_reg(base, size,
-                                                  sm_reg->data_seg_alignment,
-                                                  sm_reg->file_name);
-    if(NULL == sm_reg->sm_mmap) {
-        opal_output (ompi_bcol_base_framework.framework_output, "Bcol_basesmuma memory registration error");
-        return OMPI_ERROR;
-    }
-
-    /* don't let other communicators re-register me! */
-    cs->mpool_inited = true;
-    /* alias back to component */
-    cs->sm_payload_structs = sm_reg->sm_mmap;
-
-    return ret;
-}
-
-/* Shared memory deregistration function - deregisters memory by munmapping it and removing the
-   shared memory file.
-
-   Basic steps (please let me know if this is incompatible with your notion of deregistration
-   or if it causes problems on cleanup):
-
-   1. munmap the shared memory file.
-   2. set the base pointer to the mmaped memory to NULL.
-   3. permanently remove the shared memory file from the directory.
-
-*/
-
-int mca_bcol_basesmuma_deregister_sm(void *context_data, void *reg)
-{
-    /* local variables */
-    bcol_basesmuma_registration_data_t *sm_reg =
-        (bcol_basesmuma_registration_data_t*) context_data;
-
-    if (sm_reg->sm_mmap) {
-        OBJ_RELEASE(sm_reg->sm_mmap);
-    }
-
-    /* set the pointer to NULL */
-    sm_reg->base_addr = NULL;
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_module.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_module.c
deleted file mode 100644
index 8770689ed2..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_module.c
+++ /dev/null
@@ -1,687 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2014-2015 Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/patterns/net/netpatterns.h"
-
-#include "opal/util/show_help.h"
-#include "opal/align.h"
-
-#include "ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.h"
-#include "bcol_basesmuma.h"
-#include "bcol_basesmuma_utils.h"
-
-#ifdef __PORTALS_AVAIL__
-#include "bcol_basesmuma_portals.h"
-#endif
-
-
-/*
- * Local functions
- */
-static int alloc_lmsg_reduce_offsets_array(mca_bcol_basesmuma_module_t *sm_module)
-{
-    int rc = OMPI_SUCCESS, i = 0;
-    netpatterns_k_exchange_node_t *k_node = &sm_module->knomial_exchange_tree;
-    int n_exchanges = k_node->n_exchanges;
-
-    /* Precalculate the allreduce offsets */
-    if (0 < k_node->n_exchanges) {
-        sm_module->reduce_offsets = (int **)malloc(n_exchanges * sizeof(int*));
-
-        if (!sm_module->reduce_offsets) {
-            rc = OMPI_ERROR;
-            return rc;
-        }
-
-        for (i=0; i < n_exchanges ; i++) {
-            sm_module->reduce_offsets[i] = (int *)malloc (sizeof(int) * NOFFSETS);
-
-            if (!sm_module->reduce_offsets[i]){
-                rc = OMPI_ERROR;
-                return rc;
-            }
-        }
-    }
-    return rc;
-}
-
-static int free_lmsg_reduce_offsets_array(mca_bcol_basesmuma_module_t *sm_module)
-{
-    int rc = OMPI_SUCCESS, i = 0;
-    netpatterns_k_exchange_node_t *k_node = &sm_module->knomial_exchange_tree;
-    int n_exchanges = k_node->n_exchanges;
-
-    if (sm_module->reduce_offsets) {
-        for (i=0; i < n_exchanges; i++) {
-            free (sm_module->reduce_offsets[i]);
-        }
-
-        free(sm_module->reduce_offsets);
-    }
-    return rc;
-}
-
-static void
-mca_bcol_basesmuma_module_construct(mca_bcol_basesmuma_module_t *module)
-{
-    /* initialize all values to 0 */
-    memset((void*)((uintptr_t) module + sizeof (module->super)), 0, sizeof (*module) - sizeof (module->super));
-    module->super.bcol_component = (mca_bcol_base_component_t *) &mca_bcol_basesmuma_component;
-    module->super.list_n_connected = NULL;
-    module->super.hier_scather_offset = 0;
-}
-
-static void
-mca_bcol_basesmuma_module_destruct(mca_bcol_basesmuma_module_t *sm_module)
-{
-    /* local variables */
-    mca_sbgp_base_module_t *sbgp_module = sm_module->super.sbgp_partner_module;
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-
-    /*
-     * release allocated resrouces
-     */
-
-    /* ...but not until you're sure you have no outstanding collectives */
-    while(0 != opal_list_get_size(&(cs->nb_admin_barriers))) {
-        opal_progress();
-    }
-
-#ifdef __PORTALS_AVAIL__
-    /* Remove portals bcast specific resources */
-    if ( PTL_OK != PtlEQFree(sm_module->sg_state.read_eq)) {
-        BASESMUMA_VERBOSE(10,("PtlEQFree() failed:  )"));
-    }
-#endif
-
-    /* Remove Lmsg Reduce Offsets Array */
-    free_lmsg_reduce_offsets_array(sm_module);
-
-    /* collective topology data */
-    if( sm_module->fanout_read_tree) {
-        for (int i = 0 ; i < sm_module->super.size_of_subgroup ; i++ ) {
-            if(0 < sm_module->fanout_read_tree[i].n_children ) {
-                free(sm_module->fanout_read_tree[i].children_ranks);
-                sm_module->fanout_read_tree[i].children_ranks=NULL;
-            }
-        }
-        free(sm_module->fanout_read_tree);
-        sm_module->fanout_read_tree=NULL;
-    }
-
-    /* gvm Leak FIX Reduction_tree[].children_ranks has
-     * to be removed. I don't how to get the size (which is
-     * size of subgroup) of array reduction_tree
-     */
-    if( sm_module->reduction_tree) {
-        for (int i = 0 ; i < sm_module->super.size_of_subgroup ; i++ ) {
-            if(0 < sm_module->reduction_tree[i].n_children ) {
-                free(sm_module->reduction_tree[i].children_ranks);
-                sm_module->reduction_tree[i].children_ranks=NULL;
-            }
-        }
-        free(sm_module->reduction_tree);
-        sm_module->reduction_tree=NULL;
-    }
-
-    /* gvm Leak FIX */
-    if (sm_module->fanout_node.children_ranks){
-        free(sm_module->fanout_node.children_ranks);
-        sm_module->fanout_node.children_ranks = NULL;
-    }
-
-    if (sm_module->fanin_node.children_ranks){
-        free(sm_module->fanin_node.children_ranks);
-        sm_module->fanin_node.children_ranks = NULL;
-    }
-
-    /* colls_no_user_data resrouces */
-    if(sm_module->colls_no_user_data.ctl_buffs_mgmt){
-        free(sm_module->colls_no_user_data.ctl_buffs_mgmt);
-        sm_module->colls_no_user_data.ctl_buffs_mgmt=NULL;
-    }
-    if(sm_module->colls_no_user_data.ctl_buffs){
-        free(sm_module->colls_no_user_data.ctl_buffs);
-        sm_module->colls_no_user_data.ctl_buffs=NULL;
-    }
-
-    /* return control */
-    opal_list_append (&cs->ctl_structures,  (opal_list_item_t *) sm_module->no_userdata_ctl);
-
-    /* colls_with_user_data resrouces */
-    /*
-     *debug print */
-    /*
-      fprintf(stderr,"AAA colls_with_user_data.ctl_buffs %p \n",
-      sm_module->colls_with_user_data.ctl_buffs_mgmt);
-      end debug */
-
-    if(sm_module->colls_with_user_data.ctl_buffs_mgmt){
-        free(sm_module->colls_with_user_data.ctl_buffs_mgmt);
-        sm_module->colls_with_user_data.ctl_buffs_mgmt=NULL;
-    }
-    if(sm_module->colls_with_user_data.ctl_buffs){
-        free(sm_module->colls_with_user_data.ctl_buffs);
-        sm_module->colls_with_user_data.ctl_buffs=NULL;
-    }
-
-    if(sm_module->shared_memory_scratch_space) {
-        free(sm_module->shared_memory_scratch_space);
-        sm_module->shared_memory_scratch_space=NULL;
-    }
-
-    /* return control */
-    opal_list_append (&cs->ctl_structures,  (opal_list_item_t *) sm_module->userdata_ctl);
-
-#if 1
-    if(sm_module->scatter_kary_tree) {
-        for (int i = 0 ; i < sm_module->super.size_of_subgroup ; i++ ) {
-            if(0 < sm_module->scatter_kary_tree[i].n_children) {
-                free(sm_module->scatter_kary_tree[i].children_ranks);
-                sm_module->scatter_kary_tree[i].children_ranks=NULL;
-            }
-        }
-        free(sm_module->scatter_kary_tree);
-    }
-#endif
-
-    if(NULL != sm_module->super.list_n_connected ){
-        free(sm_module->super.list_n_connected);
-        sm_module->super.list_n_connected = NULL;
-    }
-
-    cleanup_nb_coll_buff_desc(&sm_module->ml_mem.nb_coll_desc,
-                              sm_module->ml_mem.num_banks,
-                              sm_module->ml_mem.num_buffers_per_bank);
-
-    for (int i = 0; i < BCOL_NUM_OF_FUNCTIONS; i++){
-        /* gvm FIX: Go through the list and destroy each item */
-        /* Destroy the function table object for each bcol type list */
-        OPAL_LIST_DESTRUCT((&sm_module->super.bcol_fns_table[i]));
-    }
-
-    if (NULL != sm_module->payload_backing_files_info) {
-        bcol_basesmuma_smcm_release_connections (sm_module, sbgp_module, &cs->sm_connections_list,
-                                                 &sm_module->payload_backing_files_info);
-    }
-
-    if (NULL != sm_module->ctl_backing_files_info) {
-        bcol_basesmuma_smcm_release_connections (sm_module, sbgp_module, &cs->sm_connections_list,
-                                                 &sm_module->ctl_backing_files_info);
-    }
-
-    if (NULL != sm_module->ml_mem.bank_release_counter) {
-        free(sm_module->ml_mem.bank_release_counter);
-        sm_module->ml_mem.bank_release_counter = NULL;
-    }
-
-    if (NULL != sm_module->colls_with_user_data.data_buffs) {
-        free((void *)sm_module->colls_with_user_data.data_buffs);
-        sm_module->colls_with_user_data.data_buffs = NULL;
-    }
-
-    /* free the k-nomial allgather tree here */
-    netpatterns_cleanup_recursive_knomial_allgather_tree_node(&sm_module->knomial_allgather_tree);
-    netpatterns_cleanup_recursive_doubling_tree_node(&sm_module->recursive_doubling_tree);
-    netpatterns_cleanup_recursive_knomial_tree_node(&sm_module->knomial_exchange_tree);
-
-    /* done */
-}
-
-static void bcol_basesmuma_set_small_msg_thresholds(struct mca_bcol_base_module_t *super)
-{
-    mca_bcol_basesmuma_module_t *basesmuma_module =
-        (mca_bcol_basesmuma_module_t *) super;
-
-    size_t basesmuma_offset = bcol_basesmuma_data_offset_calc(basesmuma_module);
-
-    /* Set the Allreduce threshold, for Basesmuma it equals to ML buffer size - data offset */
-    super->small_message_thresholds[BCOL_ALLREDUCE] =
-        basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset;
-
-    /* Set the Bcast threshold, for Basesmuma it equals to ML buffer size - data offset */
-    super->small_message_thresholds[BCOL_BCAST] =
-        basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset;
-
-    /* Set the Gather threshold, for Basesmuma it equals to ML buffer size - data offset */
-    super->small_message_thresholds[BCOL_GATHER] =
-        (basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset) /
-        ompi_comm_size(basesmuma_module->super.sbgp_partner_module->group_comm);
-
-    /* Set the ALLgather threshold, for Basesmuma it equals to ML buffer size - data offset */
-    super->small_message_thresholds[BCOL_ALLGATHER] =
-        (basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset) /
-        ompi_comm_size(basesmuma_module->super.sbgp_partner_module->group_comm);
-
-    /* Set the Reduce threshold, for Basesmuma it equals to ML buffer size - data offset */
-    super->small_message_thresholds[BCOL_REDUCE] =
-        basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset;
-
-    /* Set the Scatter threshold, for Basesmuma it equals to ML buffer size - data offset */
-    super->small_message_thresholds[BCOL_SCATTER] =
-        basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset;
-}
-
-/* setup memory management and collective routines */
-
-static void load_func(mca_bcol_base_module_t *super)
-{
-    int fnc;
-
-    /* Loading memory management and collective functions */
-
-    for (fnc = 0; fnc < BCOL_NUM_OF_FUNCTIONS; fnc++) {
-        super->bcol_function_table[fnc] = NULL;
-    }
-
-    /*super->bcol_function_table[BCOL_BARRIER] = bcol_basesmuma_recursive_double_barrier;*/
-
-#ifdef __PORTALS_AVAIL__
-    super->bcol_function_table[BCOL_BCAST] = bcol_basesmuma_lmsg_scatter_allgather_portals_bcast;
-    /* super->bcol_function_table[BCOL_BCAST]   =
-       bcol_basesmuma_lmsg_bcast_k_nomial_anyroot; */
-#endif
-
-    /*super->bcol_function_table[BCOL_BCAST]   = bcol_basesmuma_bcast;*/
-    /*super->bcol_function_table[BCOL_BCAST]   = bcol_basesmuma_binary_scatter_allgather_segment;*/
-    /*super->bcol_function_table[BCOL_BCAST]    =  bcol_basesmuma_bcast_k_nomial_anyroot;*/
-    super->bcol_function_table[BCOL_BCAST]    =  bcol_basesmuma_bcast;
-#ifdef __PORTALS_AVAIL__
-    super->bcol_function_table[BCOL_BCAST] =
-        bcol_basesmuma_lmsg_scatter_allgather_portals_bcast;
-#endif
-    /* super->bcol_function_table[BCOL_ALLREDUCE]  = bcol_basesmuma_allreduce_intra_fanin_fanout; */
-    super->bcol_function_table[BCOL_ALLREDUCE]  = bcol_basesmuma_allreduce_intra_recursive_doubling;
-    super->bcol_function_table[BCOL_REDUCE]  = bcol_basesmuma_reduce_intra_fanin_old;
-    /* memory management */
-    super->bcol_memory_init                  = bcol_basesmuma_bank_init_opti;
-
-    super->k_nomial_tree                     = bcol_basesmuma_setup_knomial_tree;
-
-    /* Set thresholds */
-    super->set_small_msg_thresholds = bcol_basesmuma_set_small_msg_thresholds;
-}
-
-static void load_func_with_choices(mca_bcol_base_module_t *super)
-{
-    int fnc;
-
-    /* Loading memory management and collective functions */
-
-    for (fnc=0; fnc < BCOL_NUM_OF_FUNCTIONS; fnc++) {
-        super->bcol_function_init_table[fnc] = NULL;
-    }
-
-    super->bcol_function_init_table[BCOL_FANIN]  = bcol_basesmuma_fanin_init;
-    super->bcol_function_init_table[BCOL_FANOUT] = bcol_basesmuma_fanout_init;
-    super->bcol_function_init_table[BCOL_BARRIER] = bcol_basesmuma_barrier_init;
-
-    super->bcol_function_init_table[BCOL_BCAST]  = bcol_basesmuma_bcast_init;
-    super->bcol_function_init_table[BCOL_ALLREDUCE]  = bcol_basesmuma_allreduce_init;
-    super->bcol_function_init_table[BCOL_REDUCE]  = bcol_basesmuma_reduce_init;
-    super->bcol_function_init_table[BCOL_GATHER]  = bcol_basesmuma_gather_init;
-    super->bcol_function_init_table[BCOL_ALLGATHER]  = bcol_basesmuma_allgather_init;
-    super->bcol_function_init_table[BCOL_SYNC]  = bcol_basesmuma_memsync_init;
-    /* memory management */
-    super->bcol_memory_init                  = bcol_basesmuma_bank_init_opti;
-
-    super->k_nomial_tree                     = bcol_basesmuma_setup_knomial_tree;
-
-}
-
-static int load_recursive_knomial_info(mca_bcol_basesmuma_module_t
-                                       *sm_module)
-{
-    int rc = OMPI_SUCCESS;
-    rc = netpatterns_setup_recursive_knomial_tree_node(sm_module->super.sbgp_partner_module->group_size,
-                                                       sm_module->super.sbgp_partner_module->my_index,
-                                                       mca_bcol_basesmuma_component.k_nomial_radix,
-                                                       &sm_module->knomial_exchange_tree);
-    return rc;
-}
-
-
-int bcol_basesmuma_setup_knomial_tree(mca_bcol_base_module_t *super)
-{
-    mca_bcol_basesmuma_module_t *sm_module = (mca_bcol_basesmuma_module_t *) super;
-
-    return netpatterns_setup_recursive_knomial_allgather_tree_node(sm_module->super.sbgp_partner_module->group_size,
-                                                                   sm_module->super.sbgp_partner_module->my_index,
-                                                                   mca_bcol_basesmuma_component.k_nomial_radix,
-                                                                   super->list_n_connected,
-                                                                   &sm_module->knomial_allgather_tree);
-}
-
-
-
-
-/* query to see if the module is available for use on the given
- * communicator, and if so, what it's priority is.  This is where
- * the backing shared-memory file is created.
- */
-mca_bcol_base_module_t **
-mca_bcol_basesmuma_comm_query(mca_sbgp_base_module_t *module, int *num_modules)
-{
-    /* local variables */
-    mca_bcol_base_module_t **sm_modules = NULL;
-    mca_bcol_basesmuma_module_t *sm_module;
-    bcol_basesmuma_registration_data_t *sm_reg_data;
-    int ret, my_rank, name_length;
-    char *name;
-    int i;
-
-    int bcast_radix;
-
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-    /*mca_base_component_list_item_t *hdl_cli = NULL;*/
-    /*int hdl_num;*/
-
-    /* at this point I think there is only a sinle shared
-       memory bcol that we need to be concerned with */
-
-    /* No group, no modules */
-    if (OPAL_UNLIKELY(NULL == module)) {
-        return NULL;
-    }
-
-    /* allocate and initialize an sm_bcol module */
-    sm_module = OBJ_NEW(mca_bcol_basesmuma_module_t);
-
-    /* set the subgroup */
-    sm_module->super.sbgp_partner_module=module;
-
-    (*num_modules)=1;
-    cs->super.n_net_contexts = *num_modules;
-    sm_module->reduction_tree = NULL;
-    sm_module->fanout_read_tree = NULL;
-
-    ret=netpatterns_setup_recursive_doubling_tree_node(
-                                                       module->group_size,module->my_index,
-                                                       &(sm_module->recursive_doubling_tree));
-    if(OMPI_SUCCESS != ret) {
-        opal_output (ompi_bcol_base_framework.framework_output, "Error setting up recursive_doubling_tree \n");
-        return NULL;
-    }
-
-    /* setup the fanin tree - this is used only as part of a hierarchical
-     *   barrier, so will set this up with rank 0 as the root */
-    my_rank=module->my_index;
-    ret=netpatterns_setup_narray_tree(cs->radix_fanin,
-                                      my_rank,module->group_size,&(sm_module->fanin_node));
-    if(OMPI_SUCCESS != ret) {
-        opal_output (ompi_bcol_base_framework.framework_output, "Error setting up fanin tree \n");
-        return NULL;
-    }
-
-    /* setup the fanout tree - this is used only as part of a hierarchical
-     *   barrier, so will set this up with rank 0 as the root */
-    ret=netpatterns_setup_narray_tree(cs->radix_fanout,
-                                      my_rank,module->group_size,&(sm_module->fanout_node));
-    if(OMPI_SUCCESS != ret) {
-        opal_output (ompi_bcol_base_framework.framework_output, "Error setting up fanout tree \n");
-        return NULL;
-    }
-
-    /*
-     * Setup the broadcast tree - this is used only as part of a hierarchical
-     * bcast, so will set this up with rank 0 as the root.
-     */
-
-    /* set the radix of the bcast tree */
-    bcast_radix = cs->radix_read_tree;
-
-    /* initialize fan-out read tree */
-    sm_module->fanout_read_tree=(netpatterns_tree_node_t*) malloc(
-                                                                  sizeof(netpatterns_tree_node_t)*module->group_size);
-    if( NULL == sm_module->fanout_read_tree ) {
-        goto Error;
-    }
-
-    for(i = 0; i < module->group_size; i++){
-        ret = netpatterns_setup_narray_tree(bcast_radix,
-                                            i, module->group_size, &(sm_module->fanout_read_tree[i]));
-        if(OMPI_SUCCESS != ret) {
-            goto Error;
-        }
-    }
-
-    ret = load_recursive_knomial_info(sm_module);
-    if (OMPI_SUCCESS != ret) {
-        BASESMUMA_VERBOSE(10, ("Failed to load recursive knomial tree"));
-        goto Error;
-    }
-
-    /* Allocate offsets array for lmsg reduce */
-    ret = alloc_lmsg_reduce_offsets_array(sm_module);
-    if (OMPI_SUCCESS != ret) {
-        BASESMUMA_VERBOSE(10, ("Failed to allocate reduce offsets array"));
-        goto Error;
-    }
-
-    /* initialize reduction tree */
-    sm_module->reduction_tree=(netpatterns_tree_node_t *) malloc(
-                                                                 sizeof(netpatterns_tree_node_t )*module->group_size);
-    if( NULL == sm_module->reduction_tree ) {
-        goto Error;
-    }
-
-    ret=netpatterns_setup_multinomial_tree(
-                                           cs->order_reduction_tree,module->group_size,
-                                           sm_module->reduction_tree);
-    if( MPI_SUCCESS != ret ) {
-        goto Error;
-    }
-
-    /* get largest power of k for given group size */
-    sm_module->pow_k_levels = pow_sm_k(cs->k_nomial_radix,
-                                       sm_module->super.sbgp_partner_module->group_size,
-                                       &(sm_module->pow_k));
-
-    /* get largest power of 2 for a given group size
-     * used in scatter allgather
-     */
-    sm_module->pow_2_levels = pow_sm_k(2,
-                                       sm_module->super.sbgp_partner_module->group_size,
-                                       &(sm_module->pow_2));
-
-    /*
-     * setup scatter data
-     */
-    sm_module->scatter_kary_radix=cs->scatter_kary_radix;
-    sm_module->scatter_kary_tree=NULL;
-    ret=netpatterns_setup_narray_tree_contigous_ranks(
-                                                      sm_module->scatter_kary_radix,
-                                                      sm_module->super.sbgp_partner_module->group_size,
-                                                      &(sm_module->scatter_kary_tree));
-    if(OMPI_SUCCESS != ret) {
-        opal_output (ompi_bcol_base_framework.framework_output, "In base_bcol_masesmuma_setup_library_buffers and scatter k-ary tree setup failed \n");
-        return NULL;
-    }
-
-    /* setup the module shared memory management */
-    ret=base_bcol_basesmuma_setup_library_buffers(sm_module, cs);
-
-    if(OMPI_SUCCESS != ret) {
-        opal_output (ompi_bcol_base_framework.framework_output, "In base_bcol_masesmuma_setup_library_buffers and mpool was not successfully setup!\n");
-        return NULL;
-    }
-
-    /* setup the collectives and memory management */
-
-    /* check to see whether or not the mpool has been inited */
-    /* allocate some space for the network contexts */
-    if(!cs->mpool_inited) {
-        /* if it's empty, then fill it for first time */
-        cs->super.network_contexts = (bcol_base_network_context_t **)
-            malloc((cs->super.n_net_contexts)*
-                   sizeof(bcol_base_network_context_t *));
-        /* you need to do some basic setup - define the file name,
-         * set data seg alignment and size of cntl structure in sm
-         * file.
-         */
-        /* give the payload sm file a name */
-        name_length=asprintf(&name,
-                             "%s"OPAL_PATH_SEP"0%s%0d",
-                             ompi_process_info.job_session_dir,
-                             cs->payload_base_fname,
-                             (int)getpid());
-        if( 0 > name_length ) {
-            opal_output (ompi_bcol_base_framework.framework_output, "Failed to assign the shared memory payload file a name\n");
-            return NULL;
-        }
-        /* make sure name is not too long */
-        if ( OPAL_PATH_MAX < (name_length-1) ) {
-            opal_output (ompi_bcol_base_framework.framework_output, "Shared memory file name is too long!\n");
-            return NULL;
-        }
-        /* set the name and alignment characteristics */
-        sm_reg_data = (bcol_basesmuma_registration_data_t *) malloc(
-                                                                    sizeof(bcol_basesmuma_registration_data_t));
-        sm_reg_data->file_name = name;
-
-        sm_reg_data->data_seg_alignment = getpagesize();
-        sm_reg_data->size_ctl_structure = 0;
-        cs->super.network_contexts[0] = (bcol_base_network_context_t *)
-            malloc(sizeof(bcol_base_network_context_t));
-        cs->super.network_contexts[0]->context_data =
-            (void *) sm_reg_data;
-        cs->super.network_contexts[0]->
-            register_memory_fn = mca_bcol_basesmuma_register_sm;
-        cs->super.network_contexts[0]->
-            deregister_memory_fn = mca_bcol_basesmuma_deregister_sm;
-        sm_module->super.network_context = cs->super.network_contexts[0];
-    } else {
-
-        sm_module->super.network_context = cs->super.network_contexts[0];
-    }
-
-    /* Set the header size */
-    sm_module->super.header_size = sizeof(mca_bcol_basesmuma_header_t);
-
-    /*initialize the hdl module if it's to be enabled*/
-#if 0
-    if (module->use_hdl) {
-        sm_module->super.use_hdl = module->use_hdl;
-        hdl_cli = (mca_base_component_list_item_t *)
-            opal_list_get_first(&mca_hdl_base_components_in_use);
-        sm_module->hdl_module = ((mca_hdl_base_component_t*)
-                                 hdl_cli->cli_component)->hdl_comm_query(sm_module, &hdl_num);
-        if (1 != hdl_num || sm_module->hdl_module == NULL) {
-            ML_ERROR(("hdl modules are not successfully initialized!\n"));
-            goto Error;
-        }
-    } else {
-        sm_module->hdl_module = NULL;
-    }
-#else
-    sm_module->hdl_module = NULL;
-#endif
-
-
-    /* collective setup */
-    load_func(&(sm_module->super));
-    load_func_with_choices(&(sm_module->super));
-
-    /*
-     * This initializes all collective algorithms
-     */
-
-    ret = mca_bcol_base_bcol_fns_table_init(&(sm_module->super));
-
-    if (OMPI_SUCCESS != ret) {
-
-        goto Error;
-    }
-
-    sm_module->super.supported_mode = 0;
-
-    /* NTH: this is not set anywhere on the trunk as of 08/13/13 */
-#if 0
-    if (module->use_hdl) {
-        sm_module->super.supported_mode = MCA_BCOL_BASE_ZERO_COPY;
-    }
-#endif
-
-    /* Initializes portals library required for basesmuma large message */
-#ifdef __PORTALS_AVAIL__
-    /* Enable zero copy mode */
-    sm_module->super.supported_mode = MCA_BCOL_BASE_ZERO_COPY;
-
-    ret = mca_bcol_basesmuma_portals_init(cs);
-    if (OMPI_SUCCESS != ret) {
-        return NULL;
-    }
-
-    sm_module->sg_state.phase = INIT;
-
-    ret = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*)
-                      cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q,
-                     PTL_EQ_HANDLER_NONE, &sm_module->sg_state.read_eq);
-
-    if (ret != PTL_OK) {
-        BASESMUMA_VERBOSE(10,( "PtlEQAlloc() failed: %d",ret));
-        return NULL;
-    }
-
-#endif
-    /* blocking recursive double barrier test */
-    /*
-      {
-      opal_output (ompi_bcol_base_framework.framework_output, "BBB About to hit the barrier test\n");
-      int rc;
-      bcol_function_args_t bogus;
-      rc = bcol_basesmuma_rd_barrier_init(&(sm_module->super));
-      rc = bcol_basesmuma_recursive_double_barrier(
-      &bogus, &(sm_module->super));
-      }
-    */
-
-    /* in this case we only expect a single network context.
-       in the future we should loop around this */
-    sm_modules = (mca_bcol_base_module_t **) malloc(sizeof(mca_bcol_base_module_t *));
-    if( !sm_modules ) {
-        opal_output (ompi_bcol_base_framework.framework_output, "In base_bcol_masesmuma_setup_library_buffers failed to allocate memory for sm_modules\n");
-        return NULL;
-    }
-
-    sm_modules[0] = &(sm_module->super);
-
-    return sm_modules;
-
- Error:
-
-    /* cleanup */
-    if( sm_module->reduction_tree ) {
-        free(sm_module->reduction_tree);
-        sm_module->reduction_tree=NULL;
-    }
-
-    return NULL;
-}
-
-OBJ_CLASS_INSTANCE(mca_bcol_basesmuma_module_t,
-                   mca_bcol_base_module_t,
-                   mca_bcol_basesmuma_module_construct,
-                   mca_bcol_basesmuma_module_destruct);
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_progress.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_progress.c
deleted file mode 100644
index 7029c251ab..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_progress.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-#include "ompi/constants.h"
-
-#include "bcol_basesmuma.h"
-
-/* the progress function to be called from the opal progress function
- */
-int bcol_basesmuma_progress(void)
-{
-    /* local variables */
-    volatile int32_t *cntr;
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-
-    /* check to see if release of memory blocks needs to be done */
-    if( opal_list_get_size(&(cs->nb_admin_barriers)) ) {
-        sm_nbbar_desc_t *item_ptr;
-        opal_list_t *list=&(cs->nb_admin_barriers);
-        /* process only if the list is non-empty */
-        if( !OPAL_THREAD_TRYLOCK(&cs->nb_admin_barriers_mutex)) {
-
-            for (item_ptr = (sm_nbbar_desc_t*) opal_list_get_first(list);
-                    item_ptr != (sm_nbbar_desc_t*) opal_list_get_end(list);
-                    item_ptr = (sm_nbbar_desc_t*) opal_list_get_next(item_ptr) )
-            {
-                bcol_basesmuma_rd_nb_barrier_progress_admin(item_ptr);
-                /* check to see if an complete */
-                if( NB_BARRIER_DONE == item_ptr->collective_phase ) {
-                    /* barrier is complete - remove from the list.  No need
-                     * to put it on another list, as it is part of the memory
-                     * bank control structure, and will be picked up
-                     * again when needed.
-                     */
-                    int index=
-                        item_ptr->pool_index;
-                    /* old way - ctl_struct specific */
-                    /*
-                       volatile uint64_t *cntr= (volatile uint64_t *)
-                       &(item_ptr->sm_module->colls_no_user_data.
-                       ctl_buffs_mgmt[index].bank_gen_counter);
-                     */
-
-                    cntr= (volatile int32_t *) &(item_ptr->coll_buff->
-                                ctl_buffs_mgmt[index].bank_gen_counter);
-                    item_ptr=(sm_nbbar_desc_t*)opal_list_remove_item((opal_list_t *)list,
-                            ( opal_list_item_t *)item_ptr);
-                    /* increment the generation number */
-                    OPAL_THREAD_ADD32(cntr,1);
-                }
-            }
-
-            OPAL_THREAD_UNLOCK(&cs->nb_admin_barriers_mutex);
-        }
-
-    }
-    return OMPI_SUCCESS;
-
-}
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_barrier.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_barrier.c
deleted file mode 100644
index 9749491e9f..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_barrier.c
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/* Recursive doubling blocking barrier */
-
-#include "ompi_config.h"
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/patterns/net/netpatterns.h"
-
-#include "opal/sys/atomic.h"
-
-#include "bcol_basesmuma.h"
-
-#if 0
-int bcol_basesmuma_recursive_double_barrier(bcol_function_args_t *input_args,
-                                            mca_bcol_base_function_t *c_input_args)
-{
-
-    /* local variables */
-    int ret=OMPI_SUCCESS, idx, leading_dim, loop_cnt, exchange, flag_to_set;
-    int pair_rank, flag_offset;
-    mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
-    netpatterns_pair_exchange_node_t *my_exchange_node;
-    int extra_rank, my_rank, pow_2;
-    volatile mca_bcol_basesmuma_ctl_struct_t *partner_ctl;
-    volatile mca_bcol_basesmuma_ctl_struct_t *my_ctl;
-    int64_t sequence_number;
-    bool found;
-    int buff_index, first_instance=0;
-    mca_bcol_basesmuma_module_t* bcol_module =
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-#if 0
-    fprintf(stderr,"Entering the sm rd barrier\n");
-    fflush(stderr);
-#endif
-
-    /* get the pointer to the segment of control structures */
-    my_exchange_node=&(bcol_module->recursive_doubling_tree);
-    my_rank=bcol_module->super.sbgp_partner_module->my_index;
-    pow_2=bcol_module->super.sbgp_partner_module->pow_2;
-
-    /* figure out what instance of the basesmuma bcol I am */
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    sequence_number=input_args->sequence_num - c_input_args->bcol_module->squence_number_offset;
-
-    buff_index=sequence_number & (bcol_module->colls_no_user_data.mask);
-
-    idx=SM_ARRAY_INDEX(leading_dim,buff_index,0);
-    ctl_structs=(mca_bcol_basesmuma_ctl_struct_t **)
-        bcol_module->colls_no_user_data.ctl_buffs+idx;
-    my_ctl=ctl_structs[my_rank];
-    if( my_ctl->sequence_number < sequence_number ) {
-        first_instance=1;
-    }
-
-    /* get the pool index */
-    if( first_instance ) {
-        idx = -1;
-        while( idx == -1 ) {
-
-            idx=bcol_basesmuma_get_buff_index(
-                &(bcol_module->colls_no_user_data),sequence_number);
-        }
-        if( -1 == idx ){
-            return ORTE_ERR_TEMP_OUT_OF_RESOURCE;
-        }
-        my_ctl->index=1;
-        /* this does not need to use any flag values , so only need to
-         * set the value for subsequent values that may need this */
-        my_ctl->starting_flag_value=0;
-        flag_offset=0;
-    } else {
-        /* only one thread at a time will be making progress on this
-         *   collective, so no need to make this atomic */
-        my_ctl->index++;
-        flag_offset=my_ctl->starting_flag_value;
-    }
-
-    /* signal that I have arrived */
-    my_ctl->flag = -1;
-    /* don't need to set this flag anymore */
-    my_ctl->sequence_number = sequence_number;
-    /* opal_atomic_wmb ();*/
-
-    if(0 < my_exchange_node->n_extra_sources) {
-        if (EXCHANGE_NODE == my_exchange_node->node_type) {
-            volatile int64_t *partner_sn;
-            int cnt=0;
-
-            /* I will participate in the exchange - wait for signal from extra
-            ** process */
-            extra_rank = my_exchange_node->rank_extra_source;
-            partner_ctl=(volatile mca_bcol_basesmuma_ctl_struct_t *)ctl_structs[extra_rank];
-
-            /*partner_ctl=ctl_structs[extra_rank];*/
-            partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number);
-
-            /* spin n iterations until partner registers */
-            loop_cnt=0;
-            found=false;
-            while( !found )
-            {
-                if( *partner_sn >= sequence_number ) {
-                    found=true;
-                }
-                cnt++;
-                if( cnt == 1000 ) {
-                    opal_progress();
-                    cnt=0;
-                }
-            }
-
-        }  else {
-
-            /* Nothing to do, already registared that I am here */
-        }
-    }
-
-    for(exchange = 0; exchange < my_exchange_node->n_exchanges; exchange++) {
-
-        volatile int64_t *partner_sn;
-        volatile int *partner_flag;
-        int cnt=0;
-
-        /* rank of exchange partner */
-        pair_rank = my_rank ^ ( 1 SHIFT_UP exchange );
-        partner_ctl=ctl_structs[pair_rank];
-        partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number);
-        partner_flag=(volatile int *)&(partner_ctl->flag);
-
-        /* signal that I am at iteration exchange of the algorithm */
-        flag_to_set=flag_offset+exchange;
-        my_ctl->flag = flag_to_set;
-
-        /* check to see if the partner has arrived */
-
-        /* spin n iterations until partner registers */
-        found=false;
-        while( !found )
-        {
-            if( (*partner_sn > sequence_number) ||
-                ( *partner_sn == sequence_number &&
-                  *partner_flag >= flag_to_set ) ) {
-                found=true;
-            }  else {
-                cnt++;
-                if( cnt == 1000 ) {
-                    opal_progress();
-                    cnt=0;
-                }
-            }
-        }
-    }
-
-    if(0 < my_exchange_node->n_extra_sources)  {
-        if ( EXTRA_NODE == my_exchange_node->node_type ) {
-            int cnt=0;
-
-            /* I will not participate in the exchange -
-             *   wait for signal from extra partner */
-            extra_rank = my_exchange_node->rank_extra_source;
-            partner_ctl=ctl_structs[extra_rank];
-            flag_to_set=flag_offset+my_exchange_node->log_2;
-
-            /* spin n iterations until partner registers */
-            found=false;
-            while( !found )
-            {
-                if (IS_PEER_READY(partner_ctl, flag_to_set, sequence_number)){
-                    found=true;
-                } else {
-                    cnt++;
-                    if( cnt == 1000 ) {
-                        opal_progress();
-                        cnt=0;
-                    }
-                }
-            }
-
-        }  else {
-
-            /* signal the extra rank that I am done with the recursive
-             * doubling phase.
-             */
-            flag_to_set=flag_offset+my_exchange_node->log_2;
-            my_ctl->flag = flag_to_set;
-
-        }
-    }
-
-    /* if I am the last instance of a basesmuma function in this collectie,
-     *   release the resrouces */
-    if (IS_LAST_BCOL_FUNC(c_input_args)){
-        idx=bcol_basesmuma_free_buff(
-            &(bcol_module->colls_no_user_data),
-            sequence_number);
-    }  else {
-        /* increment flag value - so next sm collective in the hierarchy
-         *    will not collide with the current one, as they share the
-         *    control structure */
-        my_ctl->starting_flag_value+=(my_exchange_node->log_2+1);
-    }
-
-    /* return */
-    return ret;
-}
-#endif
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c
deleted file mode 100644
index 60be1a4364..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c
+++ /dev/null
@@ -1,462 +0,0 @@
-/*
- * Copyright (c) 2009-2012 UT-Battelle, LLC. All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2013 Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-/* we need make cleanup with all these includes  START */
-#include <unistd.h>
-#include <sys/types.h>
-
-#include "ompi/constants.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "bcol_basesmuma.h"
-#include "opal/sys/atomic.h"
-#include "ompi/patterns/net/netpatterns.h"
-#include "ompi/mca/bcol/base/base.h"
-
-/*
- * Initialize nonblocking barrier.  This is code specific for handling
- * the recycling of data, and uses only a single set of control buffers.
- * It also assumes that for a given process, only a single outstanding
- * barrier operation will occur for a given control structure,
- * with the sequence number being used for potential overlap in time
- * between succesive barrier calls on different processes.
- */
-int bcol_basesmuma_rd_nb_barrier_init_admin(
-        sm_nbbar_desc_t *sm_desc)
-
-{
-    /* local variables */
-    int ret=OMPI_SUCCESS, idx, leading_dim, loop_cnt, exchange;
-    int pair_rank;
-    mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
-    netpatterns_pair_exchange_node_t *my_exchange_node;
-    int extra_rank, my_rank;
-    mca_bcol_basesmuma_ctl_struct_t volatile *partner_ctl;
-    mca_bcol_basesmuma_ctl_struct_t volatile *my_ctl;
-    int64_t bank_genaration;
-    bool found;
-    int pool_index=sm_desc->pool_index;
-    mca_bcol_basesmuma_module_t *bcol_module=sm_desc->sm_module;
-
-    /* get the pointer to the segment of control structures */
-    idx=sm_desc->coll_buff->number_of_buffs+pool_index;
-    leading_dim=sm_desc->coll_buff->size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,idx,0);
-    ctl_structs=(mca_bcol_basesmuma_ctl_struct_t **)
-        sm_desc->coll_buff->ctl_buffs+idx;
-    bank_genaration= sm_desc->coll_buff->ctl_buffs_mgmt[pool_index].bank_gen_counter;
-
-	my_exchange_node=&(bcol_module->recursive_doubling_tree);
-    my_rank=bcol_module->super.sbgp_partner_module->my_index;
-    my_ctl=ctl_structs[my_rank];
-    /* debug print */
-    /*
-    {
-	    int ii;
-	    for(ii = 0; ii < 6; ii++) {
-		    fprintf(stderr,"UUU ctl_struct[%d] := %p\n",ii,
-			    bcol_module->colls_no_user_data.ctl_buffs[ii]);
-		    fflush(stderr);
-	    }
-    }
-    */
-    /* end debug */
-
-    /* signal that I have arrived */
-    my_ctl->flag = -1;
-
-    opal_atomic_wmb ();
-
-	/* don't need to set this flag anymore */
-    my_ctl->sequence_number = bank_genaration;
-
-    if(0 < my_exchange_node->n_extra_sources) {
-        if (EXCHANGE_NODE == my_exchange_node->node_type) {
-            volatile int64_t *partner_sn;
-            /* I will participate in the exchange - wait for signal from extra
-             ** process */
-            extra_rank = my_exchange_node->rank_extra_source;
-            partner_ctl=ctl_structs[extra_rank];
-            partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number);
-
-            /* spin n iterations until partner registers */
-            loop_cnt=0;
-            found=false;
-            while( loop_cnt < bcol_module->super.n_poll_loops )
-            {
-                if( *partner_sn >= bank_genaration ) {
-                    found=true;
-                    break;
-                }
-                loop_cnt++;
-            }
-            if( !found ) {
-                /* set restart parameters */
-                sm_desc->collective_phase=NB_PRE_PHASE;
-                return OMPI_SUCCESS;
-            }
-
-        }  else {
-
-            /* Nothing to do, already registared that I am here */
-        }
-    }
-
-    for(exchange = 0; exchange < my_exchange_node->n_exchanges; exchange++) {
-
-        volatile int64_t *partner_sn;
-        volatile int *partner_flag;
-
-        /* rank of exchange partner */
-        pair_rank = my_rank ^ ( 1 SHIFT_UP exchange );
-        partner_ctl=ctl_structs[pair_rank];
-        partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number);
-        partner_flag=(volatile int *)&(partner_ctl->flag);
-
-        /* signal that I am at iteration exchange of the algorithm */
-        my_ctl->flag = exchange;
-
-        /* check to see if the partner has arrived */
-
-        /* spin n iterations until partner registers */
-        loop_cnt=0;
-        found=false;
-        while( loop_cnt < bcol_module->super.n_poll_loops )
-        {
-            if( (*partner_sn > bank_genaration) ||
-                    ( *partner_sn == bank_genaration &&
-                      *partner_flag >= exchange ) ) {
-                found=true;
-                break;
-            }
-
-             loop_cnt++;
-
-		}
-        if( !found ) {
-            /* set restart parameters */
-            sm_desc->collective_phase=NB_RECURSIVE_DOUBLING;
-            sm_desc->recursive_dbl_iteration=exchange;
-            return OMPI_SUCCESS;
-        }
-
-    }
-
-    if(0 < my_exchange_node->n_extra_sources)  {
-        if ( EXTRA_NODE == my_exchange_node->node_type ) {
-            volatile int64_t *partner_sn;
-            volatile int *partner_flag;
-
-            /* I will not participate in the exchange -
-             *   wait for signal from extra partner */
-            extra_rank = my_exchange_node->rank_extra_source;
-            partner_ctl=ctl_structs[extra_rank];
-            partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number);
-            partner_flag=(volatile int *)&(partner_ctl->flag);
-
-            /* spin n iterations until partner registers */
-            loop_cnt=0;
-            found=false;
-            while( loop_cnt < bcol_module->super.n_poll_loops )
-            {
-                if( (*partner_sn > bank_genaration) ||
-                        ( (*partner_sn == bank_genaration) &&
-                        (*partner_flag == (my_exchange_node->log_2)) ) ) {
-                    found=true;
-                    break;
-                }
-                loop_cnt++;
-			}
-            if( !found ) {
-                /* set restart parameters */
-                sm_desc->collective_phase=NB_POST_PHASE;
-                return OMPI_SUCCESS;
-            }
-
-        }  else {
-
-            /* signal the extra rank that I am done with the recursive
-             * doubling phase.
-             */
-            my_ctl->flag = my_exchange_node->n_exchanges;
-
-        }
-    }
-
-    /* set the barrier as complete */
-    sm_desc->collective_phase=NB_BARRIER_DONE;
-    /* return */
-    return ret;
-}
-
-/* admin nonblocking barrier - progress function */
-int bcol_basesmuma_rd_nb_barrier_progress_admin(
-        sm_nbbar_desc_t *sm_desc)
-
-{
-    /* local variables */
-    int ret=OMPI_SUCCESS, idx, leading_dim, loop_cnt, exchange;
-    int pair_rank, start_index, restart_phase;
-    mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
-    netpatterns_pair_exchange_node_t *my_exchange_node;
-    int extra_rank, my_rank;
-    mca_bcol_basesmuma_ctl_struct_t volatile *partner_ctl;
-    mca_bcol_basesmuma_ctl_struct_t volatile *my_ctl;
-    int64_t bank_genaration;
-    int pool_index=sm_desc->pool_index;
-    bool found;
-    mca_bcol_basesmuma_module_t *bcol_module=sm_desc->sm_module;
-
-    /* get the pointer to the segment of control structures */
-    idx = sm_desc->coll_buff->number_of_buffs+pool_index;
-    leading_dim = sm_desc->coll_buff->size_of_group;
-    idx = SM_ARRAY_INDEX(leading_dim,idx,0);
-    ctl_structs = (mca_bcol_basesmuma_ctl_struct_t **)
-        sm_desc->coll_buff->ctl_buffs+idx;
-    bank_genaration = sm_desc->coll_buff->ctl_buffs_mgmt[pool_index].bank_gen_counter;
-
-    my_exchange_node=&(bcol_module->recursive_doubling_tree);
-    my_rank=bcol_module->super.sbgp_partner_module->my_index;
-    my_ctl=ctl_structs[my_rank];
-
-    /* check to make sure that this should be progressed */
-    if( ( sm_desc->collective_phase == NB_BARRIER_INACTIVE ) ||
-        ( sm_desc->collective_phase == NB_BARRIER_DONE ) )
-    {
-        return OMPI_SUCCESS;
-    }
-
-    /* set the restart up - and jump to the correct place in the algorithm */
-    restart_phase=sm_desc->collective_phase;
-    if ( NB_PRE_PHASE == restart_phase ) {
-        start_index=0;
-    } else if ( NB_RECURSIVE_DOUBLING == restart_phase ) {
-        start_index=sm_desc->recursive_dbl_iteration;
-        goto Exchange_phase;
-    } else {
-        goto Post_phase;
-    }
-
-    if(0 < my_exchange_node->n_extra_sources) {
-        if (EXCHANGE_NODE == my_exchange_node->node_type) {
-            volatile int64_t *partner_sn;
-            /* I will participate in the exchange - wait for signal from extra
-             ** process */
-            extra_rank = my_exchange_node->rank_extra_source;
-            partner_ctl=ctl_structs[extra_rank];
-            partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number);
-
-            /* spin n iterations until partner registers */
-            loop_cnt=0;
-            while( loop_cnt < bcol_module->super.n_poll_loops )
-            {
-                found=false;
-                if( *partner_sn >= bank_genaration ) {
-                    found=true;
-                    break;
-                }
-                loop_cnt++;
-            }
-            if( !found ) {
-                /* set restart parameters */
-                sm_desc->collective_phase=NB_PRE_PHASE;
-                return OMPI_SUCCESS;
-            }
-
-        }  else {
-
-            /* Nothing to do, already registared that I am here */
-        }
-    }
-
-Exchange_phase:
-
-    for(exchange = start_index;
-        exchange < my_exchange_node->n_exchanges; exchange++) {
-
-        volatile int64_t *partner_sn;
-        volatile int *partner_flag;
-
-        /* rank of exchange partner */
-        pair_rank = my_rank ^ ( 1 SHIFT_UP exchange );
-        partner_ctl=ctl_structs[pair_rank];
-        partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number);
-        partner_flag=(volatile int *)&(partner_ctl->flag);
-
-        /* signal that I am at iteration exchange of the algorithm */
-        my_ctl->flag = exchange;
-
-        /* check to see if the partner has arrived */
-
-        /* spin n iterations until partner registers */
-        loop_cnt=0;
-        found=false;
-        while( loop_cnt < bcol_module->super.n_poll_loops )
-        {
-            if( (*partner_sn > bank_genaration) ||
-                    ( (*partner_sn == bank_genaration) &&
-                      (*partner_flag >= exchange) ) ) {
-                found=true;
-                break;
-            }
-            loop_cnt++;
-        }
-        if( !found ) {
-            /* set restart parameters */
-            sm_desc->collective_phase=NB_RECURSIVE_DOUBLING;
-            sm_desc->recursive_dbl_iteration=exchange;
-            return OMPI_SUCCESS;
-        }
-
-    }
-
-Post_phase:
-    if(0 < my_exchange_node->n_extra_sources)  {
-        if ( EXTRA_NODE == my_exchange_node->node_type ) {
-            volatile int64_t *partner_sn;
-            volatile int *partner_flag;
-
-            /* I will not participate in the exchange -
-             *   wait for signal from extra partner */
-            extra_rank = my_exchange_node->rank_extra_source;
-            partner_ctl=ctl_structs[extra_rank];
-            partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number);
-            partner_flag=(volatile int *)&(partner_ctl->flag);
-
-            /* spin n iterations until partner registers */
-            loop_cnt=0;
-            found=false;
-            while( loop_cnt < bcol_module->super.n_poll_loops )
-            {
-                if( (*partner_sn > bank_genaration) ||
-                        ( *partner_sn == bank_genaration &&
-                        *partner_flag == (my_exchange_node->log_2) ) ) {
-                    found=true;
-                    break;
-                }
-                loop_cnt++;
-            }
-            if( !found ) {
-                /* set restart parameters */
-                sm_desc->collective_phase=NB_POST_PHASE;
-                return OMPI_SUCCESS;
-            }
-
-        }  else {
-
-            /* signal the extra rank that I am done with the recursive
-             * doubling phase.
-             */
-            my_ctl->flag = my_exchange_node->n_exchanges;
-
-        }
-    }
-
-    /* set the barrier as complete */
-    sm_desc->collective_phase=NB_BARRIER_DONE;
-
-    /* return */
-    return ret;
-}
-
-static int bcol_basesmuma_memsync(bcol_function_args_t *input_args,
-                mca_bcol_base_function_t *c_input_args)
-{
-    int rc;
-    int memory_bank = input_args->root;
-
-    mca_bcol_basesmuma_module_t* bcol_module =
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-    sm_buffer_mgmt *buff_block = &(bcol_module->colls_with_user_data);
-    sm_nbbar_desc_t *sm_desc = &(buff_block->ctl_buffs_mgmt[memory_bank].nb_barrier_desc);
-
-    sm_desc->coll_buff = buff_block;
-    /*
-    printf("XXX SYNC call\n");
-    */
-
-    rc = bcol_basesmuma_rd_nb_barrier_init_admin(
-            sm_desc);
-    if (OMPI_SUCCESS != rc) {
-        return rc;
-    }
-
-    if (NB_BARRIER_DONE != sm_desc->collective_phase) {
-        mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-        opal_list_t *list=&(cs->nb_admin_barriers);
-        opal_list_item_t *append_item;
-
-        /* put this onto the progression list */
-        OPAL_THREAD_LOCK(&(cs->nb_admin_barriers_mutex));
-        append_item=(opal_list_item_t *)
-            &(buff_block->ctl_buffs_mgmt[memory_bank].nb_barrier_desc);
-        opal_list_append(list,append_item);
-        OPAL_THREAD_UNLOCK(&(cs->nb_admin_barriers_mutex));
-        /* progress communications so that resources can be freed up */
-        return BCOL_FN_STARTED;
-    }
-
-    /* Done - bump the counter */
-    (buff_block->ctl_buffs_mgmt[memory_bank].bank_gen_counter)++;
-    /*
-    printf("XXX SYNC call done \n");
-    */
-    return BCOL_FN_COMPLETE;
-}
-
-static int bcol_basesmuma_memsync_progress(bcol_function_args_t *input_args,
-                mca_bcol_base_function_t *c_input_args)
-{
-    int memory_bank = input_args->root;
-
-    mca_bcol_basesmuma_module_t* bcol_module =
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-    sm_buffer_mgmt *buff_block = &(bcol_module->colls_with_user_data);
-    sm_nbbar_desc_t *sm_desc = &(buff_block->ctl_buffs_mgmt[memory_bank].nb_barrier_desc);
-
-    /* I do not have to do anything, since the
-       progress done by basesmuma progress engine */
-
-    if (NB_BARRIER_DONE != sm_desc->collective_phase) {
-        return BCOL_FN_STARTED;
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-int bcol_basesmuma_memsync_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    comm_attribs.bcoll_type = BCOL_SYNC;
-
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super,
-            &comm_attribs, &inv_attribs,
-            bcol_basesmuma_memsync,
-            bcol_basesmuma_memsync_progress);
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.c
deleted file mode 100644
index 570280d084..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.c
+++ /dev/null
@@ -1,382 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/constants.h"
-#include "ompi/op/op.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/mca/bcol/bcol.h"
-
-#include "opal/include/opal_stdint.h"
-
-#include "bcol_basesmuma.h"
-#include "bcol_basesmuma_reduce.h"
-/**
- * gvm - Shared memory reduce
- */
-
-static int bcol_basesmuma_reduce_intra_fanin_progress(bcol_function_args_t *input_args,
-                                                      mca_bcol_base_function_t *c_input_args);
-
-int bcol_basesmuma_reduce_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    comm_attribs.bcoll_type = BCOL_REDUCE;
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1048576;
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000;
-    inv_attribs.datatype_bitmap = 0x11111111;
-    inv_attribs.op_types_bitmap = 0x11111111;
-
-
-    /* Set attributes for fanin fanout algorithm */
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, bcol_basesmuma_reduce_intra_fanin,
-                                 bcol_basesmuma_reduce_intra_fanin_progress);
-
-    inv_attribs.bcol_msg_min = 10000000;
-    inv_attribs.bcol_msg_max = 10485760; /* range 4 */
-
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, NULL, NULL);
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * Small data fanin reduce
- * ML buffers are used for both payload and control structures
- * This functions works with hierarchical allreduce and
- * progress engine
- */
-static inline int reduce_children (mca_bcol_basesmuma_module_t *bcol_module, volatile void *rbuf, netpatterns_tree_node_t *my_reduction_node,
-                                   int *iteration, volatile mca_bcol_basesmuma_header_t *my_ctl_pointer, ompi_datatype_t *dtype,
-                                   volatile mca_bcol_basesmuma_payload_t *data_buffs, int count, struct ompi_op_t *op, int process_shift) {
-    volatile mca_bcol_basesmuma_header_t * child_ctl_pointer;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-    int64_t sequence_number = my_ctl_pointer->sequence_number;
-    int8_t ready_flag = my_ctl_pointer->ready_flag;
-    int group_size = bcol_module->colls_no_user_data.size_of_group;
-
-    if (LEAF_NODE != my_reduction_node->my_node_type) {
-        volatile char *child_data_pointer;
-        volatile void *child_rbuf;
-
-        /* for each child */
-        /* my_result_data = child_result_data (op) my_source_data */
-
-        for (int child = *iteration ; child < my_reduction_node->n_children ; ++child) {
-            int child_rank = my_reduction_node->children_ranks[child] + process_shift;
-
-            if (group_size <= child_rank){
-                child_rank -= group_size;
-            }
-
-            child_ctl_pointer = data_buffs[child_rank].ctl_struct;
-            child_data_pointer = data_buffs[child_rank].payload;
-
-            if (!IS_PEER_READY(child_ctl_pointer, ready_flag, sequence_number, REDUCE_FLAG, bcol_id)) {
-                *iteration = child;
-                return BCOL_FN_STARTED;
-            }
-
-            child_rbuf = child_data_pointer + child_ctl_pointer->roffsets[bcol_id];
-
-            ompi_op_reduce(op,(void *)child_rbuf,(void *)rbuf, count, dtype);
-        } /* end child loop */
-    }
-
-    if (ROOT_NODE != my_reduction_node->my_node_type) {
-        opal_atomic_wmb ();
-        my_ctl_pointer->flags[REDUCE_FLAG][bcol_id] = ready_flag;
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-static int bcol_basesmuma_reduce_intra_fanin_progress(bcol_function_args_t *input_args,
-                                                      mca_bcol_base_function_t *c_input_args)
-{
-    mca_bcol_basesmuma_module_t* bcol_module =
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-
-    netpatterns_tree_node_t *my_reduction_node;
-    int my_rank, my_node_index;
-    struct ompi_datatype_t *dtype = input_args->dtype;
-    int leading_dim, idx;
-
-    /* Buffer index */
-    int buff_idx = input_args->src_desc->buffer_index;
-
-    int *iteration = &bcol_module->ml_mem.nb_coll_desc[buff_idx].iteration;
-
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    void *data_addr = (void *)input_args->src_desc->data_addr;
-    volatile void *rbuf;
-
-    /* get addressing information */
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    leading_dim = bcol_module->colls_no_user_data.size_of_group;
-    idx = SM_ARRAY_INDEX(leading_dim, buff_idx, 0);
-
-    data_buffs = (volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs + idx;
-
-    /* Get control structure and payload buffer */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-
-    my_node_index = my_rank - input_args->root;
-    if (0 > my_node_index) {
-        int group_size = bcol_module->colls_no_user_data.size_of_group;
-        my_node_index += group_size;
-    }
-
-    my_reduction_node = bcol_module->reduction_tree + my_node_index;
-    rbuf = (volatile void *)((uintptr_t) data_addr + input_args->rbuf_offset);
-
-    return reduce_children (bcol_module, rbuf, my_reduction_node, iteration, my_ctl_pointer, dtype,
-                            data_buffs, input_args->count, input_args->op, input_args->root);
-}
-
-int bcol_basesmuma_reduce_intra_fanin(bcol_function_args_t *input_args,
-                                      mca_bcol_base_function_t *c_input_args)
-{
-    /* local variables */
-    int rc=BCOL_FN_COMPLETE;
-    int my_rank,group_size,my_node_index;
-    mca_bcol_basesmuma_module_t* bcol_module =
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-
-    netpatterns_tree_node_t *my_reduction_node;
-    volatile int8_t ready_flag;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-    volatile void *sbuf,*rbuf;
-    int sbuf_offset,rbuf_offset;
-    int root,count;
-    int64_t sequence_number=input_args->sequence_num;
-    struct ompi_datatype_t *dtype;
-    int leading_dim,idx;
-
-    /* Buffer index */
-    int buff_idx = input_args->src_desc->buffer_index;
-
-    int *iteration = &bcol_module->ml_mem.nb_coll_desc[buff_idx].iteration;
-
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    volatile char * my_data_pointer;
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    void *data_addr = (void *)input_args->src_desc->data_addr;
-
-#if 0
-    fprintf(stderr,"777 entering sm reduce \n");
-#endif
-
-    /* get addressing information */
-    my_rank=bcol_module->super.sbgp_partner_module->my_index;
-    group_size=bcol_module->colls_no_user_data.size_of_group;
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-
-    data_buffs = (volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs+idx;
-    /* fprintf(stderr,"AAA the devil!!\n"); */
-    /* Get control structure and payload buffer */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-    my_data_pointer = (volatile char *)data_addr;
-
-    /* Align node index to around sbgp root */
-    root = input_args->root;
-    my_node_index = my_rank - root;
-    if (0 > my_node_index) {
-        my_node_index += group_size;
-    }
-
-    /* get arguments */
-    sbuf_offset = input_args->sbuf_offset;
-    rbuf_offset = input_args->rbuf_offset;
-    sbuf = (volatile void *)(my_data_pointer + sbuf_offset);
-    data_buffs[my_rank].payload = (void*)sbuf;
-    rbuf = (volatile void *)(my_data_pointer + rbuf_offset);
-    count = input_args->count;
-    dtype = input_args->dtype;
-
-    /* Cache my rbuf_offset */
-    my_ctl_pointer->roffsets[bcol_id] = rbuf_offset;
-
-    /* get my node for the reduction tree */
-    my_reduction_node=&(bcol_module->reduction_tree[my_node_index]);
-
-    /* init the header */
-    BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id);
-
-    input_args->result_in_rbuf = (ROOT_NODE == my_reduction_node->my_node_type);
-
-    /* set starting point for progress loop */
-    *iteration = 0;
-    my_ctl_pointer->ready_flag = ready_flag;
-
-    if (sbuf != rbuf) {
-        rc = ompi_datatype_copy_content_same_ddt(dtype, count, (char *)rbuf,
-                                                 (char *)sbuf);
-        if( 0 != rc ) {
-            return OMPI_ERROR;
-        }
-    }
-
-    rc = reduce_children (bcol_module, rbuf, my_reduction_node, iteration, my_ctl_pointer, dtype,
-                          data_buffs, count, input_args->op, root);
-
-    /* Flag value if other bcols are called */
-    my_ctl_pointer->starting_flag_value[bcol_id]++;
-
-    /* Recycle payload buffers */
-
-    return rc;
-}
-
-/* Small data fanin reduce
- * Uses SM buffer (backed by SM file) for both control structures and
- * payload
- *
- * NTH: How does this differ from the new one? Can we replace this
- * with a call to the new init then a call the new progress until
- * complete?
- */
-int bcol_basesmuma_reduce_intra_fanin_old(bcol_function_args_t *input_args,
-                                          mca_bcol_base_function_t *c_input_args)
-{
-    /* local variables */
-    int rc=OMPI_SUCCESS;
-    int my_rank,group_size,process_shift,my_node_index;
-    int n_children,child;
-    mca_bcol_basesmuma_module_t* bcol_module =
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-
-    netpatterns_tree_node_t *my_reduction_node;
-    volatile int8_t ready_flag;
-    volatile void *sbuf,*rbuf;
-    int sbuf_offset,rbuf_offset;
-    int root,count;
-    struct ompi_op_t *op;
-    int64_t sequence_number=input_args->sequence_num;
-    struct ompi_datatype_t *dtype;
-    int leading_dim,idx;
-    int buff_idx;
-    int child_rank;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    volatile char * my_data_pointer;
-    volatile char * child_data_pointer;
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    volatile mca_bcol_basesmuma_header_t * child_ctl_pointer;
-
-#if 0
-    fprintf(stderr,"Entering fanin reduce \n");
-#endif
-
-    /* Buffer index */
-    buff_idx = input_args->src_desc->buffer_index;
-    /* get addressing information */
-    my_rank=bcol_module->super.sbgp_partner_module->my_index;
-    group_size=bcol_module->colls_no_user_data.size_of_group;
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-
-    /*ctl_structs=(mca_bcol_basesmuma_ctl_struct_t **)
-      bcol_module->colls_with_user_data.ctl_buffs+idx;*/
-    data_buffs = (volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs+idx;
-
-    /* Get control structure and payload buffer */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-    my_data_pointer = (volatile char *) data_buffs[my_rank].payload;
-
-    /* Align node index to around sbgp root */
-    root = input_args->root;
-    process_shift = root;
-    my_node_index = my_rank - root;
-    if (0 > my_node_index ) {
-        my_node_index += group_size;
-    }
-
-    /* get arguments */
-    sbuf_offset = input_args->sbuf_offset;
-    rbuf_offset = input_args->rbuf_offset;
-    sbuf = (volatile void *)(my_data_pointer + sbuf_offset);
-    rbuf = (volatile void *)(my_data_pointer + rbuf_offset);
-    op   = input_args->op;
-    count = input_args->count;
-    dtype = input_args->dtype;
-
-    /* get my node for the reduction tree */
-    my_reduction_node=&(bcol_module->reduction_tree[my_node_index]);
-    n_children=my_reduction_node->n_children;
-
-    /* init the header */
-    BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id);
-
-    input_args->result_in_rbuf = (ROOT_NODE == my_reduction_node->my_node_type);
-
-    rc = ompi_datatype_copy_content_same_ddt(dtype, count, (char *)rbuf,
-                                             (char *)sbuf);
-    if (0 != rc) {
-        return OMPI_ERROR;
-    }
-
-    if (LEAF_NODE != my_reduction_node->my_node_type) {
-        volatile void *child_rbuf;
-        /* for each child */
-        /* my_result_data = child_result_data (op) my_source_data */
-
-        for (child = 0 ; child < n_children ; ++child) {
-            child_rank = my_reduction_node->children_ranks[child];
-            child_rank += process_shift;
-
-            /* wrap around */
-            if( group_size <= child_rank ){
-                child_rank-=group_size;
-            }
-
-            /*child_ctl_pointer = ctl_structs[child_rank];*/
-            child_ctl_pointer = data_buffs[child_rank].ctl_struct;
-            child_data_pointer = data_buffs[child_rank].payload;
-
-            child_rbuf = child_data_pointer + rbuf_offset;
-            /* wait until child child's data is ready for use */
-            while (!IS_PEER_READY(child_ctl_pointer, ready_flag, sequence_number, REDUCE_FLAG, bcol_id)) {
-                opal_progress();
-            }
-
-            /* apply collective operation */
-            ompi_op_reduce(op,(void *)child_rbuf,(void *)rbuf, count,dtype);
-        } /* end child loop */
-    }
-
-    if (ROOT_NODE != my_reduction_node->my_node_type) {
-        opal_atomic_wmb ();
-        my_ctl_pointer->flags[REDUCE_FLAG][bcol_id] = ready_flag;
-    }
-
-    my_ctl_pointer->starting_flag_value[bcol_id]++;
-
-    return rc;
-}
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.h b/ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.h
deleted file mode 100644
index 3d6f209446..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.h
+++ /dev/null
@@ -1,92 +0,0 @@
-#ifndef __BASESMUMA_REDUCE_H_
-
-#define __BASESMUMA_REDUCE_H_
-
-#include "ompi_config.h"
-#include "ompi/mca/bcol/basesmuma/bcol_basesmuma.h"
-#include "ompi/constants.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/communicator/communicator.h"
-#include "bcol_basesmuma_utils.h"
-#include <unistd.h>
-
-enum {
-    BLOCK_OFFSET = 0,
-    LOCAL_REDUCE_SEG_OFFSET,
-    BLOCK_COUNT,
-    SEG_SIZE,
-	NOFFSETS
-};
-
-int compute_knomial_reduce_offsets(int group_index, int count, struct
-				ompi_datatype_t *dtype,int k_radix,int n_exchanges,
-				int **offsets);
-
-int compute_knomial_reduce_offsets_reverse(int group_index, int count, struct
-				ompi_datatype_t *dtype,int k_radix,int n_exchanges,
-				int **offsets);
-
-int bcol_basesmuma_lmsg_reduce_recursivek_scatter_reduce(mca_bcol_basesmuma_module_t *sm_module,
-						const int buffer_index, void *sbuf,
-					    void *rbuf,
-						struct ompi_op_t *op,
-						const int count, struct ompi_datatype_t *dtype,
-						const int relative_group_index,
-						const int padded_start_byte,
-					volatile int8_t ready_flag,
-						volatile mca_bcol_basesmuma_payload_t *data_buffs);
-
-int bcol_basesmuma_lmsg_reduce_knomial_gather(mca_bcol_basesmuma_module_t *basesmuma_module,
-				const int buffer_index,
-				void *sbuf,void *rbuf, int count, struct
-				ompi_datatype_t *dtype,
-				const int my_group_index,
-				const int padded_start_byte,
-				volatile int8_t rflag,
-				volatile mca_bcol_basesmuma_payload_t *data_buffs);
-
-int bcol_basesmuma_lmsg_reduce_extra_root(mca_bcol_basesmuma_module_t *sm_module,
-						const int buffer_index, void *sbuf,
-					    void *rbuf,
-						struct ompi_op_t *op,
-						const int count, struct ompi_datatype_t *dtype,
-						const int relative_group_index,
-						const int padded_start_byte,
-					volatile int8_t rflag,
-						volatile mca_bcol_basesmuma_payload_t *data_buffs);
-
-
-
-int bcol_basesmuma_lmsg_reduce_extra_non_root(mca_bcol_basesmuma_module_t *sm_module,
-						const int buffer_index, void *sbuf,
-					    void *rbuf,
-						int root,
-						struct ompi_op_t *op,
-						const int count, struct ompi_datatype_t *dtype,
-						const int relative_group_index,
-						const int group_size,
-						const int padded_start_byte,
-					volatile int8_t rflag,
-						volatile mca_bcol_basesmuma_payload_t *data_buffs);
-
-int bcol_basesmuma_lmsg_reduce(bcol_function_args_t *input_args,
-        mca_bcol_base_function_t *c_input_args);
-
-int bcol_basesmuma_lmsg_reduce_extra(bcol_function_args_t *input_args,
-        mca_bcol_base_function_t *c_input_args);
-
-void basesmuma_reduce_recv(int my_group_index, int peer,
-						   void *recv_buffer,
-						   int recv_size,
-					   volatile int8_t ready_flag_val,
-					   volatile mca_bcol_basesmuma_payload_t *data_buffs);
-
-void  basesmuma_reduce_send(int my_group_index,
-						   int peer,
-						   void *send_buffer,
-						   int snd_size,
-						   int send_offset,
-					   volatile int8_t ready_flag_val,
-					   volatile mca_bcol_basesmuma_payload_t *data_buffs);
-
-#endif
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rk_barrier.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_rk_barrier.c
deleted file mode 100644
index bd8e1ad2d0..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rk_barrier.c
+++ /dev/null
@@ -1,442 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2016      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/include/ompi/constants.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/mca/bcol/basesmuma/bcol_basesmuma.h"
-
-/*
-#define IS_BARRIER_READY(peer, my_flag, my_sequence_number)\
-    (((peer)->sequence_number == (my_sequence_number) && \
-      (peer)->flags[BARRIER_RKING_FLAG][bcol_id] >= (my_flag) \
-     )? true : false )
-*/
-
-#define CALC_ACTIVE_REQUESTS(active_requests,peers, tree_order) \
-do{                                                             \
-    for( j = 0; j < (tree_order - 1); j++){                     \
-       if( 0 > peers[j] ) {                                     \
-           /* set the bit */                                    \
-           *active_requests ^= (1<<j);                          \
-       }                                                        \
-    }                                                           \
-}while(0)
-
-
-
-/*
- * Recursive K-ing barrier
- */
-
-/*
- *
- * Recurssive k-ing algorithm
- * Example k=3 n=9
- *
- *
- * Number of Exchange steps = log (basek) n
- * Number of steps in exchange step = k (radix)
- *
- */
-int bcol_basesmuma_k_nomial_barrier_init(bcol_function_args_t *input_args,
-                struct mca_bcol_base_function_t *const_args)
-{
-    /* local variables */
-    int flag_offset = 0;
-    volatile int8_t ready_flag;
-    mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) const_args->bcol_module;
-    netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree;
-    mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component;
-    uint32_t buffer_index = input_args->buffer_index;
-    int *active_requests =
-        &(bcol_module->ml_mem.nb_coll_desc[buffer_index].active_requests);
-
-    int *iteration = &bcol_module->ml_mem.nb_coll_desc[buffer_index].iteration;
-    int *status = &bcol_module->ml_mem.nb_coll_desc[buffer_index].status;
-    int leading_dim, buff_idx, idx;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-
-    int i, j, probe;
-    int src;
-
-    int pow_k, tree_order;
-    int max_requests = 0; /* important to initialize this */
-
-    bool matched;
-    int64_t sequence_number=input_args->sequence_num;
-    int my_rank = bcol_module->super.sbgp_partner_module->my_index;
-
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-
-    /* control structures */
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    volatile mca_bcol_basesmuma_header_t *peer_ctl_pointer;
-#if 0
-    fprintf(stderr,"entering sm barrier sn = %d buff index = %d\n",sequence_number,input_args->buffer_index);
-#endif
-    /* initialize the iteration counter */
-    buff_idx = input_args->buffer_index;
-    leading_dim = bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-    data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs+idx;
-    /* Set pointer to current proc ctrl region */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-
-    /* init the header */
-    BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id);
-    /* initialize these */
-    *iteration = 0;
-    *active_requests = 0;
-    *status = 0;
-
-    /* k-nomial parameters */
-    tree_order = exchange_node->tree_order;
-    pow_k = exchange_node->log_tree_order;
-
-    /* calculate the maximum number of requests
-     * at each level each rank communicates with
-     * at most (k - 1) peers
-     * so if we set k - 1 bit fields in "max_requests", then
-     * we have max_request  == 2^(k - 1) -1
-     */
-    for(i = 0; i < (tree_order - 1); i++){
-        max_requests ^=  (1<<i);
-    }
-    /* let's begin the collective, starting with extra ranks and their
-     * respective proxies
-     */
-
-    if( EXTRA_NODE == exchange_node->node_type ) {
-
-        /* then I will signal to my proxy rank*/
-
-        my_ctl_pointer->flags[BARRIER_RKING_FLAG][bcol_id] = ready_flag;
-        ready_flag = flag_offset + 1 + pow_k + 2;
-        /* now, poll for completion */
-
-        src = exchange_node->rank_extra_sources_array[0];
-        peer_ctl_pointer = data_buffs[src].ctl_struct;
-
-        for( i = 0; i < cm->num_to_probe ; i++ ) {
-            if(IS_PEER_READY(peer_ctl_pointer, ready_flag, sequence_number, BARRIER_RKING_FLAG, bcol_id)){
-                goto FINISHED;
-            }
-
-        }
-
-        /* cache state and bail */
-        *iteration = -1;
-        return BCOL_FN_STARTED;
-
-    }else if ( 0 < exchange_node->n_extra_sources ) {
-
-        /* I am a proxy for someone */
-        src = exchange_node->rank_extra_sources_array[0];
-        peer_ctl_pointer = data_buffs[src].ctl_struct;
-
-        /* probe for extra rank's arrival */
-        for( i = 0, matched = false ; i < cm->num_to_probe && !matched  ; i++) {
-            if(IS_PEER_READY(peer_ctl_pointer,ready_flag, sequence_number, BARRIER_RKING_FLAG, bcol_id)){
-              /* copy it in */
-              matched = true;
-              break;
-            }
-        }
-
-        if (!matched) {
-          *status = ready_flag;
-          *iteration = -1;
-          return BCOL_FN_STARTED;
-        }
-    }
-
-    /* bump the ready flag */
-    ready_flag++;
-
-    /* we start the recursive k - ing phase */
-    for( *iteration = 0; *iteration < pow_k; (*iteration)++) {
-        /* announce my arrival */
-        my_ctl_pointer->flags[BARRIER_RKING_FLAG][bcol_id] = ready_flag;
-        /* calculate the number of active requests */
-        CALC_ACTIVE_REQUESTS(active_requests,exchange_node->rank_exchanges[*iteration],tree_order);
-        /* Now post the recv's */
-        for( j = 0; j < (tree_order - 1); j++ ) {
-
-            /* recv phase */
-            src = exchange_node->rank_exchanges[*iteration][j];
-            if( src < 0 ) {
-                /* then not a valid rank, continue */
-                continue;
-            }
-
-            peer_ctl_pointer = data_buffs[src].ctl_struct;
-            if( !(*active_requests&(1<<j))) {
-               /* then the bit hasn't been set, thus this peer
-                * hasn't been processed at this level
-                * I am putting the probe loop as the inner most loop to achieve
-                * better temporal locality, this comes at a cost to asynchronicity
-                * but should get better cache performance
-                */
-                for( probe = 0; probe < cm->num_to_probe ; probe++){
-                    if(IS_PEER_READY(peer_ctl_pointer,ready_flag, sequence_number, BARRIER_RKING_FLAG, bcol_id)){
-                        /* set this request's bit */
-                        *active_requests ^= (1<<j);
-                        break;
-                    }
-                }
-            }
-
-
-        }
-        if( max_requests == *active_requests ){
-            /* bump the ready flag */
-            ready_flag++;
-            /*reset the active requests */
-            *active_requests = 0;
-        } else {
-            /* cache the state and hop out
-             * only the iteration needs to be tracked
-             */
-            *status = my_ctl_pointer->flags[BARRIER_RKING_FLAG][bcol_id];
-            return BCOL_FN_STARTED;
-        }
-    }
-
-    /* bump the flag one more time for the extra rank */
-    ready_flag = flag_offset + 1 + pow_k + 2;
-
-    /* finish off the last piece, send the data back to the extra  */
-    if( 0 < exchange_node->n_extra_sources ) {
-        /* simply announce my arrival */
-        my_ctl_pointer->flags[BARRIER_RKING_FLAG][bcol_id] = ready_flag;
-
-    }
-
-FINISHED:
-
-
-    my_ctl_pointer->starting_flag_value[bcol_id]++;
-    return BCOL_FN_COMPLETE;
-}
-
-
-/* allgather progress function */
-
-int bcol_basesmuma_k_nomial_barrier_progress(bcol_function_args_t *input_args,
-                        struct mca_bcol_base_function_t *const_args)
-{
-
-
-    /* local variables */
-    int flag_offset;
-    volatile int8_t ready_flag;
-    mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) const_args->bcol_module;
-    netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree;
-    mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component;
-    uint32_t buffer_index = input_args->buffer_index;
-    int *active_requests =
-        &(bcol_module->ml_mem.nb_coll_desc[buffer_index].active_requests);
-
-    int *iteration = &bcol_module->ml_mem.nb_coll_desc[buffer_index].iteration;
-    int *status = &bcol_module->ml_mem.nb_coll_desc[buffer_index].status;
-    int *iter = iteration; /* double alias */
-    int leading_dim, idx, buff_idx;
-
-    int i, j, probe;
-    int src;
-    int max_requests = 0; /* critical to set this */
-    int pow_k, tree_order;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-
-    bool matched;
-    int64_t sequence_number=input_args->sequence_num;
-    int my_rank = bcol_module->super.sbgp_partner_module->my_index;
-
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-
-    /* control structures */
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    volatile mca_bcol_basesmuma_header_t *peer_ctl_pointer;
-#if 0
-    fprintf(stderr,"%d: entering sm allgather progress active requests %d iter %d ready_flag %d\n",my_rank,
-            *active_requests,*iter,*status);
-#endif
-    buff_idx = buffer_index;
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-
-    data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
-                bcol_module->colls_with_user_data.data_buffs+idx;
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-
-    /* increment the starting flag by one and return */
-    flag_offset = my_ctl_pointer->starting_flag_value[bcol_id];
-    ready_flag = *status;
-    /* k-nomial parameters */
-    tree_order = exchange_node->tree_order;
-    pow_k = exchange_node->log_tree_order;
-
-    /* calculate the maximum number of requests
-     * at each level each rank communicates with
-     * at most (k - 1) peers
-     * so if we set k - 1 bit fields in "max_requests", then
-     * we have max_request  == 2^(k - 1) -1
-     */
-    for(i = 0; i < (tree_order - 1); i++){
-        max_requests ^= (1<<i);
-    }
-
-    /* let's begin the collective, starting with extra ranks and their
-     * respective proxies
-     */
-
-    if( EXTRA_NODE == exchange_node->node_type ) {
-
-        /* If I'm in here, then I must be looking for data */
-        ready_flag = flag_offset + 1 + pow_k + 2;
-
-        src = exchange_node->rank_extra_sources_array[0];
-        peer_ctl_pointer = data_buffs[src].ctl_struct;
-
-        for( i = 0; i < cm->num_to_probe ; i++ ) {
-            if(IS_PEER_READY(peer_ctl_pointer, ready_flag, sequence_number, BARRIER_RKING_FLAG, bcol_id)){
-                goto FINISHED;
-            }
-
-        }
-
-        /* haven't found it, state is cached, bail out */
-        return BCOL_FN_STARTED;
-
-    }else if ( ( -1 == *iteration ) && (0 < exchange_node->n_extra_sources) ) {
-
-        /* I am a proxy for someone */
-        src = exchange_node->rank_extra_sources_array[0];
-        peer_ctl_pointer = data_buffs[src].ctl_struct;
-
-        /* probe for extra rank's arrival */
-        for( i = 0, matched = false ; i < cm->num_to_probe && !matched ; i++) {
-            if(IS_PEER_READY(peer_ctl_pointer,ready_flag, sequence_number, BARRIER_RKING_FLAG, bcol_id)){
-                matched = true;
-                /* bump the flag */
-                ready_flag++;
-                *iteration = 0;
-                break;
-            }
-        }
-
-        if (!matched) {
-          return BCOL_FN_STARTED;
-        }
-    }
-
-    /* start the recursive k - ing phase */
-    for( *iter=*iteration; *iter < pow_k; (*iter)++) {
-        /* I am ready at this level */
-        my_ctl_pointer->flags[BARRIER_RKING_FLAG][bcol_id] = ready_flag;
-        if( 0 == *active_requests ) {
-            /* flip some bits, if we don't have active requests from a previous visit */
-            CALC_ACTIVE_REQUESTS(active_requests,exchange_node->rank_exchanges[*iter],tree_order);
-        }
-        for( j = 0; j < (tree_order - 1); j++ ) {
-
-            /* recv phase */
-            src = exchange_node->rank_exchanges[*iter][j];
-            if( src < 0 ) {
-                /* then not a valid rank, continue
-                 */
-                continue;
-            }
-
-            peer_ctl_pointer = data_buffs[src].ctl_struct;
-            if( !(*active_requests&(1<<j))){
-
-                /* I am putting the probe loop as the inner most loop to achieve
-                 * better temporal locality
-                 */
-                for( probe = 0; probe < cm->num_to_probe ; probe++){
-                    if(IS_PEER_READY(peer_ctl_pointer,ready_flag, sequence_number, BARRIER_RKING_FLAG, bcol_id)){
-                        /* flip the request's bit */
-                        *active_requests ^= (1<<j);
-                        break;
-                    }
-                }
-            }
-
-
-        }
-        if( max_requests == *active_requests ){
-            /* bump the ready flag */
-            ready_flag++;
-            /* reset the active requests for the next level */
-            *active_requests = 0;
-            /* calculate the number of active requests
-             * logically makes sense to do it here. We don't
-             * want to inadvertantly flip a bit to zero that we
-             * set previously
-             */
-        } else {
-            /* state is saved hop out
-             */
-            *status = my_ctl_pointer->flags[BARRIER_RKING_FLAG][bcol_id];
-            return BCOL_FN_STARTED;
-        }
-    }
-    /* bump the flag one more time for the extra rank */
-    ready_flag = flag_offset + 1 + pow_k + 2;
-
-    /* finish off the last piece, send the data back to the extra  */
-    if( 0 < exchange_node->n_extra_sources ) {
-        /* simply announce my arrival */
-        my_ctl_pointer->flags[BARRIER_RKING_FLAG][bcol_id] = ready_flag;
-
-    }
-
-FINISHED:
-
-    my_ctl_pointer->starting_flag_value[bcol_id]++;
-    return BCOL_FN_COMPLETE;
-}
-
-/* Register k-nomial barrier functions to the BCOL function table,
- * so they can be selected
- */
-int bcol_basesmuma_barrier_init(mca_bcol_base_module_t *super)
-{
-mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    comm_attribs.bcoll_type = BCOL_BARRIER;
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                bcol_basesmuma_k_nomial_barrier_init,
-                bcol_basesmuma_k_nomial_barrier_progress);
-
-    return OMPI_SUCCESS;
-}
-
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_setup.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_setup.c
deleted file mode 100644
index 435d6a6983..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_setup.c
+++ /dev/null
@@ -1,588 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2014 Los Alamos National Security, LLC.
- *                         All rights reserved.
- * Copyright (c) 2014 Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2014      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-#include "mpi.h"
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "opal/mca/mpool/base/base.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/patterns/comm/coll_ops.h"
-
-#include "opal/class/opal_object.h"
-#include "opal/dss/dss.h"
-
-#include "bcol_basesmuma.h"
-
-int base_bcol_basesmuma_setup_ctl_struct(
-    mca_bcol_basesmuma_module_t *sm_bcol_module,
-    mca_bcol_basesmuma_component_t *cs,
-    sm_buffer_mgmt *ctl_mgmt);
-
-/* this is the new one, uses the pml allgather */
-int base_bcol_basesmuma_exchange_offsets(
-    mca_bcol_basesmuma_module_t *sm_bcol_module,
-    void **result_array, uint64_t mem_offset, int loop_limit,
-    int leading_dim)
-{
-    int ret=OMPI_SUCCESS,i;
-    int count;
-    int index_in_group;
-    char *send_buff;
-    char *recv_buff;
-    uint64_t rem_mem_offset;
-
-    /* malloc some memory */
-    count = sizeof(uint64_t) + sizeof(int);
-    send_buff = (char *) malloc(count);
-    recv_buff = (char *) malloc(count *
-                           sm_bcol_module->super.sbgp_partner_module->group_size);
-    /*  exchange the base pointer for the controls structures - gather
-     *  every one else's infromation.
-     */
-
-
-    /* pack the offset of the allocated region */
-    memcpy((void *) send_buff, (void *) &(sm_bcol_module->super.sbgp_partner_module->my_index), sizeof(int));
-    memcpy((void *) (send_buff+ sizeof(int)), (void *) &(mem_offset), sizeof(uint64_t));
-
-    /* get the offsets from all procs, so can setup the control data
-     * structures.
-     */
-
-    ret=comm_allgather_pml((void *) send_buff,(void *) recv_buff,count,
-            MPI_BYTE,
-            sm_bcol_module->super.sbgp_partner_module->my_index,
-            sm_bcol_module->super.sbgp_partner_module->group_size,
-            sm_bcol_module->super.sbgp_partner_module->group_list,
-            sm_bcol_module->super.sbgp_partner_module->group_comm);
-    if( OMPI_SUCCESS != ret ) {
-        goto exit_ERROR;
-    }
-
-    /* get the control stucture offsets within the shared memory
-     *   region and populate the control structures - we do not assume
-     *   any symmetry in memory layout of each process
-     */
-
-    /* loop over the procs in the group */
-    for(i = 0; i < sm_bcol_module->super.sbgp_partner_module->group_size; i++){
-        int array_id;
-        /* get this peer's index in the group */
-        memcpy((void *) &index_in_group, (void *) (recv_buff + i*count) , sizeof(int));
-
-        /* get the offset */
-        memcpy((void *) &rem_mem_offset, (void *) (recv_buff + i*count + sizeof(int)), sizeof(uint64_t));
-
-        array_id=SM_ARRAY_INDEX(leading_dim,0,index_in_group);
-        result_array[array_id]=(void *)(uintptr_t)rem_mem_offset;
-
-    }
-
-exit_ERROR:
-    /* clean up */
-    if( NULL != send_buff ) {
-        free(send_buff);
-        send_buff = NULL;
-    }
-    if( NULL != recv_buff ) {
-        free(recv_buff);
-        recv_buff = NULL;
-    }
-
-    return ret;
-
-
-}
-
-#if 0
-int base_bcol_basesmuma_exchange_offsets(
-    mca_bcol_basesmuma_module_t *sm_bcol_module,
-    void **result_array, uint64_t mem_offset, int loop_limit,
-    int leading_dim)
-{
-    int ret=OMPI_SUCCESS,i,dummy;
-    int index_in_group, pcnt;
-    opal_list_t peers;
-    ompi_namelist_t *peer;
-    ompi_proc_t *proc_temp, *my_id;
-    opal_buffer_t *send_buffer = OBJ_NEW(opal_buffer_t);
-    opal_buffer_t *recv_buffer = OBJ_NEW(opal_buffer_t);
-    uint64_t rem_mem_offset;
-
-    /*  exchange the base pointer for the controls structures - gather
-     *  every one else's infromation.
-     */
-    /* get list of procs that will participate in the communication */
-    OBJ_CONSTRUCT(&peers, opal_list_t);
-    for (i = 0; i < sm_bcol_module->super.sbgp_partner_module->group_size; i++) {
-        /* get the proc info */
-        proc_temp = ompi_comm_peer_lookup(
-                sm_bcol_module->super.sbgp_partner_module->group_comm,
-                sm_bcol_module->super.sbgp_partner_module->group_list[i]);
-        peer = OBJ_NEW(ompi_namelist_t);
-        peer->name.jobid = proc_temp->proc_name.jobid;
-        peer->name.vpid = proc_temp->proc_name.vpid;
-        opal_list_append(&peers,&peer->super); /* this is with the new field called "super" in ompi_namelist_t struct */
-    }
-    /* pack up the data into the allgather send buffer */
-        if (NULL == send_buffer || NULL == recv_buffer) {
-            opal_output (ompi_bcol_base_framework.framework_output, "Cannot allocate memory for sbuffer or rbuffer\n");
-            ret = OMPI_ERROR;
-            goto exit_ERROR;
-        }
-
-    /* get my proc information */
-    my_id = ompi_proc_local();
-
-    /* pack my information */
-    ret = opal_dss.pack(send_buffer,
-        &(sm_bcol_module->super.sbgp_partner_module->my_index),1,OPAL_UINT32);
-
-    if (OMPI_SUCCESS != ret) {
-        opal_output (ompi_bcol_base_framework.framework_output, "Error packing my_index!!\n");
-        goto exit_ERROR;
-    }
-
-    /* pack the offset of the allocated region */
-    ret = opal_dss.pack(send_buffer,&(mem_offset),1,OPAL_UINT64);
-    if (OMPI_SUCCESS != ret) {
-        goto exit_ERROR;
-    }
-
-    /* get the offsets from all procs, so can setup the control data
-     * structures.
-     */
-    if (OMPI_SUCCESS != (ret = ompi_rte_allgather_list(&peers, send_buffer, recv_buffer))) {
-        opal_output (ompi_bcol_base_framework.framework_output, "ompi_rte_allgather_list returned error %d\n", ret);
-        goto exit_ERROR;
-    }
-
-        /* unpack the dummy */
-        pcnt=1;
-        ret = opal_dss.unpack(recv_buffer,&dummy, &pcnt, OPAL_INT32);
-        if (OMPI_SUCCESS != ret) {
-                opal_output (ompi_bcol_base_framework.framework_output, "unpack returned error %d for dummy\n",ret);
-                goto exit_ERROR;
-        }
-
-    /* get the control stucture offsets within the shared memory
-     *   region and populate the control structures - we do not assume
-     *   any symmetry in memory layout of each process
-     */
-
-    /* loop over the procs in the group */
-    for(i = 0; i < sm_bcol_module->super.sbgp_partner_module->group_size; i++){
-        int array_id;
-        pcnt=1;
-        ret = opal_dss.unpack(recv_buffer,&index_in_group, &pcnt, OPAL_UINT32);
-        if (OMPI_SUCCESS != ret) {
-            opal_output (ompi_bcol_base_framework.framework_output, "unpack returned error %d for remote index_in_group\n",ret);
-            goto exit_ERROR;
-        }
-
-        /* get the offset */
-        pcnt=1;
-        ret = opal_dss.unpack(recv_buffer,&rem_mem_offset, &pcnt, OPAL_UINT64);
-        if (OMPI_SUCCESS != ret) {
-            opal_output (ompi_bcol_base_framework.framework_output, "unpack returned error %d for remote memory offset\n",ret);
-            goto exit_ERROR;
-        }
-
-        array_id=SM_ARRAY_INDEX(leading_dim,0,index_in_group);
-        result_array[array_id]=(void *)rem_mem_offset;
-
-    }
-
-    /* clean up */
-    peer=(ompi_namelist_t *)opal_list_remove_first(&peers);
-    while( NULL !=peer) {
-        OBJ_RELEASE(peer);
-        peer=(ompi_namelist_t *)opal_list_remove_first(&peers);
-    }
-    OBJ_DESTRUCT(&peers);
-    if( send_buffer ) {
-        OBJ_RELEASE(send_buffer);
-    }
-    if( recv_buffer ) {
-        OBJ_RELEASE(recv_buffer);
-    }
-
-    return ret;
-
-exit_ERROR:
-
-    /* free peer list */
-    peer=(ompi_namelist_t *)opal_list_remove_first(&peers);
-    while( NULL !=peer) {
-        OBJ_RELEASE(peer);
-        peer=(ompi_namelist_t *)opal_list_remove_first(&peers);
-    }
-    OBJ_DESTRUCT(&peers);
-    if( send_buffer ) {
-        OBJ_RELEASE(send_buffer);
-    }
-    if( recv_buffer ) {
-        OBJ_RELEASE(recv_buffer);
-    }
-    return ret;
-}
-#endif
-
-
-static int base_bcol_basesmuma_exchange_ctl_params(
-    mca_bcol_basesmuma_module_t *sm_bcol_module,
-    mca_bcol_basesmuma_component_t *cs,
-    sm_buffer_mgmt *ctl_mgmt, list_data_t *data_blk)
-{
-    int ret=OMPI_SUCCESS,i,loop_limit;
-    int leading_dim, buf_id;
-    void *mem_offset;
-    unsigned char *base_ptr;
-    mca_bcol_basesmuma_ctl_struct_t *ctl_ptr;
-
-    /* data block base offset in the mapped file */
-    mem_offset = (void *)((uintptr_t)data_blk->data -
-                          (uintptr_t)cs->sm_ctl_structs->data_addr);
-
-    /* number of buffers in data block */
-    loop_limit=cs->basesmuma_num_mem_banks+ctl_mgmt->number_of_buffs;
-    leading_dim=ctl_mgmt->size_of_group;
-    ret=comm_allgather_pml(&mem_offset, ctl_mgmt->ctl_buffs, sizeof(void *),
-                           MPI_BYTE, sm_bcol_module->super.sbgp_partner_module->my_index,
-                           sm_bcol_module->super.sbgp_partner_module->group_size,
-                           sm_bcol_module->super.sbgp_partner_module->group_list,
-                           sm_bcol_module->super.sbgp_partner_module->group_comm);
-    if( OMPI_SUCCESS != ret ) {
-        goto exit_ERROR;
-    }
-
-#if 0
-    ret=base_bcol_basesmuma_exchange_offsets( sm_bcol_module,
-            (void **)ctl_mgmt->ctl_buffs, mem_offset, loop_limit, leading_dim);
-    if( OMPI_SUCCESS != ret ) {
-        goto exit_ERROR;
-    }
-#endif
-
-    /* convert memory offset to virtual address in current rank */
-    for (i=0;i< sm_bcol_module->super.sbgp_partner_module->group_size;i++) {
-
-        /* get the base pointer */
-        int array_id=SM_ARRAY_INDEX(leading_dim,0,i);
-        if( i == sm_bcol_module->super.sbgp_partner_module->my_index) {
-            /* me */
-            base_ptr=cs->sm_ctl_structs->map_addr;
-        } else {
-            base_ptr=sm_bcol_module->ctl_backing_files_info[i]->sm_mmap->map_addr;
-        }
-        ctl_mgmt->ctl_buffs[array_id]=(void *)
-            (uintptr_t)(((uint64_t)(uintptr_t)ctl_mgmt->ctl_buffs[array_id])+(uint64_t)(uintptr_t)base_ptr);
-        for( buf_id = 1 ; buf_id < loop_limit ; buf_id++ ) {
-            int array_id_m1=SM_ARRAY_INDEX(leading_dim,(buf_id-1),i);
-            array_id=SM_ARRAY_INDEX(leading_dim,buf_id,i);
-            ctl_mgmt->ctl_buffs[array_id]=(void *) (uintptr_t)((uint64_t)(uintptr_t)(ctl_mgmt->ctl_buffs[array_id_m1])+
-                (uint64_t)(uintptr_t)sizeof(mca_bcol_basesmuma_ctl_struct_t));
-        }
-    }
-    /* initialize my control structues */
-    for( buf_id = 0 ; buf_id < loop_limit ; buf_id++ ) {
-
-        int my_idx=sm_bcol_module->super.sbgp_partner_module->my_index;
-        int array_id=SM_ARRAY_INDEX(leading_dim,buf_id,my_idx);
-        ctl_ptr = (mca_bcol_basesmuma_ctl_struct_t *)
-                ctl_mgmt->ctl_buffs[array_id];
-
-        /* initialize the data structures - RLG, this is only one data
-         * structure that needs to be initialized, more are missing */
-        ctl_ptr->sequence_number=-1;
-        ctl_ptr->flag=-1;
-        ctl_ptr->index=0;
-        ctl_ptr->src_ptr = NULL;
-    }
-
-    return ret;
-
-exit_ERROR:
-
-    return ret;
-}
-
-static int base_bcol_basesmuma_setup_ctl (mca_bcol_basesmuma_module_t *sm_bcol_module,
-                                          mca_bcol_basesmuma_component_t *cs)
-{
-    const int my_index = sm_bcol_module->super.sbgp_partner_module->my_index;;
-    bcol_basesmuma_smcm_file_t input_file;
-    int ret;
-
-    /* exchange remote addressing information if it has not already been done  */
-    if (NULL == sm_bcol_module->ctl_backing_files_info) {
-        input_file.file_name=cs->sm_ctl_structs->map_path;
-        input_file.size=cs->sm_ctl_structs->map_size;
-        input_file.size_ctl_structure=0;
-        input_file.data_seg_alignment=BASESMUMA_CACHE_LINE_SIZE;
-        input_file.mpool_size=cs->sm_ctl_structs->map_size;
-        ret = bcol_basesmuma_smcm_allgather_connection(sm_bcol_module,
-                                                       sm_bcol_module->super.sbgp_partner_module,
-                                                       &(cs->sm_connections_list),
-                                                       &(sm_bcol_module->ctl_backing_files_info),
-                                                       sm_bcol_module->super.sbgp_partner_module->group_comm,
-                                                       input_file, cs->clt_base_fname,
-                                                       false);
-        if (OMPI_SUCCESS != ret) {
-            return ret;
-        }
-    }
-
-    /* fill in the pointer to other ranks scartch shared memory */
-    if (NULL == sm_bcol_module->shared_memory_scratch_space) {
-        sm_bcol_module->shared_memory_scratch_space =
-            calloc (sm_bcol_module->super.sbgp_partner_module->group_size, sizeof (void *));
-        if (!sm_bcol_module->shared_memory_scratch_space) {
-            opal_output (ompi_bcol_base_framework.framework_output, "Cannot allocate memory for shared_memory_scratch_space.");
-            return OMPI_ERR_OUT_OF_RESOURCE;
-        }
-
-        for (int i = 0 ; i < sm_bcol_module->super.sbgp_partner_module->group_size ; ++i) {
-            if (i == my_index) {
-                /* local file data is not cached in this list */
-                continue;
-            }
-
-            sm_bcol_module->shared_memory_scratch_space[i] =
-                (void *)((intptr_t) sm_bcol_module->ctl_backing_files_info[i]->sm_mmap +
-                         cs->scratch_offset_from_base_ctl_file);
-        }
-
-        sm_bcol_module->shared_memory_scratch_space[my_index] =
-            (void *)((intptr_t) cs->sm_ctl_structs->map_addr + cs->scratch_offset_from_base_ctl_file);
-    }
-
-    return OMPI_SUCCESS;
-}
-
-int base_bcol_basesmuma_setup_ctl_struct(
-    mca_bcol_basesmuma_module_t *sm_bcol_module,
-    mca_bcol_basesmuma_component_t *cs,
-    sm_buffer_mgmt *ctl_mgmt)
-{
-    int n_ctl, n_levels;
-    int n_ctl_structs;
-    size_t malloc_size;
-
-    /*
-     * set my no user-data conrol structures
-     */
-    /* number of banks and regions per bank are already a power of 2 */
-    n_ctl_structs=cs->basesmuma_num_mem_banks*
-        cs->basesmuma_num_regions_per_bank;
-
-    /* initialize the control structure management struct -
-     * for collectives without user data
-     *---------------------------------------------------------------
-     */
-
-    ctl_mgmt->number_of_buffs=n_ctl_structs;
-    ctl_mgmt->num_mem_banks=
-        cs->basesmuma_num_mem_banks;
-
-    ctl_mgmt->num_buffs_per_mem_bank=
-        cs->basesmuma_num_regions_per_bank;
-    ctl_mgmt->size_of_group=
-        sm_bcol_module->super.sbgp_partner_module->group_size;
-    ompi_roundup_to_power_radix(2,cs->basesmuma_num_regions_per_bank,&n_levels);
-    ctl_mgmt->log2_num_buffs_per_mem_bank=n_levels;
-
-    ompi_roundup_to_power_radix(2,n_ctl_structs,&n_levels);
-    ctl_mgmt->log2_number_of_buffs=n_levels;
-    ctl_mgmt->mask=n_ctl_structs-1;
-    sm_bcol_module->super.n_poll_loops=cs->n_poll_loops;
-
-    malloc_size=
-        (ctl_mgmt->number_of_buffs +
-         ctl_mgmt->num_mem_banks ) *
-         ctl_mgmt->size_of_group *
-         sizeof(void *);
-    ctl_mgmt->ctl_buffs = malloc(malloc_size);
-    if (!ctl_mgmt->ctl_buffs) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    /*
-     * setup the no-data buffer managment data
-     */
-    n_ctl = ctl_mgmt->num_mem_banks;
-    ctl_mgmt->ctl_buffs_mgmt = (mem_bank_management_t *) calloc (n_ctl, sizeof (mem_bank_management_t));
-    if (!ctl_mgmt->ctl_buffs_mgmt) {
-        opal_output (ompi_bcol_base_framework.framework_output, "Cannot allocate memory for ctl_buffs_mgmt");
-        free (ctl_mgmt->ctl_buffs);
-        ctl_mgmt->ctl_buffs = NULL;
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    /* initialize each individual element */
-    for (int i = 0 ; i < n_ctl ; ++i) {
-        opal_list_item_t *item;
-        opal_mutex_t *mutex_ptr;
-
-        ctl_mgmt->ctl_buffs_mgmt[i].available_buffers=
-            ctl_mgmt->num_buffs_per_mem_bank;
-        ctl_mgmt->ctl_buffs_mgmt[i].number_of_buffers=
-            ctl_mgmt->num_buffs_per_mem_bank;
-        mutex_ptr = &(ctl_mgmt->ctl_buffs_mgmt[i].mutex);
-        OBJ_CONSTRUCT(mutex_ptr, opal_mutex_t);
-        ctl_mgmt->ctl_buffs_mgmt[i].index_shared_mem_ctl_structs=i;
-
-        item = (opal_list_item_t *)&(ctl_mgmt->ctl_buffs_mgmt[i].nb_barrier_desc);
-        OBJ_CONSTRUCT(item, opal_list_item_t);
-        ctl_mgmt->ctl_buffs_mgmt[i].nb_barrier_desc.sm_module =
-            sm_bcol_module;
-        ctl_mgmt->ctl_buffs_mgmt[i].nb_barrier_desc.pool_index = i;
-        /* get the sm_buffer_mgmt pointer for the control structures */
-        ctl_mgmt->ctl_buffs_mgmt[i].nb_barrier_desc.coll_buff = ctl_mgmt;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * this function initializes the internal scratch buffers and control
- * structures that will be used by the module. It also intitializes
- * the payload buffer management structures.
- */
-int base_bcol_basesmuma_setup_library_buffers(
-    mca_bcol_basesmuma_module_t *sm_bcol_module,
-    mca_bcol_basesmuma_component_t *cs)
-{
-    int ret=OMPI_SUCCESS,i;
-    int n_ctl_structs;
-    size_t ctl_segement_size,total_memory;
-    int max_elements;
-    unsigned char *data_ptr;
-
-    /* */
-    /* setup the control struct memory */
-    if(!cs->sm_ctl_structs) {
-        ret = mca_bcol_basesmuma_allocate_sm_ctl_memory(cs);
-        if(OMPI_SUCCESS != ret) {
-            opal_output (ompi_bcol_base_framework.framework_output, "In bcol_comm_query mca_bcol_basesmuma_allocate_sm_ctl_memory failed\n");
-            return ret;
-        }
-        /*
-         * put the memory onto the free list - we have worried about
-         * alignment in the mpool allocation, and assume that the
-         * ctl structures have the approriate size to mantain alignment
-         */
-
-        /* figure out segment size */
-        n_ctl_structs=cs->basesmuma_num_mem_banks*
-            cs->basesmuma_num_regions_per_bank;
-
-        /* add memory for the control structure used for recycling the banks */
-        n_ctl_structs+=cs->basesmuma_num_mem_banks;
-
-        ctl_segement_size=n_ctl_structs*
-            sizeof(mca_bcol_basesmuma_ctl_struct_t);
-
-        total_memory=cs->sm_ctl_structs->map_size - (
-            (char *)(cs->sm_ctl_structs->data_addr)-
-            (char *)(cs->sm_ctl_structs->map_addr));
-        total_memory-=cs->my_scratch_shared_memory_size;
-        max_elements=total_memory/ctl_segement_size;
-
-        /* populate the free list */
-        data_ptr=cs->sm_ctl_structs->data_addr;
-
-        for( i=0 ; i < max_elements ; i++ ) {
-            list_data_t *item = OBJ_NEW(list_data_t);
-            if( !item ) {
-                return OMPI_ERR_OUT_OF_RESOURCE;
-            }
-            item->data=(void *)data_ptr;
-            opal_list_append(&(cs->ctl_structures),(opal_list_item_t *)item);
-            data_ptr+=ctl_segement_size;
-        }
-        /* set the scratch memory pointer and offset */
-        cs->my_scratch_shared_memory=(char *)data_ptr;
-        cs->scratch_offset_from_base_ctl_file=(size_t)
-            ((char *)data_ptr-(char *)cs->sm_ctl_structs->map_addr);
-
-
-        /* At this stage the memory is mapped and ready to use by the local rank.
-         * However, the memory of other processes has not yet been mmaped into the
-         * memory of this process.
-         */
-    }
-
-    /* intialize no_userdata_ctl */
-    sm_bcol_module->no_userdata_ctl=(list_data_t *)
-        opal_list_remove_last(&(cs->ctl_structures));
-    if (!sm_bcol_module->no_userdata_ctl) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    /* intialize userdata_ctl */
-    sm_bcol_module->userdata_ctl = (list_data_t *)
-        opal_list_remove_last(&(cs->ctl_structures));
-    if (!sm_bcol_module->userdata_ctl) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    ret = base_bcol_basesmuma_setup_ctl (sm_bcol_module, cs);
-    if (OMPI_SUCCESS != ret) {
-        return ret;
-    }
-
-    ret = base_bcol_basesmuma_setup_ctl_struct (sm_bcol_module, cs, &(sm_bcol_module->colls_no_user_data));
-    if( OMPI_SUCCESS != ret ) {
-        return ret;
-    }
-
-    ret = base_bcol_basesmuma_setup_ctl_struct (sm_bcol_module, cs, &(sm_bcol_module->colls_with_user_data));
-    if( OMPI_SUCCESS != ret ) {
-        return ret;
-    }
-
-    /* used for blocking recursive doubling barrier */
-    sm_bcol_module->index_blocking_barrier_memory_bank=0;
-
-    /* gather the offsets of the control structs relative to the base
-     *   of the shared memory file, and fill in the table with the
-     *   address of all the control structues.
-     */
-    ret = base_bcol_basesmuma_exchange_ctl_params(sm_bcol_module, cs,
-        &(sm_bcol_module->colls_no_user_data),sm_bcol_module->no_userdata_ctl);
-    if( OMPI_SUCCESS != ret ) {
-        return ret;
-    }
-
-    ret = base_bcol_basesmuma_exchange_ctl_params(sm_bcol_module, cs,
-        &(sm_bcol_module->colls_with_user_data),sm_bcol_module->userdata_ctl);
-    if( OMPI_SUCCESS != ret ) {
-        return ret;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-OBJ_CLASS_INSTANCE(list_data_t,
-        opal_list_item_t, NULL, NULL);
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.c
deleted file mode 100644
index e0c23cae62..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.c
+++ /dev/null
@@ -1,460 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- *
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2014-2016 Intel, Inc.  All rights reserved.
- * Copyright (c) 2014-2015 Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <fcntl.h>
-#include <errno.h>
-#ifdef HAVE_STRINGS_H
-#include <strings.h>
-#endif
-
-#include "ompi/proc/proc.h"
-#include "ompi/patterns/comm/coll_ops.h"
-#include "opal/align.h"
-
-#include "opal/dss/dss.h"
-#include "opal/util/error.h"
-#include "opal/util/output.h"
-#include "opal/class/opal_list.h"
-#include "opal/class/opal_hash_table.h"
-
-#include "bcol_basesmuma.h"
-
-
-
-#define SM_BACKING_FILE_NAME_MAX_LEN 256
-
-static bcol_basesmuma_smcm_mmap_t * bcol_basesmuma_smcm_reg_mmap(void *in_ptr, int fd, size_t length,
-                                                                 size_t addr_offset, size_t alignment,
-                                                                 char *file_name);
-
-struct file_info_t {
-    uint32_t vpid;
-    uint32_t jobid;
-    uint64_t file_size;
-    uint64_t size_ctl_structure;
-    uint64_t data_seg_alignment;
-    char file_name[SM_BACKING_FILE_NAME_MAX_LEN];
-};
-
-/* need to allocate space for the peer */
-static void bcol_basesmuma_smcm_proc_item_t_construct (bcol_basesmuma_smcm_proc_item_t * item)
-{
-    memset ((char *) item + sizeof (item->item), 0, sizeof (*item) - sizeof (item->item));
-}
-
-/* need to free the space for the peer */
-static void bcol_basesmuma_smcm_proc_item_t_destruct (bcol_basesmuma_smcm_proc_item_t * item)
-{
-    if (item->sm_mmap) {
-        OBJ_RELEASE(item->sm_mmap);
-    }
-
-    if (item->sm_file.file_name) {
-        free (item->sm_file.file_name);
-        item->sm_file.file_name = NULL;
-    }
-}
-
-OBJ_CLASS_INSTANCE(bcol_basesmuma_smcm_proc_item_t,
-                   opal_list_item_t,
-                   bcol_basesmuma_smcm_proc_item_t_construct,
-                   bcol_basesmuma_smcm_proc_item_t_destruct);
-
-static void bcol_basesmuma_smcm_mmap_construct (bcol_basesmuma_smcm_mmap_t *smcm_mmap)
-{
-    memset ((char *) smcm_mmap + sizeof (smcm_mmap->super), 0, sizeof (*smcm_mmap) - sizeof (smcm_mmap->super));
-}
-
-static void bcol_basesmuma_smcm_mmap_destruct (bcol_basesmuma_smcm_mmap_t *smcm_mmap)
-{
-    if (smcm_mmap->map_seg) {
-        munmap ((void *)smcm_mmap->map_seg, smcm_mmap->map_size);
-        smcm_mmap->map_seg = NULL;
-    }
-
-    if (smcm_mmap->map_path) {
-        free (smcm_mmap->map_path);
-        smcm_mmap->map_path = NULL;
-    }
-}
-
-OBJ_CLASS_INSTANCE(bcol_basesmuma_smcm_mmap_t, opal_list_item_t,
-                   bcol_basesmuma_smcm_mmap_construct,
-                   bcol_basesmuma_smcm_mmap_destruct);
-
-
-/* smcm_allgather_connection:
-   This function is called when a shared memory subgroup wants to establish shared memory "connections" among
-   a group of processes.
-
-   This function DOES NOT create any shared memory backing files, it only mmaps already existing files. Shared
-   memory files are created by the shared memory registration function
-   -----------------------------------------------------------------------------------------------------------
-   Input params:
-
-   - sbgp module   The subgrouping module contains the list of ranks to wire up.
-
-   - peer_list      An opal list containing a list of bcol_basesmuma_smcm_proc_item_t types. This
-   contains a list of peers whose shared memory files I have already mapped.
-   Upon completion of the allgather exchange with all members of the group and depending on the
-   value of "map_all", my peers' shared memory files are mapped into my local virtual memory
-   space, with all pertinent information being stored in an bcol_basesmuma_smcm_proc_item_t which is
-   subsequently appended onto the "peer_list".
-
-   - comm           The ompi_communicator_t communicator.
-
-   - input          A data struct that caches the information about my shared memory file.
-
-   - map_all        Bool that determines whether or not to go ahead and map the files from all of the peers
-   defined in the sbgp-ing module. If map_all == true, then go ahead and mmap all of the files
-   obtained in the exchange and append the information to the "peer_list". If map_all == false
-   then make a check and only mmap those peers' files whose vpid/jobid/filename combination do
-   not already exist in the "peer_list". Once mapping is completed, append this peer's information
-   to the "peer_list".
-   -----------------------------------------------------------------------------------------------------------
-   *
-   */
-
-
-int bcol_basesmuma_smcm_allgather_connection(
-                                             mca_bcol_basesmuma_module_t *sm_bcol_module,
-                                             mca_sbgp_base_module_t *module,
-                                             opal_list_t *peer_list,
-                                             bcol_basesmuma_smcm_proc_item_t ***back_files,
-                                             ompi_communicator_t *comm,
-                                             bcol_basesmuma_smcm_file_t input,
-                                             char *base_fname,
-                                             bool map_all)
-{
-
-    /* define local variables */
-
-    int rc, i, fd;
-    ptrdiff_t mem_offset;
-    ompi_proc_t *proc_temp, *my_id;
-    bcol_basesmuma_smcm_proc_item_t *temp;
-    bcol_basesmuma_smcm_proc_item_t *item_ptr;
-    bcol_basesmuma_smcm_proc_item_t **backing_files;
-    struct file_info_t local_file;
-    struct file_info_t *all_files=NULL;
-
-    /* sanity check */
-    if (strlen(input.file_name) > SM_BACKING_FILE_NAME_MAX_LEN-1) {
-        opal_output (ompi_bcol_base_framework.framework_output, "backing file name too long:  %s len :: %d",
-                     input.file_name, (int) strlen(input.file_name));
-        return OMPI_ERR_BAD_PARAM;
-    }
-
-    backing_files = (bcol_basesmuma_smcm_proc_item_t **)
-        calloc(module->group_size, sizeof(bcol_basesmuma_smcm_proc_item_t *));
-    if (!backing_files) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    /* FIXME *back_files might have been already allocated
-     * so free it in order to avoid a memory leak */
-    if (NULL != *back_files) {
-        free (*back_files);
-    }
-    *back_files = backing_files;
-
-    my_id = ompi_proc_local();
-
-    /* Phase One:
-       gather a list of processes that will participate in the allgather - I'm
-       preparing this list from the sbgp-ing module that was passed into the function */
-
-    /* fill in local file information */
-    local_file.vpid  = ((orte_process_name_t*)&my_id->super.proc_name)->vpid;
-    local_file.jobid = ((orte_process_name_t*)&my_id->super.proc_name)->jobid;
-    local_file.file_size=input.size;
-    local_file.size_ctl_structure=input.size_ctl_structure;
-    local_file.data_seg_alignment=input.data_seg_alignment;
-
-    strcpy (local_file.file_name, input.file_name);
-
-    /* will exchange this data type as a string of characters -
-     * this routine is first called before MPI_init() completes
-     * and before error handling is setup, so can't use the
-     * MPI data types to send this data */
-    all_files = (struct file_info_t *) calloc(module->group_size,
-                                              sizeof (struct file_info_t));
-    if (!all_files) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    /* exchange data */
-    rc = comm_allgather_pml(&local_file,all_files,sizeof(struct file_info_t), MPI_CHAR,
-                            sm_bcol_module->super.sbgp_partner_module->my_index,
-                            sm_bcol_module->super.sbgp_partner_module->group_size,
-                            sm_bcol_module->super.sbgp_partner_module->group_list,
-                            sm_bcol_module->super.sbgp_partner_module->group_comm);
-    if( OMPI_SUCCESS != rc ) {
-        opal_output (ompi_bcol_base_framework.framework_output, "failed in comm_allgather_pml.  Error code: %d", rc);
-        goto Error;
-    }
-
-    /* Phase four:
-       loop through the receive buffer, unpack the data recieved from remote peers */
-
-    for (i = 0; i < module->group_size; i++) {
-        struct file_info_t *rem_file = all_files + i;
-
-        /* check if this is my index or if the file is already mapped (set above). ther
-         * is no reason to look through the peer list again because no two members of
-         * the group will have the same vpid/jobid pair. ignore this previously found
-         * mapping if map_all was requested (NTH: not sure why exactly since we re-map
-         * and already mapped file) */
-        if (sm_bcol_module->super.sbgp_partner_module->my_index == i) {
-            continue;
-        }
-
-        proc_temp = ompi_comm_peer_lookup(comm,module->group_list[i]);
-
-        OPAL_LIST_FOREACH(item_ptr, peer_list, bcol_basesmuma_smcm_proc_item_t) {
-            /* if the vpid/jobid/filename combination already exists in the list,
-               then do not map this peer's file --- because you already have */
-            if (0 == ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL,
-                                                  OMPI_CAST_RTE_NAME(&proc_temp->super.proc_name),
-                                                  &item_ptr->peer) &&
-                0 == strcmp (item_ptr->sm_file.file_name, rem_file->file_name)) {
-                ++item_ptr->refcnt;
-                /* record file data */
-                backing_files[i] = item_ptr;
-                break;
-            }
-        }
-
-        if (!map_all && backing_files[i]) {
-            continue;
-        }
-
-        temp = OBJ_NEW(bcol_basesmuma_smcm_proc_item_t);
-        if (!temp) {
-            rc = OMPI_ERR_OUT_OF_RESOURCE;
-            goto Error;
-        }
-
-        temp->peer.vpid = rem_file->vpid;
-        temp->peer.jobid = rem_file->jobid;
-
-        temp->sm_file.file_name = strdup (rem_file->file_name);
-        if (!temp->sm_file.file_name) {
-            rc = OMPI_ERR_OUT_OF_RESOURCE;
-            OBJ_RELEASE(temp);
-            goto Error;
-        }
-
-        temp->sm_file.size = (size_t) rem_file->file_size;
-        temp->sm_file.mpool_size = (size_t) rem_file->file_size;
-        temp->sm_file.size_ctl_structure = (size_t) rem_file->size_ctl_structure;
-        temp->sm_file.data_seg_alignment = (size_t) rem_file->data_seg_alignment;
-        temp->refcnt = 1;
-
-        /* Phase Five:
-           If map_all == true, then  we map every peer's file
-           else we check to see if I have already mapped this
-           vpid/jobid/filename combination and if I have, then
-           I do not mmap this peer's file.
-           *
-           */
-        fd = open(temp->sm_file.file_name, O_RDWR, 0600);
-        if (0 > fd) {
-            opal_output (ompi_bcol_base_framework.framework_output, "SMCM Allgather failed to open sm backing file %s. errno = %d",
-                         temp->sm_file.file_name, errno);
-            rc = OMPI_ERROR;
-            goto Error;
-        }
-
-        /* map the file */
-        temp->sm_mmap = bcol_basesmuma_smcm_reg_mmap (NULL, fd, temp->sm_file.size,
-                                                      temp->sm_file.size_ctl_structure,
-                                                      temp->sm_file.data_seg_alignment,
-                                                      temp->sm_file.file_name);
-        close (fd);
-        if (NULL == temp->sm_mmap) {
-            opal_output (ompi_bcol_base_framework.framework_output, "mmapping failed to map remote peer's file");
-            OBJ_RELEASE(temp);
-            rc = OMPI_ERROR;
-            goto Error;
-        }
-
-        /* compute memory offset */
-        mem_offset = (ptrdiff_t) temp->sm_mmap->data_addr -
-            (ptrdiff_t) temp->sm_mmap->map_seg;
-        temp->sm_mmap->map_seg->seg_offset = mem_offset;
-        temp->sm_mmap->map_seg->seg_size = temp->sm_file.size - mem_offset;
-        /* more stuff to follow */
-
-        /* append this peer's info, including shared memory map addr, onto the
-           peer_list */
-
-        /* record file data */
-        backing_files[i] = (bcol_basesmuma_smcm_proc_item_t *) temp;
-
-        opal_list_append(peer_list, (opal_list_item_t*) temp);
-    }
-
-    rc = OMPI_SUCCESS;
-
- Error:
-
-    /* error clean-up and return */
-    if (NULL != all_files) {
-        free(all_files);
-    }
-
-    return rc;
-}
-
-int bcol_basesmuma_smcm_release_connections (mca_bcol_basesmuma_module_t *sm_bcol_module,
-                                             mca_sbgp_base_module_t *sbgp_module, opal_list_t *peer_list,
-                                             bcol_basesmuma_smcm_proc_item_t ***back_files)
-{
-    bcol_basesmuma_smcm_proc_item_t **smcm_procs = *back_files;
-
-    for (int i = 0 ; i < sbgp_module->group_size ; ++i) {
-        if (smcm_procs[i] && 0 == --smcm_procs[i]->refcnt) {
-            opal_list_remove_item (peer_list, (opal_list_item_t *) smcm_procs[i]);
-            OBJ_RELEASE(smcm_procs[i]);
-        }
-    }
-
-    free (smcm_procs);
-    *back_files = NULL;
-
-    return OMPI_SUCCESS;
- }
-
-
-/*
- * mmap the specified file as a shared file.  No information exchange with other
- * processes takes place within this routine.
- * This function assumes that the memory has already been allocated, and only the
- * mmap needs to be done.
- */
-bcol_basesmuma_smcm_mmap_t *bcol_basesmuma_smcm_mem_reg(void *in_ptr,
-                                                        size_t length,
-                                                        size_t alignment,
-                                                        char* file_name)
-{
-    /* local variables */
-    int fd = -1;
-    bcol_basesmuma_smcm_mmap_t *map = NULL;
-    int rc;
-
-    /* if pointer is not allocated - return error.  We have no clue how the user will allocate or
-     *   free this memory.
-     */
-
-    /* open the shared memory backing file */
-
-    fd = open(file_name, O_CREAT|O_RDWR,0600);
-    if (fd < 0) {
-        opal_output (ompi_bcol_base_framework.framework_output, "basesmuma shared memory allocation open failed with errno: %d",
-                    errno);
-        return NULL;
-    }
-
-    if (0 != ftruncate(fd,length)) {
-        opal_output (ompi_bcol_base_framework.framework_output, "basesmuma shared memory allocation ftruncate failed with errno: %d",
-                    errno);
-    } else {
-        /* ensure there is enough space for the backing store */
-        rc = ftruncate (fd, length);
-        if (0 > rc) {
-            opal_output (ompi_bcol_base_framework.framework_output, "failed to truncate the file to be mapped. errno: %d", errno);
-            close(fd);
-            return NULL;
-        }
-
-        map = bcol_basesmuma_smcm_reg_mmap(in_ptr, fd, length, 0, alignment, file_name);
-        if (NULL == map) {
-            close(fd);
-            return NULL;
-        }
-    }
-    /* no longer need this file descriptor. close it */
-    close (fd);
-
-    /* takes us to the top of the control structure */
-
-    return map;
-
-}
-
-static bcol_basesmuma_smcm_mmap_t * bcol_basesmuma_smcm_reg_mmap(void *in_ptr, int fd, size_t length,
-                                                                 size_t addr_offset, size_t alignment,
-                                                                 char *file_name)
-{
-
-    /* local variables */
-    bcol_basesmuma_smcm_mmap_t *map;
-    bcol_basesmuma_smcm_file_header_t *seg;
-    unsigned char* myaddr = NULL;
-    int flags = MAP_SHARED;
-
-    /* set up the map object */
-    map = OBJ_NEW(bcol_basesmuma_smcm_mmap_t);
-    if (OPAL_UNLIKELY(NULL == map)) {
-        return NULL;
-    }
-
-    /* map the file and initialize the segment state */
-    if (NULL != in_ptr) {
-        flags |= MAP_FIXED;
-    }
-    seg = (bcol_basesmuma_smcm_file_header_t *)
-        mmap(in_ptr, length, PROT_READ|PROT_WRITE, flags, fd, 0);
-    if((void*)-1 == seg) {
-        OBJ_RELEASE(map);
-        return NULL;
-    }
-
-    map->map_path = strdup (file_name);
-
-    /* the first entry in the file is the control structure. the first entry
-       in the control structure is an mca_common_sm_file_header_t element */
-    map->map_seg = seg;
-
-    myaddr = (unsigned char *) seg + addr_offset;
-    /* if we have a data segment (i.e. if 0 != data_seg_alignement) */
-
-    if (alignment) {
-        myaddr = OPAL_ALIGN_PTR(myaddr, alignment, unsigned char*);
-
-        /* is addr past the end of the file? */
-        if ((unsigned char *) seg+length < myaddr) {
-            opal_output (ompi_bcol_base_framework.framework_output, "mca_bcol_basesmuma_sm_alloc_mmap: memory region too small len %lu add %p",
-                        (unsigned long) length, (void*)myaddr);
-            OBJ_RELEASE(map);
-            munmap ((void *)seg, length);
-            return NULL;
-        }
-
-    }
-
-    map->data_addr = (unsigned char*) myaddr;
-    map->map_addr = (unsigned char*) seg;
-    map->map_size = length;
-
-    return map;
-}
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.h b/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.h
deleted file mode 100644
index db0edd6e78..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- *
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef BCOL_BASESMUMA_SMCM_H
-#define BCOL_BASESMUMA_SMCM_H
-
-#include <sys/mman.h>
-#include <stdio.h>
-
-#include "ompi_config.h"
-#include "ompi/proc/proc.h"
-
-#include "opal/class/opal_object.h"
-#include "opal/class/opal_list.h"
-#include "opal/sys/atomic.h"
-
-
-
-typedef struct bcol_basesmuma_smcm_file_header_t {
-    /* lock to control atomic access */
-    opal_atomic_lock_t seg_lock;
-
-    /* is the segment ready for use */
-    volatile int32_t seg_inited;
-
-    /* Offset to next available memory location available for allocation */
-    size_t seg_offset;
-
-    /* total size of the segment */
-    size_t seg_size;
-} bcol_basesmuma_smcm_file_header_t;
-
-
-typedef struct bcol_basesmuma_smcm_mmap_t {
-    /* double link list element */
-    opal_list_item_t super;
-    /* pointer to header imbeded in the shared memory file */
-    bcol_basesmuma_smcm_file_header_t *map_seg;
-    /* base address of the mmap'ed file */
-    unsigned char *map_addr;
-    /* base address of data segment */
-    unsigned char *data_addr;
-    /* How big it is (in bytes) */
-    size_t map_size;
-    /* Filename */
-    char *map_path;
-} bcol_basesmuma_smcm_mmap_t;
-
-OBJ_CLASS_DECLARATION(bcol_basesmuma_smcm_mmap_t);
-
-
-/* Struct that characterizes a shared memory file */
-struct bcol_basesmuma_smcm_file_t {
-
-    char *file_name;
-    size_t size;
-    size_t size_ctl_structure;
-    size_t data_seg_alignment;
-    size_t mpool_size;
-
-};
-typedef struct bcol_basesmuma_smcm_file_t bcol_basesmuma_smcm_file_t;
-
-
-struct bcol_basesmuma_smcm_proc_item_t {
-    opal_list_item_t item;          /* can put me on a free list */
-    int refcnt;
-    ompi_process_name_t peer;
-    bcol_basesmuma_smcm_file_t sm_file;
-    bcol_basesmuma_smcm_mmap_t *sm_mmap;   /* Pointer to peer's sm file */
-
-};
-typedef struct bcol_basesmuma_smcm_proc_item_t bcol_basesmuma_smcm_proc_item_t;
-
-OBJ_CLASS_DECLARATION(bcol_basesmuma_smcm_proc_item_t);
-
-
-/* allocate shared memory file
- *   in_ptr - pointer to preallocated memory (if NULL, this will be mmaped)
- *   alignment - region memory alignment
- *   file name - fully qualified backing file name
-*/
-
-OMPI_DECLSPEC extern bcol_basesmuma_smcm_mmap_t *bcol_basesmuma_smcm_mem_reg(void *in_ptr,
-                size_t length,
-                size_t alignment,
-                char* file_name);
-
-OMPI_DECLSPEC extern bcol_basesmuma_smcm_mmap_t* bcol_basesmuma_smcm_create_mmap(int fd,
-        size_t size, char *file_name,
-        size_t size_ctl_structure,
-        size_t data_seg_alignment);
-
-#endif
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_utils.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_utils.c
deleted file mode 100644
index debe081913..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_utils.c
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-#include "bcol_basesmuma_utils.h"
-
-/*
- *  Return closet power of K that is either greater than
- *  or equal to the group size.
- */
-int pow_sm_k(int k, int number, int *pow_k)
-{
-    int power = 0;
-    int n = 1;
-
-    if( 2 == k){
-        while(n <= number){
-            power++;
-            n <<= 1;
-        }
-        *pow_k = n >> 1;
-
-    } else {
-        while (n <= number) {
-            n *= k;
-            power++;
-        }
-        *pow_k = n/k;
-    }
-
-
-    return (power-1);
-}
-
-
-
-int get_k_nomial_src_list(int group_size,
-                          int radix, int my_index,
-                          int *src_list) {
-
-    /* local variables */
-    int radix_power;
-    int offset;
-    int kount = 0;
-    int src_temp;
-
-    radix_power = 1;
-    offset = 1;
-    while(offset < group_size) {
-        if( offset % (radix * radix_power) ) {
-            src_temp = my_index - offset;
-            /* wrap around */
-            if ( src_temp < 0 ) {
-                src_temp += group_size;
-            }
-            /* don't probe ghost nodes */
-            if( src_temp < group_size ) {
-                src_list[kount] = src_temp;
-                kount++;
-            }
-            offset+=radix_power;
-        } else {
-
-            radix_power *= radix;
-        }
-
-    }
-    /* return the actual number of nodes to poll on */
-    return kount;
-}
-
-int get_k_nomial_dst_size(int group_size, int radix, int my_index)
-{
-	int dst_count = 0;
-	int radix_mask;
-	int k;
-    radix_mask = 1;
-    while (radix_mask < group_size) {
-        if (0 != my_index % (radix * radix_mask)) {
-            /* I found my level in tree */
-            break;
-        }
-        radix_mask *= radix;
-    }
-	radix_mask /= radix;
-
-	while(radix_mask > 0) {
-        /* For each level of tree, do sends */
-        for (k = 1;
-                k < radix && my_index + radix_mask * k < group_size;
-                ++k) {
-            dst_count +=  1 ;
-        }
-        radix_mask /= radix;
-    }
-
-	return dst_count;
-}
diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_utils.h b/ompi/mca/bcol/basesmuma/bcol_basesmuma_utils.h
deleted file mode 100644
index 738c6c62ed..0000000000
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_utils.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2012      Los Alamos National Security, LLC.
- *                         All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_BASESMUMA_UTILS_H
-#define MCA_BCOL_BASESMUMA_UTILS_H
-
-#include "ompi_config.h"
-
-BEGIN_C_DECLS
-
-#define BASESMUMA_K_NOMIAL_SEND_CHILDREN(radix_mask,radix,relative_index, \
-        my_group_index, group_size, ready_flag) \
-do {  \
-    int k, child; \
-    while(radix_mask > 0){ \
-        for(k = 1; k < radix && relative_index+radix_mask*k<group_size; \
-                k++) {\
-            child = my_group_index+radix_mask*k;  \
-            if(child >= group_size) {   \
-                child -= group_size; \
-            } \
-            /*fprintf(stderr,"I am %d sending to child %d\n",my_group_index,child);*/ \
-            child_ctl_pointer = data_buffs[child].ctl_struct; \
-            child_ctl_pointer->src = my_group_index;  \
-            /* this can be improved to make better asynchronous progress, but it's
-             * fine for now.
-             */                                                                 \
-            while(child_ctl_pointer->sequence_number != sequence_number );       \
-            child_ctl_pointer->flags[BCAST_FLAG][bcol_id] = ready_flag;  \
-        } \
-        radix_mask = radix_mask/radix; \
-    } \
-} while( 0 )
-
-
-
-
-/*
- *  Return closet power of K that is greater than or equal to "number".
- */
-int pow_sm_k(int radix_k, int group_size, int *pow_k_group_size);
-
-/*
- * Get list of possible sources from which data may arrive based on a K-nomial tree fan-out.
- */
-
-int get_k_nomial_src_list(int group_size, int radix,
-                          int my_index, int *src_list);
-
-
-int get_k_nomial_dst_size(int group_size, int radix, int my_index);
-
-END_C_DECLS
-
-#endif
diff --git a/ompi/mca/bcol/basesmuma/owner.txt b/ompi/mca/bcol/basesmuma/owner.txt
deleted file mode 100644
index 1c86df367b..0000000000
--- a/ompi/mca/bcol/basesmuma/owner.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# owner/status file
-# owner: institution that is responsible for this package
-# status: e.g. active, maintenance, unmaintained
-#
-owner: ORNL
-status: unmaintained
diff --git a/ompi/mca/bcol/bcol.h b/ompi/mca/bcol/bcol.h
deleted file mode 100644
index c06f9eb44c..0000000000
--- a/ompi/mca/bcol/bcol.h
+++ /dev/null
@@ -1,805 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2015      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_H
-#define MCA_BCOL_H
-
-#include "ompi_config.h"
-#include "opal/class/opal_list.h"
-#include "ompi/mca/mca.h"
-#include "ompi/mca/coll/coll.h"
-#include "opal/mca/mpool/mpool.h"
-#include "ompi/mca/sbgp/sbgp.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/op/op.h"
-#include "ompi/include/ompi/constants.h"
-#include "ompi/patterns/net/netpatterns_knomial_tree.h"
-
-#include "opal/util/show_help.h"
-
-#include <limits.h>
-
-#if defined(c_plusplus) || defined(__cplusplus)
-extern "C" {
-#endif
-
-/* Forward declaration - please do not remove it */
-struct ml_buffers_t;
-
-struct mca_bcol_base_coll_fn_comm_attributes_t;
-struct mca_bcol_base_coll_fn_invoke_attributes_t;
-struct mca_bcol_base_coll_fn_desc_t;
-
-#define NUM_MSG_RANGES      5
-#define MSG_RANGE_INITIAL (1024)*12
-#define MSG_RANGE_INC      10
-#define BCOL_THRESHOLD_UNLIMITED (INT_MAX)
-/* Maximum size of a bcol's header. This allows us to correctly calculate the message
- * thresholds. If the header of any bcol exceeds this value then increase this one
- * to match. */
-#define BCOL_HEADER_MAX 96
-
-#define BCOL_HEAD_ALIGN 32   /* will turn into an MCA parameter after debug */
-
-/*
- * Functions supported
- */
-enum bcol_coll {
-    /* blocking functions */
-    BCOL_ALLGATHER,
-    BCOL_ALLGATHERV,
-    BCOL_ALLREDUCE,
-    BCOL_ALLTOALL,
-    BCOL_ALLTOALLV,
-    BCOL_ALLTOALLW,
-    BCOL_BARRIER,
-    BCOL_BCAST,
-    BCOL_EXSCAN,
-    BCOL_GATHER,
-    BCOL_GATHERV,
-    BCOL_REDUCE,
-    BCOL_REDUCE_SCATTER,
-    BCOL_SCAN,
-    BCOL_SCATTER,
-    BCOL_SCATTERV,
-    BCOL_FANIN,
-    BCOL_FANOUT,
-
-    /* nonblocking functions */
-    BCOL_IALLGATHER,
-    BCOL_IALLGATHERV,
-    BCOL_IALLREDUCE,
-    BCOL_IALLTOALL,
-    BCOL_IALLTOALLV,
-    BCOL_IALLTOALLW,
-    BCOL_IBARRIER,
-    BCOL_IBCAST,
-    BCOL_IEXSCAN,
-    BCOL_IGATHER,
-    BCOL_IGATHERV,
-    BCOL_IREDUCE,
-    BCOL_IREDUCE_SCATTER,
-    BCOL_ISCAN,
-    BCOL_ISCATTER,
-    BCOL_ISCATTERV,
-    BCOL_IFANIN,
-    BCOL_IFANOUT,
-
-    BCOL_SYNC,
-    /* New function - needed for intermediate steps */
-    BCOL_REDUCE_TO_LEADER,
-    BCOL_NUM_OF_FUNCTIONS
-};
-typedef enum bcol_coll bcol_coll;
-
-typedef enum bcol_elem_type {
-    BCOL_SINGLE_ELEM_TYPE,
-    BCOL_MULTI_ELEM_TYPE,
-    BCOL_NUM_OF_ELEM_TYPES
-} bcol_elem_type;
-
-typedef int (*mca_bcol_base_module_coll_support_all_types_fn_t)(bcol_coll coll_name);
-typedef int (*mca_bcol_base_module_coll_support_fn_t)(int op, int dtype, bcol_elem_type elem_num);
-
-/*
- * Collective function status
- */
-enum {
-    BCOL_FN_NOT_STARTED = (OMPI_ERR_MAX - 1),
-    BCOL_FN_STARTED     = (OMPI_ERR_MAX - 2),
-    BCOL_FN_COMPLETE    = (OMPI_ERR_MAX - 3)
-};
-
-
-
-/**
- * Collective component initialization
- *
- * Initialize the given collective component.  This function should
- * initialize any component-level. data.  It will be called exactly
- * once during MPI_INIT.
- *
- * @note The component framework is not lazily opened, so attempts
- * should be made to minimze the amount of memory allocated during
- * this function.
- *
- * @param[in] enable_progress_threads True if the component needs to
- *                                support progress threads
- * @param[in] enable_mpi_threads  True if the component needs to
- *                                support MPI_THREAD_MULTIPLE
- *
- * @retval OMPI_SUCCESS Component successfully initialized
- * @retval ORTE_ERROR   An unspecified error occurred
- */
-typedef int (*mca_bcol_base_component_init_query_fn_t)
-    (bool enable_progress_threads, bool enable_mpi_threads);
-
-/**
- * Query whether a component is available for the given sub-group
- *
- * Query whether the component is available for the given
- * sub-group.  If the component is available, an array of pointers should be
- * allocated and returned (with refcount at 1).  The module will not
- * be used for collective operations until module_enable() is called
- * on the module, but may be destroyed (via OBJ_RELEASE) either before
- * or after module_enable() is called.  If the module needs to release
- * resources obtained during query(), it should do so in the module
- * destructor.
- *
- * A component may provide NULL to this function to indicate it does
- * not wish to run or return an error during module_enable().
- *
- * @note The communicator is available for point-to-point
- * communication, but other functionality is not available during this
- * phase of initialization.
- *
- * @param[in] sbgp         Pointer to sub-group module.
- * @param[out] priority    Priority setting for component on
- *                         this communicator
- * @param[out] num_modules Number of modules that where generated
- *                         for the sub-group module.
- *
- * @returns An array of pointer to an initialized modules structures if the component can
- * provide a modules with the requested functionality or NULL if the
- * component should not be used on the given communicator.
- */
-typedef struct mca_bcol_base_module_t **(*mca_bcol_base_component_comm_query_fn_t)
-    (mca_sbgp_base_module_t *sbgp, int *num_modules);
-
-
-typedef int (*mca_bcol_barrier_init_fn_t)(struct mca_bcol_base_module_t *bcol_module,
-        mca_sbgp_base_module_t *sbgp_module);
-
-
-
-/*
- * Macro for use in modules that are of type btl v2.0.0
- */
-#define MCA_BCOL_BASE_VERSION_2_0_0 \
-    OMPI_MCA_BASE_VERSION_2_1_0("bcol", 2, 0, 0)
-
-
-/* This is really an abstarction violation, but is the easiest way to get
- * started.  For memory management we need to know what bcol components
- * have compatible memory management schemes.  Such compatibility can
- * be used to eliminate memory copies between levels in the collective
- * operation hierarchy, by having the output buffer of one level be the
- * input buffer to the next level
- */
-
-enum {
-    BCOL_SHARED_MEMORY_UMA=0,
-    BCOL_SHARED_MEMORY_SOCKET,
-    BCOL_POINT_TO_POINT,
-    BCOL_IB_OFFLOAD,
-    BCOL_SIZE
-};
-
-OMPI_DECLSPEC extern int bcol_mpool_compatibility[BCOL_SIZE][BCOL_SIZE];
-OMPI_DECLSPEC extern int bcol_mpool_index[BCOL_SIZE][BCOL_SIZE];
-
-/* what are the input parameters ? too many void * pointers here */
-typedef int (*bcol_register_mem_fn_t)(void *context_data, void *base,
-        size_t size, void **reg_desc);
-/* deregistration function */
-typedef int (*bcol_deregister_mem_fn_t)(void *context_data, void *reg_desc);
-
-/* Bcol network context definition */
-struct bcol_base_network_context_t {
-    opal_object_t super;
-    /* Context id - defined by upper layer, ML */
-    int context_id;
-    /* Any context information that bcol what to use */
-    void *context_data;
-
-    /* registration function */
-    bcol_register_mem_fn_t register_memory_fn;
-    /* deregistration function */
-    bcol_deregister_mem_fn_t deregister_memory_fn;
-};
-typedef struct bcol_base_network_context_t bcol_base_network_context_t;
-OMPI_DECLSPEC OBJ_CLASS_DECLARATION(bcol_base_network_context_t);
-
-/*
- *primitive function types
- */
-
-/* bcast */
-enum {
-    /* small data function */
-    BCOL_BCAST_SMALL_DATA,
-
-    /* small data - dynamic decision making supported */
-    BCOL_BCAST_SMALL_DATA_DYNAMIC,
-
-    /* number of functions */
-    BCOL_NUM_BCAST_FUNCTIONS
-};
-
-
-/**
- *  BCOL instance.
- */
-
-/* no limit on fragment size - this supports using user buffers rather
- * than library buffers
- */
-#define FRAG_SIZE_NO_LIMIT -1
-
-/* forward declaration */
-struct coll_bcol_collective_description_t;
-
-struct mca_bcol_base_component_2_0_0_t {
-
-    /** Base component description */
-    mca_base_component_t bcol_version;
-
-    /** Component initialization function */
-    mca_bcol_base_component_init_query_fn_t collm_init_query;
-
-    /** Query whether component is useable for given communicator */
-    mca_bcol_base_component_comm_query_fn_t collm_comm_query;
-
-    /** If bcol supports all possible data types */
-    mca_bcol_base_module_coll_support_fn_t coll_support;
-
-    /** If bcol supports all possible data types for given collective operation */
-    mca_bcol_base_module_coll_support_all_types_fn_t coll_support_all_types;
-
-    /** Use this flag to prevent init_query multiple calls
-        in case we have the same bcol more than on a single level */
-    bool init_done;
-
-    /** If collective calls with bcols of this type need to be ordered */
-    bool need_ordering;
-
-    /** MCA parameter: Priority of this component */
-    int priority;
-
-    /** Bcast function pointers */
-    struct coll_bcol_collective_description_t *
-        bcast_functions[BCOL_NUM_BCAST_FUNCTIONS];
-
-    /** Number of network contexts - need this for resource management */
-    int n_net_contexts;
-
-    /** List of network contexts */
-    bcol_base_network_context_t **network_contexts;
-
-    /*
-     * Fragmentation support
-     */
-
-    /** Minimum fragement size */
-    int min_frag_size;
-
-    /** Maximum fragment size */
-    int max_frag_size;
-
-    /** Supports direct use of user-buffers */
-    bool can_use_user_buffers;
-};
-typedef struct mca_bcol_base_component_2_0_0_t mca_bcol_base_component_2_0_0_t;
-typedef struct mca_bcol_base_component_2_0_0_t mca_bcol_base_component_t;
-OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_bcol_base_component_t);
-
-/* forward declaration */
-struct mca_coll_ml_descriptor_t;
-struct mca_bcol_base_payload_buffer_desc_t;
-struct mca_bcol_base_route_info_t;
-
-typedef struct {
-    int order_num;           /* Seq num of collective fragment */
-    int bcols_started;       /* How many bcols need ordering have been started */
-    int n_fns_need_ordering; /* The number of functions are called for bcols need ordering */
-} mca_bcol_base_order_info_t;
-
-/* structure that encapsultes information propagated amongst multiple
- * fragments whereby completing the entire ensemble of fragments is
- * necessary in order to complete the entire collective
- */
-struct bcol_fragment_descriptor_t {
-    /* start iterator */
-    int head;
-    /* end iterator */
-    int tail;
-    /* current iteration */
-    int start_iter;
-    /* number of full iterations this frag */
-    int num_iter;
-    /* end iter */
-    int end_iter;
-};
-typedef struct bcol_fragment_descriptor_t bcol_fragment_descriptor_t;
-
-struct bcol_function_args_t {
-    /* full message sequence number */
-    int64_t sequence_num;
-    /* full message descriptor - single copy of fragment invariant
-     * parameters */
-    /* Pasha: We don need this one for new flow - remove it */
-    struct mca_coll_ml_descriptor_t *full_message_descriptor;
-    struct mca_bcol_base_route_info_t *root_route;
-    /* function status */
-    int function_status;
-    /* root, for rooted operations */
-    int root;
-    /* input buffer */
-    const void *sbuf;
-    void *rbuf;
-    const void *userbuf;
-    struct mca_bcol_base_payload_buffer_desc_t *src_desc;
-    struct mca_bcol_base_payload_buffer_desc_t *dst_desc;
-   /* ml buffer size */
-    uint32_t buffer_size;
-    /* index of buffer in ml payload cache */
-    int buffer_index;
-    int count;
-    struct ompi_datatype_t *dtype;
-    struct ompi_op_t *op;
-    int sbuf_offset;
-    int rbuf_offset;
-    /* for bcol opaque data */
-    void *bcol_opaque_data;
-    /* An output argument that will be used by BCOL function to tell ML that the result of the BCOL is in rbuf */
-    bool result_in_rbuf;
-    bool root_flag;      /* True if the rank is root of operation */
-    bool need_dt_support; /* will trigger alternate code path for some colls */
-    int status;          /* Used for non-blocking collective completion */
-    uint32_t frag_size;  /* fragment size for large messages */
-    int hier_factor;     /* factor used when bcast is invoked as a service function back down
-                          * the tree in allgather for example, the pacl_len is not the actual
-                          * len of the data needing bcasting
-                          */
-    mca_bcol_base_order_info_t order_info;
-    bcol_fragment_descriptor_t frag_info;
-
-};
-
-struct mca_bcol_base_route_info_t {
-    int level;
-    int rank;
-};
-typedef struct mca_bcol_base_route_info_t mca_bcol_base_route_info_t;
-
-struct mca_bcol_base_lmngr_block_t {
-    opal_list_item_t super;
-    struct mca_coll_ml_lmngr_t *lmngr;
-    void* base_addr;
-};
-typedef struct mca_bcol_base_lmngr_block_t mca_bcol_base_lmngr_block_t;
-OBJ_CLASS_DECLARATION(mca_bcol_base_lmngr_block_t);
-
-struct mca_bcol_base_memory_block_desc_t {
-
-    /* memory block for payload buffers */
-    struct mca_bcol_base_lmngr_block_t *block;
-
-    /* Address offset in bytes -- Indicates free memory in the block */
-    uint64_t   block_addr_offset;
-
-    /* size of the memory block */
-    size_t     size_block;
-
-    /* number of memory banks */
-    uint32_t     num_banks;
-
-    /* number of buffers per bank */
-    uint32_t    num_buffers_per_bank;
-
-    /* size of a payload buffer */
-    uint32_t     size_buffer;
-
-    /* pointer to buffer descriptors initialized */
-    struct mca_bcol_base_payload_buffer_desc_t *buffer_descs;
-
-    /* index of the next free buffer in the block */
-    uint64_t next_free_buffer;
-
-    uint32_t *bank_release_counters;
-
-    /* Counter that defines what bank should be synchronized next
-     * since collectives could be completed out of order, we have to make
-     * sure that memory synchronization collectives started in order ! */
-    int memsync_counter;
-
-    /* This arrays of flags used to signal that the bank is ready for recycling */
-    bool *ready_for_memsync;
-
-    /* This flags monitors if bank is open for usage. Usually we expect that user
-     * will do the check only on buffer-zero allocation */
-    bool *bank_is_busy;
-
-};
-
-/* convenience typedef */
-typedef struct mca_bcol_base_memory_block_desc_t mca_bcol_base_memory_block_desc_t;
-
-typedef void (*mca_bcol_base_release_buff_fn_t)(struct mca_bcol_base_memory_block_desc_t *ml_memblock, uint32_t buff_id);
-
-struct mca_bcol_base_payload_buffer_desc_t {
-    void         *base_data_addr;   /* buffer address */
-    void         *data_addr;         /* buffer address  + header offset */
-    uint64_t     generation_number;  /* my generation */
-    uint64_t     bank_index;         /* my bank */
-    uint64_t     buffer_index;       /* my buff index */
-};
-/* convenience typedef */
-typedef struct mca_bcol_base_payload_buffer_desc_t mca_bcol_base_payload_buffer_desc_t;
-
-
-
-
-
-
-typedef struct bcol_function_args_t bcol_function_args_t;
-
-
-/* The collective operation is defined by a series of collective operations
- * invoked through a function pointer.  Each function may be different,
- * so will store the arguments in a struct and pass a pointer to the struct,
- * and use this as a way to hide the different function signatures.
- *
- * @param[in] input_args  Structure with function arguments
- * @param[in] bcol_desc   Component specific paremeters
- * @param[out] status  return status of the function
- *                     MCA_BCOL_COMPLETE    - function completed
- *                     MCA_BCOL_IN_PROGRESS - function incomplete
- *
- * @retval OMPI_SUCCESS successful completion
- * @retval OMPI_ERROR function returned error
- */
-/* forward declaration */
-struct mca_bcol_base_module_t;
-
-/* collective function prototype - all functions have the same interface
- * so that we can call them via a function pointer */
-struct mca_bcol_base_function_t;
-typedef int (*mca_bcol_base_module_collective_fn_primitives_t)
-    (bcol_function_args_t *input_args, struct mca_bcol_base_function_t *const_args);
-
-typedef int (*mca_bcol_base_module_collective_init_fn_primitives_t)
-    (struct mca_bcol_base_module_t *bcol_module);
-
-    /**
-     *  function to query for collctive function attributes
-     *
-     *  @param attribute (IN) the attribute of interest
-     *  @param algorithm_parameters (OUT) the value of attribute for this
-     *         function.  If this attribute is not supported,
-     *         OMPI_ERR_NOT_FOUND is returned.
-     */
-    typedef int (*mca_bcol_get_collective_attributes)(int attribute,
-            void *algorithm_parameters);
-
-/* data structure for tracking the relevant data needed for ml level
- * algorithm construction (e.g., function selection), initialization, and
- * usage.
- */
-struct coll_bcol_collective_description_t {
-    /* collective initiation function - first functin called */
-    mca_bcol_base_module_collective_fn_primitives_t coll_fn;
-
-    /* collective progress function - first functin called */
-    mca_bcol_base_module_collective_fn_primitives_t progress_fn;
-
-    /* collective progress function - first functin called */
-    mca_bcol_get_collective_attributes get_attributes;
-
-    /* attributes supported - bit map */
-    uint64_t attribute;
-
-};
-typedef struct coll_bcol_collective_description_t
-coll_bcol_collective_description_t;
-
-/* collective operation attributes */
-enum {
-    /* supports dynamic decisions - e.g., do not need to have the collective
-     * operation fully defined before it can be started
-     */
-    BCOL_ATTRIBUTE_DYNAMIC,
-
-    /* number of attributes */
-    BCOL_NUM_ATTRIBUTES
-};
-
-/* For rooted collectives,
- * does the algorithm knows its data source ?
- */
-enum {
-    DATA_SRC_KNOWN=0,
-    DATA_SRC_UNKNOWN,
-    DATA_SRC_TYPES
-};
-
-enum {
-    BLOCKING,
-    NON_BLOCKING
-};
-/* gvm For selection logic */
-struct mca_bcol_base_coll_fn_comm_attributes_t {
-    int bcoll_type;
-    int comm_size_min;
-    int comm_size_max;
-    int data_src;
-    int waiting_semantics;
-};
-
-typedef struct mca_bcol_base_coll_fn_comm_attributes_t
-                        mca_bcol_base_coll_fn_comm_attributes_t;
-
-struct mca_bcol_base_coll_fn_invoke_attributes_t {
-    int bcol_msg_min;
-    int bcol_msg_max;
-    uint64_t datatype_bitmap; /* Max is OMPI_DATATYPE_MAX_PREDEFINED defined to be 45 */
-    uint32_t op_types_bitmap; /* bit map of optypes supported */
-};
-
-typedef struct mca_bcol_base_coll_fn_invoke_attributes_t
-                        mca_bcol_base_coll_fn_invoke_attributes_t;
-
-struct mca_bcol_base_coll_fn_desc_t {
-    opal_list_item_t super;
-    struct mca_bcol_base_coll_fn_comm_attributes_t *comm_attr;
-    struct mca_bcol_base_coll_fn_invoke_attributes_t *inv_attr;
-    mca_bcol_base_module_collective_fn_primitives_t coll_fn;
-    mca_bcol_base_module_collective_fn_primitives_t progress_fn;
-};
-
-typedef struct mca_bcol_base_coll_fn_desc_t mca_bcol_base_coll_fn_desc_t;
-OBJ_CLASS_DECLARATION(mca_bcol_base_coll_fn_desc_t);
-
-/* end selection logic */
-
-typedef int (*mca_bcol_base_module_collective_init_fn_t)
-    (struct mca_bcol_base_module_t *bcol_module,
-     mca_sbgp_base_module_t *sbgp_module);
-
-    /* per communicator memory initialization function */
-typedef  int (*mca_bcol_module_mem_init)(struct ml_buffers_t *registered_buffers,
- mca_bcol_base_component_t *module);
-
-/* Initialize memory block - ml_memory_block initialization interface function
- *
- * Invoked at the ml level, used to pass bcol specific registration information
- * for the "ml_memory_block"
- *
- * @param[in] ml_memory_block   Pointer to the ml_memory_block. This struct
- *  contains bcol specific registration information and a call back function
- *  used for resource recycling.
- *
- * @param[in] reg_data         bcol specific registration data.
- *
- * @returns   On Success: OMPI_SUCCESS
- *            On Failure: OMPI_ERROR
- *
- */
-/*typedef int (*mca_bcol_base_init_memory_fn_t)
-    (struct mca_bcol_base_memory_block_desc_t *ml_block, void *reg_data);*/
-
-typedef int (*mca_bcol_base_init_memory_fn_t)
-     (struct mca_bcol_base_memory_block_desc_t *payload_block,
-     uint32_t data_offset,
-     struct mca_bcol_base_module_t *bcol,
-     void *reg_data);
-
-typedef int (*mca_common_allgather_init_fn_t)
-    (struct mca_bcol_base_module_t *bcol_module);
-
-typedef void (*mca_bcol_base_set_thresholds_fn_t)
-    (struct mca_bcol_base_module_t *bcol_module);
-
-enum {
-    MCA_BCOL_BASE_ZERO_COPY                   = 1,
-    MCA_BCOL_BASE_NO_ML_BUFFER_FOR_LARGE_MSG  = 1 << 1,
-    MCA_BCOL_BASE_NO_ML_BUFFER_FOR_BARRIER    = 1 << 2
-};
-
-/* base  module */
-struct mca_bcol_base_module_t {
-    /* base coll component */
-    opal_object_t super;
-
-    /* bcol component (Pasha: Do we really need cache the component?)*/
-    mca_bcol_base_component_t *bcol_component;
-
-    /* network context that is used by this bcol
-    only one context per bcol is allowed */
-    bcol_base_network_context_t *network_context;
-
-    /* We are going to use the context index a lot,
-    int order to decrease number of dereferences
-    bcol->network_context->index
-    we are caching the value on bcol */
-    int context_index;
-
-    /* Set of flags that describe features supported by bcol */
-    uint64_t supported_mode;
-
-    /* per communicator memory initialization function */
-    mca_bcol_module_mem_init init_module;
-
-    /* sub-grouping module partner */
-    mca_sbgp_base_module_t *sbgp_partner_module;
-
-    /* size of subgroup - cache this, so can have access when
-     * sbgp_partner_module no longer existes */
-    int size_of_subgroup;
-
-    /* sequence number offset - want to make sure that we start
-     * id'ing collectives with id 0, so we can have simple
-     * resource management.
-     */
-    int64_t squence_number_offset;
-
-
-    /* number of times to poll for operation completion before
-     * breaking out of a non-blocking collective operation
-     */
-    int n_poll_loops;
-
-    /* size of header that will go in data buff, should not include
-     * any info regarding alignment, let the ml level handle this
-     */
-    uint32_t header_size;
-
-
-   /* Each bcol is assigned a unique value
-    * see if we can get away with 16-bit id
-    */
-    int16_t bcol_id;
-
-    /*FIXME:
-     * Since mca_bcol_base_module_t is the only parameter which will be passed
-     * into the bcol_basesmuma_bcast_init(), add the flag to indicate whether
-     * the hdl-based algorithms will get enabled.
-     */
-    bool use_hdl;
-        /*
-     * Collective function pointers
-     */
-    /* changing function signature - will replace bcol_functions */
-    mca_bcol_base_module_collective_fn_primitives_t bcol_function_table[BCOL_NUM_OF_FUNCTIONS];
-
-    /* Tables hold pointers to functions */
-    mca_bcol_base_module_collective_init_fn_primitives_t bcol_function_init_table[BCOL_NUM_OF_FUNCTIONS];
-    opal_list_t bcol_fns_table[BCOL_NUM_OF_FUNCTIONS];
-    struct mca_bcol_base_coll_fn_desc_t*
-    filtered_fns_table[DATA_SRC_TYPES][2][BCOL_NUM_OF_FUNCTIONS][NUM_MSG_RANGES+1][OMPI_OP_NUM_OF_TYPES][OMPI_DATATYPE_MAX_PREDEFINED];
-
-    /*
-     * Bcol interface function to pass bcol specific
-     * info and memory recycling call back
-     */
-    mca_bcol_base_init_memory_fn_t bcol_memory_init;
-
-    /*
-     * netpatterns interface function, would like to invoke this on
-     * on the ml level
-     */
-    mca_common_allgather_init_fn_t k_nomial_tree;
-     /* Each bcol caches a list which describes how many ranks
-     * are "below" each rank in this bcol
-     */
-    int *list_n_connected;
-
-    /* offsets for scatter/gather */
-    int hier_scather_offset;
-
-    /* Small message threshold for each collective */
-    int small_message_thresholds[BCOL_NUM_OF_FUNCTIONS];
-
-    /* Set small_message_thresholds array */
-    mca_bcol_base_set_thresholds_fn_t set_small_msg_thresholds;
-
-    /* Pointer to the order counter on the upper layer,
-       used if the bcol needs to be ordered */
-    int *next_inorder;
-};
-typedef struct mca_bcol_base_module_t mca_bcol_base_module_t;
-OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_bcol_base_module_t);
-
-/* function description */
-struct mca_bcol_base_function_t {
-    int fn_idx;
-    /* module */
-    struct mca_bcol_base_module_t *bcol_module;
-
-    /*
-     *  The following two parameters are used for bcol modules
-     *  that want to do some optimizations based on the fact that
-     *  n functions from the same bcol module are called in a row.
-     *  For example, in the iboffload case, on the first call one
-     *  will want to initialize the MWR, and start to instantiate
-     *  it, but only post it at the end of the last call.
-     *  The index of this function in a sequence of consecutive
-     *  functions from the same bcol
-     */
-    int index_in_consecutive_same_bcol_calls;
-
-    /* number of times functions from this bcol are
-     * called in order
-     */
-    int n_of_this_type_in_a_row;
-
-    /*
-     * number of times functions from this module are called in the
-     * collective operation.
-     */
-    int n_of_this_type_in_collective;
-    int index_of_this_type_in_collective;
-};
-typedef struct mca_bcol_base_function_t mca_bcol_base_function_t;
-
-
-
-
-struct mca_bcol_base_descriptor_t {
-    opal_free_list_item_t super;
-/* Vasily: will be described in the future */
-};
-typedef struct mca_bcol_base_descriptor_t mca_bcol_base_descriptor_t;
-
-static inline __opal_attribute_always_inline__ size_t
-             mca_bcol_base_get_buff_length(ompi_datatype_t *dtype, int count)
-{
-    ptrdiff_t lb, extent;
-    ompi_datatype_get_extent(dtype, &lb, &extent);
-
-    return (size_t) (extent * count);
-}
-
-#define MCA_BCOL_CHECK_ORDER(module, bcol_function_args)                     \
-    do {                                                                     \
-        if (*((module)->next_inorder) !=                                     \
-                               (bcol_function_args)->order_info.order_num) { \
-            return BCOL_FN_NOT_STARTED;                                      \
-        }                                                                    \
-    } while (0);
-
-#define MCA_BCOL_UPDATE_ORDER_COUNTER(module, order_info) \
-    do {                                                  \
-       (order_info)->bcols_started++;                     \
-        if ((order_info)->n_fns_need_ordering ==          \
-                        (order_info)->bcols_started) {    \
-            ++(*((module)->next_inorder));                \
-        }                                                 \
-    } while (0);
-
-#if defined(c_plusplus) || defined(__cplusplus)
-}
-#endif
-#endif /* MCA_BCOL_H */
diff --git a/ompi/mca/bcol/iboffload/.opal_ignore b/ompi/mca/bcol/iboffload/.opal_ignore
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/ompi/mca/bcol/iboffload/Makefile.am b/ompi/mca/bcol/iboffload/Makefile.am
deleted file mode 100644
index 4e9dd0c966..0000000000
--- a/ompi/mca/bcol/iboffload/Makefile.am
+++ /dev/null
@@ -1,66 +0,0 @@
-#
-# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
-# Copyright (c) 2012-2015 Cisco Systems, Inc.  All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-AM_CPPFLAGS = $(bcol_iboffload_CPPFLAGS) $(btl_openib_CPPFLAGS)
-
-sources = \
-        bcol_iboffload.h \
-        bcol_iboffload_device.h \
-        bcol_iboffload_module.c \
-        bcol_iboffload_mca.h \
-        bcol_iboffload_mca.c \
-        bcol_iboffload_endpoint.h \
-        bcol_iboffload_endpoint.c \
-        bcol_iboffload_frag.h \
-        bcol_iboffload_frag.c \
-        bcol_iboffload_collfrag.h \
-        bcol_iboffload_collfrag.c \
-        bcol_iboffload_task.h \
-        bcol_iboffload_task.c \
-        bcol_iboffload_component.c \
-        bcol_iboffload_barrier.c \
-        bcol_iboffload_bcast.h \
-        bcol_iboffload_bcast.c \
-        bcol_iboffload_allgather.c \
-        bcol_iboffload_collreq.h \
-        bcol_iboffload_collreq.c \
-        bcol_iboffload_qp_info.c \
-        bcol_iboffload_qp_info.h \
-        bcol_iboffload_fanin.c \
-        bcol_iboffload_fanout.c \
-        bcol_iboffload_allreduce.c
-
-# Make the output library in this directory, and name it either
-# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
-# (for static builds).
-
-component_noinst =
-component_install =
-if MCA_BUILD_ompi_bcol_iboffload_DSO
-component_install += mca_bcol_iboffload.la
-else
-component_noinst += libmca_bcol_iboffload.la
-endif
-
-# See ompi/mca/btl/sm/Makefile.am for an explanation of
-# libmca_common_sm.la.
-
-mcacomponentdir = $(ompilibdir)
-mcacomponent_LTLIBRARIES = $(component_install)
-mca_bcol_iboffload_la_SOURCES = $(sources)
-mca_bcol_iboffload_la_LDFLAGS = -module -avoid-version $(btl_openib_LDFLAGS) $(bcol_iboffload_LDFLAGS)
-mca_bcol_iboffload_la_LIBADD = $(btl_openib_LIBS) $(bcol_iboffload_LIBS) \
-        $(OMPI_TOP_BUILDDIR)/ompi/mca/common/ofacm/libmca_common_ofacm.la \
-        $(OMPI_TOP_BUILDDIR)/ompi/mca/common/verbs/libmca_common_verbs.la
-
-noinst_LTLIBRARIES = $(component_noinst)
-libmca_bcol_iboffload_la_SOURCES =$(sources)
-libmca_bcol_iboffload_la_LDFLAGS = -module -avoid-version  $(btl_openib_LDFLAGS) $(bcol_iboffload_LDFLAGS)
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload.h b/ompi/mca/bcol/iboffload/bcol_iboffload.h
deleted file mode 100644
index 38f8ba3a31..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload.h
+++ /dev/null
@@ -1,765 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2012      Los Alamos National Security, LLC.
- *                         All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_IBOFFLOAD_H
-#define MCA_BCOL_IBOFFLOAD_H
-
-#include "ompi_config.h"
-
-#include <stdio.h>
-#include <assert.h>
-
-#include <infiniband/mqe.h>
-#include <infiniband/verbs.h>
-#include <infiniband/mverbs.h>
-
-#include "ompi/mca/mca.h"
-
-#include "ompi/op/op.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/datatype/ompi_datatype_internal.h"
-
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-
-#include "ompi/mca/sbgp/ibnet/sbgp_ibnet.h"
-
-#include "opal/mca/mpool/mpool.h"
-#include "ompi/request/request.h"
-
-#include "ompi/mca/common/ofacm/connect.h"
-
-#include "bcol_iboffload_qp_info.h"
-
-BEGIN_C_DECLS
-
-#define IMM_RDMA 1
-#define INLINE 1
-#define NO_INLINE 0
-
-#define MCA_IBOFFLOAD_CALC_SIZE_EXT 8
-#define MCA_IBOFFLOAD_IB_DRIVER_OPERAND_SIZE 8
-#define MCA_IBOFFLOAD_CACHE_LINE_SIZE 128
-
-#if OPAL_HAVE_IBOFFLOAD_CALC_RDMA
-#define MCA_BCOL_IBOFFLOAD_SEND_CALC IBV_M_WR_CALC_SEND
-#else
-#define MCA_BCOL_IBOFFLOAD_SEND_CALC IBV_M_WR_CALC
-#endif
-
-
-/* 0 - barrier rdma info
-   1 - ML rdma info */
-#define MAX_REMOTE_RDMA_INFO 2
-
-/* forward declarations */
-struct mca_bcol_iboffload_module_t;
-struct mca_bcol_iboffload_collreq_t;
-struct mca_bcol_iboffload_endpoint_t;
-struct mca_bcol_iboffload_frag_t;
-struct mca_bcol_iboffload_task_t;
-struct mca_bcol_iboffload_qp_info_t;
-struct mca_bcol_iboffload_collfrag_t;
-struct mca_bcol_iboffload_algth_lst_t;
-struct mca_bcol_iboffload_device_t;
-
-typedef int (*mca_bcol_iboffload_coll_algth_fn_t) (
-        struct mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *coll_request);
-
-struct mca_bcol_iboffload_rdma_info_t {
-    uint64_t    addr;
-    uint32_t    rkey;
-    uint32_t    lkey;
-};
-typedef struct mca_bcol_iboffload_rdma_info_t mca_bcol_iboffload_rdma_info_t;
-
-struct mca_bcol_iboffload_rdma_buffer_desc_t {
-    void     *data_addr;             /* buffer address */
-    uint64_t     generation_number;  /* my generation */
-    uint64_t     bank_index;         /* my bank */
-    uint64_t     buffer_index;       /* my buff index */
-};
-typedef struct mca_bcol_iboffload_rdma_buffer_desc_t mca_bcol_iboffload_rdma_buffer_desc_t;
-
-struct mca_bcol_iboffload_rdma_block_desc_t {
-    /* number of memory banks */
-    uint32_t     num_banks;
-    /* number of buffers per bank */
-    uint32_t     num_buffers_per_bank;
-    /* size of a payload buffer */
-    uint32_t     size_buffer;
-    /* data offset from ML */
-    uint32_t     data_offset;
-    /* pointer to buffer descriptors initialized */
-    mca_bcol_iboffload_rdma_buffer_desc_t *rdma_desc;
-};
-typedef struct mca_bcol_iboffload_rdma_block_desc_t mca_bcol_iboffload_rdma_block_desc_t;
-
-/* Information that we need to keep in order to access remote
-   memory. For each remote peer (endpoint) we will keep this
-   structure */
-struct mca_bcol_iboffload_rem_rdma_block_t {
-    /* IB related information first */
-    mca_bcol_iboffload_rdma_info_t ib_info;
-
-    mca_bcol_iboffload_rdma_buffer_desc_t *rdma_desc;
-};
-typedef struct mca_bcol_iboffload_rem_rdma_block_t mca_bcol_iboffload_rem_rdma_block_t;
-
-enum {
-    MCA_BCOL_IBOFFLOAD_BK_COUNTER_INDEX = 0,
-    MCA_BCOL_IBOFFLOAD_BK_SYNC_INDEX,
-    MCA_BCOL_IBOFFLOAD_BK_LAST
-};
-
-/* Information that we need to keep in order to access and
-   track local memory that is used as source and destinatination
-   for RDMA operations */
-struct mca_bcol_iboffload_local_rdma_block_t {
-    /* sync counter keeps next to start bank id */
-    int sync_counter;
-    /* Counter for released ml buffers */
-    int *bank_buffer_counter[MCA_BCOL_IBOFFLOAD_BK_LAST];
-    /* IB related information first */
-    struct mca_bcol_iboffload_rdma_info_t ib_info;
-    /* back pointer to original ML memory descriptor */
-    struct mca_bcol_base_memory_block_desc_t *ml_mem_desc;
-    /* Pasha: do we really need this one ?*/
-    /* caching ml memory descriptor configurations localy */
-    mca_bcol_iboffload_rdma_block_desc_t bdesc;
-};
-typedef struct mca_bcol_iboffload_local_rdma_block_t mca_bcol_iboffload_local_rdma_block_t;
-
-struct mca_bcol_iboffload_recv_wr_manager {
-    opal_mutex_t lock;
-    /** Array of ready to use receive work requests.
-     * it is 2 dimensional array since for each
-     * qp size we want to keep separate recv wr  */
-    struct ibv_recv_wr **recv_work_requests;
-};
-typedef struct mca_bcol_iboffload_recv_wr_manager mca_bcol_iboffload_recv_wr_manager;
-
-/**
- * Structure to hold the basic shared memory coll component.  First it holds the
- * base coll component, and then holds a bunch of
- * sm-coll-component-specific stuff (e.g., current MCA param
- * values).
- */
-struct mca_bcol_iboffload_component_t {
-    /** Base coll component */
-    mca_bcol_base_component_2_0_0_t super;
-    /** Enable disable verbose mode */
-    int verbose;
-    int num_qps;
-    /** Whether we want a warning if non default GID prefix is not configured
-      on multiport setup */
-    bool warn_default_gid_prefix;
-    /** Whether we want a warning if the user specifies a non-existent
-      device and/or port via bcol_ibofflad_if_[in|ex]clude MCA params */
-    bool warn_nonexistent_if;
-    /** initial size of free lists */
-    int free_list_num;
-    /** maximum size of free lists */
-    int free_list_max;
-    /** number of elements to alloc when growing free lists */
-    int free_list_inc;
-    /** name of ib memory pool */
-    char* mpool_name;
-    /** max outstanding CQE on the CQ */
-    int cq_size;
-    /** Max size of inline data */
-    unsigned int max_inline_data;
-    /** IB partition definition */
-    uint32_t pkey_val;
-    /** Outstanding atomic reads */
-    unsigned int qp_ous_rd_atom;
-    /** IB MTU */
-    int mtu;
-    /** Recv not ready timer */
-    int min_rnr_timer;
-    /** IB timeout */
-    int timeout;
-    /** IB retry count */
-    int retry_count;
-    /** Recv not ready retry count */
-    int rnr_retry;
-    /** IB maximum pending RDMA */
-    int max_rdma_dst_ops;
-    /** IB Service level (QOS) */
-    int service_level;
-    /** Preferred communication buffer alignment in Bytes (must be power of two) */
-    int buffer_alignment;
-    /** Max tasks number for MQ */
-    int max_mqe_tasks;
-    /** Max MQ size */
-    int max_mq_size;
-    /** HCA/Port include exclude list */
-    char *if_include;
-    char **if_include_list;
-    char *if_exclude;
-    char **if_exclude_list;
-    /** Dummy argv-style list; a copy of names from the
-        if_[in|ex]clude list that we use for error checking (to ensure
-        that they all exist) */
-    char **if_list;
-    /** Array of ibv devices */
-    struct ibv_device **ib_devs;
-    /** devices count */
-    int num_devs;
-    /** MCA param bcol_iboffload_receive_queues */
-    char *receive_queues;
-    /** Common info about all kinds of QPs on each iboffload module */
-    struct mca_bcol_iboffload_qp_info_t qp_infos[MCA_BCOL_IBOFFLOAD_QP_LAST];
-    /** Array of iboffload devices */
-    opal_pointer_array_t devices;
-    /** Free lists of collfrag descriptors */
-    ompi_free_list_t collfrags_free;
-    /** Free lists of outstanding collective operations */
-    ompi_free_list_t collreqs_free;
-    /** Free lists for free task operations */
-    ompi_free_list_t tasks_free;
-    /** Free lists for free calc task operations */
-    ompi_free_list_t calc_tasks_free;
-    /** Free list of empty frags, that do not keep any
-      registration information */
-    ompi_free_list_t ml_frags_free;
-    /** Recv work request mananger */
-    mca_bcol_iboffload_recv_wr_manager recv_wrs;
-    /** We allocate some resources on the component
-      * with creating of the first iboffload module
-      * and set this flag to true */
-    bool init_done;
-    /** Maximal number of fragments of the same colective request that can be sent in parallel */
-    unsigned int max_pipeline_depth;
-    /** array mapping Open MPI reduction operators to MVerbs reduction operators */
-    enum ibv_m_wr_calc_op map_ompi_to_ib_calcs[OMPI_OP_NUM_OF_TYPES];
-    /** array mapping Open MPI data types to MVerbs data types */
-    enum ibv_m_wr_data_type map_ompi_to_ib_dt[OMPI_DATATYPE_MPI_MAX_PREDEFINED];
-    /** The order of the exchange tree */
-    int exchange_tree_order;
-    /** Knomial tree order */
-    int knomial_tree_order;
-    /** K-nomial radix */
-    int k_nomial_radix;
-    /** Maximum number of pulls for completion check */
-    int max_progress_pull;
-    /** Barrier function selector */
-    int barrier_mode;
-    /** MCA for selecting Bruck's alltoall algorithms */
-    int use_brucks_smsg_alltoall_rdma;
-    int use_brucks_smsg_alltoall_sr;
-    /** radix of small-data alltoall Bruck-like algorithm */
-    int k_alltoall_bruck_radix;
-    /** alltoall small data buffer alignment */
-    int tmp_buf_alignment;
-};
-
-/**
- * Convenience typedef
- */
-typedef struct mca_bcol_iboffload_component_t mca_bcol_iboffload_component_t;
-
-/* List of all algorithms that we use */
-enum {
-    FANIN_ALG,
-    FANOUT_ALG,
-    RECURSIVE_DOUBLING_BARRIER_ALG,
-    RECURSIVE_KNOMIAL_BARRIER_ALG,
-    RECURSIVE_DOUBLING_ALLREDUCE_ALG,
-    RECURSIVE_DOUBLING_REDUCE_ALG,
-    RECURSIVE_DOUBLING_TREE_BCAST,
-    ALL_ENDPOINTS, /* connected to all peers */
-    ALLGATHER_KNOMIAL_ALG,
-    ALLGATHER_NEIGHBOR_ALG,
-    REMOTE_EXCHANGE_ALG,
-    LAST_ALG
-};
-
-struct mca_bcol_iboffload_port_t {
-    int             id;         /** Port number on device: 1 or 2 */
-    int             stat;       /** Port status - Active,Init,etc.. */
-    enum ibv_mtu    mtu;        /** MTU on this port */
-    uint64_t        subnet_id;  /** Sunnet id for the port */
-    uint16_t        lid;
-    uint16_t        lmc;
-};
-typedef struct mca_bcol_iboffload_port_t mca_bcol_iboffload_port_t;
-
-enum {
-    COLL_MQ  = 0,
-    SERVICE_MQ,
-    BCOL_IBOFFLOAD_MQ_NUM
-};
-
-struct mca_bcol_iboffload_module_t {
-    /* base structure */
-    mca_bcol_base_module_t super;
-
-    /* size */
-    int group_size;
-    int log_group_size;
-
-    /* size of each memory segment */
-    size_t segment_size;
-
-    /* collective tag */
-    long long collective_tag;
-
-    /* pointer to device */
-    struct mca_bcol_iboffload_device_t *device;
-
-    /* caching port number */
-    uint32_t port;
-
-    /* Connecting iboffload with ibnet module information */
-    /* pointer to sbgp ibnet */
-    mca_sbgp_ibnet_module_t *ibnet;
-
-    /* connection group inder for the ibnet */
-    int cgroup_index;
-
-    /* array of endpoints */
-    struct mca_bcol_iboffload_endpoint_t **endpoints;
-
-    /* Size of the endpoints array */
-    int num_endpoints;
-
-    /* caching port subnet id and lid
-     * the same information we have on device */
-    uint64_t        subnet_id;
-    uint16_t        lid;
-
-    /* Pointer to management queue */
-    struct mqe_context *mq[BCOL_IBOFFLOAD_MQ_NUM];
-    int mq_credit[BCOL_IBOFFLOAD_MQ_NUM];
-
-    /* pending list of collfrags */
-    opal_list_t collfrag_pending;
-
-    /* recursive-doubling tree node */
-    netpatterns_pair_exchange_node_t recursive_doubling_tree;
-
-    /* N exchange tree */
-    netpatterns_pair_exchange_node_t n_exchange_tree;
-
-    /* Knomial exchange tree */
-    netpatterns_k_exchange_node_t knomial_exchange_tree;
-
-    /* Knomial exchange tree */
-    netpatterns_k_exchange_node_t knomial_allgather_tree;
-
-    /* The array will keep pre-calculated task consumption per
-     * algorithm
-     */
-    uint32_t alg_task_consump[LAST_ALG];
-
-    /* Pointer to a func that's implementation of a barrier algorithm */
-    mca_bcol_iboffload_coll_algth_fn_t barrier_algth;
-
-    /* Pointer to a func that's implementation of a fanin algorithm */
-    mca_bcol_iboffload_coll_algth_fn_t fanin_algth;
-
-    /* Pointer to a func that's implementation of a fanin algorithm */
-    mca_bcol_iboffload_coll_algth_fn_t fanout_algth;
-
-    /* Pointer to a func that's implementation of a allreduce algorithm */
-    mca_bcol_iboffload_coll_algth_fn_t allreduce_algth;
-
-    /* Pointer to a func that's implementation of a non blocking memory syncronization algorithm */
-    mca_bcol_iboffload_coll_algth_fn_t memsync_algth;
-
-    /* rdma block memory information */
-    mca_bcol_iboffload_local_rdma_block_t rdma_block;
-
-    /* The largest power of two which 1 << power_of_2
-       is not larger than the group size */
-    int power_of_2;
-
-    /* The largest power of two number which is not larger than the group size */
-    int power_of_2_ranks;
-
-    /* Connection status array */
-    bool connection_status[LAST_ALG];
-
-    /* map from communicator ranks to ibsubnet */
-    int *comm_to_ibnet_map;
-
-    /* order preserving value */
-    int64_t prev_sequence_num;
-
-    /* Temp iovec to send the data fragments -- alltoall Brucks */
-    struct iovec *alltoall_iovec;
-    struct iovec *alltoall_recv_iovec;
-
-    /* tree radix for the knomial bruck small data alltoall */
-    int k_alltoall_bruck_radix;
-
-    /* Temp buffer alignment for knomial bruck small data alltoall */
-    int tmp_buf_alignment;
-
-    /* Free task list with sge's array */
-    ompi_free_list_t iovec_tasks_free;
-};
-
-typedef struct mca_bcol_iboffload_module_t mca_bcol_iboffload_module_t;
-OBJ_CLASS_DECLARATION(mca_bcol_iboffload_module_t);
-
-/**
- * Global component instance
- */
-OMPI_MODULE_DECLSPEC
-    extern mca_bcol_iboffload_component_t mca_bcol_iboffload_component;
-
-static inline int mca_bcol_iboffload_err(const char* fmt, ...)
-{
-    va_list list;
-    int ret;
-
-    va_start(list, fmt);
-    ret = vfprintf(stderr, fmt, list);
-    va_end(list);
-    return ret;
-}
-
-#define MCA_BCOL_IBOFFLOAD_ALLREDUCE_DO_CALC(ompi_op, c_type, l_operand, r_operand, result) \
-do {                                                                                        \
-    switch (ompi_op) {                                                                      \
-        case OMPI_OP_MAX:                                                                   \
-            *((c_type *)&result) = ((*(c_type *)&(l_operand) > *(c_type *)&(r_operand)) ?   \
-                                     *(c_type *)&(l_operand) : *(c_type *)&(r_operand));    \
-            break;                                                                          \
-        case OMPI_OP_MIN:                                                                   \
-            *((c_type *)&result) = ((*(c_type *)&(l_operand) < *(c_type *)&(r_operand)) ?   \
-                                     *(c_type *)&(l_operand) : *(c_type *)&(r_operand));    \
-            break;                                                                          \
-        case OMPI_OP_SUM:                                                                   \
-            *((c_type *)&result) = (*((c_type *)&(l_operand)) + *((c_type *)&(r_operand))); \
-            break;                                                                          \
-        default:                                                                            \
-            break;                                                                          \
-    }                                                                                       \
-} while (0);
-
-#define MCA_BCOL_IBOFFLOAD_PKEY_MASK 0x7fff
-#define MCA_BCOL_IBOFFLOAD_DEFAULT_GID_PREFIX 0xfe80000000000000ll
-
-#define IBOFFLOAD_ERROR(args)                                       \
-    do {                                                            \
-        mca_bcol_iboffload_err("[%s]%s[%s:%d:%s] IBOFFLOAD ",       \
-            ompi_process_info.nodename,                             \
-            OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),                     \
-            __FILE__, __LINE__, __func__);                          \
-        mca_bcol_iboffload_err args;                                \
-        mca_bcol_iboffload_err("\n");                               \
-    } while(0)
-
-#if OPAL_ENABLE_DEBUG
-#define IBOFFLOAD_VERBOSE(level, args)                              \
-    do {                                                            \
-        if (mca_bcol_iboffload_component.verbose >= level) {        \
-            mca_bcol_iboffload_err("[%s]%s[%s:%d:%s] IBOFFLOAD ",   \
-                    ompi_process_info.nodename,                     \
-                    OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),             \
-                    __FILE__, __LINE__, __func__);                  \
-            mca_bcol_iboffload_err args;                            \
-            mca_bcol_iboffload_err("\n");                           \
-        }                                                           \
-    } while(0)
-#else
-#define IBOFFLOAD_VERBOSE(level, args)
-#endif
-
-#define MCA_BCOL_IBOFFLOAD_SET_COLL_REQ_LINKS(coll_req, coll_work_req) \
-    do {                                                               \
-        opal_list_append(&(coll_req)->work_requests,                   \
-                        (opal_list_item_t*) (coll_work_req));          \
-        (coll_work_req)->coll_full_req = (coll_req);                   \
-    } while(0)
-/* Vasily: will be removed soon */
-#define APPEND_TO_TASKLIST(task_ptr_to_set, event, last_event_type)  \
-    do {                                                             \
-        *task_ptr_to_set = &(event)->element;                        \
-        last_event_type = &(event)->element;                         \
-        task_ptr_to_set = &((event)->element.next);                  \
-    } while(0)
-
-#define MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(task_ptr_to_set, task) \
-    do {                                                              \
-        *task_ptr_to_set = (task);                                    \
-        task_ptr_to_set = &((task)->next_task);                       \
-    } while(0)
-
-#define MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(task_ptr_to_set, event) \
-    do {                                                                  \
-        *task_ptr_to_set = &(event)->element;                             \
-        task_ptr_to_set = &((event)->element.next);                       \
-    } while(0)
-
-#define BCOL_IS_COMPLETED(req) (((req)->n_frag_mpi_complete == (req)->n_fragments) && \
-                                ((req)->n_fragments > 0))
-
-#define BCOL_AND_NET_ARE_COMPLETED(req) (BCOL_IS_COMPLETED(req) && \
-                                        ((req)->n_frag_net_complete == (req)->n_fragments))
-
-/* Pasha: Need to add locks here */
-#define BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(module, mq_index, num_of_credits) \
-                (((module)->mq_credit[mq_index] -= (num_of_credits)) < 0 ? false : true)
-/* Pasha: Need to add locks here */
-#define BCOL_IBOFFLOAD_MQ_RETURN_CREDITS(module, mq_index, num_of_credits) \
-                ((module)->mq_credit[mq_index] += (num_of_credits))
-
-#define BCOL_IBOFFLOAD_IS_FIRST_CALL(args) (0 == (args)->index_in_consecutive_same_bcol_calls)
-
-#define BCOL_IBOFFLOAD_IS_LAST_CALL(args) (((args)->n_of_this_type_in_collective - 1) == \
-                                            (args)->index_of_this_type_in_collective)
-
-#define BCOL_IBOFFLOAD_READY_TO_POST(args) (((args)->n_of_this_type_in_a_row - 1) == \
-                                             (args)->index_in_consecutive_same_bcol_calls)
-/*
- * bcol module functions
- */
-
-int mca_bcol_iboffload_rec_doubling_start_connections(struct mca_bcol_iboffload_module_t *iboffload);
-
-/* RDMA addr exchange with rem proc */
-int mca_bcol_iboffload_exchange_rem_addr(struct mca_bcol_iboffload_endpoint_t *ep);
-
-/* Progress function */
-int mca_bcol_iboffload_component_progress(void);
-
-/* Register memory */
-int mca_bcol_iboffload_register_mr(void *reg_data, void * base, size_t size,
-        mca_mpool_base_registration_t *reg);
-
-/* Deregister memory */
-int mca_bcol_iboffload_deregister_mr(void *reg_data, mca_mpool_base_registration_t *reg);
-
-/*
- * The function is used for create CQ in this module.
- */
-int mca_bcol_iboffload_adjust_cq(struct mca_bcol_iboffload_device_t *device,
-                                 struct ibv_cq **ib_cq);
-/*
- * Query to see if the component is available for use,
- * and can satisfy the thread and progress requirements
- */
-int mca_bcol_iboffload_init_query(bool enable_progress_threads,
-        bool enable_mpi_threads);
-
-
-/* Interface to setup the allgather tree */
-int mca_bcol_iboffload_setup_knomial_tree(mca_bcol_base_module_t *super);
-
-/*
- * Query to see if the module is available for use on
- * the given communicator, and if so, what it's priority is.
- */
-mca_bcol_base_module_t **
-mca_bcol_iboffload_comm_query(mca_sbgp_base_module_t *sbgp, int *num_modules);
-
-int
-mca_bcol_iboffload_free_tasks_frags_resources(
-        struct mca_bcol_iboffload_collfrag_t *collfrag,
-        ompi_free_list_t *frags_free);
-
-/**
- * Shared memory blocking barrier
- */
-
-int mca_bcol_iboffload_small_msg_bcast_intra(bcol_function_args_t *fn_arguments,
-                                                   struct mca_bcol_base_function_t
-                                                   *const_args);
-
-int mca_bcol_iboffload_barrier_intra_recursive_doubling_start(
-        struct mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *coll_request);
-
-int mca_bcol_iboffload_barrier_intra_recursive_knomial_start(
-        struct mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *coll_request);
-
-int mca_bcol_iboffload_barrier_intra_recursive_doubling(
-        mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *coll_request);
-
-int mca_bcol_iboffload_nb_memory_service_barrier_start(
-        struct mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *coll_request);
-
-int mca_bcol_iboffload_fanin_register(mca_bcol_base_module_t *super);
-int mca_bcol_iboffload_fanout_register(mca_bcol_base_module_t *super);
-int mca_bcol_iboffload_barrier_register(mca_bcol_base_module_t *super);
-int mca_bcol_iboffload_memsync_register(mca_bcol_base_module_t *super);
-int mca_bcol_iboffload_allreduce_register(mca_bcol_base_module_t *super);
-
-int mca_bcol_iboffload_new_style_fanin_first_call(
-                mca_bcol_iboffload_module_t *iboffload,
-                struct mca_bcol_iboffload_collreq_t *coll_request);
-
-int mca_bcol_iboffload_new_style_fanout_first_call(
-                mca_bcol_iboffload_module_t *iboffload,
-                struct mca_bcol_iboffload_collreq_t *coll_request);
-
-int mca_bcol_iboffload_nb_memory_service_barrier_intra(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-int mca_bcol_iboffload_coll_support_all_types(bcol_coll coll_name);
-int mca_bcol_iboffload_coll_supported(int op, int dtype, bcol_elem_type elem_type);
-
-static inline __opal_attribute_always_inline__ int
-                        mca_bcol_iboffload_fls(int num)
-{
-    int i = 1;
-    int j = 0;
-
-    if (0 == num) {
-        return 0;
-    }
-
-    while (i < num) {
-        i <<= 1;
-        j++;
-    }
-
-    if (i > num) {
-        j--;
-    }
-
-   return j;
-}
-
-#define BCOL_IBOFFLOAD_IS_EVEN(num) (!((num) & 1))
-static inline __opal_attribute_always_inline__ int
-                        mca_bcol_iboffload_ffs(int num)
-{
-    int j = 0;
-
-    if (0 == num) {
-        return 0;
-    }
-
-    while (BCOL_IBOFFLOAD_IS_EVEN(num)) {
-        num >>= 1;
-        j++;
-    }
-
-   return j;
-}
-
-#if OPAL_ENABLE_DEBUG
-
-/* Post task list MQ */
-#define IS_IMM(a) (a & MQE_WR_FLAG_IMM_EXE)
-#define IS_SIG(a) (a & MQE_WR_FLAG_SIGNAL)
-#define IS_BLK(a) (a & MQE_WR_FLAG_BLOCK)
-
-int task_to_rank(mca_bcol_iboffload_module_t *iboffload, struct mqe_task *task);
-int wait_to_rank(mca_bcol_iboffload_module_t *iboffload, struct mqe_task *task);
-
-#endif
-
-/* MQ posting function */
-static inline __opal_attribute_always_inline__ int
-                 mca_bcol_iboffload_post_mqe_tasks(
-                            mca_bcol_iboffload_module_t *iboffload,
-                            struct mqe_task *head_mqe)
-{
-    int rc;
-    struct mqe_task *bad_mqe = NULL;
-
-#if OPAL_ENABLE_DEBUG /* debug code */
-
-    struct mqe_task *curr_mqe_task = NULL;
-    int send_count = 0, recv_count = 0, wait_count = 0;
-
-    curr_mqe_task = head_mqe;
-    IBOFFLOAD_VERBOSE(10, ("Processing MQE Head with addr %p <START>\n",
-                          (uintptr_t) (void*) curr_mqe_task));
-
-    while (NULL != curr_mqe_task) {
-        switch(curr_mqe_task->opcode) {
-            case MQE_WR_SEND:
-                IBOFFLOAD_VERBOSE(10, ("Posting task %p id 0x%x: send on QP 0x%x\n"
-                                   "rank %d, sg_entry: addr %p LEN %d lkey %u, flag[%d-%d-%d]\n",
-                            (void*) curr_mqe_task, (uintptr_t) curr_mqe_task->wr_id,
-                            curr_mqe_task->post.qp->qp_num,
-                            task_to_rank(iboffload, curr_mqe_task),
-                            curr_mqe_task->post.send_wr->sg_list->addr,
-                            curr_mqe_task->post.send_wr->sg_list->length,
-                            curr_mqe_task->post.send_wr->sg_list->lkey,
-                            IS_IMM(curr_mqe_task->flags), IS_SIG(curr_mqe_task->flags), IS_BLK(curr_mqe_task->flags)));
-
-                ++send_count;
-                break;
-            case MQE_WR_RECV:
-                IBOFFLOAD_VERBOSE(10, ("Posting task %p id 0x%x: recv on QP 0x%x rank %d flag[%d-%d-%d]\n",
-                        (void*) curr_mqe_task, (uintptr_t) curr_mqe_task->wr_id,
-                        curr_mqe_task->post.qp->qp_num, task_to_rank(iboffload, curr_mqe_task),
-                        IS_IMM(curr_mqe_task->flags), IS_SIG(curr_mqe_task->flags), IS_BLK(curr_mqe_task->flags)));
-
-                ++recv_count;
-                break;
-            case MQE_WR_CQE_WAIT:
-
-                IBOFFLOAD_VERBOSE(10, ("Posting task %p id %x: wait on CQ %p for rank %d num of waits %d flag[%d-%d-%d]\n",
-                            (void*) curr_mqe_task, (uintptr_t) curr_mqe_task->wr_id,
-                            (void*) curr_mqe_task->wait.cq, wait_to_rank(iboffload, curr_mqe_task),
-                            curr_mqe_task->wait.count,
-                            IS_IMM(curr_mqe_task->flags), IS_SIG(curr_mqe_task->flags), IS_BLK(curr_mqe_task->flags)));
-
-                wait_count += curr_mqe_task->wait.count;
-                break;
-            default:
-                IBOFFLOAD_ERROR(("Fatal error, unknow packet type %d\n",
-                                                   curr_mqe_task->opcode));
-                return OMPI_ERROR;
-        }
-
-        /* pointer to next task */
-        curr_mqe_task = curr_mqe_task->next;
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("wait[%d] send[%d] recv[%d]\n",
-                            wait_count, send_count, recv_count));
-#endif
-
-    IBOFFLOAD_VERBOSE(10, ("Posting MQ %p <DONE>\n", (uintptr_t) head_mqe->wr_id));
-
-    rc = mqe_post_task(iboffload->mq[0], head_mqe, &bad_mqe);
-    if (OPAL_UNLIKELY(0 != rc)) {
-        IBOFFLOAD_ERROR(("ibv_post_mqe failed, errno says: %s,"
-                         " the return code is [%d]\n",
-                         strerror(errno), rc));
-
-        return OMPI_ERROR;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__
-                                int lognum(int n) {
-    int count = 1, lognum = 0;
-
-    while (count < n) {
-        count = count << 1;
-        lognum++;
-    }
-
-    return lognum;
-}
-
-END_C_DECLS
-
-#endif /* MCA_BCOL_IBOFFLOAD_H */
-
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_allgather.c b/ompi/mca/bcol/iboffload/bcol_iboffload_allgather.c
deleted file mode 100644
index 28140e5bb7..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_allgather.c
+++ /dev/null
@@ -1,1388 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <errno.h>
-#include "opal_stdint.h"
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_alltoall.h"
-#include "bcol_iboffload_bcast.h"
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_task.h"
-#include "bcol_iboffload_collreq.h"
-#include "bcol_iboffload_collfrag.h"
-#include "bcol_iboffload_endpoint.h"
-
-#include "opal/include/opal/types.h"
-
-static int mca_bcol_iboffload_allgather_init(
-                               bcol_function_args_t *fn_arguments,
-                               mca_bcol_iboffload_module_t *iboffload_module,
-                               mca_bcol_iboffload_collreq_t **coll_request,
-                               bool if_bcol_last, int mq_credits,
-                               collective_message_progress_function progress_fn)
-{
-    int rc;
-
-    ompi_free_list_item_t *item;
-    mca_bcol_iboffload_collfrag_t *coll_fragment;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    OMPI_FREE_LIST_WAIT(&cm->collreqs_free, item, rc);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_ERROR(("Wait for free list failed.\n"));
-        return rc;
-    }
-    /* setup call request */
-    (*coll_request) = (mca_bcol_iboffload_collreq_t *) item;
-
-    (*coll_request)->n_fragments  = 0;
-    (*coll_request)->n_frags_sent = 0;
-    (*coll_request)->n_frag_mpi_complete = 0;
-    (*coll_request)->n_frag_net_complete = 0;
-    (*coll_request)->if_bcol_last = if_bcol_last;
-    (*coll_request)->ml_buffer_index = fn_arguments->buffer_index;
-    (*coll_request)->completion_cb_fn = NULL;
-    (*coll_request)->buffer_info[SBUF].buf = (void *) (
-            (unsigned char *)fn_arguments->sbuf +
-            fn_arguments->sbuf_offset);
-    (*coll_request)->buffer_info[RBUF].buf = (void *) (
-        (unsigned char *)fn_arguments->rbuf +
-        fn_arguments->rbuf_offset);
-    (*coll_request)->buffer_info[SBUF].offset = fn_arguments->sbuf_offset;
-    (*coll_request)->buffer_info[RBUF].offset = fn_arguments->rbuf_offset;
-    /* seems like we should initialize the memory registration pointer to NULL here */
-    (*coll_request)->buffer_info[SBUF].iboffload_reg = NULL;
-    (*coll_request)->buffer_info[RBUF].iboffload_reg = NULL;
-    (*coll_request)->dtype = fn_arguments->dtype;
-    (*coll_request)->count = fn_arguments->count;
-    (*coll_request)->module = iboffload_module;
-    /* TODO Pasha: we need it for pending quque. Set it later. */
-    (*coll_request)->progress_fn = progress_fn;
-    /* TODO Pasha: fix it  later */
-    (*coll_request)->qp_index = MCA_BCOL_IBOFFLOAD_QP_BARRIER;
-
-    (*coll_request)->order_info = &fn_arguments->order_info;
-
-    coll_fragment = &((*coll_request)->first_collfrag);
-    mca_bcol_iboffload_collfrag_init(coll_fragment);
-
-    /** Vasily ????? */
-    /* mq_credits = (*coll_request)->total_tasks_num; */
-    coll_fragment->mq_credits = mq_credits;
-    coll_fragment->mq_index = COLL_MQ;
-    /* pasha: just set it to zero */
-    coll_fragment->last_wait_num = 0;
-    coll_fragment->alg = -2; /* used only for debug */
-    /*
-    if (my_rank == algthm_ptr->root) {
-        coll_fragment->last_wait_num = 0;
-    } else {
-        coll_fragment->last_wait_num = algth_lst->last_wait_num;
-    }
-    */
-    /* Pasha: we have nothing to unpack */
-    coll_fragment->unpack_size = 0;
-    /* coll_fragment->unpack_size = pack_len; */
-    /* coll_fragment->alg = RECURSIVE_DOUBLING_TREE_BCAST; */
-
-    /* set pointers for (coll frag) <-> (coll full request) */
-    (*coll_request)->user_handle_freed = false;
-
-    fn_arguments->bcol_opaque_data = (void *) (*coll_request);
-    /*  We don't have root..
-    if (true == fn_arguments->root_flag) {
-        (*coll_request)->root = my_group_index;
-    } else {
-        (*coll_request)->root = fn_arguments->root_route->rank;
-    }
-    */
-
-    MCA_BCOL_IBOFFLOAD_SET_COLL_REQ_LINKS((*coll_request), coll_fragment);
-    return OMPI_SUCCESS;
-}
-
-#if 1
-static inline void bcol_iboffload_setup_allgather_endpoints_connection(mca_bcol_iboffload_module_t *iboffload)
-{
-    int i, j;
-    /*Seems that we don't require this*/
-    netpatterns_k_exchange_node_t *exchange_node = &iboffload->knomial_allgather_tree;
-
-    mca_bcol_iboffload_endpoint_t *ep;
-
-    IBOFFLOAD_VERBOSE(10, ("Open connections.\n"));
-#if 0
-    fprintf(stderr,"Entering Open Connections\n");
-#endif
-
-    /* start with extras and proxy connections */
-    if(exchange_node->n_extra_sources > 0) {
-        /* connect to endpoint */
-        /*ep = iboffload->endpoints[comm_to_ibnet[exchange_node->rank_extra_sources_array[0]]];*/
-        ep = iboffload->endpoints[exchange_node->rank_extra_sources_array[0]];
-         while (OMPI_SUCCESS !=
-                check_endpoint_state(ep, NULL, NULL)) {
-            opal_progress();
-        }
-    }
-    /* now move through the recursive k-ing exchanges */
-    if(NULL != exchange_node->rank_exchanges) {
-        for( i = 0; i < exchange_node->log_tree_order; i++) {
-            for( j = 0; j < ( exchange_node->tree_order - 1 ); j++) {
-                if( exchange_node->rank_exchanges[i][j] < 0 ){
-                    continue;
-                }
-                /* connect to endpoint */
-                /*ep = iboffload->endpoints[comm_to_ibnet[exchange_node->rank_exchanges[i][j]]];*/
-                ep = iboffload->endpoints[exchange_node->rank_exchanges[i][j]];
-                if (iboffload->ibnet->super.my_index < ep->index) {
-                    while(0 == (ep)->remote_zero_rdma_addr.addr) {
-                        opal_progress();
-                    }
-                } else {
-                    IBOFFLOAD_VERBOSE(10, ("Trying to connect - %d", ep->index));
-                    while (OMPI_SUCCESS !=
-                            check_endpoint_state(ep, NULL, NULL)) {
-                        opal_progress();
-                    }
-                }
-
-            }
-        }
-    }
-
-    /* set the connection status to connected */
-    iboffload->connection_status[ALLGATHER_KNOMIAL_ALG] = true;
-}
-#endif
-
-
-static inline void bcol_iboffload_setup_allgather_ring_endpoints_connection(mca_bcol_iboffload_module_t *iboffload)
-{
-    int i;
-    const int group_size = iboffload->ibnet->super.group_size;
-    mca_bcol_iboffload_endpoint_t *ep;
-
-    IBOFFLOAD_VERBOSE(10, ("Open connections.\n"));
-
-    /* this is algorithm specific - need to move through the algorithm here basically to set up connections, should be
-     *
-     */
-
-     /* I'm going to leave this alone for now, because I'm
-      *  not sure how these endpoints map back to ibnet. Is it mapped to ibnet ids or to communicator ids?
-      */
-    for (i = 0; i < group_size; i++) {
-        ep = iboffload->endpoints[i];
-        while (OMPI_SUCCESS !=
-                check_endpoint_state(ep, NULL, NULL)) {
-            opal_progress();
-        }
-    }
-
-    /* set the connection status to connected */
-
-    /*JSL - change this macro */
-    iboffload->connection_status[ALLGATHER_NEIGHBOR_ALG] = true;
-}
-
-#if 0
-/* allgather neighbor exchange algorithm N/2 communication steps, 2 connections */
-static int mca_bcol_iboffload_neighbor_allgather_userbuffer_exec(mca_bcol_iboffload_module_t *iboffload_module,
-                                                   mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int rc,
-        src, dst;
-
-    uint32_t pack_len;
-    int my_group_index = iboffload_module->super.sbgp_partner_module->my_index;
-    int group_size = iboffload_module->group_size;
-    int step, roffset, soffset;
-    int neighbor[2], offset_at_step[2], recv_data_from[2], send_data_from;
-    int even_rank;
-    int parity;
-
-    struct mqe_task *last_send = NULL,
-                    *last_wait = NULL;
-    mca_bcol_iboffload_collfrag_t *coll_fragment = &coll_request->first_collfrag;
-
-#if 0
-    fprintf(stderr,"entering large msg neighbor exchange allgather\n");
-#endif
-    IBOFFLOAD_VERBOSE(10,("Entering large msg iboffload allgather"));
-    if (OPAL_UNLIKELY(!iboffload_module->connection_status[ALLGATHER_NEIGHBOR_ALG])) {
-        IBOFFLOAD_VERBOSE(10,("Allgather open new connection "));
-        bcol_iboffload_setup_allgather_ring_endpoints_connection(iboffload_module);
-    }
-
-    pack_len = coll_request->count * coll_request->dtype->super.size;
-    IBOFFLOAD_VERBOSE(10,("My packet length %d pack_len frag_count %d dtype size %d ",
-                pack_len,
-                coll_request->count,
-                coll_request->dtype->super.size));
-
-    /* register send and receive sides */
-    /* send side, only sending pack_len data */
-
-    /* I think that probably I will only register the rbuf */
-    /* on receive side I need to register pack_len*group_size data */
-    rc = mca_bcol_iboffload_prepare_buffer(coll_request->buffer_info[RBUF].buf, pack_len * group_size,
-            &coll_request->buffer_info[RBUF].iboffload_reg, iboffload_module);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_ERROR(("Cannot register memory: "
-                         "addr - %p, %d bytes.\n",
-                          coll_request->buffer_info[RBUF].buf, pack_len));
-        return OMPI_ERROR;
-    }
-    coll_request->buffer_info[RBUF].lkey = coll_request->buffer_info[RBUF].iboffload_reg->mr->lkey;
-
-    /* it is estimated mq consumption... */
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-                    iboffload_module, coll_fragment->mq_index, coll_fragment->mq_credits))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-        goto out_of_resources;
-    }
-
-    coll_fragment->tail_next = &coll_fragment->to_post;
-
-
-    /* start the neighbor exchange */
-
-    even_rank = !(my_group_index % 2);
-    if (even_rank) {
-        neighbor[0] = (my_group_index + 1) % group_size;
-        neighbor[1] = (my_group_index - 1 + group_size) % group_size;
-        recv_data_from[0] = my_group_index;
-        recv_data_from[1] = my_group_index;
-        offset_at_step[0] = (+2);
-        offset_at_step[1] = (-2);
-    } else {
-        neighbor[0] = (my_group_index - 1 + group_size) % group_size;
-        neighbor[1] = (my_group_index + 1) % group_size;
-        recv_data_from[0] = neighbor[0];
-        recv_data_from[1] = neighbor[0];
-        offset_at_step[0] = (-2);
-        offset_at_step[1] = (+2);
-    }
-
-    /* first step is special step, only send one block */
-    roffset = neighbor[0]*pack_len;
-    soffset = my_group_index*pack_len;
-    /* send receive this */
-
-    dst = neighbor[0];
-    src = neighbor[0];
-
-    rc = mca_bcol_iboffload_send_rtr_setup(&last_send,
-            src, iboffload_module,
-            coll_fragment);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to mca_bcol_iboffload_send_rtr_setup"));
-        if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-            goto out_of_resources;
-        }
-        return OMPI_ERROR;
-    }
-
-
-    rc = mca_bcol_iboffload_recv_rtr_setup(
-            &last_wait, dst, iboffload_module, coll_fragment);
-    /* send the data */
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to"
-                    "mca_bcol_iboffload_recv_rtr_setup"));
-        if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-            goto out_of_resources;
-        }
-        return OMPI_ERROR;
-    }
-
-    rc = mca_bcol_iboffload_send_large_buff_setup(
-            &last_send, RBUF,
-            coll_request->buffer_info[RBUF].offset +
-            soffset/* offset calc */ ,
-            pack_len, dst,
-            iboffload_module, coll_fragment);
-
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to"
-                    "mca_bcol_iboffload_send_large_buff_setup"));
-        if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-            goto out_of_resources;
-        }
-        return OMPI_ERROR;
-    }
-    /* send is done */
-
-
-
-    rc = mca_bcol_iboffload_recv_large_buff_setup(&last_wait, RBUF,
-            coll_request->buffer_info[RBUF].offset +
-            roffset,
-            pack_len, src,
-            iboffload_module, coll_fragment);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to mca_bcol_iboffload_recv_large_buff_setup"));
-        if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-            goto out_of_resources;
-        }
-        return OMPI_ERROR;
-    }
-
-    /* now for the actual neighbor exchange algorithm */
-
-
-    /* determine initial send location */
-    if(even_rank) {
-        send_data_from = my_group_index;
-    }else {
-        send_data_from = recv_data_from[0];
-    }
-    for( step = 1; step < (group_size/2); step++) {
-
-        parity = step % 2;
-        recv_data_from[parity] =
-            (recv_data_from[parity] + offset_at_step[parity] + group_size) % group_size;
-        src = neighbor[parity];
-        dst = src;
-
-        roffset = recv_data_from[parity] * pack_len;
-        soffset = send_data_from * pack_len;
-
-        /* post send rtr and recev rtr together */
-        if( 1 == step ){
-            rc = mca_bcol_iboffload_send_rtr_setup(&last_send,
-                    src, iboffload_module,
-                    coll_fragment);
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                IBOFFLOAD_VERBOSE(10, ("Failed to mca_bcol_iboffload_send_rtr_setup"));
-                if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                    goto out_of_resources;
-                }
-                return OMPI_ERROR;
-            }
-
-            rc = mca_bcol_iboffload_recv_rtr_setup(
-                    &last_wait, dst, iboffload_module, coll_fragment);
-            /* send the data */
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                IBOFFLOAD_VERBOSE(10, ("Failed to"
-                            "mca_bcol_iboffload_recv_rtr_setup"));
-                if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                    goto out_of_resources;
-                }
-                return OMPI_ERROR;
-            }
-        }
-
-
-        /* I'm using the hierarchy offset used in the k-nomial allgather */
-        /* this won't work...*/
-        rc = mca_bcol_iboffload_send_large_buff_setup(
-                &last_send, RBUF,
-                coll_request->buffer_info[RBUF].offset +
-                soffset/* offset calc */ ,
-                2 * pack_len, dst,
-                iboffload_module, coll_fragment);
-
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to"
-                        "mca_bcol_iboffload_send_large_buff_setup"));
-            if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                goto out_of_resources;
-            }
-            return OMPI_ERROR;
-        }
-        /* send is done */
-
-
-        rc = mca_bcol_iboffload_recv_large_buff_setup(&last_wait, RBUF,
-                coll_request->buffer_info[RBUF].offset +
-                roffset,
-                2 * pack_len, src,
-                iboffload_module, coll_fragment);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to mca_bcol_iboffload_recv_large_buff_setup"));
-            if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                goto out_of_resources;
-            }
-            return OMPI_ERROR;
-        }
-        send_data_from = recv_data_from[parity];
-
-    }
-
-    /* end of list */
-    *coll_fragment->tail_next = NULL;
-
-    /* finish initializing full message descriptor */
-    (coll_request)->n_fragments  = 1;
-    (coll_request)->n_frags_sent = 1;
-
-    assert(NULL != last_wait);
-    last_wait->flags |= MQE_WR_FLAG_SIGNAL;
-    coll_fragment->signal_task_wr_id = last_wait->wr_id;
-    last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-
-    assert(MCA_COLL_ML_NO_BUFFER == coll_request->ml_buffer_index);
-    /* post the mwr */
-    rc = mca_bcol_iboffload_post_mqe_tasks(iboffload_module, coll_fragment->to_post);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
-        /* Note: need to clean up */
-        return rc;
-    }
-
-    MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload_module->super, coll_request->order_info);
-
-    IBOFFLOAD_VERBOSE(10, ("Return success.\n"));
-    return BCOL_FN_STARTED;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Allgather, adding collfrag to collfrag_pending.\n"));
-    rc =
-        mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload_module);
-    return (OMPI_SUCCESS != rc) ? BCOL_FN_NOT_STARTED : BCOL_FN_STARTED;
-}
-#endif
-
-#if 0
-/* debug connection routine */
-static inline void bcol_iboffload_setup_allgather_endpoints_connection(mca_bcol_iboffload_module_t *iboffload)
-{
-    int i;
-    const int group_size = iboffload->ibnet->super.group_size;
-    mca_bcol_iboffload_endpoint_t *ep;
-
-    IBOFFLOAD_VERBOSE(10, ("Open connections.\n"));
-
-    /* this is algorithm specific - need to move through the algorithm here basically to set up connections, should be
-     *
-     */
-
-     /* I'm going to leave this alone for now, because I'm
-      *  not sure how these endpoints map back to ibnet. Is it mapped to ibnet ids or to communicator ids?
-      */
-    for (i = 0; i < group_size; i++) {
-        ep = iboffload->endpoints[i];
-        while (OMPI_SUCCESS !=
-                check_endpoint_state(ep, NULL, NULL)) {
-            opal_progress();
-        }
-    }
-
-    /* set the connection status to connected */
-
-    /*JSL - change this macro */
-    iboffload->connection_status[ALLGATHER_KNOMIAL_ALG] = true;
-}
-#endif
-
-static int mca_bcol_iboffload_k_nomial_allgather_userbuffer_exec(mca_bcol_iboffload_module_t *iboffload_module,
-                                                   mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int rc,
-        src, dst, comm_dst, comm_src;
-    int tree_order, pow_k, i, j;
-
-    uint32_t pack_len;
-    int my_group_index = iboffload_module->super.sbgp_partner_module->my_index;
-    int group_size = iboffload_module->group_size;
-    int *group_list = iboffload_module->super.sbgp_partner_module->group_list;
-    int my_comm_index = group_list[my_group_index];
-
-    netpatterns_k_exchange_node_t *exchange_node = &iboffload_module->knomial_allgather_tree;
-
-    struct mqe_task *last_send = NULL,
-                    *last_wait = NULL;
-    mca_bcol_iboffload_collfrag_t *coll_fragment = &coll_request->first_collfrag;
-
-#if 0
-    fprintf(stderr,"entering large msg allgather\n");
-#endif
-    IBOFFLOAD_VERBOSE(10,("Entering large msg iboffload allgather"));
-    if (OPAL_UNLIKELY(!iboffload_module->connection_status[ALLGATHER_KNOMIAL_ALG])) {
-        IBOFFLOAD_VERBOSE(10,("Allgather open new connection "));
-        bcol_iboffload_setup_allgather_endpoints_connection(iboffload_module);
-    }
-
-    pack_len = coll_request->count * coll_request->dtype->super.size;
-    IBOFFLOAD_VERBOSE(10,("My packet length %d pack_len frag_count %d dtype size %d ",
-                pack_len,
-                coll_request->count,
-                coll_request->dtype->super.size));
-
-    /* register send and receive sides */
-    /* send side, only sending pack_len data */
-
-    /* I think that probably I will only register the rbuf */
-    /* on receive side I need to register pack_len*group_size data */
-
-    rc = mca_bcol_iboffload_prepare_buffer(coll_request->buffer_info[RBUF].buf, pack_len * group_size,
-            &coll_request->buffer_info[RBUF].iboffload_reg, iboffload_module);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_ERROR(("Cannot register memory: "
-                         "addr - %p, %d bytes.\n",
-                          coll_request->buffer_info[RBUF].buf, pack_len));
-        return OMPI_ERROR;
-    }
-    coll_request->buffer_info[RBUF].lkey = coll_request->buffer_info[RBUF].iboffload_reg->mr->lkey;
-
-    /* it is estimated mq consumption... */
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-                    iboffload_module, coll_fragment->mq_index, coll_fragment->mq_credits))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-        goto out_of_resources;
-    }
-
-    coll_fragment->tail_next = &coll_fragment->to_post;
-
-    /* start with the extra / proxy phase */
-    if( EXTRA_NODE == exchange_node->node_type ) {
-
-
-        /* send pack_len data to proxy */
-        comm_dst = exchange_node->rank_extra_sources_array[0];
-        /* get ib subnet id */
-        dst = comm_dst; /* comm_to_ibnet[comm_dst];*/
-        /* post ready-to-receive receive on sender's side */
-        rc = mca_bcol_iboffload_recv_rtr_setup(
-                &last_wait, dst, iboffload_module, coll_fragment);
-
-        /* send the data */
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to"
-                        "mca_bcol_iboffload_recv_rtr_setup"));
-            if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                goto out_of_resources;
-            }
-            return OMPI_ERROR;
-        }
-
-        rc = mca_bcol_iboffload_send_large_buff_setup(
-                &last_send, RBUF, coll_request->buffer_info[RBUF].offset + my_comm_index*pack_len,
-                pack_len, dst,
-                iboffload_module, coll_fragment);
-
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to"
-                        "mca_bcol_iboffload_send_large_buff_setup"));
-            if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                goto out_of_resources;
-            }
-            return OMPI_ERROR;
-        }
-        /* send is done */
-
-        /* post the receive */
-        comm_src = comm_dst;
-        src = dst;
-        /* Sending this results in a race condition where if the rtr send bypasses
-           the large msg receive on proxy's side, then it triggers the start of the
-           recurssive k-ing phase prematurely causing random data corruption.
-          */
-       /*
-        rc = mca_bcol_iboffload_send_rtr_setup(&last_send,
-                                    src, iboffload_module,
-                                    coll_fragment);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to mca_bcol_iboffload_send_rtr_setup"));
-            if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                goto out_of_resources;
-            }
-            return OMPI_ERROR;
-        }
-        */
-        rc = mca_bcol_iboffload_recv_large_buff_setup(&last_wait,
-                RBUF, coll_request->buffer_info[RBUF].offset,
-                pack_len*group_size, src,
-                iboffload_module, coll_fragment);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to mca_bcol_iboffload_recv_large_buff_setup"));
-            if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                goto out_of_resources;
-            }
-            return OMPI_ERROR;
-        }
-
-        goto FINISHED;
-
-
-    } else if( 0 < exchange_node->n_extra_sources ) {
-
-        /* am a proxy, receive pack_len data from extra */
-        comm_src = exchange_node->rank_extra_sources_array[0];
-        /* get ib subnet */
-        src =  comm_src; /*comm_to_ibnet[comm_src];*/
-
-        rc = mca_bcol_iboffload_send_rtr_setup(&last_send,
-                                    src, iboffload_module,
-                                    coll_fragment);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to mca_bcol_iboffload_send_rtr_setup"));
-            if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                goto out_of_resources;
-            }
-            return OMPI_ERROR;
-        }
-
-
-        rc = mca_bcol_iboffload_recv_large_buff_setup(&last_wait,
-                RBUF, coll_request->buffer_info[RBUF].offset + pack_len*comm_src,
-                pack_len, src,
-                iboffload_module, coll_fragment);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to mca_bcol_iboffload_recv_large_buff_setup"));
-            if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                goto out_of_resources;
-            }
-            return OMPI_ERROR;
-        }
-
-    }
-
-    /* start recursive k - ing */
-    tree_order = exchange_node->tree_order;
-    pow_k =  exchange_node->log_tree_order;
-    for( i = 0; i < pow_k; i++) {
-
-
-        /* Post ready-to-recv messages - I am here */
-        for( j = 0; j <( tree_order - 1); j++) {
-            comm_src = exchange_node->rank_exchanges[i][j];
-            if( comm_src < 0 ){
-                continue;
-            }
-            /* get ib subnet */
-            src = comm_src; /*comm_to_ibnet[comm_src];*/
-
-            rc = mca_bcol_iboffload_send_rtr_setup(&last_send,
-                    src, iboffload_module,
-                    coll_fragment);
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                IBOFFLOAD_VERBOSE(10, ("Failed to mca_bcol_iboffload_send_rtr_setup"));
-                if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                    goto out_of_resources;
-                }
-                return OMPI_ERROR;
-            }
-        }
-
-        /* Post receive ready-to-recev message - I can send to you */
-        for( j = 0; j < (tree_order - 1); j++) {
-            /* recev ready-to-receive message */
-            comm_dst = exchange_node->rank_exchanges[i][j];
-            /* remember, if we have extra ranks, then we won't participate
-             * with a least one peer. Make a check:
-             */
-            if( comm_dst < 0 ){
-                continue;
-            }
-
-            /* get ib subnet id */
-            dst = comm_dst; /*comm_to_ibnet[comm_dst];*/
-            /* post ready-to-receive receive on sender's side */
-            rc = mca_bcol_iboffload_recv_rtr_setup(
-                    &last_wait, dst, iboffload_module, coll_fragment);
-            /* send the data */
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                IBOFFLOAD_VERBOSE(10, ("Failed to"
-                            "mca_bcol_iboffload_recv_rtr_setup"));
-                if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                    goto out_of_resources;
-                }
-                return OMPI_ERROR;
-            }
-        }
-
-
-        /* (k-1) sends */
-        for( j = 0; j < (tree_order - 1); j++ ) {
-
-            /* send phase
-             */
-            comm_dst = exchange_node->rank_exchanges[i][j];
-            /* remember, if we have extra ranks, then we won't participate
-             * with a least one peer. Make a check
-             */
-            if( comm_dst < 0 ){
-                continue;
-            }
-
-            /* get ib subnet id */
-            dst = comm_dst; /*comm_to_ibnet[comm_dst];*/
-            rc = mca_bcol_iboffload_send_large_buff_setup(
-                    &last_send, RBUF,
-                    coll_request->buffer_info[RBUF].offset + pack_len*exchange_node->payload_info[i][j].s_offset/* offset calc */ ,
-                    exchange_node->payload_info[i][j].s_len*pack_len, dst,
-                    iboffload_module, coll_fragment);
-
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                IBOFFLOAD_VERBOSE(10, ("Failed to"
-                            "mca_bcol_iboffload_send_large_buff_setup"));
-                if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                    goto out_of_resources;
-                }
-                return OMPI_ERROR;
-            }
-            /* send is done */
-
-        }
-
-        /* we post receives after all sends in order to achieve concurrent
-         * sends as well as assuring blocking until completely receiving
-         * all data at level k before starting level k+1 sends
-         */
-        /* (k-1) receives - these are blocking */
-        for( j = 0; j < (tree_order - 1); j++) {
-            /*recv phase */
-            comm_src = exchange_node->rank_exchanges[i][j];
-            if( comm_src < 0 ){
-                continue;
-            }
-            /* get ib subnet */
-            src = comm_src; /*comm_to_ibnet[comm_src];*/
-
-            rc = mca_bcol_iboffload_recv_large_buff_setup(&last_wait, RBUF,
-                    coll_request->buffer_info[RBUF].offset + pack_len*exchange_node->payload_info[i][j].r_offset,
-                    exchange_node->payload_info[i][j].r_len*pack_len, src,
-                    iboffload_module, coll_fragment);
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                IBOFFLOAD_VERBOSE(10, ("Failed to mca_bcol_iboffload_recv_large_buff_setup"));
-                if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                    goto out_of_resources;
-                }
-                return OMPI_ERROR;
-            }
-
-
-
-        }
-
-
-    }
-
-    /* last step, just send it back to the extra if I have one */
-    if( 0 < exchange_node->n_extra_sources ) {
-
-        comm_dst = exchange_node->rank_extra_sources_array[0];
-
-        /* get ib subnet id */
-        dst = comm_dst; /*comm_to_ibnet[comm_dst];*/
-        /*
-        rc = mca_bcol_iboffload_recv_rtr_setup(
-                &last_wait, dst, iboffload_module, coll_fragment);
-
-        // send the data
-         we are already guaranteed that extra rank is waiting
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to"
-                        "mca_bcol_iboffload_recv_rtr_setup"));
-            if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                goto out_of_resources;
-            }
-            return OMPI_ERROR;
-        }
-        */
-
-        rc = mca_bcol_iboffload_send_large_buff_setup(
-                &last_send, RBUF, coll_request->buffer_info[RBUF].offset,
-                pack_len*group_size, dst,
-                iboffload_module, coll_fragment);
-
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to"
-                        "mca_bcol_iboffload_send_large_buff_setup"));
-            if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                goto out_of_resources;
-            }
-            return OMPI_ERROR;
-        }
-        /* send is done */
-
-    }
-
-FINISHED:
-
-    /* end of list */
-    *coll_fragment->tail_next = NULL;
-
-    /* finish initializing full message descriptor */
-    (coll_request)->n_fragments  = 1;
-    (coll_request)->n_frags_sent = 1;
-
-    assert(NULL != last_wait);
-    last_wait->flags |= MQE_WR_FLAG_SIGNAL;
-    coll_fragment->signal_task_wr_id = last_wait->wr_id;
-    last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-
-    assert(MCA_COLL_ML_NO_BUFFER == coll_request->ml_buffer_index);
-    /* post the mwr */
-    rc = mca_bcol_iboffload_post_mqe_tasks(iboffload_module, coll_fragment->to_post);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
-        /* Note: need to clean up */
-        return rc;
-    }
-
-    MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload_module->super, coll_request->order_info);
-
-    IBOFFLOAD_VERBOSE(10, ("Return success.\n"));
-    return BCOL_FN_STARTED;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Allgather, adding collfrag to collfrag_pending.\n"));
-    rc =
-        mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload_module);
-    return (OMPI_SUCCESS != rc) ? BCOL_FN_NOT_STARTED : BCOL_FN_STARTED;
-}
-
-static int mca_bcol_iboffload_k_nomial_allgather_mlbuffer_exec(mca_bcol_iboffload_module_t *iboffload_module,
-                                                   mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int rc,
-        src, dst, comm_dst, comm_src, i, j;
-    int tree_order, pow_k, knt;
-    uint32_t pack_len;
-    int my_group_index = iboffload_module->super.sbgp_partner_module->my_index;
-    int group_size = iboffload_module->group_size;
-    netpatterns_k_exchange_node_t *exchange_node =
-                                    &iboffload_module->knomial_allgather_tree;
-
-    struct mqe_task *last_send = NULL,
-                    *last_wait = NULL;
-    mca_bcol_iboffload_collfrag_t *coll_fragment = &coll_request->first_collfrag;
-    int *list_connected = iboffload_module->super.list_n_connected;
-
-    /* test test */
-    int buff_offset = iboffload_module->super.hier_scather_offset;
-
-    IBOFFLOAD_VERBOSE(10,("Entering small msg iboffload bcast"));
-
-
-    if (OPAL_UNLIKELY(!iboffload_module->connection_status[ALLGATHER_KNOMIAL_ALG])) {
-        IBOFFLOAD_VERBOSE(10,("Bcast open new connection "));
-        bcol_iboffload_setup_allgather_endpoints_connection(iboffload_module);
-    }
-
-    pack_len = coll_request->count * coll_request->dtype->super.size;
-    IBOFFLOAD_VERBOSE(10,("My packet length %d pack_len frag_count %d dtype size %d ",
-                            pack_len,
-                            coll_request->count,
-                            coll_request->dtype->super.size));
-
-    /* now we calculate the actual buff_offset */
-    buff_offset = buff_offset*pack_len;
-
-    /* it is estimated mq consumption... */
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-                 iboffload_module, coll_fragment->mq_index, coll_fragment->mq_credits))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-        goto out_of_resources;
-    }
-
-    coll_fragment->tail_next = &coll_fragment->to_post;
-    /* we put this in to propagate the lkey into this local data structure */
-    coll_request->buffer_info[SBUF].lkey = iboffload_module->rdma_block.ib_info.lkey;
-    /* end hack */
-    if( EXTRA_NODE == exchange_node->node_type ) {
-        /* setup the rdma "send" pack_len data to proxy rank */
-        comm_dst = exchange_node->rank_extra_sources_array[0];
-        /* get ib subnet id */
-        dst = comm_dst;
-        /* now I need to calculate my own offset info */
-        knt = 0;
-        for( i = 0; i < my_group_index; i++){
-            knt += list_connected[i];
-        }
-
-        rc = mca_bcol_iboffload_rdma_write_imm_small_buff_setup(
-                &last_send, pack_len*list_connected[my_group_index],  pack_len*knt /* source offset */,
-                pack_len*knt /* destination offset */, dst,
-                iboffload_module, coll_fragment);
-#if 0
-        rc = mca_bcol_iboffload_rdma_write_imm_small_buff_setup(
-                &last_send, pack_len,  pack_len*group_list[my_group_index] /* source offset */,
-                pack_len*group_list[my_group_index] /* destination offset */, dst,
-                iboffload_module, coll_fragment);
-#endif
-        /* old flow with ml offset */
-#if 0
-        rc = mca_bcol_iboffload_rdma_write_imm_small_buff_setup(
-                &last_send, pack_len,  pack_len*group_list[my_group_index] /* source offset */,
-                coll_request->buffer_info[RBUF].offset + pack_len*group_list[my_group_index] /* destination offset */, dst,
-                iboffload_module, coll_fragment);
-#endif
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to"
-                        " mca_bcol_iboffload_send_small_buff_setup"));
-            if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                goto out_of_resources;
-            }
-            return OMPI_ERROR;
-        }
-        /* send is done */
-
-        /* setup the rdma "receive" from proxy */
-        comm_src = comm_dst;
-        src = dst;
-        /* more general is the number connected */
-        knt = 0;
-        for( i = 0; i < group_size; i++) {
-            knt += list_connected[i];
-        }
-
-
-        rc = mca_bcol_iboffload_recv_small_buff_setup(&last_wait,
-                                    pack_len*knt, src,
-                                    iboffload_module, coll_fragment);
-
-       /*
-        rc = mca_bcol_iboffload_recv_small_buff_setup(&last_wait,
-                                    pack_len*group_size, src,
-                                    iboffload_module, coll_fragment);
-        */
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to setup data receive"));
-            if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                goto out_of_resources;
-            }
-            return OMPI_ERROR;
-        }
-
-        goto FINISHED;
-    } else if( 0 < exchange_node->n_extra_sources ) {
-
-        /* am a proxy, receive pack_len data from extra */
-        comm_src = exchange_node->rank_extra_sources_array[0];
-        /* get ib subnet */
-        src = comm_src;
-        rc = mca_bcol_iboffload_recv_small_buff_setup(&last_wait,
-                                    pack_len*list_connected[src], src,
-                                    iboffload_module, coll_fragment);
-        /*
-        rc = mca_bcol_iboffload_recv_small_buff_setup(&last_wait,
-                                    pack_len, src,
-                                    iboffload_module, coll_fragment);
-        */
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to setup data receive"));
-            if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                goto out_of_resources;
-            }
-            return OMPI_ERROR;
-        }
-
-
-    }
-
-    /* start recursive k - ing */
-    tree_order = exchange_node->tree_order;
-    pow_k =  exchange_node->log_tree_order;
-    /*fprintf(stderr,"tree order %d pow_k %d\n",tree_order,pow_k);*/
-    for( i = 0; i < pow_k; i++) {
-        for( j = 0; j < (tree_order - 1); j++ ) {
-            /* send phase
-             */
-            comm_dst = exchange_node->rank_exchanges[i][j];
-            /* remember, if we have extra ranks, then we won't participate
-             * with a least one peer. Make a check
-             */
-            /*fprintf(stderr,"AAA my index %d comm_dst %d\n",my_group_index,comm_dst);*/
-            if( comm_dst < 0 ){
-                continue;
-            }
-
-            /* get ib subnet id */
-            /* again, don't think we need this */
-            /*dst = ibnet_map[comm_dst];*/
-            dst = comm_dst;
-            /*
-            fprintf(stderr,"BBB my index %d dst %d pack len %d s_len %d src offset %d r_len %d \n",my_group_index,dst,
-                    pack_len,exchange_node->payload_info[i][j].s_len,exchange_node->payload_info[i][j].s_offset,
-                    exchange_node->payload_info[i][j].r_len);
-            */
-            /* rdma "send" setup */
-
-
-            rc = mca_bcol_iboffload_rdma_write_imm_small_buff_setup(
-                    &last_send, exchange_node->payload_info[i][j].s_len * pack_len,
-                    exchange_node->payload_info[i][j].s_offset * pack_len /* source offset */,
-                    exchange_node->payload_info[i][j].s_offset * pack_len /* destination offset */, dst,
-                    iboffload_module, coll_fragment);
-
-#if 0
-            rc = mca_bcol_iboffload_rdma_write_imm_small_buff_setup(
-                    &last_send, exchange_node->payload_info[i][j].s_len * pack_len,
-                    exchange_node->payload_info[i][j].s_offset * exchange_node->payload_info[i][j].s_len*pack_len /* source offset */,
-                    exchange_node->payload_info[i][j].s_offset * exchange_node->payload_info[i][j].s_len*pack_len /* destination offset */, dst,
-                    iboffload_module, coll_fragment);
-#endif
-
-#if 0
-            rc = mca_bcol_iboffload_rdma_write_imm_small_buff_setup(
-                    &last_send, exchange_node->payload_info[i][j].s_len * pack_len,
-                    exchange_node->payload_info[i][j].s_offset * pack_len /* source offset */,
-                    exchange_node->payload_info[i][j].s_offset * pack_len /* destination offset */, dst,
-                    iboffload_module, coll_fragment);
-#endif
-#if 0
-            rc = mca_bcol_iboffload_rdma_write_imm_small_buff_setup(
-                    &last_send, exchange_node->payload_info[i][j].s_len * pack_len,
-                    coll_request->buffer_info[SBUF].offset + exchange_node->payload_info[i][j].s_offset * pack_len /* source offset */,
-                    coll_request->buffer_info[SBUF].offset + exchange_node->payload_info[i][j].s_offset * pack_len /* destination offset */, dst,
-                    iboffload_module, coll_fragment);
-#endif
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                IBOFFLOAD_VERBOSE(10, ("Failed to"
-                            " mca_bcol_iboffload_send_small_buff_setup"));
-                if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                    goto out_of_resources;
-                }
-                return OMPI_ERROR;
-            }
-
-            /* send is done */
-        }
-
-       for( j = 0; j < (tree_order - 1); j++) {
-
-            /* rdma "recv" phase */
-           comm_src = exchange_node->rank_exchanges[i][j];
-           /* remember, if we have extra ranks, then we won't participate
-            * with a least one peer. Make a check
-            */
-           if( comm_src < 0 ){
-               continue;
-           }
-
-           /* get ib subnet id */
-           /* shouldn't need this */
-           src = comm_src;
-
-           rc = mca_bcol_iboffload_recv_small_buff_setup(&last_wait,
-                    exchange_node->payload_info[i][j].r_len * pack_len, src,
-                    iboffload_module, coll_fragment);
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                IBOFFLOAD_VERBOSE(10, ("Failed to setup data receive"));
-                if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                    goto out_of_resources;
-                }
-                return OMPI_ERROR;
-            }
-
-        }
-    }
-
-    /* last step, proxies send full data back to the extra ranks */
-    if( 0 < exchange_node->n_extra_sources ) {
-        /* send pack_len data to proxy */
-        comm_dst = exchange_node->rank_extra_sources_array[0];
-        /* get ibnet id */
-        dst = comm_dst;
-
-        knt = 0;
-        for( i = 0; i < group_size; i++){
-            knt += list_connected[i];
-        }
-
-        rc = mca_bcol_iboffload_rdma_write_imm_small_buff_setup(
-                &last_send, pack_len*knt, 0 /* source offset */,
-                0 /* destination offset */, dst,
-                iboffload_module, coll_fragment);
-#if 0
-        rc = mca_bcol_iboffload_rdma_write_imm_small_buff_setup(
-                &last_send, pack_len*group_size, 0 /* source offset */,
-                0 /* destination offset */, dst,
-                iboffload_module, coll_fragment);
-#endif
-#if 0
-        rc = mca_bcol_iboffload_rdma_write_imm_small_buff_setup(
-                &last_send, pack_len*group_size, coll_request->buffer_info[RBUF].offset /* source offset */,
-                coll_request->buffer_info[SBUF].offset /* destination offset */, dst,
-                iboffload_module, coll_fragment);
-#endif
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to"
-                        " mca_bcol_iboffload_send_small_buff_setup"));
-            if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == rc){
-                goto out_of_resources;
-                fprintf(stderr,"I'm out of resources \n");
-            }
-            return OMPI_ERROR;
-        }
-        /* send is done */
-
-    }
-
-FINISHED:
-
-    /* end of list */
-    *coll_fragment->tail_next = NULL;
-
-    /* finish initializing full message descriptor */
-    (coll_request)->n_fragments  = 1;
-    (coll_request)->n_frags_sent = 1;
-
-    assert(NULL != last_wait);
-    last_wait->flags |= MQE_WR_FLAG_SIGNAL;
-    coll_fragment->signal_task_wr_id = last_wait->wr_id;
-    last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-
-    assert(MCA_COLL_ML_NO_BUFFER != coll_request->ml_buffer_index);
-    /* post the mwr */
-    rc = mca_bcol_iboffload_post_mqe_tasks(iboffload_module, coll_fragment->to_post);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
-        /* Note: need to clean up */
-        return rc;
-    }
-
-    MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload_module->super, coll_request->order_info);
-
-    IBOFFLOAD_VERBOSE(10, ("Return success.\n"));
-    return BCOL_FN_STARTED;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Allgather, adding collfrag to collfrag_pending.\n"));
-    rc =
-        mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload_module);
-    return (OMPI_SUCCESS != rc) ? BCOL_FN_NOT_STARTED : BCOL_FN_STARTED;
-}
-
-#if 0
-static int mca_bcol_iboffload_neighbor_allgather_userbuffer_intra(
-                                            bcol_function_args_t *fn_arguments,
-                                            struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_iboffload_module_t *iboffload_module =
-        (mca_bcol_iboffload_module_t *)const_args->bcol_module;
-
-    int rc;
-    int mq_credits = iboffload_module->group_size * 2 * 2; /* large message protocol consumes
-                                                            * twice as many mq credits
-                                                            */
-
-    bool if_bcol_last = BCOL_IBOFFLOAD_IS_LAST_CALL(const_args);
-    mca_bcol_iboffload_collreq_t *coll_request;
-
-    MCA_BCOL_CHECK_ORDER(const_args->bcol_module, fn_arguments);
-
-    rc = mca_bcol_iboffload_allgather_init(fn_arguments, iboffload_module,
-            &coll_request, if_bcol_last, mq_credits,
-            mca_bcol_iboffload_neighbor_allgather_userbuffer_exec);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        return rc;
-    }
-
-    rc = coll_request->progress_fn(iboffload_module, coll_request);
-
-    IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_k_nomial_allgather_userbuffer_intra was started [%d]\n", rc));
-    return rc;
-}
-#endif
-
-#if 1
-static int mca_bcol_iboffload_k_nomial_allgather_userbuffer_intra(bcol_function_args_t *fn_arguments,
-                                                   struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_iboffload_module_t *iboffload_module =
-        (mca_bcol_iboffload_module_t *)const_args->bcol_module;
-
-    int rc;
-    int mq_credits = ((iboffload_module->knomial_allgather_tree.tree_order - 1)*
-                       iboffload_module->knomial_allgather_tree.log_tree_order + 1) * 2 * 2; /* large message protocol
-                                                                                              * consumes twice as much
-                                                                                              */
-
-    bool if_bcol_last = BCOL_IBOFFLOAD_IS_LAST_CALL(const_args);
-    mca_bcol_iboffload_collreq_t *coll_request;
-
-    MCA_BCOL_CHECK_ORDER(const_args->bcol_module, fn_arguments);
-
-    rc = mca_bcol_iboffload_allgather_init(fn_arguments, iboffload_module,
-            &coll_request, if_bcol_last, mq_credits,
-            mca_bcol_iboffload_k_nomial_allgather_userbuffer_exec);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        return rc;
-    }
-
-    rc = coll_request->progress_fn(iboffload_module, coll_request);
-
-    IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_k_nomial_allgather_userbuffer_intra was started [%d]\n", rc));
-    return rc;
-}
-#endif
-
-static int mca_bcol_iboffload_k_nomial_allgather_mlbuffer_intra(bcol_function_args_t *fn_arguments,
-                                                   struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_iboffload_module_t *iboffload_module =
-        (mca_bcol_iboffload_module_t *)const_args->bcol_module;
-
-    int rc;
-
-    /* I'll add one for everyone, since nobody wants to feel left out */
-    int mq_credits = ((iboffload_module->knomial_allgather_tree.tree_order - 1)*
-                       iboffload_module->knomial_allgather_tree.log_tree_order + 1) * 2 ;
-    bool if_bcol_last = BCOL_IBOFFLOAD_IS_LAST_CALL(const_args);
-    mca_bcol_iboffload_collreq_t *coll_request;
-
-    MCA_BCOL_CHECK_ORDER(const_args->bcol_module, fn_arguments);
-
-    rc = mca_bcol_iboffload_allgather_init(fn_arguments, iboffload_module,
-            &coll_request, if_bcol_last, mq_credits,
-            mca_bcol_iboffload_k_nomial_allgather_mlbuffer_exec);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        return rc;
-    }
-
-    rc = coll_request->progress_fn(iboffload_module, coll_request);
-
-    IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_small_msg_bcast_intra was started [%d]\n", rc));
-    return rc;
-}
-
-
-/* these progress engines are shared between alltoall and allgather and exist in both files,
- * should be moved to a common .h file
- */
-static int mca_bcol_iboffload_collreq_mlbuffer_progress(
-            bcol_function_args_t *input_args,
-            struct mca_bcol_base_function_t *const_args)
-{
-    int i;
-    mca_bcol_iboffload_collreq_t *coll_request =
-         (mca_bcol_iboffload_collreq_t *)
-                   input_args->bcol_opaque_data;
-    IBOFFLOAD_VERBOSE(10, ("Run progress (ml buffer).\n"));
-    for (i = 0; i < mca_bcol_iboffload_component.max_progress_pull; i++) {
-    if (BCOL_IS_COMPLETED(coll_request)) {
-
-        coll_request->user_handle_freed = true;
-
-        if (COLLREQ_IS_DONE(coll_request)) {
-        IBOFFLOAD_VERBOSE(10, ("Coll request already done.\n"));
-        RELEASE_COLLREQ(coll_request);
-        }
-        IBOFFLOAD_VERBOSE(10, ("Collective finished (ml buffer).\n"));
-
-        return BCOL_FN_COMPLETE;
-    }
-    }
-    IBOFFLOAD_VERBOSE(10, ("Collective not finished (ml buffer).\n"));
-    return BCOL_FN_STARTED;
-}
-
-
-static int mca_bcol_iboffload_collreq_userbuffer_progress(
-                        bcol_function_args_t *input_args,
-                        struct mca_bcol_base_function_t *const_args)
-{
-    int i;
-    mca_bcol_iboffload_collreq_t *coll_request =
-                 (mca_bcol_iboffload_collreq_t *)
-                                   input_args->bcol_opaque_data;
-
-    IBOFFLOAD_VERBOSE(10, ("Run progress (user buffer)\n"));
-
-    /* Complete the allgather - progress releases full request descriptors */
-
-    for (i = 0; i < mca_bcol_iboffload_component.max_progress_pull; i++) {
-        if (coll_request->n_frag_mpi_complete == coll_request->n_fragments &&
-            coll_request->n_frag_net_complete == coll_request->n_fragments) {
-
-            IBOFFLOAD_VERBOSE(10, ("Deregister user buff.\n"));
-
-            if (NULL != coll_request->buffer_info[SBUF].iboffload_reg) {
-                coll_request->module->device->mpool->mpool_deregister(
-                        coll_request->module->device->mpool,
-                        (mca_mpool_base_registration_t *) coll_request->buffer_info[SBUF].iboffload_reg);
-                coll_request->buffer_info[SBUF].iboffload_reg = NULL;
-            }
-
-
-            if (NULL != coll_request->buffer_info[RBUF].iboffload_reg) {
-                coll_request->module->device->mpool->mpool_deregister(
-                        coll_request->module->device->mpool,
-                        (mca_mpool_base_registration_t *) coll_request->buffer_info[RBUF].iboffload_reg);
-                coll_request->buffer_info[RBUF].iboffload_reg = NULL;
-            }
-
-            RELEASE_COLLREQ(coll_request);
-            IBOFFLOAD_VERBOSE(10, ("New bcast done !!!"));
-            return BCOL_FN_COMPLETE;
-        }
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("Collective finished (user buffer).\n"));
-
-    /* We are not done */
-    return BCOL_FN_STARTED;
-}
-
-int mca_bcol_iboffload_allgather_register(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    IBOFFLOAD_VERBOSE(10, ("Register iboffload Allgather.\n"));
-    comm_attribs.bcoll_type = BCOL_ALLGATHER;
-
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super,
-            &comm_attribs, &inv_attribs,
-            mca_bcol_iboffload_k_nomial_allgather_mlbuffer_intra,
-            mca_bcol_iboffload_collreq_mlbuffer_progress);
-
-    inv_attribs.bcol_msg_min = 10000000;
-    inv_attribs.bcol_msg_max = 10485760; /* range 4 */
-
-
-    /* zero-copy k-nomial algorithm */
-#if 1
-    mca_bcol_base_set_attributes(super,
-            &comm_attribs, &inv_attribs,
-            mca_bcol_iboffload_k_nomial_allgather_userbuffer_intra,
-            mca_bcol_iboffload_collreq_userbuffer_progress);
-#endif
-    /* zero-copy neighbor exchange algorithm */
-#if 0
-    mca_bcol_base_set_attributes(super,
-            &comm_attribs, &inv_attribs,
-            mca_bcol_iboffload_neighbor_allgather_userbuffer_intra,
-            mca_bcol_iboffload_collreq_userbuffer_progress);
-#endif
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_allreduce.c b/ompi/mca/bcol/iboffload/bcol_iboffload_allreduce.c
deleted file mode 100644
index 406442ff7c..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_allreduce.c
+++ /dev/null
@@ -1,1418 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/*
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/** @file */
-
-#include "ompi_config.h"
-
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <errno.h>
-#include "opal_stdint.h"
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_task.h"
-#include "bcol_iboffload_collfrag.h"
-#include "bcol_iboffload_endpoint.h"
-
-#include "opal/include/opal/types.h"
-
-static int mca_bcol_iboffload_calc_res_to_user(void *callback_data)
-{
-    int rc;
-    uint64_t result = 0;
-
-    uint64_t l_operand = 0;
-    uint64_t r_operand = 0;
-
-    mca_bcol_iboffload_collfrag_t *coll_frag =
-                        (mca_bcol_iboffload_collfrag_t *) callback_data;
-
-    mca_bcol_iboffload_collreq_t *coll_request = coll_frag->coll_full_req;
-
-    ompi_op_t *op = coll_request->op;
-    ompi_datatype_t *dtype = coll_request->dtype;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    struct ibv_context *ib_dev_context = coll_request->module->device->dev.ib_dev_context;
-
-    IBOFFLOAD_VERBOSE(10, ("Start calculating.\n"));
-
-    rc = unpack_data_from_calc(ib_dev_context,
-                               cm->map_ompi_to_ib_calcs[op->op_type],
-                               cm->map_ompi_to_ib_dt[dtype->id], false,
-                               (void *) (uintptr_t) coll_request->l_operand,
-                               NULL, (void *) &l_operand);
-    if (0 != rc) {
-        IBOFFLOAD_VERBOSE(10, ("unpack_data_from_calc for l_operand failed: op %s, type %s\n",
-                                op->o_name, dtype->name));
-        return OMPI_ERROR;
-    }
-
-    rc = unpack_data_from_calc(ib_dev_context,
-                               cm->map_ompi_to_ib_calcs[op->op_type],
-                               cm->map_ompi_to_ib_dt[dtype->id], false,
-                               (void *) (uintptr_t) coll_request->r_operand,
-                               NULL, (void *) &r_operand);
-    if (0 != rc) {
-        IBOFFLOAD_VERBOSE(10, ("unpack_data_from_calc for r_operand failed: op %s, type %s\n",
-                                op->o_name, dtype->name));
-        return OMPI_ERROR;
-    }
-
-    switch (op->op_type) {
-        case OMPI_OP_PROD:
-                break; /* ronni todo - ????? */
-        case OMPI_OP_LAND:
-            result = l_operand && r_operand;
-            break;
-        case OMPI_OP_BAND:
-            result = l_operand & r_operand;
-            break;
-        case OMPI_OP_LOR:
-            result = l_operand || r_operand;
-            break;
-        case OMPI_OP_BOR:
-            result = l_operand | r_operand;
-            break;
-        case OMPI_OP_LXOR:
-            result = ((l_operand && !r_operand) || (!l_operand && r_operand));
-            break;
-        case OMPI_OP_BXOR:
-            result = l_operand ^ r_operand;
-            break;
-        case OMPI_OP_MAXLOC:
-        case OMPI_OP_MINLOC:
-            break;
-        case OMPI_OP_MAX:
-        case OMPI_OP_MIN:
-        case OMPI_OP_SUM:
-            switch (cm->map_ompi_to_ib_dt[dtype->id]) {
-                case IBV_M_DATA_TYPE_INT8:
-                    MCA_BCOL_IBOFFLOAD_ALLREDUCE_DO_CALC(coll_request->op->op_type, char, l_operand, r_operand, result);
-                    break;
-                case IBV_M_DATA_TYPE_INT16:
-                    MCA_BCOL_IBOFFLOAD_ALLREDUCE_DO_CALC(coll_request->op->op_type, int16_t, l_operand, r_operand, result);
-                    break;
-                case IBV_M_DATA_TYPE_INT32:
-                    MCA_BCOL_IBOFFLOAD_ALLREDUCE_DO_CALC(coll_request->op->op_type, int32_t, l_operand, r_operand, result);
-                    break;
-                case IBV_M_DATA_TYPE_INT64:
-                    MCA_BCOL_IBOFFLOAD_ALLREDUCE_DO_CALC(coll_request->op->op_type, int64_t, l_operand, r_operand, result);
-                    break;
-                case IBV_M_DATA_TYPE_FLOAT32:
-                    MCA_BCOL_IBOFFLOAD_ALLREDUCE_DO_CALC(coll_request->op->op_type, float, l_operand, r_operand, result);
-                    break;
-                case IBV_M_DATA_TYPE_FLOAT64:
-                    MCA_BCOL_IBOFFLOAD_ALLREDUCE_DO_CALC(coll_request->op->op_type, double, l_operand, r_operand, result);
-                    break;
-                default:
-                    IBOFFLOAD_VERBOSE(10, ("Unsupported data type: %s.\n", dtype->name));
-                    return OMPI_ERROR;
-            }
-
-            break;
-
-        default:
-            IBOFFLOAD_VERBOSE(10, ("Unsupported op: %s.\n", coll_request->op->o_name));
-            return OMPI_ERROR;
-    }
-
-    memcpy(coll_request->buffer_info[RBUF].buf, &result, coll_frag->unpack_size);
-    IBOFFLOAD_VERBOSE(10, ("The output data after calc is %lf, result %lf, l_operand %lf, r_operand %lf: "
-                           "sbuf addr %p, rbuf addr %p.\n",
-                           *(double *) coll_request->buffer_info[RBUF].buf, *(double *) &result,
-                           *(double *) &l_operand, *(double *) &r_operand,
-                           coll_request->buffer_info[SBUF].buf,
-                           coll_request->buffer_info[RBUF].buf));
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_bcol_iboffload_unpack_res_to_user(void *callback_data)
-{
-    int rc;
-
-    mca_bcol_iboffload_collfrag_t *coll_frag =
-                        (mca_bcol_iboffload_collfrag_t *) callback_data;
-
-    mca_bcol_iboffload_collreq_t *coll_request = coll_frag->coll_full_req;
-    mca_bcol_iboffload_task_t *task = (mca_bcol_iboffload_task_t *) coll_frag->signal_task_wr_id;
-
-    mca_bcol_iboffload_frag_t *recv_frag = task->frag;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    struct ibv_context *ib_dev_context = coll_request->module->device->dev.ib_dev_context;
-
-    rc = unpack_data_from_calc(ib_dev_context,
-                               cm->map_ompi_to_ib_calcs[coll_request->op->op_type],
-                               cm->map_ompi_to_ib_dt[coll_request->dtype->id],
-                               false, (void*) (uintptr_t) recv_frag->sg_entry.addr,
-                               NULL, coll_request->buffer_info[RBUF].buf);
-    if (0 != rc) {
-        IBOFFLOAD_VERBOSE(10, ("unpack_data_from_calc is failed: op %s, type %s\n",
-                                coll_request->op->o_name, coll_request->dtype->name));
-        return OMPI_ERROR;
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("The naitive output data is %" PRId64 ".\n"
-                           "The output data is %" PRId64 ".\n",
-                            *(uint64_t *) recv_frag->sg_entry.addr,
-                            *(uint64_t *) coll_request->buffer_info[RBUF].buf));
-
-    return OMPI_SUCCESS;
-}
-
-static int
-allreduce_extra_node(mca_bcol_iboffload_module_t *iboffload,
-                     mca_bcol_iboffload_collreq_t *coll_request)
-/* (EXTRA_NODE == my_exchange_node->node_type) */
-{
-    /* local variables */
-    int rc, extra_rank;
-
-    mca_bcol_iboffload_frag_t *send_fragment,
-                              *preposted_recv_frag;
-
-    mca_bcol_iboffload_task_t *send_task,
-                              *wait_task;
-
-    struct mqe_task *last_wait, /* we need ask from completion on last wait */
-                    *last_send;
-
-    netpatterns_pair_exchange_node_t *my_exchange_node =
-                                          &iboffload->recursive_doubling_tree;
-
-    struct mqe_task **mqe_ptr_to_set;
-    mca_bcol_iboffload_collfrag_t *coll_fragment = (mca_bcol_iboffload_collfrag_t *)
-                                                   opal_list_get_last(&coll_request->work_requests);
-
-    mqe_ptr_to_set = &coll_fragment->to_post;
-
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-                 iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    /* I will NOT participate in the exchange - so just "register" as here */
-    extra_rank = my_exchange_node->rank_extra_source;
-
-    send_fragment = mca_bcol_iboffload_get_send_frag(coll_request,
-                            extra_rank, coll_request->qp_index,
-                            MCA_IBOFFLOAD_IB_DRIVER_OPERAND_SIZE, 0,
-                            SBUF,
-                            MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML_CALC);
-
-    if (OPAL_UNLIKELY(NULL == send_fragment)) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting and packing send frag.\n"));
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    /* send my operand to EXCHANGE NODE */
-    send_task = mca_bcol_iboffload_get_send_task(iboffload, extra_rank,
-            coll_request->qp_index, send_fragment, coll_fragment, INLINE);
-    if (OPAL_UNLIKELY(NULL == send_task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting send task.\n"));
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send);
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task);
-
-    preposted_recv_frag =
-        mca_bcol_iboffload_get_preposted_recv_frag(
-                iboffload, extra_rank, coll_request->qp_index);
-    if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-        /* RLG need cleanup */
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    /* Wait for final result from EXCHANGE NODE */
-    wait_task = mca_bcol_iboffload_get_wait_task(iboffload, extra_rank, 1,
-                            preposted_recv_frag, coll_request->qp_index, NULL);
-    if (OPAL_UNLIKELY(NULL == wait_task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n"));
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-
-    *mqe_ptr_to_set = NULL;
-
-    /* finish initializing full message descriptor */
-    coll_request->n_fragments  = 1;
-    coll_request->n_frags_sent = 1;
-
-    /* Pasha: need to set to true in upper layer */
-    coll_request->user_handle_freed = false;
-
-    last_wait->flags |= MQE_WR_FLAG_SIGNAL;
-
-    coll_fragment->signal_task_wr_id = last_wait->wr_id;
-    last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-
-    /* post the mwr */
-    IBOFFLOAD_VERBOSE(10, ("Post tasks.\n"));
-    rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_ERROR(("MQE task posting failing.\n"));
-        /* Note: need to clean up */
-        return rc;
-    }
-
-    MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info);
-
-    return OMPI_SUCCESS;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Allreduce: adding collfrag to collfrag_pending.\n"));
-    return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload);
-}
-
-/**
- * Start allreduce
- */
-static int do_exchange(mca_bcol_iboffload_module_t *iboffload,
-                    mca_bcol_iboffload_collreq_t *coll_request,
-                    struct mqe_task ***mqe_ptr_to_set,
-                    struct mqe_task **last_wait,
-                    struct ibv_sge **l_operand,
-                    struct ibv_sge **r_operand)
-{
-    int rc = OMPI_SUCCESS, exchange, pair_rank,
-        my_rank = ((mca_sbgp_base_module_t *) iboffload->ibnet)->my_index;
-
-    mca_bcol_iboffload_frag_t *preposted_recv_frag;
-
-    mca_bcol_iboffload_task_t *wait_task,
-                              *calc_task;
-
-    struct mqe_task *last_send;
-    netpatterns_pair_exchange_node_t *my_exchange_node =
-                                          &iboffload->recursive_doubling_tree;
-
-    mca_bcol_iboffload_collfrag_t *coll_fragment = (mca_bcol_iboffload_collfrag_t *)
-                                                   opal_list_get_last(&coll_request->work_requests);
-
-    size_t calc_size = MCA_IBOFFLOAD_IB_DRIVER_OPERAND_SIZE + MCA_IBOFFLOAD_CALC_SIZE_EXT;
-
-    pair_rank = my_exchange_node->rank_exchanges[0];
-    preposted_recv_frag =
-        mca_bcol_iboffload_get_preposted_recv_frag(
-                iboffload, pair_rank, coll_request->qp_index);
-    if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-        /* RLG need cleanup */
-        IBOFFLOAD_VERBOSE(10, ("Get prepost recv fag fail.\n"));
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    /* Wait for send from first algorithm partner */
-    wait_task = mca_bcol_iboffload_get_wait_task(iboffload, pair_rank, 1,
-                            preposted_recv_frag, coll_request->qp_index, NULL);
-    if (OPAL_UNLIKELY(NULL == wait_task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n"));
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    APPEND_TO_TASKLIST((*mqe_ptr_to_set), wait_task, (*last_wait));
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-
-    (*l_operand)->length = calc_size;
-    for (exchange = 1; exchange < my_exchange_node->n_exchanges; ++exchange) {
-        pair_rank = my_exchange_node->rank_exchanges[exchange];
-
-        (*r_operand) = &preposted_recv_frag->sg_entry;
-        (*r_operand)->length = calc_size;
-
-        /* Calc and send the result to the partner */
-        calc_task = mca_bcol_iboffload_get_calc_task(iboffload,
-                        pair_rank, coll_request->qp_index, NULL,
-                        *l_operand, *r_operand,
-                        coll_request, NO_INLINE);
-        if (OPAL_UNLIKELY(NULL == calc_task)) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting calc task.\n"));
-            rc = OMPI_ERR_RESOURCE_BUSY;
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST((*mqe_ptr_to_set), calc_task, last_send);
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, calc_task);
-
-        /* Calc and send the result to myself */
-        calc_task = mca_bcol_iboffload_get_calc_task(iboffload,
-                        my_rank, coll_request->qp_index, NULL,
-                        *l_operand, *r_operand, coll_request, NO_INLINE);
-        if (OPAL_UNLIKELY(NULL == calc_task)) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting calc task.\n"));
-            rc = OMPI_ERR_RESOURCE_BUSY;
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST((*mqe_ptr_to_set), calc_task, last_send);
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, calc_task);
-
-        preposted_recv_frag =
-            mca_bcol_iboffload_get_preposted_recv_frag(
-                    iboffload, my_rank, coll_request->qp_index);
-        if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-            /* RLG need cleanup */
-            IBOFFLOAD_VERBOSE(10, ("Get prepost recv fag fail.\n"));
-            rc = OMPI_ERR_RESOURCE_BUSY;
-            goto out_of_resources;
-        }
-
-        /* Wait for calc from myself */
-        wait_task = mca_bcol_iboffload_get_wait_task(iboffload, my_rank, 1,
-                                preposted_recv_frag, coll_request->qp_index, NULL);
-        if (NULL == wait_task) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n"));
-            rc = OMPI_ERR_RESOURCE_BUSY;
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST((*mqe_ptr_to_set), wait_task, (*last_wait));
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-
-        (*l_operand) = &preposted_recv_frag->sg_entry;
-        (*l_operand)->length = calc_size;
-
-        preposted_recv_frag =
-            mca_bcol_iboffload_get_preposted_recv_frag(
-                    iboffload, pair_rank, coll_request->qp_index);
-        if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-            /* RLG need cleanup */
-            IBOFFLOAD_VERBOSE(10, ("Get prepost recv fag fail.\n"));
-            rc = OMPI_ERR_RESOURCE_BUSY;
-            goto out_of_resources;
-        }
-
-        /* Wait for calc from the current algorithm partner */
-        wait_task = mca_bcol_iboffload_get_wait_task(iboffload, pair_rank, 1,
-                                preposted_recv_frag, coll_request->qp_index, NULL);
-        if (OPAL_UNLIKELY(NULL == wait_task)) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n"));
-            rc = OMPI_ERR_RESOURCE_BUSY;
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST((*mqe_ptr_to_set), wait_task, (*last_wait));
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-
-    }
-
-    (*r_operand) = &preposted_recv_frag->sg_entry;
-    (*r_operand)->length = calc_size;
-
-    return OMPI_SUCCESS;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Adding collfrag to collfrag_pending"));
-    return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload);
-}
-
-/* Power of 2 case */
-static int
-pure_recursive_doubling(mca_bcol_iboffload_module_t *iboffload,
-                        mca_bcol_iboffload_collreq_t *coll_request)
-{
-    /* local variables */
-    int rc = OMPI_SUCCESS, pair_rank,
-        my_rank = ((mca_sbgp_base_module_t *) iboffload->ibnet)->my_index;
-
-    struct mqe_task *last_send,
-                    *last_wait;
-
-    mca_bcol_iboffload_task_t *send_task,
-                              *wait_task,
-                              *calc_task;
-
-    mca_bcol_iboffload_frag_t *send_fragment,
-                              *preposted_recv_frag;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    netpatterns_pair_exchange_node_t *my_exchange_node =
-                                          &iboffload->recursive_doubling_tree;
-
-    struct ibv_sge *r_operand = NULL,
-                   *l_operand = NULL;
-
-    struct mqe_task **mqe_ptr_to_set;
-    mca_bcol_iboffload_collfrag_t *coll_fragment = (mca_bcol_iboffload_collfrag_t *)
-                                                   opal_list_get_last(&coll_request->work_requests);
-
-    mqe_ptr_to_set = &coll_fragment->to_post;
-
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-                 iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("Allreduce starting: type %d op %d, "
-                       "n_extra_sources - %d.\n", cm->map_ompi_to_ib_dt[coll_request->dtype->id],
-                        cm->map_ompi_to_ib_calcs[coll_request->op->op_type],
-                        my_exchange_node->n_extra_sources));
-
-    pair_rank = my_exchange_node->rank_exchanges[0];
-
-    send_fragment = mca_bcol_iboffload_get_send_frag(coll_request,
-                            pair_rank, coll_request->qp_index,
-                            (MCA_IBOFFLOAD_IB_DRIVER_OPERAND_SIZE + MCA_IBOFFLOAD_CALC_SIZE_EXT), 0,
-                            SBUF,
-                            MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML_CALC);
-    if (OPAL_UNLIKELY(NULL == send_fragment)) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting and packing send frag.\n"));
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-/* Vasily: NO_INLINE ????? */
-    /* send my operand to the first algorithm partner */
-    send_task = mca_bcol_iboffload_get_send_task(iboffload, pair_rank,
-            coll_request->qp_index, send_fragment, coll_fragment, NO_INLINE);
-    if (OPAL_UNLIKELY(NULL == send_task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting send task.\n"));
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send);
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task);
-
-    l_operand = &send_fragment->sg_entry;
-    /* Recursive-doubling exchange */
-    rc = do_exchange(iboffload, coll_request, &mqe_ptr_to_set,
-                          &last_wait, &l_operand, &r_operand);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        return rc;
-    }
-    if (false == coll_request->do_calc_in_cpu) {
-        /* Calc and send the result to myself */
-        calc_task = mca_bcol_iboffload_get_calc_task(iboffload,
-                        my_rank, coll_request->qp_index, NULL,
-                        l_operand,
-                        r_operand, coll_request, NO_INLINE);
-        if (OPAL_UNLIKELY(NULL == calc_task)) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting calc task.\n"));
-            rc = OMPI_ERR_RESOURCE_BUSY;
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST(mqe_ptr_to_set, calc_task, last_send);
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, calc_task);
-
-        preposted_recv_frag =
-            mca_bcol_iboffload_get_preposted_recv_frag(
-                    iboffload, my_rank, coll_request->qp_index);
-        if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-            /* RLG need cleanup */
-            rc = OMPI_ERR_RESOURCE_BUSY;
-            goto out_of_resources;
-        }
-
-        /* Wait for calc from myself */
-        wait_task = mca_bcol_iboffload_get_wait_task(iboffload, my_rank, 1,
-                        preposted_recv_frag, coll_request->qp_index, NULL);
-        if (OPAL_UNLIKELY(NULL == wait_task)) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n"));
-            rc = OMPI_ERR_RESOURCE_BUSY;
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-    } else {
-        coll_request->l_operand = l_operand->addr;
-        coll_request->r_operand = r_operand->addr;
-    }
-
-    *mqe_ptr_to_set = NULL;
-/* Vasily: TODO with MACRO */
-    /* finish initializing full message descriptor */
-    coll_request->n_fragments  = 1;
-    coll_request->n_frags_sent = 1;
-
-    /* Pasha: need to set to true in upper layer */
-    coll_request->user_handle_freed = false;
-
-    last_wait->flags |= MQE_WR_FLAG_SIGNAL;
-
-    coll_fragment->signal_task_wr_id = last_wait->wr_id;
-    last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-
-    /* post the mwr */
-    IBOFFLOAD_VERBOSE(10, ("Post tasks.\n"));
-    rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_ERROR(("MQE task posting failing.\n"));
-        /* Note: need to clean up */
-        return rc;
-    }
-
-    MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info);
-
-    return OMPI_SUCCESS;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Adding collfrag to collfrag_pending"));
-    return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload);
-}
-
-static int rdma_do_exchange(mca_bcol_iboffload_module_t *iboffload,
-                    mca_bcol_iboffload_collreq_t *coll_request,
-                    struct mqe_task ***mqe_ptr_to_set,
-                    struct mqe_task **last_wait,
-                    struct ibv_sge **l_operand,
-                    struct ibv_sge **r_operand)
-{
-    int rc = OMPI_SUCCESS, exchange, pair_rank,
-        my_rank = ((mca_sbgp_base_module_t *) iboffload->ibnet)->my_index;
-
-    mca_bcol_iboffload_frag_t *preposted_recv_frag;
-
-    mca_bcol_iboffload_task_t *wait_task,
-                              *calc_task;
-
-    struct mqe_task *last_send;
-    netpatterns_pair_exchange_node_t *my_exchange_node =
-                                          &iboffload->recursive_doubling_tree;
-
-    mca_bcol_iboffload_collfrag_t *coll_fragment = (mca_bcol_iboffload_collfrag_t *)
-                                                   opal_list_get_last(&coll_request->work_requests);
-
-    const size_t calc_size = MCA_IBOFFLOAD_IB_DRIVER_OPERAND_SIZE + MCA_IBOFFLOAD_CALC_SIZE_EXT;
-    size_t remote_offset = calc_size;
-    size_t self_offset = 0;
-
-    pair_rank = my_exchange_node->rank_exchanges[0];
-    preposted_recv_frag =
-        mca_bcol_iboffload_get_preposted_recv_frag(
-                iboffload, pair_rank, coll_request->qp_index);
-    if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-        /* RLG need cleanup */
-        IBOFFLOAD_VERBOSE(10, ("Get prepost recv fag fail.\n"));
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    /* Wait for send from first algorithm partner */
-    wait_task = mca_bcol_iboffload_get_wait_task(iboffload, pair_rank, 1,
-                            preposted_recv_frag, coll_request->qp_index, NULL);
-    if (OPAL_UNLIKELY(NULL == wait_task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n"));
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    APPEND_TO_TASKLIST((*mqe_ptr_to_set), wait_task, (*last_wait));
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-
-    (*l_operand)->length = 2 * calc_size ;
-    for (exchange = 1; exchange < my_exchange_node->n_exchanges; ++exchange) {
-        pair_rank = my_exchange_node->rank_exchanges[exchange];
-        /* Pasha: Not used
-        (*r_operand) = &preposted_recv_frag->sg_entry;
-        (*r_operand)->length = calc_size;
-        */
-
-        remote_offset +=  2 * calc_size;
-        self_offset +=  2 * calc_size;
-
-        /* Calc and send the result to the partner */
-        /*
-        calc_task = mca_bcol_iboffload_get_calc_task(iboffload,
-                        pair_rank, coll_request->qp_index, NULL,
-                        *l_operand, *r_operand,
-                        coll_request, NO_INLINE);
-                        */
-        calc_task = mca_bcol_iboffload_get_rdma_calc_task(iboffload,
-                        pair_rank, coll_request->qp_index, NULL,
-                        *l_operand, NULL,
-                        coll_request, remote_offset);
-        if (OPAL_UNLIKELY(NULL == calc_task)) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting calc task.\n"));
-            rc = OMPI_ERR_RESOURCE_BUSY;
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST((*mqe_ptr_to_set), calc_task, last_send);
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, calc_task);
-
-        /* Calc and send the result to myself */
-        /*
-        calc_task = mca_bcol_iboffload_get_calc_task(iboffload,
-                        my_rank, coll_request->qp_index, NULL,
-                        *l_operand, NULL,
-                        coll_request, NO_INLINE);
-                        */
-        calc_task = mca_bcol_iboffload_get_rdma_calc_task(iboffload,
-                        my_rank, coll_request->qp_index, NULL,
-                        *l_operand, NULL,
-                        coll_request, self_offset);
-        if (OPAL_UNLIKELY(NULL == calc_task)) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting calc task.\n"));
-            rc = OMPI_ERR_RESOURCE_BUSY;
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST((*mqe_ptr_to_set), calc_task, last_send);
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, calc_task);
-
-        preposted_recv_frag =
-            mca_bcol_iboffload_get_preposted_recv_frag(
-                    iboffload, my_rank, coll_request->qp_index);
-        if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-            /* RLG need cleanup */
-            IBOFFLOAD_VERBOSE(10, ("Get prepost recv fag fail.\n"));
-            rc = OMPI_ERR_RESOURCE_BUSY;
-            goto out_of_resources;
-        }
-
-        /* Wait for calc from myself */
-        wait_task = mca_bcol_iboffload_get_wait_task(iboffload, my_rank, 1,
-                                preposted_recv_frag, coll_request->qp_index, NULL);
-        if (NULL == wait_task) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n"));
-            rc = OMPI_ERR_RESOURCE_BUSY;
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST((*mqe_ptr_to_set), wait_task, (*last_wait));
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-
-        /*
-        (*l_operand) = &preposted_recv_frag->sg_entry;
-        */
-
-        /* (*l_operand)->length = 2 * calc_size; */
-        (*l_operand)->addr = (uint64_t) (uintptr_t) ((unsigned char *) (*l_operand)->addr + 2 * calc_size);
-
-        preposted_recv_frag =
-            mca_bcol_iboffload_get_preposted_recv_frag(
-                    iboffload, pair_rank, coll_request->qp_index);
-        if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-            /* RLG need cleanup */
-            IBOFFLOAD_VERBOSE(10, ("Get prepost recv fag fail.\n"));
-            rc = OMPI_ERR_RESOURCE_BUSY;
-            goto out_of_resources;
-        }
-
-        /* Wait for calc from the current algorithm partner */
-        wait_task = mca_bcol_iboffload_get_wait_task(iboffload, pair_rank, 1,
-                                preposted_recv_frag, coll_request->qp_index, NULL);
-        if (OPAL_UNLIKELY(NULL == wait_task)) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n"));
-            rc = OMPI_ERR_RESOURCE_BUSY;
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST((*mqe_ptr_to_set), wait_task, (*last_wait));
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-
-    }
-    /* Pasha: not used
-    (*r_operand) = &preposted_recv_frag->sg_entry;
-    (*r_operand)->length = calc_size;
-    */
-
-    return OMPI_SUCCESS;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Adding collfrag to collfrag_pending"));
-    return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload);
-}
-
-#define ALLREDUCE_BASE_OFFSET (MCA_IBOFFLOAD_IB_DRIVER_OPERAND_SIZE + MCA_IBOFFLOAD_CALC_SIZE_EXT)
-
-/* RDMA Recursive doubling + cache friendly version */
-static int
-rdma_pure_recursive_doubling(mca_bcol_iboffload_module_t *iboffload,
-                        mca_bcol_iboffload_collreq_t *coll_request)
-{
-    /* local variables */
-    int rc = OMPI_SUCCESS, pair_rank,
-        my_rank = ((mca_sbgp_base_module_t *) iboffload->ibnet)->my_index;
-
-    struct mqe_task *last_send,
-                    *last_wait;
-
-    mca_bcol_iboffload_task_t *send_task,
-                              *wait_task,
-                              *calc_task;
-
-    mca_bcol_iboffload_frag_t *send_fragment,
-                              *preposted_recv_frag;
-    struct ibv_sge operand;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    netpatterns_pair_exchange_node_t *my_exchange_node =
-                                          &iboffload->recursive_doubling_tree;
-
-    struct ibv_sge *r_operand = NULL,
-                   *l_operand = NULL;
-
-    struct mqe_task **mqe_ptr_to_set;
-    mca_bcol_iboffload_collfrag_t *coll_fragment = (mca_bcol_iboffload_collfrag_t *)
-                                                   opal_list_get_last(&coll_request->work_requests);
-
-    mqe_ptr_to_set = &coll_fragment->to_post;
-
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-                 iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("Allreduce starting: type %d op %d, "
-                       "n_extra_sources - %d.\n", cm->map_ompi_to_ib_dt[coll_request->dtype->id],
-                        cm->map_ompi_to_ib_calcs[coll_request->op->op_type],
-                        my_exchange_node->n_extra_sources));
-
-    pair_rank = my_exchange_node->rank_exchanges[0];
-
-    send_fragment = mca_bcol_iboffload_get_send_frag(coll_request,
-                            pair_rank, coll_request->qp_index,
-                            (MCA_IBOFFLOAD_IB_DRIVER_OPERAND_SIZE + MCA_IBOFFLOAD_CALC_SIZE_EXT),
-                            0,
-                            SBUF,
-                            MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML_CALC);
-    if (OPAL_UNLIKELY(NULL == send_fragment)) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting and packing send frag.\n"));
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-    /* Vasily: NO_INLINE ????? */
-    /* send my operand to the first algorithm partner */
-    /* send_task = mca_bcol_iboffload_get_send_task(iboffload, pair_rank,
-            coll_request->qp_index, send_fragment, coll_fragment, NO_INLINE); */
-
-    send_task = mca_bcol_iboffload_get_rdma_task(
-            pair_rank, ALLREDUCE_BASE_OFFSET,
-            send_fragment, iboffload, coll_fragment);
-    if (OPAL_UNLIKELY(NULL == send_task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting send task.\n"));
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    /* Pasha: ugly but faster, set inline on first send */
-    SENDWR(send_task)->send_flags |= IBV_SEND_INLINE;
-
-
-    APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send);
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task);
-
-    /* l_operand = &send_fragment->sg_entry; */
-    operand = send_fragment->sg_entry;
-    l_operand = &operand;
-
-    /* Recursive-doubling exchange */
-    rc = rdma_do_exchange(iboffload, coll_request, &mqe_ptr_to_set,
-                          &last_wait, &l_operand, &r_operand);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        return rc;
-    }
-
-    /* Pasha: This flow is broken, print error */
-    if (false == coll_request->do_calc_in_cpu) {
-        ML_ERROR(("Calc in CPU must be enabled !!!"));
-        /* Calc and send the result to myself */
-        calc_task = mca_bcol_iboffload_get_calc_task(iboffload,
-                        my_rank, coll_request->qp_index, NULL,
-                        l_operand,
-                        r_operand, coll_request, NO_INLINE);
-        if (OPAL_UNLIKELY(NULL == calc_task)) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting calc task.\n"));
-            rc = OMPI_ERR_RESOURCE_BUSY;
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST(mqe_ptr_to_set, calc_task, last_send);
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, calc_task);
-
-        preposted_recv_frag =
-            mca_bcol_iboffload_get_preposted_recv_frag(
-                    iboffload, my_rank, coll_request->qp_index);
-        if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-            /* RLG need cleanup */
-            rc = OMPI_ERR_RESOURCE_BUSY;
-            goto out_of_resources;
-        }
-
-        /* Wait for calc from myself */
-        wait_task = mca_bcol_iboffload_get_wait_task(iboffload, my_rank, 1,
-                        preposted_recv_frag, coll_request->qp_index, NULL);
-        if (OPAL_UNLIKELY(NULL == wait_task)) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n"));
-            rc = OMPI_ERR_RESOURCE_BUSY;
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-    } else {
-        coll_request->l_operand = (uint64_t) (uintptr_t)
-            ((unsigned char *)l_operand->addr);
-        coll_request->r_operand = (uint64_t) (uintptr_t)
-            ((unsigned char *) (coll_request->l_operand) + ALLREDUCE_BASE_OFFSET);
-    }
-
-    *mqe_ptr_to_set = NULL;
-/* Vasily: TODO with MACRO */
-    /* finish initializing full message descriptor */
-    coll_request->n_fragments  = 1;
-    coll_request->n_frags_sent = 1;
-
-    /* Pasha: need to set to true in upper layer */
-    coll_request->user_handle_freed = false;
-
-    last_wait->flags |= MQE_WR_FLAG_SIGNAL;
-
-    coll_fragment->signal_task_wr_id = last_wait->wr_id;
-    last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-
-    /* post the mwr */
-    IBOFFLOAD_VERBOSE(10, ("Post tasks.\n"));
-    rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_ERROR(("MQE task posting failing.\n"));
-        /* Note: need to clean up */
-        return rc;
-    }
-
-    MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info);
-
-    return OMPI_SUCCESS;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Adding collfrag to collfrag_pending"));
-    return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload);
-}
-/*
- * non power of 2 & EXCHANGE_NODE case,
- * need to wait for message from "extra" proc.
- */
-static int
-non_pure_recursive_doubling(mca_bcol_iboffload_module_t *iboffload,
-                            mca_bcol_iboffload_collreq_t *coll_request)
-{
-    /* local variables */
-    int rc = OMPI_SUCCESS, extra_rank, pair_rank,
-        my_rank = ((mca_sbgp_base_module_t *) iboffload->ibnet)->my_index;
-
-    mca_bcol_iboffload_frag_t *calc_fragment,
-                              *preposted_recv_frag;
-
-    mca_bcol_iboffload_task_t *wait_task,
-                              *calc_task;
-
-    struct ibv_sge *r_operand = NULL,
-                   *l_operand = NULL;
-
-    struct mqe_task *last_wait, /* we need ask from completion on last wait */
-                    *last_send;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    netpatterns_pair_exchange_node_t *my_exchange_node =
-                                          &iboffload->recursive_doubling_tree;
-
-    struct mqe_task **mqe_ptr_to_set;
-    mca_bcol_iboffload_collfrag_t *coll_fragment = (mca_bcol_iboffload_collfrag_t *)
-                                                   opal_list_get_last(&coll_request->work_requests);
-
-    mqe_ptr_to_set = &coll_fragment->to_post;
-
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-                 iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("Allreduce starting: type %d op %d, "
-                       "n_extra_sources - %d.\n", cm->map_ompi_to_ib_dt[coll_request->dtype->id],
-                        cm->map_ompi_to_ib_calcs[coll_request->op->op_type],
-                        my_exchange_node->n_extra_sources));
-
-    extra_rank = my_exchange_node->rank_extra_source;
-
-    preposted_recv_frag =
-        mca_bcol_iboffload_get_preposted_recv_frag(
-                iboffload, extra_rank, coll_request->qp_index);
-    if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-        /* RLG need cleanup */
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    /* Wait for data from extra node */
-    wait_task = mca_bcol_iboffload_get_wait_task(iboffload, extra_rank, 1,
-                            preposted_recv_frag, coll_request->qp_index, NULL);
-    if (OPAL_UNLIKELY(NULL == wait_task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n"));
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-
-    pair_rank = my_exchange_node->rank_exchanges[0];
-
-    calc_fragment = mca_bcol_iboffload_get_send_frag(coll_request,
-                            pair_rank, coll_request->qp_index,
-                            MCA_IBOFFLOAD_IB_DRIVER_OPERAND_SIZE +
-                            MCA_IBOFFLOAD_CALC_SIZE_EXT, 0,
-                            SBUF,
-                            MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML_CALC);
-    if (OPAL_UNLIKELY(NULL == calc_fragment)) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting and packing send frag.\n"));
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    /* Calc extra node operand with mine and send the result
-       to the first algorithm partner */
-    preposted_recv_frag->sg_entry.length = MCA_IBOFFLOAD_IB_DRIVER_OPERAND_SIZE +
-                                           MCA_IBOFFLOAD_CALC_SIZE_EXT;
-    calc_task = mca_bcol_iboffload_get_calc_task(iboffload,
-                        pair_rank, coll_request->qp_index, calc_fragment,
-                        &preposted_recv_frag->sg_entry,
-                        &calc_fragment->sg_entry, coll_request, NO_INLINE);
-    if (OPAL_UNLIKELY(NULL == calc_task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting calc task.\n"));
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    APPEND_TO_TASKLIST(mqe_ptr_to_set, calc_task, last_send);
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, calc_task);
-
-    /* Calc extra node operand with mine and store the result on my buff */
-    calc_task = mca_bcol_iboffload_get_calc_task(iboffload,
-                        my_rank, coll_request->qp_index, NULL,
-                        &preposted_recv_frag->sg_entry,
-                        &calc_fragment->sg_entry, coll_request, NO_INLINE);
-    if (OPAL_UNLIKELY(NULL == calc_task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting calc task.\n"));
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    APPEND_TO_TASKLIST(mqe_ptr_to_set, calc_task, last_send);
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, calc_task);
-
-    preposted_recv_frag =
-        mca_bcol_iboffload_get_preposted_recv_frag(
-                iboffload, my_rank, coll_request->qp_index);
-    if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-        /* RLG need cleanup */
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    /* Wait for calc from myself */
-    wait_task = mca_bcol_iboffload_get_wait_task(iboffload, my_rank, 1,
-                            preposted_recv_frag, coll_request->qp_index, NULL);
-    if (OPAL_UNLIKELY(NULL == wait_task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n"));
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-
-    l_operand = &preposted_recv_frag->sg_entry;
-    l_operand->length = MCA_IBOFFLOAD_IB_DRIVER_OPERAND_SIZE +
-                        MCA_IBOFFLOAD_CALC_SIZE_EXT;
-    /* Recursive-doubling exchange */
-    rc = do_exchange(iboffload, coll_request, &mqe_ptr_to_set,
-                     &last_wait, &l_operand, &r_operand);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        return rc;
-    }
-
-    /* Need to send message to "extra" proc =>
-       one more final result calc for extra node */
-    calc_task = mca_bcol_iboffload_get_calc_task(iboffload,
-                        extra_rank, coll_request->qp_index, NULL,
-                        l_operand,
-                        r_operand, coll_request, NO_INLINE);
-    if (OPAL_UNLIKELY(NULL == calc_task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting calc task.\n"));
-        rc = OMPI_ERR_RESOURCE_BUSY;
-        goto out_of_resources;
-    }
-
-    APPEND_TO_TASKLIST(mqe_ptr_to_set, calc_task, last_send);
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, calc_task);
-
-    if (false == coll_request->do_calc_in_cpu) {
-        /* Calc and send the result to myself */
-        calc_task = mca_bcol_iboffload_get_calc_task(iboffload,
-                        my_rank, coll_request->qp_index, NULL,
-                        l_operand,
-                        r_operand, coll_request, NO_INLINE);
-        if (OPAL_UNLIKELY(NULL == calc_task)) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting calc task.\n"));
-            rc = OMPI_ERR_RESOURCE_BUSY;
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST(mqe_ptr_to_set, calc_task, last_send);
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, calc_task);
-
-        preposted_recv_frag =
-            mca_bcol_iboffload_get_preposted_recv_frag(
-                    iboffload, my_rank, coll_request->qp_index);
-        if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-            /* RLG need cleanup */
-            rc = OMPI_ERR_RESOURCE_BUSY;
-            goto out_of_resources;
-        }
-
-        /* Wait for calc from myself */
-        wait_task = mca_bcol_iboffload_get_wait_task(iboffload, my_rank, 1,
-                        preposted_recv_frag, coll_request->qp_index, NULL);
-        if (OPAL_UNLIKELY(NULL == wait_task)) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n"));
-            rc = OMPI_ERR_RESOURCE_BUSY;
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-    } else {
-        coll_request->l_operand = l_operand->addr;
-        coll_request->r_operand = r_operand->addr;
-    }
-
-    *mqe_ptr_to_set = NULL;
-
-    /* finish initializing full message descriptor */
-    coll_request->n_fragments  = 1;
-    coll_request->n_frags_sent = 1;
-
-    assert(NULL != last_wait);
-
-    last_wait->flags |= MQE_WR_FLAG_SIGNAL;
-    coll_fragment->signal_task_wr_id = last_wait->wr_id;
-    last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-
-    /* post the mwr */
-    IBOFFLOAD_VERBOSE(10, ("Post tasks.\n"));
-    rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post);
-    if(OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_ERROR(("MQE task posting failing.\n"));
-        /* Note: need to clean up */
-        return rc;
-    }
-
-    MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info);
-
-    return OMPI_SUCCESS;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Adding collfrag to collfrag_pending"));
-    return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload);
-}
-
-static int mca_bcol_iboffload_allreduce_init(
-                               bcol_function_args_t *fn_arguments,
-                               mca_bcol_iboffload_module_t *iboffload,
-                               struct mca_bcol_iboffload_collreq_t **coll_request,
-                               bool if_bcol_last)
-{
-    int rc;
-
-    bool exclude_case;
-    ompi_free_list_item_t *item;
-    mca_bcol_iboffload_collfrag_t *coll_fragment;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_allreduce_init.\n"));
-
-    OMPI_FREE_LIST_WAIT(&cm->collreqs_free, item, rc);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for coll request free list waiting.\n"));
-        return rc;
-    }
-
-    (*coll_request) = (mca_bcol_iboffload_collreq_t *) item;
-    (*coll_request)->progress_fn = iboffload->allreduce_algth;
-
-    (*coll_request)->if_bcol_last = if_bcol_last;
-
-    exclude_case = (non_pure_recursive_doubling == iboffload->allreduce_algth &&
-                                    (OMPI_OP_SUM == fn_arguments->op->op_type &&
-                                     OMPI_DATATYPE_MPI_DOUBLE == fn_arguments->dtype->id));
-
-    (*coll_request)->do_calc_in_cpu = cm->last_calc_in_cpu && !exclude_case;
-
-    if (false == (*coll_request)->do_calc_in_cpu ||
-            allreduce_extra_node == iboffload->allreduce_algth) {
-        (*coll_request)->do_calc_in_cpu = false; /* Relevant for extra node only */
-        (*coll_request)->completion_cb_fn =
-                        mca_bcol_iboffload_unpack_res_to_user;
-    } else {
-        (*coll_request)->completion_cb_fn =
-                       mca_bcol_iboffload_calc_res_to_user;
-    }
-
-    (*coll_request)->module = iboffload;
-    (*coll_request)->op = fn_arguments->op;
-
-    (*coll_request)->dtype = fn_arguments->dtype;
-    (*coll_request)->count = fn_arguments->count;
-
-    (*coll_request)->ml_buffer_index = fn_arguments->buffer_index;
-    (*coll_request)->buffer_info[SBUF].lkey = iboffload->rdma_block.ib_info.lkey;
-
-    (*coll_request)->order_info = &fn_arguments->order_info;
-
-    /* ML buffer was provided, no need to pack the data.
-     * It is few assumption here:
-     * we CAN touch and change ML buffer
-     */
-    (*coll_request)->buffer_info[SBUF].buf = (void *) (
-            (unsigned char *) fn_arguments->sbuf +
-                     (size_t) fn_arguments->sbuf_offset);
-
-    (*coll_request)->buffer_info[SBUF].offset = fn_arguments->sbuf_offset;
-
-    (*coll_request)->buffer_info[RBUF].buf = (void *) (
-        (unsigned char *) fn_arguments->rbuf +
-                 (size_t) fn_arguments->rbuf_offset);
-
-    (*coll_request)->buffer_info[RBUF].offset = fn_arguments->rbuf_offset;
-
-    if(mca_bcol_iboffload_component.enable_rdma_calc) {
-        (*coll_request)->qp_index = MCA_BCOL_IBOFFLOAD_QP_BARRIER;
-    } else {
-        (*coll_request)->qp_index = MCA_BCOL_IBOFFLOAD_QP_REGULAR;
-    }
-
-    (*coll_request)->n_frag_mpi_complete = 0;
-    (*coll_request)->n_frag_net_complete = 0;
-
-    fn_arguments->bcol_opaque_data = (void *) (*coll_request);
-
-    /*
-     * setup collective work request
-     */
-
-    /* get collective frag */
-    coll_fragment = &((*coll_request)->first_collfrag);
-    mca_bcol_iboffload_collfrag_init(coll_fragment);
-
-    coll_fragment->mq_index = COLL_MQ;
-    coll_fragment->alg = RECURSIVE_DOUBLING_ALLREDUCE_ALG;
-
-    coll_fragment->mq_credits =
-                iboffload->alg_task_consump[RECURSIVE_DOUBLING_ALLREDUCE_ALG];
-
-    /* set pointers for (coll frag) <-> (coll full request) */
-    MCA_BCOL_IBOFFLOAD_SET_COLL_REQ_LINKS(*coll_request, coll_fragment);
-
-    coll_fragment->unpack_size =
-                mca_bcol_base_get_buff_length(fn_arguments->dtype, fn_arguments->count);
-
-    IBOFFLOAD_VERBOSE(10, ("The input data is %lf", *(double *) (*coll_request)->buffer_info[SBUF].buf));
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_bcol_iboffload_allreduce_intra(bcol_function_args_t *fn_arguments,
-                                              struct mca_bcol_base_function_t *const_args)
-{
-    /* local variables */
-    int rc;
-
-    mca_bcol_iboffload_collreq_t *coll_request = NULL;
-    mca_bcol_iboffload_module_t *iboffload =
-                 (mca_bcol_iboffload_module_t *) const_args->bcol_module;
-
-    /* Pasha: please do not touch this line, it used for ML buffer recycling barrier call */
-    bool if_bcol_last = ((const_args->index_of_this_type_in_collective + 1) ==
-                          const_args->n_of_this_type_in_collective);
-
-    MCA_BCOL_CHECK_ORDER(const_args->bcol_module, fn_arguments);
-
-    IBOFFLOAD_VERBOSE(10, ("n_of_this_type_in_a_row %d, index_in_consecutive_same_bcol_calls %d",
-                            const_args->n_of_this_type_in_a_row,
-                            const_args->index_in_consecutive_same_bcol_calls + 1));
-
-    IBOFFLOAD_VERBOSE(10, ("Allreduce started.\n"));
-    fn_arguments->result_in_rbuf = true;
-
-    rc = mca_bcol_iboffload_allreduce_init(fn_arguments, iboffload,
-                                           &coll_request, if_bcol_last);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("Get error from mca_bcol_iboffload_allreduce_init.\n"));
-        return rc;
-    }
-
-    /* Allreduce starting */
-    rc = iboffload->allreduce_algth(iboffload, coll_request);
-    if (OPAL_UNLIKELY(OMPI_ERROR == rc)) {
-        return BCOL_FN_NOT_STARTED;
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("Wait for completions.\n"));
-
-    /* done */
-    return BCOL_FN_STARTED;
-}
-
-static int mca_bcol_iboffload_allreduce_progress(
-                        bcol_function_args_t *input_args,
-                        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_iboffload_collreq_t *coll_request =
-                 (mca_bcol_iboffload_collreq_t *)
-                                   input_args->bcol_opaque_data;
-
-    if (BCOL_IS_COMPLETED(coll_request)) {
-        coll_request->user_handle_freed = true;
-        if (COLLREQ_IS_DONE(coll_request)) {
-            IBOFFLOAD_VERBOSE(10, ("Coll request already done.\n"));
-            RELEASE_COLLREQ(coll_request);
-        }
-
-        IBOFFLOAD_VERBOSE(10, ("Allreduce already done.\n"));
-        return BCOL_FN_COMPLETE;
-    }
-
-    return BCOL_FN_STARTED;
-}
-
-int mca_bcol_iboffload_allreduce_first_call(mca_bcol_iboffload_module_t *iboffload,
-                                            mca_bcol_iboffload_collreq_t *coll_request)
-{
-    netpatterns_pair_exchange_node_t *my_exchange_node =
-                                          &iboffload->recursive_doubling_tree;
-
-    int i = 0, my_rank = iboffload->ibnet->super.my_index,
-        n_exchanges = my_exchange_node->n_exchanges,
-        *exchanges = my_exchange_node->rank_exchanges,
-        n_extra_src = my_exchange_node->n_extra_sources,
-        rank_extra_src = my_exchange_node->rank_extra_source;
-
-    mca_bcol_iboffload_endpoint_t *ep = iboffload->endpoints[my_rank];
-
-    /* Connecting to myself */
-    while (OMPI_SUCCESS !=
-            check_endpoint_state(ep, NULL, NULL)) {
-        opal_progress();
-    }
-
-    iboffload->alg_task_consump[RECURSIVE_DOUBLING_ALLREDUCE_ALG] = 0;
-
-    if (0 < n_extra_src) {
-        iboffload->alg_task_consump[RECURSIVE_DOUBLING_ALLREDUCE_ALG] += 4; /* Two CALCs and two WAITs tasks */
-        ep = iboffload->endpoints[rank_extra_src];
-        while (OMPI_SUCCESS !=
-                check_endpoint_state(ep, NULL, NULL)) {
-            opal_progress();
-        }
-    }
-
-    for (i = 0; i < n_exchanges; ++i) {
-        iboffload->alg_task_consump[RECURSIVE_DOUBLING_ALLREDUCE_ALG] += 4; /* Two CALCs and two WAITs tasks */
-        ep = iboffload->endpoints[exchanges[i]];
-
-        while (OMPI_SUCCESS !=
-                check_endpoint_state(ep, NULL, NULL)) {
-            opal_progress();
-        }
-    }
-
-    iboffload->alg_task_consump[RECURSIVE_DOUBLING_ALLREDUCE_ALG] += 4; /* Two CALCs and two WAITs tasks */
-
-    if (0 < my_exchange_node->n_extra_sources) {
-        iboffload->allreduce_algth =
-                (EXTRA_NODE == my_exchange_node->node_type)?
-                 allreduce_extra_node:
-                 non_pure_recursive_doubling;
-    } else {
-        if(mca_bcol_iboffload_component.enable_rdma_calc) {
-            iboffload->allreduce_algth =
-                rdma_pure_recursive_doubling;
-        } else {
-            iboffload->allreduce_algth =
-                pure_recursive_doubling;
-        }
-    }
-
-    return iboffload->allreduce_algth(iboffload, coll_request);
-}
-
-int mca_bcol_iboffload_allreduce_register(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    IBOFFLOAD_VERBOSE(10, ("Register iboffload Allreduce.\n"));
-
-    comm_attribs.bcoll_type = BCOL_ALLREDUCE;
-
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super,
-            &comm_attribs, &inv_attribs,
-            mca_bcol_iboffload_allreduce_intra,
-            mca_bcol_iboffload_allreduce_progress);
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_barrier.c b/ompi/mca/bcol/iboffload/bcol_iboffload_barrier.c
deleted file mode 100644
index 1eb47f5921..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_barrier.c
+++ /dev/null
@@ -1,934 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <errno.h>
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_task.h"
-#include "bcol_iboffload_collreq.h"
-#include "bcol_iboffload_collfrag.h"
-#include "bcol_iboffload_endpoint.h"
-#include "ompi/mca/coll/ml/coll_ml_allocation.h"
-
-static int mca_bcol_iboffload_barrier_init(
-        bcol_function_args_t *input_args,
-        mca_bcol_iboffload_module_t *iboffload,
-        collective_message_completion_callback_function cb_fn,
-        struct mca_bcol_iboffload_collreq_t **coll_request);
-
-/**
- * Start barrier
- */
-
-int mca_bcol_iboffload_barrier_intra_recursive_doubling(
-        mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    /* local variables */
-    mca_bcol_iboffload_task_t *send_task = NULL,
-                              *wait_task = NULL;
-
-    struct mqe_task **mqe_ptr_to_set = NULL;
-    mca_bcol_iboffload_collfrag_t *coll_fragment = NULL;
-
-    struct mqe_task *last_wait = NULL, /* we need ask from completion on last wait */
-                    *last_send = NULL; /* If it no wait, we need ask for completion on last send */
-
-    int rc, exchange, extra_rank, pair_rank;
-
-
-    mca_bcol_iboffload_frag_t *send_fragment = NULL,
-                              *preposted_recv_frag = NULL;
-
-    netpatterns_pair_exchange_node_t *my_exchange_node =
-                                          &iboffload->recursive_doubling_tree;
-
-    IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_barrier_intra_recursive_doubling.\n"));
-
-    coll_fragment = (mca_bcol_iboffload_collfrag_t *)
-                         opal_list_get_last(&coll_request->work_requests);
-    /* Set mq credits */
-    coll_fragment->mq_credits = iboffload->alg_task_consump[RECURSIVE_DOUBLING_BARRIER_ALG];
-
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-                 iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-
-        goto out_of_resources;
-    }
-
-    coll_fragment->alg = RECURSIVE_DOUBLING_BARRIER_ALG;
-
-    /*
-     * NOTE: need to generate template, if this will be a multiple fragment
-     * message.  This way we can progress the collective w/o knowing it's
-     * type - actually, this is not the case for barrier, but just a note
-     * to remind us that we need to generalize this.
-     */
-
-    mqe_ptr_to_set = &coll_fragment->to_post;
-
-    /*
-     * Fill in the communication pattern
-     */
-
-    /*
-     * If non power of 2, may need to wait for message from "extra" proc.
-     */
-
-    if (0 < my_exchange_node->n_extra_sources) {
-        if (EXCHANGE_NODE == my_exchange_node->node_type) {
-            /* I will participate in the exchange (of the algorithm) -
-             * wait for signal from extra process */
-            extra_rank = my_exchange_node->rank_extra_source;
-            preposted_recv_frag = mca_bcol_iboffload_get_preposted_recv_frag(
-                    iboffload, extra_rank, coll_request->qp_index);
-
-            if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-                IBOFFLOAD_VERBOSE(10, ("Non power of 2 case: "
-                                   "Failing for getting prepost recv frag.\n"));
-                goto out_of_resources;
-            }
-
-            wait_task = mca_bcol_iboffload_get_wait_task(iboffload,
-                    extra_rank, 1, preposted_recv_frag, coll_request->qp_index, NULL);
-            if (OPAL_UNLIKELY(NULL == wait_task)) {
-                IBOFFLOAD_VERBOSE(10, ("Non power of 2 case: "
-                                   "Failing for getting wait task.\n"));
-                goto out_of_resources;
-            }
-
-            APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-            MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-        }  else {
-            /* I will not participate in the exchange - so just "register" as here */
-            extra_rank = my_exchange_node->rank_extra_source;
-            /* send - no need to send any data, in-order delivery */
-            send_fragment = mca_bcol_iboffload_get_send_frag(coll_request,
-                                    extra_rank, coll_request->qp_index, 0,
-                                    0, SBUF,MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY);
-
-            send_task = mca_bcol_iboffload_get_send_task(iboffload, extra_rank,
-                    coll_request->qp_index, send_fragment, coll_fragment, INLINE);
-            if (OPAL_UNLIKELY(NULL == send_task)) {
-                IBOFFLOAD_VERBOSE(10, ("Non power of 2 case: "
-                                   "Failing for getting send task.\n"));
-                goto out_of_resources;
-            }
-
-            APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send);
-            MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task);
-        }
-    }
-
-    /* loop over exchange send/recv pairs */
-    for (exchange = 0; exchange < my_exchange_node->n_exchanges; ++exchange) {
-        /* rank of exchange partner */
-        pair_rank = my_exchange_node->rank_exchanges[exchange];
-        /* post send */
-        send_fragment = mca_bcol_iboffload_get_send_frag(coll_request,
-                                 pair_rank, coll_request->qp_index, 0,
-                                 0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY);
-
-        assert(NULL != send_fragment);
-
-        send_task = mca_bcol_iboffload_get_send_task(iboffload, pair_rank,
-                                                     coll_request->qp_index,
-                                                     send_fragment, coll_fragment, INLINE);
-        if (OPAL_UNLIKELY(NULL == send_task)) {
-            IBOFFLOAD_VERBOSE(10, ("Exchaging: "
-                               "Failing for getting send task.\n"));
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send);
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task);
-
-        /* post wait */
-        preposted_recv_frag = mca_bcol_iboffload_get_preposted_recv_frag(
-                iboffload, pair_rank, coll_request->qp_index);
-        if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-            IBOFFLOAD_VERBOSE(10, ("Exchaging: "
-                               "Failing for getting prepost recv frag.\n"));
-            goto out_of_resources;
-        }
-
-        wait_task = mca_bcol_iboffload_get_wait_task(iboffload, pair_rank, 1,
-                                                     preposted_recv_frag,
-                                                     coll_request->qp_index, NULL);
-        if (OPAL_UNLIKELY(NULL == wait_task)) {
-            IBOFFLOAD_VERBOSE(10, ("Exchaging: "
-                               "Failing for getting wait task.\n"));
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-    }
-
-    /* if non power of 2, may need to send message to "extra" proc */
-    if (0 < my_exchange_node->n_extra_sources)  {
-        if (EXTRA_NODE == my_exchange_node->node_type) {
-            /* I will not participate in the exchange -
-             * wait for signal from exchange process */
-            extra_rank = my_exchange_node->rank_extra_source;
-            /* post wait */
-            preposted_recv_frag =
-                mca_bcol_iboffload_get_preposted_recv_frag(iboffload, extra_rank,
-                                                           coll_request->qp_index);
-            if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-                IBOFFLOAD_VERBOSE(10, ("Sending to 'extra' node: "
-                                   "Failing for getting prepost recv frag.\n"));
-                goto out_of_resources;
-            }
-
-            wait_task = mca_bcol_iboffload_get_wait_task(iboffload, extra_rank, 1,
-                                                         preposted_recv_frag,
-                                                         coll_request->qp_index, NULL);
-            if (OPAL_UNLIKELY(NULL == wait_task)) {
-                IBOFFLOAD_VERBOSE(10, ("Sending to 'extra' node: "
-                                   "Failing for getting wait task.\n"));
-                goto out_of_resources;
-            }
-
-            APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-            MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-
-        }  else {
-            /* I will participate in the exchange -
-             * send signal to extra process */
-            extra_rank = my_exchange_node->rank_extra_source;
-            send_fragment = mca_bcol_iboffload_get_send_frag(coll_request,
-                                    extra_rank, coll_request->qp_index, 0,
-                                    0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY);
-
-            send_task = mca_bcol_iboffload_get_send_task(
-                                                iboffload, extra_rank,
-                                                coll_request->qp_index,
-                                                send_fragment, coll_fragment, INLINE);
-            if (OPAL_UNLIKELY(NULL == send_task)) {
-                IBOFFLOAD_VERBOSE(10, ("Sending to 'extra' node: "
-                                   "Failing for getting send task.\n"));
-                goto out_of_resources;
-            }
-
-            APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send);
-            MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task);
-        }
-    }
-
-    /* Fill in the the rest of the coll_fragment */
-    IBOFFLOAD_VERBOSE(10, ("Fill in the the rest of the coll_fragment.\n"));
-    /* end of list */
-    *mqe_ptr_to_set = NULL;
-
-    /* finish initializing full message descriptor */
-    coll_request->n_fragments  = 1;
-    coll_request->n_frags_sent = 1;
-
-    coll_request->n_frag_mpi_complete = 0;
-    coll_request->n_frag_net_complete = 0;
-
-    coll_request->user_handle_freed = false;
-
-    last_wait->flags |= MQE_WR_FLAG_SIGNAL;
-    coll_fragment->signal_task_wr_id = last_wait->wr_id;
-    last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-
-    /* post the mwr */
-    if (MCA_BCOL_IBOFFLOAD_QP_SYNC != coll_request->qp_index) {
-        rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
-            /* Note: need to clean up */
-            return rc;
-        }
-
-        MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info);
-    } else {
-        /* Special flow for ML service barrier , only this function supposed to
-         post service requests */
-        struct mqe_task *bad_mqe = NULL;
-        assert (MCA_BCOL_IBOFFLOAD_QP_SYNC == coll_request->qp_index );
-        /* Post to special service MQ - 1 */
-        rc = mqe_post_task(iboffload->mq[1], coll_fragment->to_post, &bad_mqe);
-        if (OPAL_UNLIKELY(0 != rc)) {
-            IBOFFLOAD_ERROR(("ibv_post_mqe failed on device (%s), errno says: %s,"
-                        " the return code is [%d]\n",
-                        ibv_get_device_name(iboffload->device->dev.ib_dev),
-                        strerror(errno), rc));
-            return OMPI_ERROR;
-        }
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("Return success.\n"));
-    return OMPI_SUCCESS;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Barrier, adding collfrag to collfrag_pending.\n"));
-    return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload);
-}
-
-int mca_bcol_iboffload_barrier_intra_recursive_doubling_start(
-        struct mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int rc;
-
-    rc = mca_bcol_iboffload_rec_doubling_start_connections(iboffload);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        return rc;
-    }
-
-    iboffload->barrier_algth =
-                       mca_bcol_iboffload_barrier_intra_recursive_doubling;
-    return
-       mca_bcol_iboffload_barrier_intra_recursive_doubling(iboffload, coll_request);
-}
-
-int mca_bcol_iboffload_nb_memory_service_barrier_start(
-        struct mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int rc;
-
-    rc = mca_bcol_iboffload_rec_doubling_start_connections(iboffload);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        return rc;
-    }
-
-    iboffload->memsync_algth =
-        mca_bcol_iboffload_barrier_intra_recursive_doubling;
-
-    return
-        mca_bcol_iboffload_barrier_intra_recursive_doubling
-        (iboffload, coll_request);
-}
-
-int mca_bcol_iboffload_nb_memory_service_barrier_intra(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-
-    /* local variables */
-    int rc;
-    mca_bcol_iboffload_collreq_t *coll_request;
-    mca_bcol_iboffload_module_t *iboffload =
-                    (mca_bcol_iboffload_module_t *) const_args->bcol_module;
-    /*
-     * recursive doubling
-     */
-
-
-    IBOFFLOAD_VERBOSE(10, ("Memory syncranization barrier was started\n"));
-
-    /* init barrier collective request */
-    rc = mca_bcol_iboffload_barrier_init(input_args, iboffload, NULL, &coll_request);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_ERROR(("Get error from mca_bcol_iboffload_barrier_init"));
-        return rc;
-    }
-
-    /* set the qp index to special qp that is used only for synchronization */
-    coll_request->qp_index = MCA_BCOL_IBOFFLOAD_QP_SYNC;
-    /* overwrite mq index to run over service setup */
-    coll_request->first_collfrag.mq_index = SERVICE_MQ;
-
-    /* start the barrier */
-    rc = iboffload->memsync_algth(iboffload, coll_request);
-    if (OPAL_UNLIKELY(OMPI_ERROR == rc)) {
-      return rc;
-    }
-
-    /* complete the barrier - progress releases full request descriptors */
-    IBOFFLOAD_VERBOSE(10, ("Memory syncranization barrier was started\n"));
-
-    /* done */
-    return BCOL_FN_STARTED;
-}
-
-/* Recursive K - ing*/
-static int recursive_knomial_start_connections(struct mca_bcol_iboffload_module_t *iboffload)
-{
-    netpatterns_k_exchange_node_t *my_exchange_node =
-        &iboffload->knomial_exchange_tree;
-    int k, i, n_exchanges = my_exchange_node->n_exchanges,
-        **exchanges = my_exchange_node->rank_exchanges,
-        n_extra_src = my_exchange_node->n_extra_sources,
-        tree_order = my_exchange_node->tree_order - 1,
-        rank_extra_src;
-
-    mca_bcol_iboffload_endpoint_t *ep;
-
-    iboffload->alg_task_consump[RECURSIVE_KNOMIAL_BARRIER_ALG] += 0;
-
-    IBOFFLOAD_VERBOSE(10, ("\nMy sbgp rank (index) - %d, "
-                "num of endpoints = %d, iboffload module - %p"
-                " extra n %d, n_exchanges %d",
-                iboffload->ibnet->super.my_index, iboffload->num_endpoints, iboffload,
-                n_extra_src, n_exchanges));
-    if (0 < n_extra_src) {
-        for (k = 0; k < n_extra_src; k++) {
-            iboffload->alg_task_consump[RECURSIVE_KNOMIAL_BARRIER_ALG] += 2; /* One send task one wait */
-            rank_extra_src = my_exchange_node->rank_extra_sources_array[k];
-            ep = iboffload->endpoints[rank_extra_src];
-            if (iboffload->ibnet->super.my_index < ep->index) {
-                while(0 == (ep)->remote_zero_rdma_addr.addr) {
-                    opal_progress();
-                }
-            } else {
-                IBOFFLOAD_VERBOSE(10, ("Trying to connect - %d", ep->index));
-                while (OMPI_SUCCESS !=
-                        check_endpoint_state(ep, NULL, NULL)) {
-                    opal_progress();
-                }
-            }
-        }
-    }
-
-    for (i = 0; i < n_exchanges; ++i) {
-        for (k = 0; k < tree_order; k++) {
-            iboffload->alg_task_consump[RECURSIVE_KNOMIAL_BARRIER_ALG] += 2; /* One send task one wait */
-            ep = iboffload->endpoints[exchanges[i][k]];
-
-            IBOFFLOAD_VERBOSE(10, ("Trying to connect - %d", ep->index));
-            if (iboffload->ibnet->super.my_index < ep->index) {
-                while(0 == (ep)->remote_zero_rdma_addr.addr) {
-                    opal_progress();
-                }
-            } else {
-                while (OMPI_SUCCESS !=
-                        check_endpoint_state(ep, NULL, NULL)) {
-                    opal_progress();
-                }
-            }
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_bcol_iboffload_barrier_intra_recursive_knomial(
-        mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    /* local variables */
-    mca_bcol_iboffload_task_t *send_task = NULL,
-                              *wait_task = NULL;
-
-    struct mqe_task **mqe_ptr_to_set = NULL;
-    mca_bcol_iboffload_collfrag_t *coll_fragment = NULL;
-
-    struct mqe_task *last_wait = NULL, /* we need ask from completion on last wait */
-                    *last_send = NULL; /* If it no wait, we need ask for completion on last send */
-
-    int rc, exchange, extra_rank, pair_rank, k;
-
-
-    mca_bcol_iboffload_frag_t *send_fragment = NULL,
-                              *preposted_recv_frag = NULL;
-
-    netpatterns_k_exchange_node_t *my_exchange_node =
-        &iboffload->knomial_exchange_tree;
-    IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_barrier_intra_recursive_knomial. Node type %d\n", my_exchange_node->node_type));
-
-    coll_fragment = (mca_bcol_iboffload_collfrag_t *)
-                         opal_list_get_last(&coll_request->work_requests);
-
-    /* Set mq credits */
-    coll_fragment->mq_credits = iboffload->alg_task_consump[RECURSIVE_KNOMIAL_BARRIER_ALG];
-
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-                 iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-
-        goto out_of_resources;
-    }
-
-    coll_fragment->alg = RECURSIVE_KNOMIAL_BARRIER_ALG;
-
-    /*
-     * NOTE: need to generate template, if this will be a multiple fragment
-     * message.  This way we can progress the collective w/o knowing it's
-     * type - actually, this is not the case for barrier, but just a note
-     * to remind us that we need to generalize this.
-     */
-
-    mqe_ptr_to_set = &coll_fragment->to_post;
-
-    /*
-     * Fill in the communication pattern
-     */
-
-    /*
-     * If non power of 2, may need to wait for message from "extra" proc.
-     */
-
-    if (0 < my_exchange_node->n_extra_sources) {
-        if (EXCHANGE_NODE == my_exchange_node->node_type) {
-            /* I will participate in the exchange (of the algorithm) -
-             * wait for signal from extra process */
-            for (k = 0; k < my_exchange_node->n_extra_sources; k++) {
-                extra_rank = my_exchange_node->rank_extra_sources_array[k];
-                IBOFFLOAD_VERBOSE(10,("Exchange [ %d ] extra get %d", k, extra_rank));
-
-                preposted_recv_frag = mca_bcol_iboffload_get_preposted_recv_frag(
-                        iboffload, extra_rank, coll_request->qp_index);
-
-                if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-                    IBOFFLOAD_VERBOSE(10, ("Non power of 2 case: "
-                                "Failing for getting prepost recv frag.\n"));
-                    goto out_of_resources;
-                }
-
-                wait_task = mca_bcol_iboffload_get_wait_task(iboffload,
-                        extra_rank, 1, preposted_recv_frag, coll_request->qp_index, NULL);
-                if (OPAL_UNLIKELY(NULL == wait_task)) {
-                    IBOFFLOAD_VERBOSE(10, ("Non power of 2 case: "
-                                "Failing for getting wait task.\n"));
-                    goto out_of_resources;
-                }
-
-                APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-                MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-            }
-        }  else {
-            /* I will not participate in the exchange - so just "register" as here */
-            extra_rank = my_exchange_node->rank_extra_sources_array[0];
-            IBOFFLOAD_VERBOSE(10,("Send to proxy %d", extra_rank));
-            /* send - no need to send any data, in-order delivery */
-            send_fragment = mca_bcol_iboffload_get_send_frag(coll_request,
-                                    extra_rank, coll_request->qp_index, 0,
-                                    0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY);
-
-            send_task = mca_bcol_iboffload_get_send_task(iboffload, extra_rank,
-                    coll_request->qp_index, send_fragment, coll_fragment, INLINE);
-            if (OPAL_UNLIKELY(NULL == send_task)) {
-                IBOFFLOAD_VERBOSE(10, ("Non power of 2 case: "
-                                   "Failing for getting send task.\n"));
-                goto out_of_resources;
-            }
-
-            APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send);
-            MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task);
-        }
-    }
-
-    /* loop over exchange send/recv pairs */
-    for (exchange = 0; exchange < my_exchange_node->n_exchanges; ++exchange) {
-        for (k = 0; k < my_exchange_node->tree_order - 1; k++) {
-            /* rank of exchange partner */
-            pair_rank = my_exchange_node->rank_exchanges[exchange][k];
-            IBOFFLOAD_VERBOSE(10,("Exchange [ %d ,%d ] send to %d", exchange, k, pair_rank));
-            /* post send */
-            send_fragment = mca_bcol_iboffload_get_send_frag(coll_request,
-                                        pair_rank, coll_request->qp_index, 0,
-                                        0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY);
-
-            send_task = mca_bcol_iboffload_get_send_task(iboffload, pair_rank,
-                    coll_request->qp_index,
-                    send_fragment, coll_fragment, INLINE);
-            if (OPAL_UNLIKELY(NULL == send_task)) {
-                IBOFFLOAD_VERBOSE(10, ("Exchaging: "
-                            "Failing for getting send task.\n"));
-                goto out_of_resources;
-            }
-
-            APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send);
-            MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task);
-        }
-
-        for (k = 0; k < my_exchange_node->tree_order - 1; k++) {
-
-            pair_rank = my_exchange_node->rank_exchanges[exchange][k];
-            IBOFFLOAD_VERBOSE(10,("Exchange [ %d ,%d ] recv %d", exchange, k, pair_rank));
-            /* post wait */
-            preposted_recv_frag = mca_bcol_iboffload_get_preposted_recv_frag(
-                    iboffload, pair_rank, coll_request->qp_index);
-            if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-                IBOFFLOAD_VERBOSE(10, ("Exchaging: "
-                            "Failing for getting prepost recv frag.\n"));
-                goto out_of_resources;
-            }
-
-            wait_task = mca_bcol_iboffload_get_wait_task(iboffload, pair_rank, 1,
-                    preposted_recv_frag, coll_request->qp_index, NULL);
-            if (OPAL_UNLIKELY(NULL == wait_task)) {
-                IBOFFLOAD_VERBOSE(10, ("Exchaging: "
-                            "Failing for getting wait task.\n"));
-                goto out_of_resources;
-            }
-
-            APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-            MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-        }
-    }
-
-    /* if non power of 2, may need to send message to "extra" proc */
-    if (0 < my_exchange_node->n_extra_sources)  {
-        if (EXTRA_NODE == my_exchange_node->node_type) {
-            /* I will not participate in the exchange -
-             * wait for signal from exchange process */
-            extra_rank = my_exchange_node->rank_extra_sources_array[0];
-            IBOFFLOAD_VERBOSE(10,("Wait from proxy %d", extra_rank));
-            /* post wait */
-            preposted_recv_frag =
-                mca_bcol_iboffload_get_preposted_recv_frag(iboffload, extra_rank,
-                                                           coll_request->qp_index);
-            if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-                IBOFFLOAD_VERBOSE(10, ("Sending to 'extra' node: "
-                                   "Failing for getting prepost recv frag.\n"));
-                goto out_of_resources;
-            }
-
-            wait_task = mca_bcol_iboffload_get_wait_task(iboffload, extra_rank, 1,
-                                                         preposted_recv_frag,
-                                                         coll_request->qp_index, NULL);
-            if (OPAL_UNLIKELY(NULL == wait_task)) {
-                IBOFFLOAD_VERBOSE(10, ("Sending to 'extra' node: "
-                                   "Failing for getting wait task.\n"));
-                goto out_of_resources;
-            }
-
-            APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-            MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-
-        }  else {
-            /* I will participate in the exchange -
-             * send signal to extra process */
-            for (k = 0; k < my_exchange_node->n_extra_sources; k++) {
-                extra_rank = my_exchange_node->rank_extra_sources_array[k];
-                IBOFFLOAD_VERBOSE(10,("Exchange [ %d ] extra release %d", k, extra_rank));
-
-                send_fragment = mca_bcol_iboffload_get_send_frag(coll_request,
-                                        extra_rank, coll_request->qp_index, 0,
-                                        0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY);
-
-                send_task = mca_bcol_iboffload_get_send_task(
-                        iboffload, extra_rank,
-                        coll_request->qp_index,
-                        send_fragment, coll_fragment, INLINE);
-                if (OPAL_UNLIKELY(NULL == send_task)) {
-                    IBOFFLOAD_VERBOSE(10, ("Sending to 'extra' node: "
-                                "Failing for getting send task.\n"));
-                    goto out_of_resources;
-                }
-
-                APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send);
-                MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task);
-            }
-        }
-    }
-
-    /* Fill in the the rest of the coll_fragment */
-    IBOFFLOAD_VERBOSE(10, ("Fill in the the rest of the coll_fragment.\n"));
-    /* end of list */
-    *mqe_ptr_to_set = NULL;
-
-    /* finish initializing full message descriptor */
-    coll_request->n_fragments  = 1;
-    coll_request->n_frags_sent = 1;
-
-    coll_request->n_frag_mpi_complete = 0;
-    coll_request->n_frag_net_complete = 0;
-
-    coll_request->user_handle_freed = false;
-
-    last_wait->flags |= MQE_WR_FLAG_SIGNAL;
-    coll_fragment->signal_task_wr_id = last_wait->wr_id;
-    last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-
-    /* post the mwr */
-    if (MCA_BCOL_IBOFFLOAD_QP_SYNC != coll_request->qp_index) {
-        rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
-            /* Note: need to clean up */
-            return rc;
-        }
-
-        MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info);
-    } else {
-        /* Special flow for ML service barrier , only this function supposed to
-         post service requests */
-        struct mqe_task *bad_mqe = NULL;
-        assert (MCA_BCOL_IBOFFLOAD_QP_SYNC == coll_request->qp_index );
-        /* Post to special service MQ - 1 */
-        rc = mqe_post_task(iboffload->mq[1], coll_fragment->to_post, &bad_mqe);
-        if (OPAL_UNLIKELY(0 != rc)) {
-            IBOFFLOAD_ERROR(("ibv_post_mqe failed on device (%s), errno says: %s,"
-                        " the return code is [%d]\n",
-                        ibv_get_device_name(iboffload->device->dev.ib_dev),
-                        strerror(errno), rc));
-            return OMPI_ERROR;
-        }
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("Return success.\n"));
-    return OMPI_SUCCESS;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Barrier, adding collfrag to collfrag_pending.\n"));
-    return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload);
-}
-
-int mca_bcol_iboffload_barrier_intra_recursive_knomial_start(
-        struct mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int rc;
-
-    rc = recursive_knomial_start_connections(iboffload);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        return rc;
-    }
-
-    iboffload->barrier_algth =
-        mca_bcol_iboffload_barrier_intra_recursive_knomial;
-    return
-       mca_bcol_iboffload_barrier_intra_recursive_knomial(iboffload, coll_request);
-}
-
-int mca_bcol_iboffload_rec_doubling_start_connections(mca_bcol_iboffload_module_t *iboffload)
-{
-    netpatterns_pair_exchange_node_t *my_exchange_node =
-                                          &iboffload->recursive_doubling_tree;
-
-    int i, n_exchanges = my_exchange_node->n_exchanges,
-        *exchanges = my_exchange_node->rank_exchanges,
-        n_extra_src = my_exchange_node->n_extra_sources,
-        rank_extra_src = my_exchange_node->rank_extra_source;
-
-    mca_bcol_iboffload_endpoint_t *ep;
-
-    IBOFFLOAD_VERBOSE(10, ("\nMy sbgp rank (index) - %d, "
-                          "num of endpoints = %d, iboffload module - %p\n",
-                           iboffload->ibnet->super.my_index, iboffload->num_endpoints, iboffload));
-    if (0 < n_extra_src) {
-        iboffload->alg_task_consump[RECURSIVE_DOUBLING_BARRIER_ALG] += 2; /* One send task one wait */
-        ep = iboffload->endpoints[rank_extra_src];
-
-        if (iboffload->ibnet->super.my_index < ep->index) {
-            while(0 == (ep)->remote_zero_rdma_addr.addr) {
-                opal_progress();
-            }
-        } else {
-            IBOFFLOAD_VERBOSE(10, ("Trying to connect - %d", ep->index));
-            while (OMPI_SUCCESS !=
-                    check_endpoint_state(ep, NULL, NULL)) {
-                opal_progress();
-            }
-        }
-    }
-
-    for (i = 0; i < n_exchanges; ++i) {
-        iboffload->alg_task_consump[RECURSIVE_DOUBLING_BARRIER_ALG] += 2; /* One send task one wait */
-        ep = iboffload->endpoints[exchanges[i]];
-
-        if (iboffload->ibnet->super.my_index < ep->index) {
-            while(0 == (ep)->remote_zero_rdma_addr.addr) {
-                opal_progress();
-            }
-        } else {
-            IBOFFLOAD_VERBOSE(10, ("Trying to connect - %d", ep->index));
-            while (OMPI_SUCCESS !=
-                    check_endpoint_state(ep, NULL, NULL)) {
-                opal_progress();
-            }
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_bcol_iboffload_barrier_init(
-        bcol_function_args_t *input_args,
-        mca_bcol_iboffload_module_t *iboffload,
-        collective_message_completion_callback_function cb_fn,
-        struct mca_bcol_iboffload_collreq_t **coll_request)
-{
-    ompi_free_list_item_t *item;
-    mca_bcol_iboffload_collfrag_t *coll_fragment;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_barrier_init"));
-
-    OMPI_FREE_LIST_WAIT_MT(&cm->collreqs_free, item);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for coll request free list waiting.\n"));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    (*coll_request) = (mca_bcol_iboffload_collreq_t *) item;
-    (*coll_request)->progress_fn = iboffload->barrier_algth;
-
-    /*
-     * For usual barrier it is null. For memory
-     * service barrier we need some work to do
-     */
-    (*coll_request)->completion_cb_fn = cb_fn;
-    (*coll_request)->order_info = &input_args->order_info;
-
-    (*coll_request)->module = iboffload;
-    (*coll_request)->ml_buffer_index = input_args->buffer_index;
-    (*coll_request)->buffer_info[SBUF].offset = 0;
-    (*coll_request)->buffer_info[RBUF].offset = 0;
-    (*coll_request)->qp_index = MCA_BCOL_IBOFFLOAD_QP_BARRIER;
-
-    input_args->bcol_opaque_data = (void *) (*coll_request);
-
-    /*
-     * setup collective work request
-     */
-
-    /* get collective frag */
-    coll_fragment = &(*coll_request)->first_collfrag;
-    mca_bcol_iboffload_collfrag_init(coll_fragment);
-
-    coll_fragment->mq_index = COLL_MQ;
-
-    /* set pointers for (coll frag) <-> (coll full request) */
-    MCA_BCOL_IBOFFLOAD_SET_COLL_REQ_LINKS(*coll_request, coll_fragment);
-
-    return OMPI_SUCCESS;
-}
-
-/************************************************************************
- ************************ New style Barrier *****************************
- ***********************************************************************/
-
-static int mca_bcol_iboffload_new_style_barrier_progress(
-                        bcol_function_args_t *input_args,
-                        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_iboffload_collreq_t *coll_request =
-                 (mca_bcol_iboffload_collreq_t *)
-                                   input_args->bcol_opaque_data;
-
-    if (BCOL_IS_COMPLETED(coll_request)) {
-        coll_request->user_handle_freed = true;
-        if (COLLREQ_IS_DONE(coll_request)) {
-            IBOFFLOAD_VERBOSE(10, ("Coll request already done.\n"));
-            RELEASE_COLLREQ(coll_request);
-        }
-
-        IBOFFLOAD_VERBOSE(10, ("Barrier already done.\n"));
-        return BCOL_FN_COMPLETE;
-    }
-
-    return BCOL_FN_STARTED;
-}
-
-static int mca_bcol_iboffload_new_style_barrier_intra(
-                                bcol_function_args_t *input_args,
-                                struct mca_bcol_base_function_t *const_args)
-{
-    /* local variables */
-    int rc;
-    mca_bcol_iboffload_collreq_t *coll_request;
-    mca_bcol_iboffload_module_t *iboffload =
-                    (mca_bcol_iboffload_module_t *) const_args->bcol_module;
-
-    /* check for ordering */
-    MCA_BCOL_CHECK_ORDER(const_args->bcol_module, input_args);
-
-    /*
-     * recursive doubling
-     */
-
-
-    IBOFFLOAD_VERBOSE(10, ("Barrier starts.\n"));
-
-    /* init barrier collective reqeust */
-    rc = mca_bcol_iboffload_barrier_init(input_args, iboffload, NULL, &coll_request);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_ERROR(("Get error from mca_bcol_iboffload_barrier_init"));
-        return rc;
-    }
-
-    /* start the barrier */
-    rc = iboffload->barrier_algth(iboffload, coll_request);
-    if (OPAL_UNLIKELY(OMPI_ERROR == rc)) {
-        return BCOL_FN_NOT_STARTED;
-    }
-
-    /* done */
-    return BCOL_FN_STARTED;
-}
-
-int mca_bcol_iboffload_barrier_register(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    IBOFFLOAD_VERBOSE(10, ("Register iboffload Barrier.\n"));
-
-    comm_attribs.bcoll_type = BCOL_BARRIER;
-
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super,
-        &comm_attribs, &inv_attribs,
-        mca_bcol_iboffload_new_style_barrier_intra,
-        mca_bcol_iboffload_new_style_barrier_progress);
-
-    return OMPI_SUCCESS;
-}
-
-int mca_bcol_iboffload_memsync_register(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    IBOFFLOAD_VERBOSE(10, ("Register sync function\n"));
-
-    comm_attribs.bcoll_type = BCOL_SYNC;
-
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super,
-        &comm_attribs, &inv_attribs,
-        mca_bcol_iboffload_nb_memory_service_barrier_intra,
-        mca_bcol_iboffload_new_style_barrier_progress);
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.c b/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.c
deleted file mode 100644
index dd392117ed..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.c
+++ /dev/null
@@ -1,1065 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <errno.h>
-#include "opal_stdint.h"
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_bcast.h"
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_task.h"
-#include "bcol_iboffload_collreq.h"
-#include "bcol_iboffload_collfrag.h"
-#include "bcol_iboffload_endpoint.h"
-
-#include "opal/include/opal/types.h"
-
-static int mca_bcol_iboffload_bcast_init(
-                               bcol_function_args_t *fn_arguments,
-                               mca_bcol_iboffload_module_t *iboffload_module,
-                               mca_bcol_iboffload_collreq_t **coll_request,
-                               bool if_bcol_last, int mq_credits,
-                               collective_message_progress_function progress_fn)
-{
-    ompi_free_list_item_t *item;
-    mca_bcol_iboffload_collfrag_t *coll_fragment;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    int my_group_index = iboffload_module->super.sbgp_partner_module->my_index;
-
-    OMPI_FREE_LIST_WAIT_MT(&cm->collreqs_free, item);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        IBOFFLOAD_ERROR(("Wait for free list failed.\n"));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-    /* setup call request */
-    (*coll_request) = (mca_bcol_iboffload_collreq_t *) item;
-
-    (*coll_request)->n_fragments  = 0;
-    (*coll_request)->n_frags_sent = 0;
-    (*coll_request)->n_frag_mpi_complete = 0;
-    (*coll_request)->n_frag_net_complete = 0;
-    (*coll_request)->if_bcol_last = if_bcol_last;
-    (*coll_request)->ml_buffer_index = fn_arguments->buffer_index;
-    (*coll_request)->completion_cb_fn = NULL;
-    (*coll_request)->buffer_info[SBUF].buf = (void *) (
-            (unsigned char *)fn_arguments->sbuf +
-            fn_arguments->sbuf_offset);
-    (*coll_request)->buffer_info[SBUF].offset = fn_arguments->sbuf_offset;
-    (*coll_request)->buffer_info[RBUF].offset = fn_arguments->rbuf_offset;
-
-    (*coll_request)->dtype = fn_arguments->dtype;
-    (*coll_request)->count = fn_arguments->count;
-    (*coll_request)->module = iboffload_module;
-    /* TODO Pasha: we need it for pending quque. Set it later. */
-    (*coll_request)->progress_fn = progress_fn;
-    /* TODO Pasha: fix it  later */
-    (*coll_request)->qp_index = MCA_BCOL_IBOFFLOAD_QP_REGULAR;
-
-    (*coll_request)->order_info = &fn_arguments->order_info;
-
-    coll_fragment = &((*coll_request)->first_collfrag);
-    mca_bcol_iboffload_collfrag_init(coll_fragment);
-
-    /** Vasily ????? */
-    /* mq_credits = (*coll_request)->total_tasks_num; */
-    coll_fragment->mq_credits = mq_credits;
-    coll_fragment->mq_index = COLL_MQ;
-    /* Pasha: just set it to zero */
-    coll_fragment->last_wait_num = 0;
-    coll_fragment->alg = -2; /* used only for debug */
-    /*
-    if (my_rank == algthm_ptr->root) {
-        coll_fragment->last_wait_num = 0;
-    } else {
-        coll_fragment->last_wait_num = algth_lst->last_wait_num;
-    }
-    */
-    /* Pasha: we have nothing to unpack */
-    coll_fragment->unpack_size = 0;
-    /* coll_fragment->unpack_size = pack_len; */
-    /* coll_fragment->alg = RECURSIVE_DOUBLING_TREE_BCAST; */
-
-    /* set pointers for (coll frag) <-> (coll full request) */
-    (*coll_request)->user_handle_freed = false;
-
-    fn_arguments->bcol_opaque_data = (void *) (*coll_request);
-
-    if (true == fn_arguments->root_flag) {
-        (*coll_request)->root = my_group_index;
-    } else {
-        (*coll_request)->root = fn_arguments->root_route->rank;
-    }
-
-    MCA_BCOL_IBOFFLOAD_SET_COLL_REQ_LINKS((*coll_request), coll_fragment);
-    return OMPI_SUCCESS;
-}
-static inline __opal_attribute_always_inline__ int
-binomial_scatter_smsg(
-        mca_bcol_iboffload_module_t *iboffload_module,
-        mca_bcol_iboffload_collfrag_t *coll_fragment,
-        struct mqe_task **last_send,
-        int radix_mask_pow,
-        uint32_t my_group_index,
-        size_t send_size
-        )
-{
-    int rc, dst;
-    int radix_mask = radix_mask_pow >= 0 ? 1 << radix_mask_pow : 0;
-
-    while(radix_mask > 0) {
-        /* For each level of tree, do sends */
-        dst = my_group_index ^ radix_mask;
-        rc = mca_bcol_iboffload_send_small_buff_setup(
-                last_send, send_size, dst,
-                iboffload_module, coll_fragment);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to isend data"));
-            return rc;
-        }
-
-        radix_mask >>= 1;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-#define BINOMIAL_SMALL_SCATTER(                                                                                     \
-        iboffload_module, coll_fragment,                                                                            \
-        last_wait, last_send,                                                                                       \
-        distance,                                                                                                   \
-        my_group_index,                                                                                             \
-        segment_size                                                                                                \
-        )                                                                                                           \
-do {                                                                                                                \
-    int rc = OMPI_SUCCESS;                                                                                          \
-    int dst;                                                                                                        \
-    int send_size;                                                                                                  \
-    int dst_boundary_rank;                                                                                          \
-    int radix_mask_pow = distance;                                                                                  \
-    int radix_mask = (distance) >= 0 ? 1 << (distance) : 0;                                                         \
-    IBOFFLOAD_VERBOSE(10, ("BCAST SCATTER %d %d", radix_mask, distance));                                           \
-                                                                                                                    \
-    while(radix_mask > 0) {                                                                                         \
-        /* For each level of tree, do sends */                                                                      \
-        dst = my_group_index ^ radix_mask;                                                                          \
-        dst_boundary_rank = dst & ((~(int)0) << (radix_mask_pow));                                                  \
-                                                                                                                    \
-        IBOFFLOAD_VERBOSE(10, ("Scatter data to %d , len %d offset %d", dst, send_size, send_offset));              \
-                                                                                                                    \
-        rc = mca_bcol_iboffload_send_small_buff_setup(                                                              \
-                &last_send, send_size, dst,                                                                         \
-                iboffload_module, coll_fragment);                                                                   \
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {                                                                    \
-            IBOFFLOAD_VERBOSE(10, ("Failed to isend data"));                                                        \
-            return rc;                                                                                              \
-        }                                                                                                           \
-        radix_mask >>= 1;                                                                                           \
-        /* radix_mask_pow--; */                                                                                     \
-    }                                                                                                               \
-} while(0)
-
-
-int mca_bcol_iboffload_small_msg_bcast_progress(
-                        bcol_function_args_t *input_args,
-                        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_iboffload_collreq_t *coll_request =
-                 (mca_bcol_iboffload_collreq_t *)
-                                   input_args->bcol_opaque_data;
-
-    IBOFFLOAD_VERBOSE(10, ("Run progress.\n"));
-
-    /* We should send the data to our children in the tree before
-       the upper layer will start with buffers recycling */
-    if (BCOL_AND_NET_ARE_COMPLETED(coll_request)) {
-        coll_request->user_handle_freed = true;
-        if (COLLREQ_IS_DONE(coll_request)) {
-            IBOFFLOAD_VERBOSE(10, ("Coll request already done.\n"));
-            RELEASE_COLLREQ(coll_request);
-        }
-
-        IBOFFLOAD_VERBOSE(10, ("New bcast done !!!"));
-        return BCOL_FN_COMPLETE;
-    }
-
-    return BCOL_FN_STARTED;
-}
-
-static int mca_bcol_iboffload_small_msg_bcast_exec(mca_bcol_iboffload_module_t *iboffload_module,
-                                                   mca_bcol_iboffload_collreq_t *coll_request)
-{
-    netpatterns_pair_exchange_node_t *recursive_doubling_tree =
-        &iboffload_module->recursive_doubling_tree;
-
-    int rc,
-        distance_mask_pow , dst,
-        group_src, power_of_2_distance;
-
-    uint32_t pack_len;
-    int my_group_index = iboffload_module->super.sbgp_partner_module->my_index;
-
-    struct mqe_task *last_send = NULL,
-                    *last_wait = NULL;
-    mca_bcol_iboffload_collfrag_t *coll_fragment = &coll_request->first_collfrag;
-
-    IBOFFLOAD_VERBOSE(10,("Entering small msg iboffload bcast"));
-
-    if (OPAL_UNLIKELY(!iboffload_module->connection_status[RECURSIVE_DOUBLING_TREE_BCAST])) {
-        IBOFFLOAD_VERBOSE(10,("Bcast open new connection "));
-        bcol_iboffload_setup_binomial_connection(iboffload_module);
-    }
-
-    pack_len = coll_request->count * coll_request->dtype->super.size;
-    IBOFFLOAD_VERBOSE(10,("My packet length %d pack_len frag_count %d dtype size %d ",
-                            pack_len,
-                            coll_request->count,
-                            coll_request->dtype->super.size));
-
-    /* it is estimated mq consumption... */
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-                 iboffload_module, coll_fragment->mq_index, coll_fragment->mq_credits))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-        goto out_of_resources;
-    }
-
-    coll_fragment->tail_next = &coll_fragment->to_post;
-    coll_request->buffer_info[SBUF].lkey = iboffload_module->rdma_block.ib_info.lkey;
-
-    if (coll_request->root == my_group_index) {
-        IBOFFLOAD_VERBOSE(10, ("I'm root of the data"));
-
-        /* Send data to the extra peer */
-        if (recursive_doubling_tree->n_extra_sources > 0) {
-            /* send the all data to your extra peer */
-            dst = recursive_doubling_tree->rank_extra_source;
-            IBOFFLOAD_VERBOSE(10,("Sending the dat to Dst %d",dst));
-            rc = mca_bcol_iboffload_send_small_buff_setup(
-                    &last_send, pack_len, dst,
-                    iboffload_module, coll_fragment);
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                IBOFFLOAD_VERBOSE(10, ("Failed to"
-                            " mca_bcol_iboffload_send_large_buff_setup"));
-                goto out_of_resources;
-            }
-        }
-
-        distance_mask_pow =
-            iboffload_module->power_of_2 - 1;
-
-       rc = binomial_scatter_smsg(iboffload_module, coll_fragment,
-                &last_send, distance_mask_pow,
-                my_group_index, pack_len);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to binomial_scatter_smsg"));
-            goto out_of_resources;
-        }
-
-        goto finalize;
-    }
-
-    /* prepare and post recv operation */
-    group_src = bcol_iboffload_binomial_root_to_src(coll_request->root,
-            my_group_index, iboffload_module->power_of_2_ranks,
-            iboffload_module->group_size, &power_of_2_distance);
-    assert(group_src >= 0);
-
-    if (0 > power_of_2_distance) {
-        /* the rank is virtual root for this group, receive the data
-           and scatter gather as root */
-        IBOFFLOAD_VERBOSE(10,("Virtual root distance_mask_pow %d ",iboffload_module->power_of_2));
-        distance_mask_pow = iboffload_module->power_of_2 - 1;
-    } else {
-        distance_mask_pow = power_of_2_distance - 1;
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("Bcast, receive data from %d[%d], count %d, offset %d",
-                group_src));
-
-    rc = mca_bcol_iboffload_recv_small_buff_setup(&last_wait,
-                                pack_len, group_src,
-                                iboffload_module, coll_fragment);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to setup data receive"));
-        goto out_of_resources;
-    }
-
-    rc = binomial_scatter_smsg(iboffload_module, coll_fragment,
-            &last_send, distance_mask_pow,
-            my_group_index, pack_len);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to binomial_scatter_smsg"));
-        goto out_of_resources;
-    }
-
-    if (recursive_doubling_tree->n_extra_sources > 0 &&
-            iboffload_module->power_of_2 - 1 != distance_mask_pow) {
-/*
-
-    if ((recursive_doubling_tree->n_extra_sources > 0) &&
-            ((my_group_index + iboffload_module->power_of_2_ranks ) <
-            iboffload_module->group_size) ) {
-  */
-          dst = recursive_doubling_tree->rank_extra_source;
-        /*
-        dst = my_group_index + iboffload_module->power_of_2_ranks;
-        */
-
-        rc = mca_bcol_iboffload_send_small_buff_setup(
-                &last_send, pack_len, dst,
-                iboffload_module, coll_fragment);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to"
-                        " mca_bcol_iboffload_send_small_buff_setup"));
-            goto out_of_resources;
-        }
-    }
-
-finalize:
-    /* end of list */
-    *coll_fragment->tail_next = NULL;
-
-    /* finish initializing full message descriptor */
-    (coll_request)->n_fragments  += 1;
-    (coll_request)->n_frags_sent += 1;
-
-    if (NULL != last_wait) {
-        last_wait->flags |= MQE_WR_FLAG_SIGNAL;
-        coll_fragment->signal_task_wr_id = last_wait->wr_id;
-        last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-    } else {
-        last_send->flags |= MQE_WR_FLAG_SIGNAL;
-        coll_fragment->signal_task_wr_id = last_send->wr_id;
-        last_send->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-    }
-    /* post the mwr */
-    rc = mca_bcol_iboffload_post_mqe_tasks(iboffload_module, coll_fragment->to_post);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
-        /* Note: need to clean up */
-        return rc;
-    }
-
-    MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload_module->super, coll_request->order_info);
-
-    IBOFFLOAD_VERBOSE(10, ("Return success.\n"));
-    return BCOL_FN_STARTED;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Barrier, adding collfrag to collfrag_pending.\n"));
-    rc =
-        mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload_module);
-    return (OMPI_SUCCESS != rc) ? BCOL_FN_NOT_STARTED : BCOL_FN_STARTED;
-}
-
-int mca_bcol_iboffload_small_msg_bcast_intra(bcol_function_args_t *fn_arguments,
-                                                   struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_iboffload_module_t *iboffload_module =
-        (mca_bcol_iboffload_module_t *) const_args->bcol_module;
-
-    int rc;
-    int mq_credits = iboffload_module->power_of_2 + 2;
-    bool if_bcol_last = BCOL_IBOFFLOAD_IS_LAST_CALL(const_args);
-    mca_bcol_iboffload_collreq_t *coll_request;
-
-    MCA_BCOL_CHECK_ORDER(const_args->bcol_module, fn_arguments);
-
-    rc = mca_bcol_iboffload_bcast_init(fn_arguments, iboffload_module,
-            &coll_request, if_bcol_last, mq_credits,
-            mca_bcol_iboffload_small_msg_bcast_exec);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        return rc;
-    }
-
-    rc = coll_request->progress_fn(iboffload_module, coll_request);
-
-    IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_small_msg_bcast_intra was started [%d]\n", rc));
-    return rc;
-}
-
-static int mca_bcol_iboffload_small_msg_bcast_extra_exec(mca_bcol_iboffload_module_t *iboffload_module,
-                                                   mca_bcol_iboffload_collreq_t *coll_request)
-{
-    netpatterns_pair_exchange_node_t *recursive_doubling_tree =
-        &iboffload_module->recursive_doubling_tree;
-
-    int rc,
-        dst;
-    int my_group_index = iboffload_module->super.sbgp_partner_module->my_index;
-    uint32_t pack_len;
-
-    struct mqe_task *last_send = NULL,
-                    *last_wait = NULL;
-    mca_bcol_iboffload_collfrag_t *coll_fragment = &coll_request->first_collfrag;
-
-    IBOFFLOAD_VERBOSE(10,("Entering small msg extra iboffload bcast"));
-
-    if (OPAL_UNLIKELY(!iboffload_module->connection_status[RECURSIVE_DOUBLING_TREE_BCAST])) {
-        IBOFFLOAD_VERBOSE(10,("Bcast open new connection "));
-        bcol_iboffload_setup_binomial_connection(iboffload_module);
-    }
-
-
-    pack_len = coll_request->count * coll_request->dtype->super.size;
-    coll_request->buffer_info[SBUF].lkey = iboffload_module->rdma_block.ib_info.lkey;
-
-    IBOFFLOAD_VERBOSE(10,("My packet length %d pack_len frag_count %d dtype size %d ",
-                            pack_len,
-                            coll_request->count,
-                            coll_request->dtype->super.size));
-
-    /* it is estimated mq consumption... */
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-                 iboffload_module,
-                 coll_fragment->mq_index, coll_fragment->mq_credits))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-        goto out_of_resources;
-    }
-
-    coll_fragment->tail_next = &coll_fragment->to_post;
-
-
-    if (coll_request->root == my_group_index) {
-        IBOFFLOAD_VERBOSE(10, ("I'm root of the data %d", iboffload_module->power_of_2));
-        /* send the all data to your extra peer */
-
-        dst = recursive_doubling_tree->rank_extra_source;
-        IBOFFLOAD_VERBOSE(10,("Im extra root sending data to %d \n",dst));
-        rc = mca_bcol_iboffload_send_small_buff_setup(
-                &last_send, pack_len, dst,
-                iboffload_module, coll_fragment);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to"
-                        " mca_bcol_iboffload_send_small_buff_setup"));
-            goto out_of_resources;
-        }
-    } else {
-        /* Not root case */
-        dst = recursive_doubling_tree->rank_extra_source;
-        rc = mca_bcol_iboffload_recv_small_buff_setup(&last_wait,
-                pack_len, dst,
-                iboffload_module, coll_fragment);
-
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to setup data receive"));
-            return OMPI_ERROR;
-        }
-    }
-
-    /* end of list */
-    *coll_fragment->tail_next = NULL;
-
-    /* finish initializing full message descriptor */
-    (coll_request)->n_fragments  = 1;
-    (coll_request)->n_frags_sent = 1;
-
-    if (NULL != last_wait) {
-        last_wait->flags |= MQE_WR_FLAG_SIGNAL;
-        coll_fragment->signal_task_wr_id = last_wait->wr_id;
-        last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-    } else {
-        last_send->flags |= MQE_WR_FLAG_SIGNAL;
-        coll_fragment->signal_task_wr_id = last_send->wr_id;
-        last_send->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-    }
-    /* post the mwr */
-    rc = mca_bcol_iboffload_post_mqe_tasks(iboffload_module, coll_fragment->to_post);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
-        /* Note: need to clean up */
-        return rc;
-    }
-
-    MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload_module->super, coll_request->order_info);
-
-    IBOFFLOAD_VERBOSE(10, ("Return success.\n"));
-    return BCOL_FN_STARTED;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Barrier, adding collfrag to collfrag_pending.\n"));
-    rc =
-        mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload_module);
-    return (OMPI_SUCCESS != rc) ? BCOL_FN_NOT_STARTED : BCOL_FN_STARTED;
-}
-
-int mca_bcol_iboffload_small_msg_bcast_extra_intra(bcol_function_args_t *fn_arguments,
-                                                   struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_iboffload_module_t *iboffload_module =
-        (mca_bcol_iboffload_module_t *)const_args->bcol_module;
-
-    int rc;
-    int mq_credits = 2;
-    bool if_bcol_last = BCOL_IBOFFLOAD_IS_LAST_CALL(const_args);
-    mca_bcol_iboffload_collreq_t *coll_request;
-
-    MCA_BCOL_CHECK_ORDER(const_args->bcol_module, fn_arguments);
-
-    rc = mca_bcol_iboffload_bcast_init(fn_arguments, iboffload_module,
-            &coll_request, if_bcol_last, mq_credits,
-            mca_bcol_iboffload_small_msg_bcast_extra_exec);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        return rc;
-    }
-
-    rc = coll_request->progress_fn(iboffload_module, coll_request);
-
-    IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_small_msg_bcast_extra_exec was started [%d]\n", rc));
-    return rc;
-}
-
-/* Large message scatter-allgather with zero copy */
-int mca_bcol_iboffload_zero_copy_progress(bcol_function_args_t *fn_arguments,
-                                                   struct mca_bcol_base_function_t *const_args)
-{
-    int i;
-    mca_bcol_iboffload_collreq_t *coll_request =
-                 (mca_bcol_iboffload_collreq_t *)fn_arguments->bcol_opaque_data;
-
-    /* IBOFFLOAD_VERBOSE(10, ("Run general progress. %d == %d *  %d == %d",
-                coll_request->n_frag_mpi_complete, coll_request->n_fragments,
-                coll_request->n_frag_net_complete, coll_request->n_fragments)); */
-
-    /* Complete the bcast - progress releases full request descriptors */
-    for (i = 0; i < mca_bcol_iboffload_component.max_progress_pull; i++) {
-        if (coll_request->n_frag_mpi_complete == coll_request->n_fragments &&
-            coll_request->n_frag_net_complete == coll_request->n_fragments) {
-
-            IBOFFLOAD_VERBOSE(10, ("Deregister user buff.\n"));
-            coll_request->module->device->mpool->mpool_deregister(
-                    coll_request->module->device->mpool,
-                    (mca_mpool_base_registration_t *) coll_request->buffer_info[SBUF].iboffload_reg);
-            coll_request->buffer_info[SBUF].iboffload_reg = NULL;
-
-            RELEASE_COLLREQ(coll_request);
-            IBOFFLOAD_VERBOSE(10, ("New bcast done !!!"));
-            return BCOL_FN_COMPLETE;
-        }
-    }
-
-    /* IBOFFLOAD_VERBOSE(10, ("Bcast general progress done")); */
-
-    /* done */
-    return BCOL_FN_STARTED;
-}
-/* Pasha: I have to move it to static inline later, it looks too ugly for macro */
-#define BINOMIAL_SCATTER(                                                                                           \
-        iboffload_module, coll_fragment,                                                                            \
-        last_wait, last_send,                                                                                       \
-        distance,                                                                                                   \
-        my_group_index,                                                                                             \
-        segment_size, count                                                                                         \
-        )                                                                                                           \
-do {                                                                                                                \
-    int rc = OMPI_SUCCESS;                                                                                          \
-    int dst;                                                                                                        \
-    int send_size;                                                                                                  \
-    int send_offset;                                                                                                \
-    int delta;                                                                                                      \
-    int dst_boundary_rank;                                                                                          \
-    int radix_mask_pow = distance;                                                                                  \
-    int radix_mask = (distance) >= 0 ? 1 << (distance) : 0;                                                         \
-    IBOFFLOAD_VERBOSE(10, ("BCAST SCATTER %d %d", radix_mask, distance));                                           \
-                                                                                                                    \
-    while(radix_mask > 0) {                                                                                         \
-        /* For each level of tree, do sends */                                                                      \
-        dst = my_group_index ^ radix_mask;                                                                          \
-        dst_boundary_rank = dst & ((~(int)0) << (radix_mask_pow));                                                  \
-        send_offset = segment_size * dst_boundary_rank;                                                             \
-        /* Pasha: make sure that we handle the corner cases */                                                      \
-        delta = count - send_offset;                                                                                \
-        if (OPAL_UNLIKELY(delta <= 0)) {                                                                            \
-            radix_mask >>= 1;                                                                                       \
-            radix_mask_pow--;                                                                                       \
-            continue; /* we have to send something, other way it will hang */                                       \
-        } else  {                                                                                                   \
-            /* the tail case */                                                                                     \
-            send_size = (int)                                                                                       \
-            (delta - (int)segment_size * radix_mask) < 0 ? delta :                                                  \
-            (int)segment_size * radix_mask;                                                                         \
-        }                                                                                                           \
-        IBOFFLOAD_VERBOSE(10, ("Scatter data to %d , len %d offset %d", dst, send_size, send_offset));              \
-        rc = mca_bcol_iboffload_recv_rtr_setup(                                                                     \
-                &last_wait, dst, iboffload_module, coll_fragment);                                                  \
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {                                                                                  \
-            IBOFFLOAD_VERBOSE(10, ("Failed to isend data"));                                                        \
-            return OMPI_ERROR;                                                                                      \
-        }                                                                                                           \
-        rc = mca_bcol_iboffload_send_large_buff_setup(                                                              \
-                &last_send, SBUF, send_offset, send_size, dst,                                                      \
-                iboffload_module, coll_fragment);                                                                   \
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {                                                                                  \
-            IBOFFLOAD_VERBOSE(10, ("Failed to isend data"));                                                        \
-            return OMPI_ERROR;                                                                                      \
-        }                                                                                                           \
-        radix_mask >>= 1;                                                                                           \
-        radix_mask_pow--;                                                                                           \
-    }                                                                                                               \
-} while(0)
-
-static int mca_bcol_iboffload_bcast_scatter_allgather_exec(mca_bcol_iboffload_module_t *iboffload_module,
-        mca_bcol_iboffload_collreq_t *coll_request)
-{
-    netpatterns_pair_exchange_node_t *recursive_doubling_tree =
-        &iboffload_module->recursive_doubling_tree;
-
-    int rc,
-        dst,
-        group_src, power_of_2_distance,
-        recv_count;
-    size_t offset;
-    int count = coll_request->count * coll_request->dtype->super.size;
-    int my_group_index = iboffload_module->ibnet->super.my_index;
-    size_t base_block_size =
-        (count +  iboffload_module->power_of_2_ranks - 1) /
-        iboffload_module->power_of_2_ranks;
-
-    struct mqe_task *last_send = NULL,
-                    *last_wait = NULL;
-    mca_bcol_iboffload_collfrag_t *coll_fragment = &coll_request->first_collfrag;
-
-    if (OPAL_UNLIKELY(!iboffload_module->connection_status[RECURSIVE_DOUBLING_TREE_BCAST])) {
-        bcol_iboffload_setup_binomial_connection(iboffload_module);
-    }
-
-    /* register memory in mpool/rcache */
-    rc = mca_bcol_iboffload_prepare_buffer(coll_request->buffer_info[SBUF].buf, count,
-            &coll_request->buffer_info[SBUF].iboffload_reg, iboffload_module);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_ERROR(("Cannot register memory: "
-                         "addr - %p, %d bytes.\n",
-                          coll_request->buffer_info[SBUF].buf, count));
-        return OMPI_ERROR;
-    }
-
-    coll_request->buffer_info[SBUF].lkey = coll_request->buffer_info[SBUF].iboffload_reg->mr->lkey;
-
-    /* it is estimated mq consumption... */
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-                 iboffload_module, coll_fragment->mq_index, coll_fragment->mq_credits) ||
-                 false == opal_list_is_empty(&iboffload_module->collfrag_pending))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-        goto out_of_resources;
-    }
-
-    coll_fragment->tail_next = &coll_fragment->to_post;
-
-    if (coll_request->root == my_group_index) {
-        IBOFFLOAD_VERBOSE(10, ("I'm root of the data %d %d",
-                    iboffload_module->power_of_2, recursive_doubling_tree->n_extra_sources ));
-        /* for proxy we have little bit more work to do */
-        if (recursive_doubling_tree->n_extra_sources > 0) {
-            /* send the all data to your extra peer */
-            dst = recursive_doubling_tree->rank_extra_source;
-            rc = mca_bcol_iboffload_recv_rtr_setup(
-                    &last_wait, dst, iboffload_module, coll_fragment);
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                IBOFFLOAD_VERBOSE(10, ("Failed to"
-                            " mca_bcol_iboffload_recv_rtr_setup"));
-                return OMPI_ERROR;
-            }
-            rc = mca_bcol_iboffload_send_large_buff_setup(
-                    &last_send, SBUF, 0, count, dst,
-                    iboffload_module, coll_fragment);
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                IBOFFLOAD_VERBOSE(10, ("Failed to"
-                            " mca_bcol_iboffload_send_large_buff_setup"));
-                return OMPI_ERROR;
-            }
-        }
-        power_of_2_distance = iboffload_module->power_of_2;
-
-        BINOMIAL_SCATTER(iboffload_module, coll_fragment,
-                last_wait, last_send,  power_of_2_distance - 1,
-                my_group_index, base_block_size, count
-                );
-        /* EXIT OR GO TO Gather */
-        goto GATHER;
-    }
-
-    /* prepare and post recv operation */
-    group_src = bcol_iboffload_binomial_root_to_src(coll_request->root,
-            my_group_index, iboffload_module->power_of_2_ranks,
-            iboffload_module->group_size, &power_of_2_distance);
-
-    IBOFFLOAD_VERBOSE(10, ("SRC %d DIST %d ranks %d gsize %d root %d my rank %d",
-                group_src, power_of_2_distance, iboffload_module->power_of_2_ranks,
-                iboffload_module->group_size,
-                coll_request->root, my_group_index));
-    assert(group_src >= 0);
-
-    if (0 > power_of_2_distance) {
-        /* the rank is virtual root for this group, receive the data
-           and scatter gather as root */
-        power_of_2_distance =
-            iboffload_module->power_of_2;
-        offset = 0;
-        recv_count = count;
-        IBOFFLOAD_VERBOSE(10, ("Virtual root %d , set mask to %d",
-                    my_group_index, power_of_2_distance));
-    } else {
-        int my_left_boundary_rank;
-        int delta;
-        recv_count = base_block_size * (1 << power_of_2_distance); /* we may receive larger data */
-        my_left_boundary_rank = my_group_index & ((~(int)0) << power_of_2_distance );
-        offset = (size_t) (base_block_size * my_left_boundary_rank);
-        delta = count - offset;
-        if (OPAL_UNLIKELY(delta <= 0)) {
-            /* no data to recv */
-            goto GATHER;
-        } else {
-            recv_count = (delta < recv_count) ? delta : recv_count;
-        }
-
-        IBOFFLOAD_VERBOSE(10, ("Recv data set mask to %d",
-                    power_of_2_distance));
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("Bcast, receive data from %d[%d], count %d, offset %d",
-                group_src, recv_count, offset));
-
-    /* Receive data to user buffer */
-    rc = mca_bcol_iboffload_send_rtr_setup(&last_send,
-                                group_src, iboffload_module,
-                                coll_fragment);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to setup send rtr"));
-        return OMPI_ERROR;
-    }
-
-    rc = mca_bcol_iboffload_recv_large_buff_setup(&last_wait,
-                                SBUF, offset, recv_count, group_src,
-                                iboffload_module, coll_fragment);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to setup data receive"));
-        return OMPI_ERROR;
-    }
-
-    BINOMIAL_SCATTER(iboffload_module, coll_fragment,
-            last_wait, last_send, power_of_2_distance - 1,
-            my_group_index, base_block_size, count);
-
-GATHER:
-    rc = bcol_iboffload_bcast_binomial_gather(iboffload_module,
-            &last_send, &last_wait, coll_fragment,
-            count, base_block_size, power_of_2_distance);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to setup gather. Return %d", rc));
-        return rc;
-    }
-
-    if (recursive_doubling_tree->n_extra_sources > 0 &&
-            iboffload_module->power_of_2 != power_of_2_distance) {
-        dst = recursive_doubling_tree->rank_extra_source;
-
-        rc = mca_bcol_iboffload_recv_rtr_setup(
-                &last_wait, dst, iboffload_module, coll_fragment);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to"
-                        " mca_bcol_iboffload_recv_rtr_setup"));
-            return OMPI_ERROR;
-        }
-
-        rc = mca_bcol_iboffload_send_large_buff_setup(
-                &last_send, SBUF, 0, count, dst,
-                iboffload_module, coll_fragment);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to"
-                        " mca_bcol_iboffload_send_large_buff_setup"));
-            return OMPI_ERROR;
-        }
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("Fill in the the rest of the coll_fragment.\n"));
-
-    /* end of list */
-    *coll_fragment->tail_next = NULL;
-
-    /* finish initializing full message descriptor */
-    coll_request->n_fragments  += 1;
-    coll_request->n_frags_sent += 1;
-
-    if (NULL != last_wait) {
-        last_wait->flags |= MQE_WR_FLAG_SIGNAL;
-        coll_fragment->signal_task_wr_id = last_wait->wr_id;
-        last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-    } else {
-        last_send->flags |= MQE_WR_FLAG_SIGNAL;
-        coll_fragment->signal_task_wr_id = last_send->wr_id;
-        last_send->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-    }
-
-    /* post the mwr */
-    rc = mca_bcol_iboffload_post_mqe_tasks(iboffload_module, coll_fragment->to_post);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
-        /* Note: need to clean up */
-        return rc;
-    }
-
-    MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload_module->super, coll_request->order_info);
-
-    IBOFFLOAD_VERBOSE(10, ("Return success.\n"));
-
-    return BCOL_FN_STARTED;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Barrier, adding collfrag to collfrag_pending.\n"));
-    rc =
-        mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload_module);
-    return (OMPI_SUCCESS != rc) ? BCOL_FN_NOT_STARTED : BCOL_FN_STARTED;
-}
-
-int mca_bcol_iboffload_bcast_scatter_allgather_intra(bcol_function_args_t *fn_arguments,
-                                                   struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_iboffload_module_t *iboffload_module =
-        (mca_bcol_iboffload_module_t *) const_args->bcol_module;
-
-    int rc;
-    int mq_credits = iboffload_module->power_of_2 * 3  + 4;
-    bool if_bcol_last = BCOL_IBOFFLOAD_IS_LAST_CALL(const_args);
-    mca_bcol_iboffload_collreq_t *coll_request;
-
-    MCA_BCOL_CHECK_ORDER(const_args->bcol_module, fn_arguments);
-
-    rc = mca_bcol_iboffload_bcast_init(fn_arguments, iboffload_module,
-            &coll_request, if_bcol_last, mq_credits,
-            mca_bcol_iboffload_bcast_scatter_allgather_exec);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        return rc;
-    }
-
-    rc = coll_request->progress_fn(iboffload_module, coll_request);
-
-    IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_bcast_scatter_allgather_intra was started [%d]\n", rc));
-    return rc;
-}
-
-static int mca_bcol_iboffload_bcast_scatter_allgather_extra_exec(mca_bcol_iboffload_module_t *iboffload_module,
-        mca_bcol_iboffload_collreq_t *coll_request)
-{
-    netpatterns_pair_exchange_node_t *recursive_doubling_tree =
-        &iboffload_module->recursive_doubling_tree;
-
-    int rc, dst;
-    int count = coll_request->count * coll_request->dtype->super.size;
-    int my_group_index = iboffload_module->ibnet->super.my_index;
-    struct mqe_task *last_send = NULL,
-                    *last_wait = NULL;
-    mca_bcol_iboffload_collfrag_t *coll_fragment = &coll_request->first_collfrag;
-
-    if (OPAL_UNLIKELY(!iboffload_module->connection_status[RECURSIVE_DOUBLING_TREE_BCAST])) {
-        bcol_iboffload_setup_binomial_connection(iboffload_module);
-    }
-
-    /* register memory in mpool/rcache */
-    rc = mca_bcol_iboffload_prepare_buffer(coll_request->buffer_info[SBUF].buf, count,
-            &coll_request->buffer_info[SBUF].iboffload_reg, iboffload_module);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_ERROR(("Cannot register memory: "
-                         "addr - %p, %d bytes.\n",
-                          coll_request->buffer_info[SBUF].buf, count));
-        return OMPI_ERROR;
-    }
-
-    coll_request->buffer_info[SBUF].lkey = coll_request->buffer_info[SBUF].iboffload_reg->mr->lkey;
-
-    /* it is estimated mq consumption... */
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-                 iboffload_module, coll_fragment->mq_index, coll_fragment->mq_credits) ||
-                 false == opal_list_is_empty(&iboffload_module->collfrag_pending))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-        goto out_of_resources;
-    }
-
-    coll_fragment->tail_next = &coll_fragment->to_post;
-
-    /* send or recv the data */
-
-    if (coll_request->root == my_group_index) {
-        IBOFFLOAD_VERBOSE(10, ("I'm root of the data %d", iboffload_module->power_of_2));
-        /* send the all data to your extra peer */
-        dst = recursive_doubling_tree->rank_extra_source;
-        rc = mca_bcol_iboffload_recv_rtr_setup(
-                &last_wait, dst, iboffload_module, coll_fragment);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to"
-                        " mca_bcol_iboffload_recv_rtr_setup"));
-            return OMPI_ERROR;
-        }
-        rc = mca_bcol_iboffload_send_large_buff_setup(
-                &last_send, SBUF, 0, count, dst,
-                iboffload_module, coll_fragment);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to"
-                        " mca_bcol_iboffload_send_large_buff_setup"));
-            return OMPI_ERROR;
-        }
-    } else {
-        /* Not root case */
-        dst = recursive_doubling_tree->rank_extra_source;
-        rc = mca_bcol_iboffload_send_rtr_setup(&last_send,
-                dst, iboffload_module,
-                coll_fragment);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to setup send rtr"));
-            return OMPI_ERROR;
-        }
-
-        rc = mca_bcol_iboffload_recv_large_buff_setup(&last_wait,
-                SBUF, 0, count, dst,
-                iboffload_module, coll_fragment);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to setup data receive"));
-            return OMPI_ERROR;
-        }
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("Fill in the the rest of the coll_fragment.\n"));
-
-    /* end of list */
-    *coll_fragment->tail_next = NULL;
-
-    /* finish initializing full message descriptor */
-    coll_request->n_fragments  += 1;
-    coll_request->n_frags_sent += 1;
-
-    if (NULL != last_wait) {
-        last_wait->flags |= MQE_WR_FLAG_SIGNAL;
-        coll_fragment->signal_task_wr_id = last_wait->wr_id;
-        last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-    } else {
-        last_send->flags |= MQE_WR_FLAG_SIGNAL;
-        coll_fragment->signal_task_wr_id = last_send->wr_id;
-        last_send->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-    }
-
-    /* post the mwr */
-    rc = mca_bcol_iboffload_post_mqe_tasks(iboffload_module, coll_fragment->to_post);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
-        /* Note: need to clean up */
-        return rc;
-    }
-
-    MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload_module->super, coll_request->order_info);
-
-    IBOFFLOAD_VERBOSE(10, ("Return success.\n"));
-
-    return BCOL_FN_STARTED;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Barrier, adding collfrag to collfrag_pending.\n"));
-    rc =
-        mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload_module);
-    return (OMPI_SUCCESS != rc) ? BCOL_FN_NOT_STARTED : BCOL_FN_STARTED;
-}
-
-int mca_bcol_iboffload_bcast_scatter_allgather_extra_intra(bcol_function_args_t *fn_arguments,
-                                                   struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_iboffload_module_t *iboffload_module =
-        (mca_bcol_iboffload_module_t *) const_args->bcol_module;
-
-    int rc;
-    int mq_credits = iboffload_module->power_of_2 * 3  + 4;
-    bool if_bcol_last = BCOL_IBOFFLOAD_IS_LAST_CALL(const_args);
-    mca_bcol_iboffload_collreq_t *coll_request;
-
-    MCA_BCOL_CHECK_ORDER(const_args->bcol_module, fn_arguments);
-
-    rc = mca_bcol_iboffload_bcast_init(fn_arguments, iboffload_module,
-            &coll_request, if_bcol_last, mq_credits,
-            mca_bcol_iboffload_bcast_scatter_allgather_extra_exec);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        return rc;
-    }
-
-    rc = coll_request->progress_fn(iboffload_module, coll_request);
-
-    IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_bcast_scatter_allgather_extra_intra was started [%d]\n", rc));
-    return rc;
-}
-
-int mca_bcol_iboffload_bcast_register(mca_bcol_base_module_t *super)
-{
-    mca_bcol_iboffload_module_t *iboffload_module =
-                            (mca_bcol_iboffload_module_t *) super;
-
-    int my_group_index = iboffload_module->ibnet->super.my_index;
-
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    IBOFFLOAD_VERBOSE(10, ("Register iboffload Bcast.\n"));
-
-    comm_attribs.bcoll_type = BCOL_BCAST;
-
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    if (my_group_index < iboffload_module->power_of_2_ranks) {
-        mca_bcol_base_set_attributes(super,
-            &comm_attribs, &inv_attribs,
-            mca_bcol_iboffload_small_msg_bcast_intra,
-            mca_bcol_iboffload_small_msg_bcast_progress);
-
-        inv_attribs.bcol_msg_min = 10000000;
-        inv_attribs.bcol_msg_max = 10485760; /* range 4 */
-
-        mca_bcol_base_set_attributes(super,
-                &comm_attribs, &inv_attribs,
-                mca_bcol_iboffload_bcast_scatter_allgather_intra,
-                mca_bcol_iboffload_zero_copy_progress);
-
-    } else {
-        mca_bcol_base_set_attributes(super,
-            &comm_attribs, &inv_attribs,
-            mca_bcol_iboffload_small_msg_bcast_extra_intra,
-            mca_bcol_iboffload_small_msg_bcast_progress);
-
-        inv_attribs.bcol_msg_min = 10000000;
-        inv_attribs.bcol_msg_max = 10485760; /* range 4 */
-
-        mca_bcol_base_set_attributes(super,
-                &comm_attribs, &inv_attribs,
-                mca_bcol_iboffload_bcast_scatter_allgather_extra_intra,
-                mca_bcol_iboffload_zero_copy_progress);
-
-    }
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.h b/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.h
deleted file mode 100644
index f283ab65e9..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.h
+++ /dev/null
@@ -1,606 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_IBOFFLOAD_BCAST_H
-#define MCA_BCOL_IBOFFLOAD_BCAST_H
-
-#include "ompi_config.h"
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_task.h"
-#include "bcol_iboffload_collreq.h"
-#include "bcol_iboffload_collfrag.h"
-#include "bcol_iboffload_endpoint.h"
-
-#include "opal/include/opal/types.h"
-
-BEGIN_C_DECLS
-
-int mca_bcol_iboffload_small_msg_bcast_progress(
-                        bcol_function_args_t *input_args,
-                        struct mca_bcol_base_function_t *const_args);
-int mca_bcol_iboffload_small_msg_bcast_extra_intra(bcol_function_args_t *fn_arguments,
-        struct mca_bcol_base_function_t *const_args);
-int mca_bcol_iboffload_small_msg_bcast_intra(bcol_function_args_t *fn_arguments,
-        struct mca_bcol_base_function_t *const_args);
-int mca_bcol_iboffload_bcast_scatter_allgather_intra(bcol_function_args_t *fn_arguments,
-        struct mca_bcol_base_function_t *const_args);
-int mca_bcol_iboffload_zero_copy_progress(bcol_function_args_t *fn_arguments,
-        struct mca_bcol_base_function_t *const_args);
-int mca_bcol_iboffload_bcast_scatter_allgather_extra_intra(bcol_function_args_t *fn_arguments,
-        struct mca_bcol_base_function_t *const_args);
-int mca_bcol_iboffload_bcast_register(mca_bcol_base_module_t *super);
-
-static inline __opal_attribute_always_inline__ int
-mca_bcol_iboffload_recv_rtr_setup(
-        struct mqe_task **last_wait,
-        uint32_t dest_rank,
-        mca_bcol_iboffload_module_t *iboffload,
-        mca_bcol_iboffload_collfrag_t *coll_fragment)
-{
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_frag_t *fragment;
-
-    /* Wait for RTR message over credit QP */
-    fragment = mca_bcol_iboffload_get_preposted_recv_frag(
-            iboffload, dest_rank,
-            MCA_BCOL_IBOFFLOAD_QP_CREDIT);
-    if (OPAL_UNLIKELY(NULL == fragment)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get recv frag.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    task = mca_bcol_iboffload_get_wait_task(
-            iboffload, dest_rank, 1, fragment, MCA_BCOL_IBOFFLOAD_QP_CREDIT,
-            iboffload->endpoints[dest_rank]->qps[MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF].qp->lcl_qp);
-    if (OPAL_UNLIKELY(NULL == task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get wait task.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, task);
-    MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, task);
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__ int
-mca_bcol_iboffload_send_small_buff_setup(
-        struct mqe_task **last_send,
-        size_t len, uint32_t dest_rank,
-        mca_bcol_iboffload_module_t *iboffload,
-        mca_bcol_iboffload_collfrag_t *coll_fragment)
-{
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_frag_t *fragment;
-
-    mca_bcol_iboffload_collreq_t *coll_request =
-        coll_fragment->coll_full_req;
-
-    IBOFFLOAD_VERBOSE(10,("Get ml frag that I will send dest rank %d, len %d, lkey %d",
-                            dest_rank, len, iboffload->rdma_block.ib_info.lkey));
-
-    fragment = mca_bcol_iboffload_get_send_frag(coll_request, dest_rank,
-                                 coll_request->qp_index, len, 0,
-                                 SBUF, /* this could be problematic */
-                                 MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML);
-    if (OPAL_UNLIKELY(NULL == fragment)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get frag.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    IBOFFLOAD_VERBOSE(10,("Get an rdma task for dest %d for packet size %d",
-                            dest_rank,len));
-    task = mca_bcol_iboffload_get_rdma_task(
-                            dest_rank, 0,
-                            fragment, iboffload, coll_fragment);
-
-    if (OPAL_UNLIKELY(NULL == task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get send task.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    *last_send = &task->element;
-
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, task);
-    MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, task);
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__ int
-mca_bcol_iboffload_send_large_buff_setup(
-        struct mqe_task **last_send,
-        int buf_index, int offset,
-        size_t len, uint32_t dest_rank,
-        mca_bcol_iboffload_module_t *iboffload,
-        mca_bcol_iboffload_collfrag_t *coll_fragment)
-{
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_frag_t *fragment;
-
-    mca_bcol_iboffload_collreq_t *coll_request =
-        coll_fragment->coll_full_req;
-
-    fragment = mca_bcol_iboffload_get_send_frag(coll_request, dest_rank,
-                                 MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF,
-                                 len,
-                                 offset, buf_index, MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML);
-    if (OPAL_UNLIKELY(NULL == fragment)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get frag.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    task = mca_bcol_iboffload_get_send_task(
-                            iboffload, dest_rank,
-                            MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF,
-                            fragment, coll_fragment, NO_INLINE);
-
-    if (OPAL_UNLIKELY(NULL == task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get send task.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    *last_send = &task->element;
-
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, task);
-    MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, task);
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__ int
-mca_bcol_iboffload_send_rtr_setup(
-                            struct mqe_task **last_send,
-                            uint32_t dest_rank,
-                            mca_bcol_iboffload_module_t *iboffload,
-                            mca_bcol_iboffload_collfrag_t *coll_fragment)
-{
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_frag_t *fragment;
-
-    /* Recv is ready , Send RTR message */
-    fragment = mca_bcol_iboffload_get_send_frag(coll_fragment->coll_full_req,
-                                 dest_rank, MCA_BCOL_IBOFFLOAD_QP_CREDIT, 0,
-                                 0, RBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY);
-    if (OPAL_UNLIKELY(NULL == fragment)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get frag.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    task = mca_bcol_iboffload_get_send_task(iboffload, dest_rank,
-            MCA_BCOL_IBOFFLOAD_QP_CREDIT,
-            fragment, coll_fragment, INLINE);
-    if (OPAL_UNLIKELY(NULL == task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get send task.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("dest_rank - %d. qp index - %d.\n",
-                dest_rank, MCA_BCOL_IBOFFLOAD_QP_CREDIT));
-
-    *last_send = &task->element;
-
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, task);
-    MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, task);
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__ int
-mca_bcol_iboffload_recv_small_preposted_buff_setup(
-                            struct mqe_task **last_wait,
-                            size_t len, uint32_t dest_rank,
-                            int qp_index,
-                            int nwaits,
-                            mca_bcol_iboffload_module_t *iboffload,
-                            mca_bcol_iboffload_collfrag_t *coll_fragment)
-{
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_frag_t *fragment;
-
-    IBOFFLOAD_VERBOSE(10,("Get preposted recv from rank %d", dest_rank));
-
-    fragment = mca_bcol_iboffload_get_preposted_recv_frag(
-                               iboffload, dest_rank,
-                               qp_index);
-    if (OPAL_UNLIKELY(NULL == fragment)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get recv frag.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    task = mca_bcol_iboffload_get_wait_task(iboffload, dest_rank, nwaits,
-            fragment, qp_index, NULL);
-    if (OPAL_UNLIKELY(NULL == task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get wait task.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    *last_wait = &task->element;
-
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, task);
-    MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, task);
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__ int
-mca_bcol_iboffload_recv_small_buff_setup(
-                            struct mqe_task **last_wait,
-                            size_t len, uint32_t dest_rank,
-                            mca_bcol_iboffload_module_t *iboffload,
-                            mca_bcol_iboffload_collfrag_t *coll_fragment)
-{
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_frag_t *fragment;
-
-    mca_bcol_iboffload_collreq_t *coll_request =
-        coll_fragment->coll_full_req;
-
-    IBOFFLOAD_VERBOSE(10, ("Get preposted recv from rank %d", dest_rank));
-
-    fragment = mca_bcol_iboffload_get_preposted_recv_frag(
-                               iboffload, dest_rank,
-                               coll_request->qp_index);
-    if (OPAL_UNLIKELY(NULL == fragment)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get recv frag.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    task = mca_bcol_iboffload_get_wait_task(iboffload, dest_rank, 1,
-            fragment, coll_request->qp_index, NULL);
-    if (OPAL_UNLIKELY(NULL == task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get wait task.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    *last_wait = &task->element;
-
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, task);
-    MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, task);
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__ int
-mca_bcol_iboffload_recv_large_buff_setup(
-                            struct mqe_task **last_wait,
-                            int buf_index, int offset,
-                            size_t len, uint32_t dest_rank,
-                            mca_bcol_iboffload_module_t *iboffload,
-                            mca_bcol_iboffload_collfrag_t *coll_fragment)
-{
-    int num_preposted;
-
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_frag_t *fragment;
-
-    mca_bcol_iboffload_collreq_t *coll_request = coll_fragment->coll_full_req;
-
-    /* Post message to recv queue for large messages */
-    fragment = mca_bcol_iboffload_get_ml_frag(
-            iboffload, MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF, len,
-            coll_request->buffer_info[buf_index].iboffload_reg->mr->lkey,
-            (uint64_t)((unsigned char *)coll_request->buffer_info[buf_index].buf + offset));
-    if (OPAL_UNLIKELY(NULL == fragment)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get recv frag.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    num_preposted = mca_bcol_iboffload_prepost_ml_recv_frag(
-            MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF,
-            dest_rank, fragment, iboffload);
-    if (0 >= num_preposted) {
-        IBOFFLOAD_ERROR(("Failed to prepost recv fragments "
-                    "return code - %d; dest_rank - %d",
-                    num_preposted, dest_rank));
-
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    task = mca_bcol_iboffload_get_wait_task(iboffload, dest_rank, 1,
-            fragment, MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF, NULL);
-    if (OPAL_UNLIKELY(NULL == task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get wait task.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    *last_wait = &task->element;
-
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, task);
-    MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, task);
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__
-int bcol_iboffload_binomial_root_to_src(int group_root, int my_rank,
-        int pow2_size, int group_size, int *distance)
-{
-    int root, relative_rank, src,
-        pow2_distance = 0, i;
-
-    if (group_root < pow2_size) {
-        root = group_root;
-    } else {
-        /* the source of the data is extra node,
-           the real root it represented by some rank from
-           pow2 group */
-        root = group_root - pow2_size;
-        /* shortcut for the case when my rank is root for the group */
-        if (my_rank == root) {
-            *distance = -1;
-            return group_root;
-        }
-    }
-
-    relative_rank = (my_rank - root) < 0 ? my_rank - root + pow2_size :
-        my_rank - root;
-
-    for (i = 1; i < pow2_size; i<<=1, pow2_distance++) {
-        if (relative_rank & i) {
-            src = my_rank ^ i;
-            if (src >= pow2_size)
-                src -= pow2_size;
-
-            *distance = pow2_distance;
-            IBOFFLOAD_VERBOSE(10, ("AAAAA d %d rel %d it %d root %d my %d", *distance, relative_rank, i, root, my_rank));
-            return src;
-        }
-    }
-
-    /* error case */
-    *distance = -1;
-    return -1;
-}
-
-static inline void bcol_iboffload_setup_binomial_connection(mca_bcol_iboffload_module_t *iboffload)
-{
-    netpatterns_pair_exchange_node_t *my_exchange_node =
-                                          &iboffload->recursive_doubling_tree;
-
-    int i, n_exchanges = my_exchange_node->n_exchanges,
-        *exchanges = my_exchange_node->rank_exchanges,
-        n_extra_src = my_exchange_node->n_extra_sources,
-        my_rank = iboffload->ibnet->super.my_index,
-        rank_extra_src = my_exchange_node->rank_extra_source;
-
-    mca_bcol_iboffload_endpoint_t *ep;
-
-    IBOFFLOAD_VERBOSE(10, ("Open connections.\n"));
-
-    if (0 < n_extra_src) {
-        ep = iboffload->endpoints[rank_extra_src];
-        while (OMPI_SUCCESS !=
-                check_endpoint_state(ep, NULL, NULL)) {
-            opal_progress();
-        }
-
-#if OPAL_ENABLE_DEBUG
-        {
-            int qp_index, num_qps = mca_bcol_iboffload_component.num_qps;
-            for (qp_index = 0; qp_index < num_qps; ++qp_index) {
-                assert(NULL != ep->qps[qp_index].qp->lcl_qp);
-                IBOFFLOAD_VERBOSE(10, ("Endpoint - %p, QP index - %d: qp num - %x.",
-                                       ep, qp_index, ep->qps[qp_index].qp->lcl_qp->qp_num));
-            }
-        }
-#endif
-
-        /* Connect to all extra nodes */
-        if (EXTRA_NODE == my_exchange_node->node_type) {
-            for (i = iboffload->power_of_2_ranks;
-                    i < iboffload->num_endpoints; ++i) {
-                if (i != my_rank) {
-                    ep = iboffload->endpoints[i];
-
-                    IBOFFLOAD_VERBOSE(10, ("subgroup rank %d: Connect to rank %d.\n", my_rank, i));
-
-                    while (OMPI_SUCCESS !=
-                            check_endpoint_state(ep, NULL, NULL)) {
-                        opal_progress();
-                    }
-
-#if OPAL_ENABLE_DEBUG
-        {
-            int qp_index, num_qps = mca_bcol_iboffload_component.num_qps;
-            for (qp_index = 0; qp_index < num_qps; ++qp_index) {
-                assert(NULL != ep->qps[qp_index].qp->lcl_qp);
-                IBOFFLOAD_VERBOSE(10, ("Endpoint - %p, QP index - %d: qp num - %x.",
-                                       ep, qp_index, ep->qps[qp_index].qp->lcl_qp->qp_num));
-            }
-        }
-#endif
-                }
-            }
-        }
-    }
-
-    for (i = 0; i < n_exchanges; ++i) {
-        ep = iboffload->endpoints[exchanges[i]];
-
-        while (OMPI_SUCCESS !=
-                check_endpoint_state(ep, NULL, NULL)) {
-            opal_progress();
-        }
-
-#if OPAL_ENABLE_DEBUG
-        {
-            int qp_index, num_qps = mca_bcol_iboffload_component.num_qps;
-            for (qp_index = 0; qp_index < num_qps; ++qp_index) {
-                assert(NULL != ep->qps[qp_index].qp->lcl_qp);
-                IBOFFLOAD_VERBOSE(10, ("Endpoint - %p, QP index - %d: qp num - %x.",
-                                       ep, qp_index, ep->qps[qp_index].qp->lcl_qp->qp_num));
-            }
-        }
-#endif
-    }
-    /* set the connection status to connected */
-    iboffload->connection_status[RECURSIVE_DOUBLING_TREE_BCAST] = true;
-}
-
-static inline __opal_attribute_always_inline__
-int bcol_iboffload_bcast_binomial_gather(mca_bcol_iboffload_module_t *iboffload_module,
-        struct mqe_task **last_send, struct mqe_task **last_wait,
-        mca_bcol_iboffload_collfrag_t *coll_fragment,
-        int count, int base_block_size, int radix_mask_pow)
-{
-    int rc;
-    int i;
-    int my_group_index = iboffload_module->ibnet->super.my_index;
-    int delta, rdelta;
-
-    IBOFFLOAD_VERBOSE(10, ("bcol_iboffload_bcast_binomial_gather %d %d",
-                radix_mask_pow, my_group_index));
-
-    /* we assume the iteration #iteration already was completed with probe */
-    for (i = 0; i < iboffload_module->power_of_2; i++) {
-        int pow2 = 1 << i;
-        int peer_index = my_group_index ^ pow2;
-        int slen, rlen,
-            send_offset,
-            recv_offset;
-
-        if (i > radix_mask_pow) {
-            slen = rlen = pow2 * base_block_size;
-            send_offset = base_block_size * ((my_group_index) & ((~(int)0) << i));
-            recv_offset = base_block_size * ((peer_index)     & ((~(int)0) << i));
-
-            rdelta = count - recv_offset;
-            if (rdelta > 0) {
-                IBOFFLOAD_VERBOSE(10, ("Recv1 [ pow2 %d, radix %d ] offset %d , len %d , dest %d",
-                            pow2, 1 << iboffload_module->power_of_2,
-                            recv_offset, rlen, peer_index));
-
-                rc = mca_bcol_iboffload_send_rtr_setup(last_send,
-                        peer_index, iboffload_module,
-                        coll_fragment);
-                if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                    IBOFFLOAD_VERBOSE(10, ("Failed to setup send rtr"));
-                    return OMPI_ERROR;
-                }
-            }
-
-            delta = count - send_offset;
-            if (delta > 0) {
-                if (delta < slen) {
-                    /* recv the tail */
-                    slen = delta;
-                }
-
-                IBOFFLOAD_VERBOSE(10, ("Send1 [ pow2 %d, radix %d ] offset %d , len %d , dest %d",
-                            pow2, 1 << iboffload_module->power_of_2,
-                            send_offset, slen, peer_index));
-                rc = mca_bcol_iboffload_recv_rtr_setup(last_wait, peer_index, iboffload_module, coll_fragment);
-                if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                    IBOFFLOAD_VERBOSE(10, ("Failed to isend data"));
-                    return OMPI_ERROR;
-                }
-
-                rc = mca_bcol_iboffload_send_large_buff_setup(last_send, SBUF, send_offset, slen, peer_index,
-                        iboffload_module, coll_fragment);
-                if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                    IBOFFLOAD_VERBOSE(10, ("Failed to isend data"));
-                    return OMPI_ERROR;
-                }
-            }
-
-            if (rdelta > 0) {
-                if (rdelta < rlen) {
-                    /* recv the tail */
-                    rlen = rdelta;
-                }
-
-                rc = mca_bcol_iboffload_recv_large_buff_setup(last_wait,
-                        SBUF, recv_offset, rlen, peer_index,
-                        iboffload_module, coll_fragment);
-                if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                    IBOFFLOAD_VERBOSE(10, ("Failed to setup data receive"));
-                    return OMPI_ERROR;
-                }
-            }
-
-        } else if (i == radix_mask_pow) {
-            /* only receive data */
-            rlen = pow2 * base_block_size;
-            recv_offset = base_block_size * ((peer_index) & ((~(int)0) << i));
-            delta = count - recv_offset;
-            if (0 >= delta) {
-                /* we have nothing to send, skip the iteration */
-                continue;
-            }
-            if (delta < rlen) {
-                /* recv the tail */
-                rlen = delta;
-            }
-            /* receive data from the peer */
-            IBOFFLOAD_VERBOSE(10, ("Recv2 [ pow2 %d, radix %d ] offset %d , len %d , dest %d",
-                        pow2,
-                        1 << iboffload_module->power_of_2,
-                        recv_offset,
-                        rlen, peer_index));
-            rc = mca_bcol_iboffload_send_rtr_setup(last_send,
-                    peer_index, iboffload_module,
-                    coll_fragment);
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                IBOFFLOAD_VERBOSE(10, ("Failed to setup send rtr"));
-                return OMPI_ERROR;
-            }
-
-            rc = mca_bcol_iboffload_recv_large_buff_setup(last_wait,
-                    SBUF, recv_offset, rlen, peer_index,
-                    iboffload_module, coll_fragment);
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                IBOFFLOAD_VERBOSE(10, ("Failed to setup data receive"));
-                return OMPI_ERROR;
-            }
-        } else if (i < radix_mask_pow) {
-            /* Only send data */
-            slen = pow2 * base_block_size;
-            send_offset = base_block_size * ((my_group_index) & ((~(int)0) << i));
-            delta = count - send_offset;
-            if (0 >= delta) {
-                /* we have nothing to send, skip the iteration */
-                continue;
-            }
-
-            if (delta < slen) {
-                slen = delta;
-            }
-
-            IBOFFLOAD_VERBOSE(10, ("Send2 [ pow2 %d, radix %d ] offset %d , len %d , dest %d",
-                        pow2,
-                        1 << iboffload_module->power_of_2,
-                        send_offset,
-                        slen,
-                        peer_index));
-
-            rc = mca_bcol_iboffload_recv_rtr_setup(last_wait, peer_index, iboffload_module, coll_fragment);
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                IBOFFLOAD_VERBOSE(10, ("Failed to isend data"));
-                return OMPI_ERROR;
-            }
-
-            rc = mca_bcol_iboffload_send_large_buff_setup(last_send, SBUF, send_offset, slen, peer_index,
-                    iboffload_module, coll_fragment);
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                IBOFFLOAD_VERBOSE(10, ("Failed to isend data"));
-                return OMPI_ERROR;
-            }
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-END_C_DECLS
-
-#endif
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_collfrag.c b/ompi/mca/bcol/iboffload/bcol_iboffload_collfrag.c
deleted file mode 100644
index dc447d879d..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_collfrag.c
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <string.h>
-
-#include "bcol_iboffload_collreq.h"
-#include "bcol_iboffload_collfrag.h"
-
-static void
-collfrag_constructor(struct mca_bcol_iboffload_collfrag_t *collfrag)
-{
-    collfrag->n_sends = 0;
-    collfrag->n_sends_completed = 0;
-
-    memset(collfrag->pre_posted_recvs, 0,
-           sizeof(struct mca_bcol_iboffload_task_t *) * MAX_MQE_TASKS);
-
-    collfrag->signal_task_wr_id = (uint64_t) 0;
-    collfrag->complete = false;
-
-    collfrag->seq_n = -1;
-    collfrag->coll_full_req = NULL;
-
-    collfrag->unpack_size = 0;
-
-    collfrag->tasks_posted = 0;
-    collfrag->to_post = NULL;
-    collfrag->task_next = NULL;
-    collfrag->tasks_to_release = NULL;
-
-    collfrag->in_pending_list = false;
-}
-
-static void
-collfrag_destruct(struct mca_bcol_iboffload_collfrag_t *collfrag)
-{
-}
-
-OBJ_CLASS_INSTANCE(mca_bcol_iboffload_collfrag_t,
-                   ompi_free_list_item_t,
-                   collfrag_constructor,
-                   collfrag_destruct);
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_collfrag.h b/ompi/mca/bcol/iboffload/bcol_iboffload_collfrag.h
deleted file mode 100644
index 3be53aacaa..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_collfrag.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_IBOFFLOAD_COLLFRAG_H
-#define MCA_BCOL_IBOFFLOAD_COLLFRAG_H
-
-#include "ompi_config.h"
-
-#include <infiniband/mqe.h>
-#include <infiniband/verbs.h>
-#include <infiniband/mverbs.h>
-
-#include "bcol_iboffload.h"
-
-#include "opal/class/ompi_free_list.h"
-
-BEGIN_C_DECLS
-
-#define MAX_MQE_TASKS 128 /* Pasha - do we want to make it dynamic ?*/
-
-struct mca_bcol_iboffload_task_t;
-struct mca_bcol_iboffload_collreq_t;
-
-/* collective fragment descriptor */
-struct mca_bcol_iboffload_collfrag_t {
-    ompi_free_list_item_t super;
-
-    /* number of asynchronous sends scheduled */
-    uint32_t n_sends;
-
-    /* number of sends completed */
-    uint32_t n_sends_completed;
-
-    /* Algorithm ID that was user for this fragment*/
-    int32_t alg;
-
-    /* pre-posted receive sources */
-    struct mca_bcol_iboffload_task_t *pre_posted_recvs[MAX_MQE_TASKS];
-
-    /* cache here pointer to signaled task */
-    uint64_t signal_task_wr_id;
-
-    /* mwr completion from the mcq */
-    volatile bool complete;
-
-    /* sequence number - we use it for
-       correct ordering of resources release */
-    uint32_t seq_n;
-
-    /* pointer to the full collective request descriptor */
-    struct mca_bcol_iboffload_collreq_t *coll_full_req;
-
-    size_t unpack_size;
-
-    bool in_pending_list;
-
-    /* Num of posted tasks */
-    int tasks_posted;
-
-    /* Pointer to head of not posted elements list */
-    struct mqe_task *to_post;
-
-    /* Pointer to tail next */
-    struct mqe_task **tail_next;
-
-    /* List of the all tasks of this coll frag */
-    struct mca_bcol_iboffload_task_t *tasks_to_release;
-
-    /* Pointer to the next elem in All tasks list */
-    struct mca_bcol_iboffload_task_t **task_next;
-
-    /* Num of needed mq credits */
-    int mq_credits;
-
-    /* MQ index, that used for this frag */
-    int mq_index;
-
-    /*
-     * Last wait sequence number; zero i.e.
-     * there isn't any wait in the coll request
-     */
-    int32_t last_wait_num;
-    /* fragment descriptor for non contiguous data */
-    bcol_fragment_descriptor_t *bcol_frag_info;
-    /* frag-len of ml buffer */
-    int frag_len;
-};
-typedef struct mca_bcol_iboffload_collfrag_t mca_bcol_iboffload_collfrag_t;
-OBJ_CLASS_DECLARATION(mca_bcol_iboffload_collfrag_t);
-
-static inline __opal_attribute_always_inline__
-            void mca_bcol_iboffload_collfrag_init(
-                          mca_bcol_iboffload_collfrag_t *cf)
-{
-    /* init the request */
-    cf->n_sends = 0;
-    cf->complete = false;
-    cf->n_sends_completed = 0;
-    cf->alg = -1;
-    cf->in_pending_list = false;
-    cf->tail_next = NULL;
-    cf->tasks_posted = 0;
-    cf->to_post = NULL;
-    cf->mq_credits = 0;
-    cf->mq_index = 0;
-    cf->tasks_to_release = NULL;
-    cf->task_next = &cf->tasks_to_release;
-    cf->last_wait_num = 0;
-}
-
-static inline __opal_attribute_always_inline__
-                struct mca_bcol_iboffload_collfrag_t *
-                       mca_bcol_iboffload_get_collfrag(void)
-{
-    ompi_free_list_item_t *item;
-    mca_bcol_iboffload_collfrag_t *cf;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    /* blocking allocation for collectives fragment */
-    OMPI_FREE_LIST_GET_MT(&cm->collfrags_free, item);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        IBOFFLOAD_ERROR(("Failed to allocated collfrag.\n"));
-        return NULL;
-    }
-
-    cf = (mca_bcol_iboffload_collfrag_t*) item;
-    mca_bcol_iboffload_collfrag_init(cf);
-
-    return cf;
-}
-
-END_C_DECLS
-
-#endif
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_collreq.c b/ompi/mca/bcol/iboffload/bcol_iboffload_collreq.c
deleted file mode 100644
index bae677bb51..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_collreq.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "bcol_iboffload_collreq.h"
-
-static void
-collreq_construct(struct mca_bcol_iboffload_collreq_t *collreq)
-{
-    int i;
-    collreq->n_fragments = 0;
-    collreq->n_frag_mpi_complete = 0;
-    collreq->n_frag_net_complete = 0;
-    collreq->user_handle_freed = false;
-
-    for (i = 0; i < BCOL_IBOFFLOAD_BUFFERS; i++) {
-        collreq->buffer_info[i].buf = NULL;
-        collreq->buffer_info[i].offset = 0;
-        collreq->buffer_info[i].iboffload_reg = NULL;
-    }
-
-    OBJ_CONSTRUCT(&collreq->work_requests, opal_list_t);
-    OBJ_CONSTRUCT(&collreq->first_collfrag, mca_bcol_iboffload_collfrag_t);
-
-    OBJ_CONSTRUCT(&collreq->send_convertor, opal_convertor_t);
-    OBJ_CONSTRUCT(&collreq->recv_convertor, opal_convertor_t);
-}
-
-static void
-collreq_destruct(struct mca_bcol_iboffload_collreq_t *collreq)
-{
-    OBJ_DESTRUCT(&collreq->work_requests);
-    OBJ_DESTRUCT(&collreq->first_collfrag);
-
-    OBJ_DESTRUCT(&collreq->send_convertor);
-    OBJ_DESTRUCT(&collreq->recv_convertor);
-}
-
-OBJ_CLASS_INSTANCE(mca_bcol_iboffload_collreq_t,
-        ompi_request_t,
-        collreq_construct,
-        collreq_destruct);
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_collreq.h b/ompi/mca/bcol/iboffload/bcol_iboffload_collreq.h
deleted file mode 100644
index 31344009d3..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_collreq.h
+++ /dev/null
@@ -1,273 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_IBOFFLOAD_COLLREQ_H
-#define MCA_BCOL_IBOFFLOAD_COLLREQ_H
-
-#include "ompi_config.h"
-
-#include <infiniband/mqe.h>
-#include <infiniband/verbs.h>
-#include <infiniband/mverbs.h>
-
-#include "opal/class/ompi_free_list.h"
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_device.h"
-#include "bcol_iboffload_collfrag.h"
-
-#define SBUF 0
-#define RBUF 1
-
-#define BCOL_IBOFFLOAD_BUFFERS 2
-
-BEGIN_C_DECLS
-
-struct mca_bcol_iboffload_reg_t;
-
-/*
- * collective progress function
- */
-typedef int (*collective_message_progress_function)(
-        struct mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *full_message_descriptor);
-/*
- * callback function to be called after the collective work request
- * completes.  This is invoked in user-space, and is typically where
- * data may be copied out of library buffers, or when any other user-
- * level protocol may be completed
- *
- * input:
- * callback data: typically, this may be the work request just finished
- */
-typedef int (*collective_message_completion_callback_function)(
-        void *callback_data);
-
-struct mca_bcol_iboffload_buff_info {
-    void *buf;
-    size_t offset;
-    uint32_t lkey;
-    struct mca_bcol_iboffload_reg_t *iboffload_reg;
-};
-typedef struct mca_bcol_iboffload_buff_info mca_bcol_iboffload_buff_info;
-
-/*
- * Collective message descriptor
- * the mca_bcol_iboffload_message_desc_t was replaced with mca_bcol_iboffload_collreq_t
- * *************************************************************************************************
- *
- * Brief  description of iboffload collective request dependencies:
- *
- * mca_bcol_iboffload_collreq_t                      <----<< Full coll request
- *          |
- *          --(0)-- mca_bcol_iboffload_collfrag_t    <----<< Fragment of coll request ( for example
- *          |                   |                            10MB Bcast maybe split to 2MB fragments )
- *          |                   |
- *          |                   --(0)-- mca_bcol_iboffload_task_t---mqe_task
- *          |                   |                    |
- *          |                   |                     ---mca_bcol_iboffload_frag_t---ibv_sge
- *          |                   --(1)-- mca_bcol_iboffload_task_t---mqe_task
- *          |                   |                    |
- *          |                   |                     ---mca_bcol_iboffload_frag_t---ibv_sge
- *          |                   ..(M)..
- *          |
- *          --(1)-- mca_bcol_iboffload_collfrag_t
- *          |
- *          ..(N)..
- *
- * *************************************************************************************************
- */
-
-struct mca_bcol_iboffload_collreq_t {
-    ompi_request_t super;
-
-    /* op type */
-    struct ompi_op_t *op;
-
-    /* Sometimes the operation that should be performed
-       by the IB is different than the mpi_op and is then set
-       by the pack_data_for_calc function */
-    enum ibv_m_wr_calc_op actual_ib_op;
-
-    /* Sometimes the data type that should be used by the IB
-       to peroform the calc s different than the mpi dtype,
-       and is then set by the pack_data_for_calc function */
-    enum ibv_m_wr_data_type actual_ib_dtype;
-
-    /* data type */
-    struct ompi_datatype_t *dtype;
-
-    /* convertor for send operation */
-    opal_convertor_t send_conv;
-
-    /* convertor for recv operation */
-    opal_convertor_t recv_conv;
-
-    /*
-     * count (in data type units)
-     */
-    uint64_t count;
-
-    /*
-     * root of collective operation
-     */
-    int root;
-
-    /* number of message fragments */
-    int n_fragments;
-
-    /* number of fragments sent - all resrouces for a fragment are allocated
-     * or none at all are
-     */
-    int n_frags_sent;
-
-    /* number of fragments completed from the MPI perspective */
-    int n_frag_mpi_complete;
-
-    /* number of fragments completed from a network perspective */
-    int n_frag_net_complete;
-
-    /* collective free and may be released  - message complete from the
-     ** MPI perspective, the network prespective, and the user is done
-     ** with the message handle */
-    volatile bool user_handle_freed;
-
-    /* list of collective fragements - only 1 for now */
-    opal_list_t work_requests;
-
-    /* message progress function */
-    collective_message_progress_function progress_fn;
-
-    /* work request completion callback function */
-    collective_message_completion_callback_function completion_cb_fn;
-
-    /* index of qp with enough length of buffs for this collective */
-    int qp_index;
-
-    bool if_bcol_last;
-
-    /* The flag is used for the last bcol to indicate if the calculation should be done by the cpu */
-    bool do_calc_in_cpu;
-
-    /* in Allreduce case, if (true == do_calc_in_cpu) =>
-       the final result will be calc on local CPU */
-    uint64_t l_operand;
-    uint64_t r_operand;
-
-    /* caching ML-rdma buffer descriptor */
-    mca_bcol_iboffload_rdma_buffer_desc_t *ml_rdma_desc;
-
-    /* ML buffer index code */
-    int ml_buffer_index;
-
-    /* In the current implementation the collrequest connected to 1 single
-       iboffload module */
-    struct mca_bcol_iboffload_module_t *module;
-
-    mca_bcol_iboffload_collfrag_t first_collfrag;
-
-    /* Send/recv buffs info - user buffers registration if needed etc. */
-    mca_bcol_iboffload_buff_info buffer_info[BCOL_IBOFFLOAD_BUFFERS];
-
-    /* My bi nominal tree children in this collective */
-    int *bi_nominal_tree_children;
-
-    /* Convertors for send/recv if needed */
-    opal_convertor_t send_convertor;
-    opal_convertor_t recv_convertor;
-
-    /* Order info from upper layer */
-    mca_bcol_base_order_info_t *order_info;
-};
-typedef struct mca_bcol_iboffload_collreq_t mca_bcol_iboffload_collreq_t;
-OBJ_CLASS_DECLARATION(mca_bcol_iboffload_collreq_t);
-
-#define COLLREQ_IS_DONE(cr) (cr->user_handle_freed &&   \
-        (cr->n_frag_mpi_complete == cr->n_fragments) && \
-        (cr->n_frag_net_complete == cr->n_fragments))
-
-#define RELEASE_COLLREQ(cr)                                            \
-do {                                                                   \
-    (cr)->user_handle_freed = false;                                   \
-    OMPI_FREE_LIST_RETURN_MT(&mca_bcol_iboffload_component.collreqs_free, \
-        (ompi_free_list_item_t *) (cr));                               \
-} while (0)
-
-static inline __opal_attribute_always_inline__
-            int mca_bcol_iboffload_free_resources_and_move_to_pending(
-                     mca_bcol_iboffload_collfrag_t *coll_fragment,
-                     mca_bcol_iboffload_module_t *iboffload)
-{
-    int rc = mca_bcol_iboffload_free_tasks_frags_resources(coll_fragment,
-                iboffload->device->frags_free);
-
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        return rc;
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("iboffload - %p, coll_fragment - %p, "
-                          "coll frag in_pending_list ? - %d, pending_list size - %d.\n",
-                           iboffload, coll_fragment, coll_fragment->in_pending_list,
-                           opal_list_get_size(&iboffload->collfrag_pending)));
-
-    BCOL_IBOFFLOAD_MQ_RETURN_CREDITS(iboffload, coll_fragment->mq_index, coll_fragment->mq_credits);
-
-    /* Remove coll frag from coll request opal_list */
-    opal_list_remove_item(&coll_fragment->coll_full_req->work_requests,
-                          (opal_list_item_t *) coll_fragment);
-
-    if (false == coll_fragment->in_pending_list) {
-        /* Put the collfrag on pending list */
-        coll_fragment->in_pending_list = true;
-        opal_list_append(&iboffload->collfrag_pending,
-                            (opal_list_item_t *) coll_fragment);
-    } else {
-        /* The item is already on pending list =>
-           insert it first that not break order
-           between frags on the list */
-        opal_list_prepend(&iboffload->collfrag_pending,
-                         (opal_list_item_t *) coll_fragment);
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/* Forward declaration */
-struct mca_bcol_iboffload_reg_t;
-static inline __opal_attribute_always_inline__
-      int mca_bcol_iboffload_prepare_buffer(
-            void *buffer,
-            size_t size,
-            struct mca_bcol_iboffload_reg_t **registration_handler,
-            mca_bcol_iboffload_module_t *iboffload)
-{
-    int rc;
-    mca_mpool_base_registration_t *reg = NULL;
-
-    assert(size > 0);
-    rc = iboffload->device->mpool->mpool_register(
-                            iboffload->device->mpool,
-                            buffer, size,
-                            (uint32_t) 0 /* flags */,
-                            &reg);
-
-    *registration_handler =
-        (struct mca_bcol_iboffload_reg_t *) reg;
-
-    return rc;
-}
-
-int mca_bcol_iboffload_coll_req_implement(
-                            mca_bcol_iboffload_module_t *iboffload,
-                            mca_bcol_iboffload_collreq_t *coll_request);
-
-END_C_DECLS
-
-#endif
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_component.c b/ompi/mca/bcol/iboffload/bcol_iboffload_component.c
deleted file mode 100644
index 01cee48fe7..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_component.c
+++ /dev/null
@@ -1,1075 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2012      Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2014      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2014      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * Copyright (c) 2015      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-
-#include <infiniband/verbs.h>
-
-#include "ompi/constants.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "opal/mca/mpool/base/base.h"
-#include "ompi/mca/common/ofacm/connect.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/common/ofacm/base.h"
-#include "ompi/mca/common/verbs/common_verbs.h"
-
-#include "opal/util/argv.h"
-#include "opal/include/opal/types.h"
-
-#include "bcol_iboffload_mca.h"
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_task.h"
-#include "bcol_iboffload_device.h"
-#include "bcol_iboffload_qp_info.h"
-#include "bcol_iboffload_collreq.h"
-#include "bcol_iboffload_collfrag.h"
-
-/*
- * Public string showing the bcol ompi_sm V2 component version number
- */
-const char *mca_bcol_iboffload_component_version_string =
-    "Open MPI bcol - iboffload collective MCA component version " OMPI_VERSION;
-
-/*
- * Local functions
- */
-
-static int setup_qps(void);
-static int iboffload_open(void);
-static int iboffload_close(void);
-
-#define GET_IB_DTYPE_BY_CTYPE(ctype, is_int, ib_dtype)                           \
-do {                                                                             \
-    switch (sizeof(ctype)) {                                                     \
-    case 1:                                                                      \
-        ib_dtype = ((is_int) ? IBV_M_DATA_TYPE_INT8 : IBV_M_DATA_TYPE_INVALID);  \
-        break;                                                                   \
-    case 2:                                                                      \
-        ib_dtype = ((is_int) ? IBV_M_DATA_TYPE_INT16 : IBV_M_DATA_TYPE_INVALID); \
-        break;                                                                   \
-    case 4:                                                                      \
-        ib_dtype = ((is_int) ? IBV_M_DATA_TYPE_INT32 : IBV_M_DATA_TYPE_FLOAT32); \
-        break;                                                                   \
-    case 8:                                                                      \
-        ib_dtype = ((is_int) ? IBV_M_DATA_TYPE_INT64 : IBV_M_DATA_TYPE_FLOAT64); \
-        break;                                                                   \
-    default:                                                                     \
-        ib_dtype = IBV_M_DATA_TYPE_INVALID;                                      \
-    }                                                                            \
-} while (0)
-
-/*
- * Instantiate the public struct with all of our public information
- * and pointers to our public functions in it
- */
-mca_bcol_iboffload_component_t mca_bcol_iboffload_component = {
-
-    /* First, fill in the super */
-
-    .super = {
-        /* First, the mca_component_t struct containing meta
-           information about the component itself */
-
-        .bcol_version = {
-            MCA_BCOL_BASE_VERSION_2_0_0,
-
-            /* Component name and version */
-
-            .mca_component_name = "iboffload",
-            MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
-                                  OMPI_RELEASE_VERSION),
-
-            /* Component open and close functions */
-
-            .mca_open_component = iboffload_open,
-            .mca_close_component = iboffload_close,
-            .mca_register_component_params = mca_bcol_iboffload_register_params,
-        },
-
-        .collm_init_query = mca_bcol_iboffload_init_query,
-        .collm_comm_query = mca_bcol_iboffload_comm_query,
-        .coll_support = mca_bcol_iboffload_coll_supported,
-        .coll_support_all_types = mca_bcol_iboffload_coll_support_all_types,
-        .init_done = false,
-        .need_ordering = true, /* collective calls with iboffload should to be ordered */
-    },
-    /* iboffload-component specifc information */
-    .verbose = 0, /* verbose */
-    .num_qps = 0, /* number of qps to use */
-    .warn_default_gid_prefix = false, /* warn_default_gid_prefix */
-    .warn_nonexistent_if = false, /* warn_nonexistent_if */
-    .free_list_num = 0, /* free_list_num */
-    .free_list_max = 0, /* free_list_max */
-    .free_list_inc = 0, /* free_list_inc */
-    .mpool_name = NULL, /* mpool_name */
-    .cq_size = 0, /* cq_size */
-    .max_inline_data = 0, /* max_inline_data */
-    .pkey_val = 0, /* pkey_val */
-    .qp_ous_rd_atom = 0, /* qp_ous_rd_atom */
-    .mtu = 0, /* mtu */
-    .min_rnr_timer = 0, /* min_rnr_timer */
-    .timeout = 0, /* timeout */
-    .retry_count = 0, /* retry_count */
-    .rnr_retry = 0, /* rnr_retry */
-    .max_rdma_dst_ops = 0, /* max_rdma_dst_ops */
-    .service_level = 0, /* service_level */
-    .buffer_alignment = 0, /* buffer_alignment */
-    .max_mqe_tasks = 0, /* max_mqe_tasks */
-    .max_mq_size = 0, /* max_mq_size */
-    .if_include = NULL, /* if_include */
-    .if_include_list = NULL, /* if_include_list */
-    .if_exclude = NULL, /* if_exclude */
-    .if_exclude_list = NULL, /* if_exclude_list */
-    .if_list = NULL, /* if_list */
-    .ib_devs = NULL, /* ib_devs */
-    .num_devs = 0, /* num_devs */
-    .receive_queues = NULL, /* receive_queues */
-};
-
-static int mca_bcol_iboffload_dummy_init_query(
-    bool enable_progress_threads, bool enable_mpi_threads)
-{
-    return OMPI_SUCCESS;
-}
-
-static void mca_bcol_iboffload_device_constructor
-            (mca_bcol_iboffload_device_t *device)
-{
-    /* Init OFACM stuf */
-    device->dev.ib_dev = NULL;
-    device->dev.ib_dev_context = NULL;
-    device->dev.capabilities = 0;
-    /* device->dev.type = MCA_COMMON_OFACM_COLL;*/
-    /* Init other stuff */
-    device->ib_pd = NULL;
-    device->ib_cq = NULL;
-    device->ports = NULL;
-
-    device->mpool = NULL;
-    device->ib_mq_cq = NULL;
-    device->frags_free = NULL;
-
-    device->activated = false;
-    device->num_act_ports = 0;
-
-    memset(&device->ib_dev_attr, 0, sizeof(struct ibv_device_attr));
-    memset(&device->dummy_reg, 0, sizeof( mca_bcol_iboffload_reg_t));
-}
-
-static void mca_bcol_iboffload_device_destructor
-            (mca_bcol_iboffload_device_t *device)
-{
-    int qp_index, num_qps = mca_bcol_iboffload_component.num_qps;
-
-    IBOFFLOAD_VERBOSE(10, ("Device %s will be destroyed.\n",
-                           ibv_get_device_name(device->dev.ib_dev)));
-
-    if (NULL != device->frags_free) {
-        for (qp_index = 0; qp_index < num_qps; ++qp_index) {
-            mca_bcol_iboffload_dealloc_qps_resource_fn_t dealloc_resource =
-                        mca_bcol_iboffload_component.qp_infos[qp_index].dealloc_resource;
-            if (NULL != dealloc_resource) {
-                dealloc_resource(qp_index, device);
-            }
-        }
-
-        free(device->frags_free);
-    }
-
-    if (NULL != device->mpool) {
-        IBOFFLOAD_VERBOSE(10, ("Mpool destroy - %p.\n", device->mpool));
-        if (OMPI_SUCCESS != mca_mpool_base_module_destroy(device->mpool)) {
-            IBOFFLOAD_ERROR(("Device %s, failed to destroy mpool",
-                              ibv_get_device_name(device->dev.ib_dev)));
-        }
-    }
-
-    if (NULL != device->dummy_reg.mr) {
-        IBOFFLOAD_VERBOSE(10, ("Dummy memory MR unregister - %p.\n", device->dummy_reg.mr));
-        if (OMPI_SUCCESS !=
-            mca_bcol_iboffload_deregister_mr((void *) device, &device->dummy_reg.base)) {
-            IBOFFLOAD_ERROR(("Device %s: failed to unregister dummy memory MR.",
-                              ibv_get_device_name(device->dev.ib_dev)));
-        }
-    }
-
-    if (NULL != device->ib_cq) {
-        if (ibv_destroy_cq(device->ib_cq)) {
-            IBOFFLOAD_ERROR(("Device %s, failed to destroy CQ, errno says %s",
-                        ibv_get_device_name(device->dev.ib_dev), strerror(errno)));
-        }
-    }
-
-    if (NULL != device->ib_mq_cq) {
-        if (ibv_destroy_cq(device->ib_mq_cq)) {
-            IBOFFLOAD_ERROR(("Device %s, failed to destroy mq CQ, errno says %s",
-                        ibv_get_device_name(device->dev.ib_dev), strerror(errno)));
-        }
-    }
-
-    /* Release IB PD if we have one */
-    if (NULL != device->ib_pd) {
-        if(ibv_dealloc_pd(device->ib_pd)){
-            IBOFFLOAD_ERROR(("Device %s, failed to release PD, errno says %s",
-                ibv_get_device_name(device->dev.ib_dev), strerror(errno)));
-        }
-    }
-
-    /* close the device */
-    if (NULL != device->dev.ib_dev_context) {
-        if (ibv_close_device(device->dev.ib_dev_context)) {
-            IBOFFLOAD_ERROR(("Device %s "
-                        ", failed to close the device, errno says %s",
-                        ibv_get_device_name(device->dev.ib_dev), strerror(errno)));
-        }
-    }
-
-    /* release memory */
-    if (NULL != device->ports) {
-        free(device->ports);
-    }
-}
-
-OBJ_CLASS_INSTANCE(mca_bcol_iboffload_device_t,
-                   opal_list_item_t,
-                   mca_bcol_iboffload_device_constructor,
-                   mca_bcol_iboffload_device_destructor);
-
-int mca_bcol_iboffload_coll_supported(int op, int dtype, bcol_elem_type elem_type)
-{
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    return  (IBV_M_DATA_TYPE_INVALID != cm->map_ompi_to_ib_dt[dtype]) &&
-            (IBV_M_CALC_OP_INVALID != cm->map_ompi_to_ib_calcs[op]) &&
-            (BCOL_SINGLE_ELEM_TYPE == elem_type);
-}
-
-int mca_bcol_iboffload_coll_support_all_types(bcol_coll coll_name)
-{
-    return BCOL_ALLREDUCE ^ coll_name;
-}
-
-/* Unload devices */
-static int iboffload_release_devices(void)
-{
-    int i;
-    mca_bcol_iboffload_device_t *device = NULL;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    opal_pointer_array_t *devs = &cm->devices;
-
-    IBOFFLOAD_VERBOSE(10, ("Destroy all devices.\n"));
-
-    for (i = 0; i < cm->num_devs; i++) {
-        device = opal_pointer_array_get_item(devs, i);
-
-        IBOFFLOAD_VERBOSE(10, ("Device %s with index %d will be destroyed.\n",
-                               ibv_get_device_name(device->dev.ib_dev), i));
-        if (NULL != device) {
-            OBJ_RELEASE(device);
-        }
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("All devices were destroyed.\n"));
-
-    opal_pointer_array_remove_all(devs);
-    OBJ_DESTRUCT(devs);
-
-    /* release device list */
-    /*ibv_free_device_list_compat(cm->ib_devs);*/
-    ompi_ibv_free_device_list(cm->ib_devs);
-    cm->ib_devs = NULL;
-
-    IBOFFLOAD_VERBOSE(10, ("All devices destroyed.\n"));
-
-    return OMPI_SUCCESS;
-}
-
-/* Create list of IB HCA that have active port */
-static int iboffload_load_devices(void)
-{
-    int num_devs = 0, i;
-    mca_bcol_iboffload_device_t *device = NULL;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    IBOFFLOAD_VERBOSE(10, ("Entering to iboffload_load_devices"));
-
-    /* Get list of devices */
-    /*cm->ib_devs = ibv_get_device_list_compat(&num_devs);*/
-    cm->ib_devs = ompi_ibv_get_device_list(&num_devs);
-    if (0 == num_devs || NULL == cm->ib_devs) {
-        IBOFFLOAD_ERROR(("No IB devices found"));
-        /* No hca error*/
-        opal_show_help("help-mpi-btl-base.txt", "btl:no-nics", true);
-        return OMPI_ERROR;
-    }
-
-    cm->num_devs = num_devs;
-
-    for (i = 0; i < num_devs; i++) {
-        device = OBJ_NEW(mca_bcol_iboffload_device_t);
-        if (NULL != device) {
-            opal_pointer_array_set_item(&cm->devices, i, (void *) device);
-            device->dev.ib_dev = cm->ib_devs[i];
-
-            IBOFFLOAD_VERBOSE(10, ("Device %s with index %d was appended.\n",
-                                    ibv_get_device_name(device->dev.ib_dev), i));
-        }
-    }
-
-    if (0 == opal_pointer_array_get_size(&cm->devices)) {
-        /* No relevand devices were found, return error */
-        IBOFFLOAD_ERROR(("No active devices found.\n"));
-
-        return OMPI_ERROR;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static void map_ompi_to_ib_dtype(void)
-{
-    int dt;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    for (dt = 0; dt < OMPI_DATATYPE_MAX_PREDEFINED; ++dt) {
-        cm->map_ompi_to_ib_dt[dt] = IBV_M_DATA_TYPE_INVALID;
-    }
-
-    GET_IB_DTYPE_BY_CTYPE(char,      true,  cm->map_ompi_to_ib_dt[OMPI_DATATYPE_MPI_SIGNED_CHAR]);
-
-    GET_IB_DTYPE_BY_CTYPE(short,     true,  cm->map_ompi_to_ib_dt[OMPI_DATATYPE_MPI_SHORT]);
-    GET_IB_DTYPE_BY_CTYPE(int,       true,  cm->map_ompi_to_ib_dt[OMPI_DATATYPE_MPI_INT]);
-    GET_IB_DTYPE_BY_CTYPE(long,      true,  cm->map_ompi_to_ib_dt[OMPI_DATATYPE_MPI_LONG]);
-    GET_IB_DTYPE_BY_CTYPE(long long, true,  cm->map_ompi_to_ib_dt[OMPI_DATATYPE_MPI_LONG_LONG]);
-    GET_IB_DTYPE_BY_CTYPE(float,     false, cm->map_ompi_to_ib_dt[OMPI_DATATYPE_MPI_FLOAT]);
-    GET_IB_DTYPE_BY_CTYPE(double,    false, cm->map_ompi_to_ib_dt[OMPI_DATATYPE_MPI_DOUBLE]);
-
-    /* Check (only in DEBUG mode) if size of double equal to 64 bit */
-    assert(8 == sizeof(double));
-}
-
-static void map_ompi_to_ib_op_type(void)
-{
-    int op;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    for (op = 0; op < OMPI_OP_NUM_OF_TYPES; ++op) {
-        cm->map_ompi_to_ib_calcs[op] = IBV_M_CALC_OP_INVALID;
-    }
-
-    cm->map_ompi_to_ib_calcs[OMPI_OP_MAX]  = IBV_M_CALC_OP_MAX;
-    cm->map_ompi_to_ib_calcs[OMPI_OP_MIN]  = IBV_M_CALC_OP_MIN;
-    cm->map_ompi_to_ib_calcs[OMPI_OP_SUM]  = IBV_M_CALC_OP_ADD;
-
-    cm->map_ompi_to_ib_calcs[OMPI_OP_LAND] = IBV_M_CALC_OP_LAND;
-    cm->map_ompi_to_ib_calcs[OMPI_OP_BAND] = IBV_M_CALC_OP_BAND;
-    cm->map_ompi_to_ib_calcs[OMPI_OP_LOR]  = IBV_M_CALC_OP_LOR;
-    cm->map_ompi_to_ib_calcs[OMPI_OP_BOR]  = IBV_M_CALC_OP_BOR;
-    cm->map_ompi_to_ib_calcs[OMPI_OP_LXOR] = IBV_M_CALC_OP_LXOR;
-    cm->map_ompi_to_ib_calcs[OMPI_OP_BXOR] = IBV_M_CALC_OP_BXOR;
-}
-
-/*
- * Open the component
- */
-static int iboffload_open(void)
-{
-    int rc;
-
-    /* local variables */
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    IBOFFLOAD_VERBOSE(10, ("Open Iboffload component.\n"));
-
-    (void) mca_bcol_iboffload_verify_params();
-
-    cm->super.priority = 100;
-    cm->super.n_net_contexts = 0;
-    cm->super.network_contexts = NULL;
-
-    OBJ_CONSTRUCT(&cm->recv_wrs.lock, opal_mutex_t);
-
-    /* construct lists */
-    OBJ_CONSTRUCT(&cm->devices, opal_pointer_array_t);
-    rc = opal_pointer_array_init(&cm->devices, 10, INT_MAX, 10);
-    if (OMPI_SUCCESS != rc) {
-        goto close_device;
-    }
-
-    /* Check MCA parameters */
-    if (0 != (mca_bcol_iboffload_component.exchange_tree_order & (mca_bcol_iboffload_component.exchange_tree_order - 1))) {
-        IBOFFLOAD_ERROR(("Warning: ibcol_iboffload_exchange_tree_order is %d which is not a power of 2, setting it to 2",
-                         mca_bcol_iboffload_component.exchange_tree_order));
-        mca_bcol_iboffload_component.exchange_tree_order = 2;
-    }
-
-    /* Pasha: Since we do not have max inline check like in openib,
-       I will put some dummy check here. All mlnx devices support at least 512b */
-    if (mca_bcol_iboffload_component.max_inline_data > 512) {
-        IBOFFLOAD_ERROR(("Warning the inline %d, is to big and unsupported",
-                    mca_bcol_iboffload_component.max_inline_data));
-        rc = OMPI_ERROR;
-        goto close_device;
-    }
-
-    /* Register the progress function */
-    rc = opal_progress_register(mca_bcol_iboffload_component_progress);
-    if (OMPI_SUCCESS != rc) {
-        IBOFFLOAD_ERROR(("Failed to register the progress function"
-                         " for iboffload component.\n"));
-        goto close_device;
-    }
-
-    map_ompi_to_ib_dtype();
-    map_ompi_to_ib_op_type();
-
-    /* The init_done set to true on first component usage */
-    cm->init_done = false;
-
-    return OMPI_SUCCESS;
-
-close_device:
-    OBJ_DESTRUCT(&cm->devices);
-    OBJ_DESTRUCT(&cm->recv_wrs.lock);
-    return rc;
-}
-
-/*
- * Close the component
- */
-static int iboffload_close(void)
-{
-    int rc;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    IBOFFLOAD_VERBOSE(10, ("Destroy component free lists.\n"));
-
-    if (true == cm->init_done) {
-        OBJ_DESTRUCT(&cm->tasks_free);
-        OBJ_DESTRUCT(&cm->collreqs_free);
-        OBJ_DESTRUCT(&cm->collfrags_free);
-        OBJ_DESTRUCT(&cm->calc_tasks_free);
-    }
-
-    /* Unregister the progress function */
-    rc = opal_progress_unregister(mca_bcol_iboffload_component_progress);
-    if (OMPI_SUCCESS != rc) {
-        IBOFFLOAD_ERROR(("Failed to unregister the progress function"
-                         " for iboffload component.\n"));
-    }
-
-    rc = iboffload_release_devices();
-    if (OMPI_SUCCESS != rc) {
-        return rc;
-    }
-
-    if (NULL != cm->receive_queues) {
-        free(cm->receive_queues);
-    }
-
-    OBJ_DESTRUCT(&cm->recv_wrs.lock);
-
-    IBOFFLOAD_VERBOSE(10, ("The component closed.\n"));
-
-    return OMPI_SUCCESS;
-}
-
-/* query to see if the component is available for use, and can
- * satisfy the thread and progress requirements
- */
-int mca_bcol_iboffload_init_query(bool enable_progress_threads,
-        bool enable_mpi_threads)
-{
-    int rc;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    IBOFFLOAD_VERBOSE(10, ("Init Iboffload component.\n"));
-
-    /* Get list of HCAs and ports */
-    rc = iboffload_load_devices();
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_ERROR(("Load devices error.\n"));
-        goto unload_devices;
-    }
-
-    /* Setup the BSRQ QP's based on the final value of
-       mca_bcol_iboffload_component.receive_queues. */
-    rc = setup_qps();
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_ERROR(("QPs setup error.\n"));
-        goto unload_devices;
-    }
-
-    cm->super.collm_init_query = mca_bcol_iboffload_dummy_init_query;
-
-    return OMPI_SUCCESS;
-
-    /* done */
-unload_devices:
-    IBOFFLOAD_ERROR(("Release devices: an error occured.\n"));
-
-    iboffload_release_devices();
-
-    return rc;
-}
-
-static int32_t atoi_param(char *param, int32_t dflt)
-{
-    if (NULL == param || '\0' == param[0]) {
-        return dflt ? dflt : 1;
-    }
-
-    return atoi(param);
-}
-
-static int setup_qps(void)
-{
-    int ret = OMPI_SUCCESS, qp = 0;
-    int rd_num = 0, rd_low = 0, size = 0,
-        rd_win = 0, rd_rsv = 0, sd_max = 0;
-
-    mca_bcol_iboffload_qp_type_t type = 0;
-
-    char **queues = NULL, **params = NULL;
-
-    queues = opal_argv_split(mca_bcol_iboffload_component.receive_queues, ':');
-    if (0 == opal_argv_count(queues)) {
-        opal_show_help("help-mpi-btl-openib.txt",
-                       "no qps in receive_queues", true,
-                       ompi_process_info.nodename,
-                       mca_bcol_iboffload_component.receive_queues);
-
-        ret = OMPI_ERROR;
-
-        goto exit;
-    }
-
-    while (queues[qp] != NULL) {
-        if (0 == strncmp("P,", queues[qp], 2)) {
-            type = MCA_BCOL_IBOFFLOAD_PP_QP;
-        } else if (0 == strncmp("S,", queues[qp], 2)) {
-            type = MCA_BCOL_IBOFFLOAD_SRQ_QP;
-        } else if (0 == strncmp("X,", queues[qp], 2)) {
-#if HAVE_XRC
-        type = MCA_BCOL_IBOFFLOAD_XRC_QP;
-#else
-            opal_show_help("help-mpi-btl-openib.txt", "No XRC support", true,
-                           ompi_process_info.nodename,
-                           mca_bcol_iboffload_component.receive_queues);
-            ret = OMPI_ERR_NOT_AVAILABLE;
-            goto exit;
-#endif
-        } else {
-            opal_show_help("help-mpi-btl-openib.txt",
-                           "invalid qp type in receive_queues", true,
-                           ompi_process_info.nodename,
-                           mca_bcol_iboffload_component.receive_queues,
-                           queues[qp]);
-
-            ret = OMPI_ERR_BAD_PARAM;
-
-            goto exit;
-        }
-
-        ++qp;
-    }
-
-    mca_bcol_iboffload_component.num_qps = MCA_BCOL_IBOFFLOAD_QP_LAST;
-
-    qp = 0;
-#define P(N) (((N) > count) ? NULL : params[(N)])
-    while (NULL != queues[qp]) {
-        int count;
-
-        params = opal_argv_split_with_empty(queues[qp], ',');
-        count = opal_argv_count(params);
-
-        if ('P' == params[0][0]) {
-            if (count < 3 || count > 6) {
-                opal_show_help("help-mpi-btl-openib.txt",
-                               "invalid pp qp specification", true,
-                               ompi_process_info.nodename, queues[qp]);
-
-                ret = OMPI_ERR_BAD_PARAM;
-
-                goto exit;
-            }
-
-            size = atoi_param(P(1), 0);
-
-            rd_num = atoi_param(P(2), 256);
-
-            /* by default set rd_low to be 3/4 of rd_num */
-            rd_low = atoi_param(P(3), rd_num - (rd_num / 4));
-            rd_win = atoi_param(P(4), (rd_num - rd_low) * 2);
-            rd_rsv = atoi_param(P(5), (rd_num * 2) / rd_win);
-
-
-            if ((rd_num - rd_low) > rd_win) {
-                opal_show_help("help-mpi-btl-openib.txt", "non optimal rd_win",
-                        true, rd_win, rd_num - rd_low);
-            }
-        } else {
-            if (count < 3 || count > 5) {
-                opal_show_help("help-mpi-btl-openib.txt",
-                               "invalid srq specification", true,
-                               ompi_process_info.nodename, queues[qp]);
-
-                ret = OMPI_ERR_BAD_PARAM;
-
-                goto exit;
-            }
-
-            size = atoi_param(P(1), 0);
-            rd_num = atoi_param(P(2), 256);
-
-            /* by default set rd_low to be 3/4 of rd_num */
-            rd_low = atoi_param(P(3), rd_num - (rd_num / 4));
-            sd_max = atoi_param(P(4), rd_low / 4);
-
-            IBOFFLOAD_VERBOSE(10, ("srq: rd_num is %d rd_low is %d sd_max is %d",
-                         rd_num, rd_low, sd_max));
-
-        }
-
-        if (rd_num <= rd_low) {
-            opal_show_help("help-mpi-btl-openib.txt", "rd_num must be > rd_low",
-                    true, ompi_process_info.nodename, queues[qp]);
-            ret = OMPI_ERR_BAD_PARAM;
-
-            goto exit;
-        }
-
-        opal_argv_free(params);
-
-        ++qp;
-    }
-
-    params = NULL;
-
-    for (qp = 0; qp < MCA_BCOL_IBOFFLOAD_QP_LAST; ++qp) {
-        mca_bcol_iboffload_component.qp_infos[qp].qp_index = qp;
-
-        mca_bcol_iboffload_component.qp_infos[qp].type = type;
-        mca_bcol_iboffload_component.qp_infos[qp].size = size;
-
-        mca_bcol_iboffload_component.qp_infos[qp].rd_num = rd_num;
-        mca_bcol_iboffload_component.qp_infos[qp].rd_low = rd_low;
-
-        mca_bcol_iboffload_component.qp_infos[qp].rd_pp_win = rd_num - rd_low;
-
-        if (MCA_BCOL_IBOFFLOAD_PP_QP == type) {
-            mca_bcol_iboffload_component.qp_infos[qp].u.pp_qp.rd_win = rd_win;
-            mca_bcol_iboffload_component.qp_infos[qp].u.pp_qp.rd_rsv = rd_rsv;
-        } else {
-            mca_bcol_iboffload_component.qp_infos[qp].u.srq_qp.sd_max = sd_max;
-        }
-
-        if (NULL != setup_qps_fn[qp]) {
-            setup_qps_fn[qp](&mca_bcol_iboffload_component.qp_infos[qp]);
-        }
-    }
-
-exit:
-    if (NULL != params) {
-        opal_argv_free(params);
-    }
-
-    if (NULL != queues) {
-        opal_argv_free(queues);
-    }
-
-    return ret;
-}
-
-static int progress_pending_collfrags(mca_bcol_iboffload_module_t *iboffload)
-{
-    mca_bcol_iboffload_collfrag_t *pending_collfrag;
-    int rc, size = opal_list_get_size(&iboffload->collfrag_pending);
-
-    IBOFFLOAD_VERBOSE(10, ("Calling progress_pending_collfrags"));
-
-    do {
-        pending_collfrag = (mca_bcol_iboffload_collfrag_t *)
-                opal_list_remove_first(&iboffload->collfrag_pending);
-
-        IBOFFLOAD_VERBOSE(10, ("Get pending_collfrag - %p, iboffload - %p, "
-                              "pending list size - %d.", pending_collfrag, iboffload,
-                               opal_list_get_size(&iboffload->collfrag_pending)));
-
-        /* Return back coll frag to coll request opal_list */
-        opal_list_append(&pending_collfrag->coll_full_req->work_requests,
-                         (opal_list_item_t *) pending_collfrag);
-
-        rc = pending_collfrag->coll_full_req->progress_fn
-                            (iboffload, pending_collfrag->coll_full_req);
-        if (OPAL_UNLIKELY(BCOL_FN_STARTED != rc && OMPI_SUCCESS != rc)) {
-            return OMPI_ERROR;
-        }
-    } while (--size > 0);
-
-    return OMPI_SUCCESS;
-}
-
-
-/**
- * Test - if we finished with the coll fragment descriptor,
- * and free all resouces if so.
- **/
-int
-mca_bcol_iboffload_free_tasks_frags_resources(
-                  mca_bcol_iboffload_collfrag_t *collfrag,
-                  ompi_free_list_t *frags_free)
-{
-    int rc;
-
-    mca_bcol_iboffload_task_t *task = collfrag->tasks_to_release;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    /* Support for multiple frags we will add later
-     * n_outstanding_frags = coll_req->n_frags_sent - coll_req->n_frag_net_complete; */
-
-    while (NULL != task) {
-        /* Return frag (is the reference counter is zero)*/
-        rc = release_frags_on_task(task, frags_free);
-        if (OMPI_SUCCESS != rc) {
-            return OMPI_ERROR;
-        }
-
-        /* Return task: if the pointer is NULL => we assume the task
-           is a member of the common task list (tasks_free) */
-        if (NULL == task->task_list) {
-            OMPI_FREE_LIST_RETURN_MT(&cm->tasks_free,
-                    (ompi_free_list_item_t *) task);
-        } else {
-            OMPI_FREE_LIST_RETURN_MT(task->task_list,
-                    (ompi_free_list_item_t *) task);
-        }
-
-        task = task->next_task;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static void fatal_error(char *mesg)
-{
-    IBOFFLOAD_ERROR(("FATAL ERROR: %s", mesg));
-    ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_INTERN);
-}
-
-#define RELEASE_COLLFRAG(cf)                                                    \
-    do {                                                                        \
-        opal_list_remove_item(&(cf)->coll_full_req->work_requests,              \
-                               (opal_list_item_t *) (cf));                      \
-        if (&(cf)->coll_full_req->first_collfrag != (cf)) {                     \
-            OMPI_FREE_LIST_RETURN_MT(&mca_bcol_iboffload_component.collfrags_free, \
-                                 (ompi_free_list_item_t *) (cf));               \
-        }                                                                       \
-    } while (0)
-
-#define COLLFRAG_IS_DONE(cf) ((cf)->complete && (cf)->n_sends_completed == (cf)->n_sends)
-
-/* Pasha: Need to modify the code to progress pending queue only if relevant
-* resource was released */
-#define PROGRESS_PENDING_COLLFRAG(cf)                                                            \
-    if (OPAL_UNLIKELY(opal_list_get_size(&(cf)->coll_full_req->module->collfrag_pending) > 0)) { \
-        int rc;                                                                                  \
-        IBOFFLOAD_VERBOSE(10, ("Calling for PROGRESS_PENDING_COLLFRAG"));                        \
-        rc = progress_pending_collfrags((cf)->coll_full_req->module);                            \
-        if (OPAL_UNLIKELY(OMPI_ERROR == rc)) {                                                   \
-            fatal_error("failed to progress_pending_collfrags\n");                               \
-            return 0;                                                                            \
-        }                                                                                        \
-    }
-
-
-static inline __opal_attribute_always_inline__ int
-                  handle_collfrag_done(mca_bcol_iboffload_collfrag_t *coll_frag,
-                                       mca_bcol_iboffload_collreq_t *coll_request,
-                                       mca_bcol_iboffload_device_t *device)
-{
-    int rc;
-
-    if (COLLFRAG_IS_DONE(coll_frag)) {
-        IBOFFLOAD_VERBOSE(10, ("Coll frag - %p already done.\n", coll_frag));
-
-        coll_request->n_frag_net_complete++;
-        IBOFFLOAD_VERBOSE(10, ("Free tasks resourse.\n"));
-        /* Check if we are done with this coll_frag and release resources if so.  */
-        rc = mca_bcol_iboffload_free_tasks_frags_resources(coll_frag, device->frags_free);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_ERROR(("mca_bcol_iboffload_free_tasks_frags_resources FAILED"));
-            fatal_error("Failed to mca_bcol_iboffload_free_tasks_frags_resources\n");
-            return -1;
-        }
-
-        BCOL_IBOFFLOAD_MQ_RETURN_CREDITS(coll_request->module, coll_frag->mq_index, coll_frag->mq_credits);
-
-        RELEASE_COLLFRAG(coll_frag);
-
-        PROGRESS_PENDING_COLLFRAG(coll_frag);
-
-        IBOFFLOAD_VERBOSE(10, ("Alg %d: user_handle_freed - %d, n_frag_mpi_complete - %d, "
-                               "n_fragments- %d, n_frag_net_complete - %d, n_fragments - %d.\n",
-                                coll_frag->alg,
-                                coll_request->user_handle_freed,
-                                coll_request->n_frag_mpi_complete,
-                                coll_request->n_fragments,
-                                coll_request->n_frag_net_complete,
-                                coll_request->n_fragments));
-
-        /* check for full message completion */
-        if (COLLREQ_IS_DONE(coll_request)) {
-            IBOFFLOAD_VERBOSE(10, ("Coll request already done.\n"));
-            RELEASE_COLLREQ(coll_request);
-        }
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("Exit with success.\n"));
-
-    return 0;
-}
-
-/*
- *  IBOFFLOAD component progress.
- */
-
-static int progress_one_device(mca_bcol_iboffload_device_t *device)
-{
-    int ne, rc, count = 0;
-
-    mca_bcol_iboffload_collfrag_t *coll_frag;
-    mca_bcol_iboffload_collreq_t *coll_request;
-
-    struct ibv_wc wc;
-    memset(&wc, 0, sizeof(struct ibv_wc));
-
-    /*
-     * poll for collective completion - does not mean resources can
-     * be freed, as incomplete network level sends may still be pending
-     */
-
-    /* Poll for completion on completion on wait MQEs */
-    if(0 != (ne = ibv_poll_cq(device->ib_mq_cq, 1, &wc))) {
-        do {
-            if (OPAL_UNLIKELY(0 > ne)) {
-                IBOFFLOAD_ERROR(("Device %s: "
-                    "failed to poll MQ completion queue\n",
-                    ibv_get_device_name(device->dev.ib_dev)));
-                fatal_error("failed to poll MQ completion queue\n");
-                return count;
-            }
-
-            if (OPAL_UNLIKELY(IBV_WC_SUCCESS != wc.status)) {
-                IBOFFLOAD_ERROR(("Device %s: "
-                    "the completion with error on wait was gotten, status %d, opcode %d, "
-                    "vendor_err 0x%x, qp %x, id 0x%x\n", ibv_get_device_name(device->dev.ib_dev),
-                    wc.status, wc.opcode, wc.vendor_err, wc.qp_num, wc.wr_id));
-                fatal_error("wc.status \n");
-                return count;
-            }
-
-            IBOFFLOAD_VERBOSE(10, ("The MQ completion was polled.\n"));
-
-            ++count;
-
-            /* get pointer to mca_bcol_iboffload_collfrag_t */
-            coll_frag = (mca_bcol_iboffload_collfrag_t*)
-                                    (uint64_t) (uintptr_t) wc.wr_id;
-
-            /* Only last MQ task of collective frag
-               sends completion signal, so if we got it =>
-               all MQEs were done. */
-            coll_frag->complete = true;
-
-            IBOFFLOAD_VERBOSE(10, ("MQ completion for algorithm %d coll_frag_addr %p ml buffer index %d",
-                        coll_frag->alg, (void *)coll_frag, coll_frag->coll_full_req->ml_buffer_index));
-
-            /* full request descriptor */
-            coll_request = coll_frag->coll_full_req;
-
-            coll_request->n_frag_mpi_complete++;
-
-            /*
-             * at this stage all receives have been completed, so
-             * unpack the data to user buffer, the resources will be released when we will done with all
-             * element in the task list
-             */
-
-            if (NULL != coll_request->completion_cb_fn) {
-                if (OMPI_SUCCESS !=
-                     coll_request->completion_cb_fn(coll_frag)) {
-                    fatal_error("coll_request->completion_cb_fn\n");
-                    return count;
-                }
-            }
-
-            if (coll_request->n_frag_mpi_complete ==
-                            coll_request->n_fragments) {
-                OPAL_ATOMIC_SWAP_PTR(&coll_request->super.reg_complete, REQUEST_COMPLETED);
-                IBOFFLOAD_VERBOSE(10, ("After request completion.\n"));
-            }
-
-            rc = handle_collfrag_done(coll_frag, coll_request, device);
-            if (0 != rc) {
-                return count;
-            }
-        } while(0 != (ne = ibv_poll_cq(device->ib_mq_cq, 1, &wc)));
-
-        return count;
-    }
-
-    /* poll the send completion queue */
-    do {
-        ne = ibv_poll_cq(device->ib_cq, 1, &wc);
-        if (0 < ne) {
-            if (OPAL_UNLIKELY(IBV_WC_SUCCESS != wc.status)) {
-                IBOFFLOAD_ERROR(("Device %s, "
-                        "the completion with error on send was gotten, status %d, opcode %d, "
-                        "vendor_err 0x%x, qp %x, id 0x%x\n", ibv_get_device_name(device->dev.ib_dev),
-                         wc.status, wc.opcode, wc.vendor_err, wc.qp_num, wc.wr_id));
-
-#if OPAL_ENABLE_DEBUG
-                {
-                    mca_bcol_iboffload_module_t *iboffload;
-                    int i, qp_index, num_qps = mca_bcol_iboffload_component.num_qps;
-
-                    coll_frag = (mca_bcol_iboffload_collfrag_t*)
-                                         (uint64_t) (uintptr_t) wc.wr_id;
-
-                    iboffload = coll_frag->coll_full_req->module;
-
-                    for (i = 0; i < iboffload->num_endpoints; ++i) {
-                        mca_bcol_iboffload_endpoint_t *ep = iboffload->endpoints[i];
-
-                        for (qp_index = 0; qp_index < num_qps; ++qp_index) {
-                            if (NULL != ep->qps[qp_index].qp->lcl_qp &&
-                                wc.qp_num == ep->qps[qp_index].qp->lcl_qp->qp_num) {
-                                IBOFFLOAD_ERROR(("Module - %p, coll_frag - %p, "
-                                                 "destination %d, qp index - %d.",
-                                                  iboffload, coll_frag, i, qp_index));
-                            }
-                        }
-                    }
-                }
-#endif
-                fatal_error("Failed to ibv_poll_cq\n");
-                return count;
-            }
-
-            ++count;
-
-            /* get pointer to mca_bcol_iboffload_collfrag_t */
-            coll_frag = (mca_bcol_iboffload_collfrag_t*)
-                                         (uint64_t) (uintptr_t) wc.wr_id;
-
-            /* update the number of completed sends */
-            coll_frag->n_sends_completed++;
-
-            IBOFFLOAD_VERBOSE(10, ("Send CQ completion for algorithm %d coll_frag_addr %p ml buffer index %d",
-                        coll_frag->alg, (void *)coll_frag, coll_frag->coll_full_req->ml_buffer_index));
-
-            IBOFFLOAD_VERBOSE(10, ("Alg %d coll_frag_addr %p: n_sends_completed - %d, n_sends - %d.\n",
-                                    coll_frag->alg, (void *)coll_frag,
-                                    coll_frag->n_sends_completed,
-                                    coll_frag->n_sends));
-
-            assert(coll_frag->n_sends_completed <= coll_frag->n_sends);
-
-            /* full message descriptor */
-            coll_request = coll_frag->coll_full_req;
-
-            /* check to see if all sends are complete from the network
-             * perspective */
-            rc = handle_collfrag_done(coll_frag, coll_request, device);
-            if (0 != rc) {
-                return count;
-            }
-        } else if (OPAL_UNLIKELY(0 > ne)) {
-            IBOFFLOAD_ERROR(("Device %s: "
-                "failed to poll send completion queue\n",
-                ibv_get_device_name(device->dev.ib_dev)));
-                fatal_error("failed to poll send completion queue\n");
-            return count;
-        }
-    } while (0 != ne);
-
-    return count;
-}
-
-int mca_bcol_iboffload_component_progress(void)
-{
-    int i, count = 0;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    opal_pointer_array_t *devs = &cm->devices;
-
-    int devices_count = cm->num_devs;
-
-    for(i = 0; i < devices_count; ++i) {
-        mca_bcol_iboffload_device_t *device =
-            opal_pointer_array_get_item(devs, i);
-
-        if (OPAL_LIKELY(device->activated)) {
-            count += progress_one_device(device);
-        }
-    }
-
-    return count;
-}
-
-#if OPAL_ENABLE_DEBUG /* debug code */
-int task_to_rank(mca_bcol_iboffload_module_t *iboffload, struct mqe_task *task)
-{
-    int i, j, num_qps = mca_bcol_iboffload_component.num_qps;
-    for (i = 0; i < iboffload->num_endpoints; i++) {
-        for (j = 0; j < num_qps; j++) {
-            if (task->post.qp == iboffload->endpoints[i]->qps[j].qp->lcl_qp) {
-                return i;
-            }
-        }
-    }
-
-    return -1; /* not found ! */
-}
-
-int wait_to_rank(mca_bcol_iboffload_module_t *iboffload, struct mqe_task *task)
-{
-    int i, j;
-    for (i = 0; i < iboffload->num_endpoints; i++) {
-        for (j = 0; j < IBOFFLOAD_CQ_LAST; j++) {
-            if (task->wait.cq == iboffload->endpoints[i]->recv_cq[j]) {
-                return i;
-            }
-        }
-    }
-
-    return -1; /* not found ! */
-}
-
-#endif /* debug code */
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_device.h b/ompi/mca/bcol/iboffload/bcol_iboffload_device.h
deleted file mode 100644
index a7503df4f0..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_device.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_IBOFFLOAD_DEVICE_H
-#define MCA_BCOL_IBOFFLOAD_DEVICE_H
-
-#include "ompi_config.h"
-
-#include <infiniband/mqe.h>
-#include <infiniband/mverbs.h>
-
-#include <infiniband/verbs.h>
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-
-#define BCOL_IBOFFLOAD_DUMMY_MEM_SIZE 1
-
-BEGIN_C_DECLS
-
-/* Device OBJ */
-struct mca_bcol_iboffload_device_t {
-    opal_list_item_t super;
-
-    bool activated;
-
-    struct ompi_common_ofacm_base_dev_desc_t dev;
-    struct ibv_pd *ib_pd;
-    struct ibv_device_attr ib_dev_attr;
-
-    int num_act_ports;
-
-    struct mca_bcol_iboffload_port_t *ports;
-    struct ibv_cq *ib_cq;
-
-    /* CQ for MQs of all iboffload modules on this device */
-    struct ibv_cq *ib_mq_cq;
-
-    /* The free list of registered buffers
-     * since the registration depends on PD, it is
-     * most resonable place to keep the frags */
-    ompi_free_list_t *frags_free;
-    mca_mpool_base_module_t *mpool;
-
-    /* netowrk context */
-    bcol_base_network_context_t *net_context;
-
-    /* We keep dummy frags for all QPs on each device,
-       possibly some of QPs don't need it but anyway we distribute dummy
-       for them. All dummies point to a same byte of memory. */
-    mca_bcol_iboffload_frag_t dummy_frags[MCA_BCOL_IBOFFLOAD_QP_LAST];
-
-    /* Registred memory for the dummy frags */
-    char dummy_mem[BCOL_IBOFFLOAD_DUMMY_MEM_SIZE];
-
-    /* Registration info of the dummy memory */
-    mca_bcol_iboffload_reg_t dummy_reg;
-};
-
-typedef struct mca_bcol_iboffload_device_t mca_bcol_iboffload_device_t;
-OBJ_CLASS_DECLARATION(mca_bcol_iboffload_device_t);
-
-END_C_DECLS
-
-#endif /* MCA_BCOL_IBOFFLOAD_DEVICE_H */
-
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_endpoint.c b/ompi/mca/bcol/iboffload/bcol_iboffload_endpoint.c
deleted file mode 100644
index 50d0eeeb8c..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_endpoint.c
+++ /dev/null
@@ -1,373 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <infiniband/mverbs.h>
-
-#include "ompi/constants.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/mca/common/ofacm/connect.h"
-
-#include "opal/threads/mutex.h"
-#include "opal/class/opal_object.h"
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_device.h"
-#include "bcol_iboffload_endpoint.h"
-
-static void mca_bcol_iboffload_endpoint_construct(mca_bcol_iboffload_endpoint_t *ep)
-{
-    ep->iboffload_module = NULL;
-    ep->ibnet_proc = NULL;
-
-    ep->qps = (mca_bcol_iboffload_endpoint_qp_t *)
-              calloc(mca_bcol_iboffload_component.num_qps,
-                     sizeof(mca_bcol_iboffload_endpoint_qp_t));
-
-    ep->index = 0;
-    OBJ_CONSTRUCT(&ep->endpoint_lock, opal_mutex_t);
-    OBJ_CONSTRUCT(&ep->pending_frags, opal_list_t);
-
-    memset(ep->recv_cq, 0, IBOFFLOAD_CQ_LAST * sizeof(ep->recv_cq[0]));
-    memset(&ep->qp_config, 0, sizeof(ompi_common_ofacm_base_qp_config_t));
-
-    ep->cpc_context = NULL;
-
-    memset(&ep->remote_zero_rdma_addr, 0, sizeof(mca_bcol_iboffload_rdma_info_t));
-    memset(&ep->remote_rdma_block, 0, sizeof(mca_bcol_iboffload_rem_rdma_block_t));
-
-    ep->need_toset_remote_rdma_info = false;
-}
-
-static void mca_bcol_iboffload_endpoint_destruct(mca_bcol_iboffload_endpoint_t *ep)
-{
-    int qp_index, num_qps, i;
-    ompi_free_list_item_t *item;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    num_qps = cm->num_qps;
-
-    IBOFFLOAD_VERBOSE(10, ("Destruct: ep - %p, ep->index - %d", ep, ep->index));
-
-    if (NULL != ep->qps) {
-        for (qp_index = 0; qp_index < num_qps; ++qp_index) {
-            do {
-                item = (ompi_free_list_item_t *)
-                    opal_list_remove_first(&ep->qps[qp_index].preposted_frags);
-                if(OPAL_LIKELY(NULL != item)) {
-                    OMPI_FREE_LIST_RETURN_MT(&ep->device->frags_free[qp_index], item);
-                }
-            } while (NULL != item);
-
-            OBJ_DESTRUCT(&ep->qps[qp_index].preposted_frags);
-        }
-
-        free(ep->qps);
-    }
-
-    OBJ_DESTRUCT(&ep->endpoint_lock);
-    OBJ_DESTRUCT(&ep->pending_frags);
-
-    /* If the CPC has an endpoint_finalize function, call it */
-    if (NULL != ep->endpoint_cpc->cbm_endpoint_finalize) {
-        ep->endpoint_cpc->cbm_endpoint_finalize(ep->cpc_context);
-    }
-
-    for (i = 0; i < IBOFFLOAD_CQ_LAST; i++) {
-        if (NULL != ep->recv_cq[i]) {
-            if (ibv_destroy_cq(ep->recv_cq[i])) {
-                IBOFFLOAD_ERROR(("Endpoint %x "
-                            ", failed to destroy CQ, errno says %s",
-                            ep, strerror(errno)));
-            }
-        }
-    }
-}
-
-OBJ_CLASS_INSTANCE(mca_bcol_iboffload_endpoint_t,
-        opal_list_item_t,
-        mca_bcol_iboffload_endpoint_construct,
-        mca_bcol_iboffload_endpoint_destruct);
-
-/* Pasha: Add some error message here */
-
-/*
- * Called when the CPC has established a connection on an endpoint
- */
-static void mca_bcol_iboffload_endpoint_invoke_error(void *context)
-{
-    mca_bcol_iboffload_endpoint_t *endpoint = (mca_bcol_iboffload_endpoint_t *) context;
-    IBOFFLOAD_ERROR(("Getting error on endpoint - %p!", endpoint));
-}
-
-
-/* Pasha: Need to add more logic here */
-static void mca_bcol_iboffload_endpoint_cpc_complete(void *context)
-{
-    mca_bcol_iboffload_endpoint_t *endpoint = (mca_bcol_iboffload_endpoint_t *) context;
-
-    IBOFFLOAD_VERBOSE(10, ("Endpoint - %p for comm rank %d: CPC complete.\n",
-                           endpoint, endpoint->iboffload_module->ibnet->super.group_list[endpoint->index]));
-
-    if (OMPI_SUCCESS !=
-            mca_bcol_iboffload_exchange_rem_addr(endpoint)) {
-        IBOFFLOAD_ERROR(("endpoint - %p, "
-                    "remote addr exchange error.\n", endpoint));
-    }
-    /* The connection is correctly setup. Now we can decrease the
-       event trigger. */
-    opal_progress_event_users_decrement();
-}
-
-/* Vasily: Need to add more logic here */
-int mca_bcol_iboffload_endpoint_post_recvs(void *context)
-{
-    int qp_index, rc, num_qps;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    mca_bcol_iboffload_endpoint_t *endpoint =
-                             (mca_bcol_iboffload_endpoint_t *) context;
-
-    IBOFFLOAD_VERBOSE(10, ("endpoint - %p, post of %d recvs !",
-                            endpoint, cm->qp_infos[0].rd_num));
-    /* TODO Pasha - fix later */
-    num_qps = cm->num_qps;
-    for (qp_index = 0; qp_index < num_qps; ++qp_index) {
-        rc = mca_bcol_iboffload_prepost_recv(endpoint, qp_index,
-                                             cm->qp_infos[qp_index].rd_num);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            /* Pasha: Need to add more failure logic */
-            IBOFFLOAD_ERROR(("Failed to prepost recv fragments "
-                             "on qp index %d, return code - %d",
-                              qp_index, rc));
-
-            return OMPI_ERROR;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/* The function go over each ibnet proc and creates endpoint for each one */
-int mca_bcol_iboffloads_create_endpoints(mca_sbgp_ibnet_connection_group_info_t *cgroup,
-        mca_bcol_iboffload_module_t *module) {
-    uint32_t i;
-    mca_bcol_iboffload_endpoint_t *ep;
-
-    if (NULL == cgroup || NULL == module) {
-        IBOFFLOAD_ERROR(("Bad parameters for create endpoints function."));
-        return OMPI_ERROR;
-    }
-
-    module->num_endpoints = cgroup->num_procs;
-    module->endpoints = (mca_bcol_iboffload_endpoint_t **)
-                            calloc(module->num_endpoints,
-                                   sizeof(mca_bcol_iboffload_endpoint_t *));
-    if (NULL == module->endpoints) {
-        IBOFFLOAD_ERROR(("Error memory allocation for endpoints array"
-                         ", errno says %s", strerror(errno)));
-        return OMPI_ERROR;
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("iboffload - %p, num of endpoints - %d.\n",
-                            module, module->num_endpoints));
-/* Ishai: No need to open so many endpoints. We are not talking with all procs */
-    for (i = 0; i < cgroup->num_procs; i++) {
-        ep = OBJ_NEW(mca_bcol_iboffload_endpoint_t);
-        /* check qp memory allocation */
-        if (NULL == ep->qps) {
-            IBOFFLOAD_ERROR(("Failed to allocate memory for qps"));
-            return OMPI_ERROR;
-        }
-        /* init new endpoint */
-        ep->index = i;
-        ep->iboffload_module = module;
-        /* saving the device for the destruction - iboffload module amy not exist than */
-        ep->device = ep->iboffload_module->device;
-        ep->ibnet_proc = (mca_sbgp_ibnet_proc_t *)
-            opal_pointer_array_get_item(cgroup->ibnet_procs, i);
-        if (NULL == ep->ibnet_proc) {
-            IBOFFLOAD_ERROR(("Failed to get proc pointer, for index %d", i));
-            return OMPI_ERROR;
-        }
-
-        if (OMPI_SUCCESS !=
-            mca_bcol_iboffload_endpoint_init(ep)) {
-            IBOFFLOAD_ERROR(("Failed to init endpoint - %p", ep));
-            return OMPI_ERROR;
-        }
-
-        IBOFFLOAD_VERBOSE(10, ("Endpoint - %p, ep index - %d, iboffload - %p, "
-                               "cpc contex - %p.\n", ep, ep->index,
-                                ep->iboffload_module, ep->cpc_context));
-
-        /* Add the new endpoint to array of endpoints */
-        module->endpoints[i] = ep;
-    }
-
-    /* Pasha: Need to add better clean-up here */
-    return OMPI_SUCCESS;
-}
-
-static int config_qps(mca_bcol_iboffload_endpoint_t *ep)
-{
-    int qp_index;
-    int ret = OMPI_SUCCESS;
-
-    ompi_common_ofacm_base_qp_config_t *qp_config = &ep->qp_config;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    qp_config->num_srqs = 0;
-    qp_config->srq_num = NULL;
-
-    qp_config->num_qps = cm->num_qps;
-
-    qp_config->init_attr = (struct ibv_qp_init_attr *)
-            calloc(qp_config->num_qps, sizeof(struct ibv_qp_init_attr));
-
-    if (NULL == qp_config->init_attr) {
-        IBOFFLOAD_ERROR(("Failed allocate memory for qp init attributes"));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-
-        goto config_qps_exit;
-    }
-
-    qp_config->attr = (struct ibv_qp_attr *)
-        calloc(qp_config->num_qps, sizeof(struct ibv_qp_attr));
-
-    if (OPAL_UNLIKELY(NULL == qp_config->attr)) {
-        IBOFFLOAD_ERROR(("Failed allocate memory for qp attributes"));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-
-        goto config_qps_exit;
-    }
-
-    /* we must to specify that the qps are special */
-    qp_config->init_attr_mask = (uint32_t *)
-        calloc(qp_config->num_qps, sizeof(uint32_t));
-
-    if (OPAL_UNLIKELY(NULL == qp_config->init_attr_mask)) {
-        IBOFFLOAD_ERROR(("Failed allocate memory for qp mask."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-
-        goto config_qps_exit;
-    }
-
-    /* qp_config->rtr_attr_mask = qp_config->rts_attr_mask = NULL; */
-
-    qp_config->rtr_attr_mask = (uint32_t *)
-        calloc(qp_config->num_qps, sizeof(uint32_t));
-
-    if (OPAL_UNLIKELY(NULL == qp_config->rtr_attr_mask)) {
-        IBOFFLOAD_ERROR(("Failled allocate memory for qp rtr attributes mask."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-
-        goto config_qps_exit;
-    }
-
-    qp_config->rts_attr_mask = (uint32_t *)
-        calloc(qp_config->num_qps, sizeof(uint32_t));
-
-    if (OPAL_UNLIKELY(NULL == qp_config->rts_attr_mask)) {
-        IBOFFLOAD_ERROR(("Failled allocate memory for qp rts attributes mask."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-
-        goto config_qps_exit;
-    }
-
-    for (qp_index = 0; qp_index < qp_config->num_qps; ++qp_index) {
-        mca_bcol_iboffload_config_qps_fn_t config_qp =
-                                 cm->qp_infos[qp_index].config_qp;
-
-        if (NULL != config_qp) {
-            config_qp(qp_index, ep, qp_config);
-        }
-    }
-
-config_qps_exit:
-    return ret;
-}
-
-/* The fucntion is called for endpoints
- * with MCA_COMMON_OFACM_USER_CUSTOM state only,
- * we need a OPAL_THREAD_LOCK before call to this function */
-int mca_bcol_iboffload_endpoint_init(mca_bcol_iboffload_endpoint_t *ep)
-{
-    int qp_index, cq_index, num_qps;
-    ompi_common_ofacm_base_module_t *cpc;
-
-    mca_bcol_iboffload_device_t *device = ep->iboffload_module->device;
-
-    mca_sbgp_ibnet_connection_group_info_t *cgroup =
-                &ep->iboffload_module->ibnet->cgroups[ep->iboffload_module->cgroup_index];
-
-    for (cq_index = 0; cq_index < IBOFFLOAD_CQ_LAST; cq_index++) {
-        if (OMPI_SUCCESS !=
-                mca_bcol_iboffload_adjust_cq(device, &ep->recv_cq[cq_index])) {
-            IBOFFLOAD_ERROR(("Error creating CQ for %s errno says %s",
-                        ibv_get_device_name(device->dev.ib_dev), strerror(errno)));
-            /* OBJ_RELEASE(ep); */ /* Vasily: What must we do in this case ??? */
-            return OMPI_ERROR;
-        }
-    }
-
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != config_qps(ep))) {
-        IBOFFLOAD_ERROR(("Error configure QPs for endpoint %x errno says %s",
-                                                           ep, strerror(errno)));
-        return OMPI_ERROR;
-    }
-
-    /* Adding here one more redirection in critical path. Need to think
-     * what is the best way to prevent it */
-
-    IBOFFLOAD_VERBOSE(10, ("Endpoint - %p, rem port - %d", ep,
-            ep->ibnet_proc->remote_ports_info[BCOL_IBOFFLOAD_ENDPOINT_PORT_IDX(cgroup, ep)].id));
-
-    cpc = ep->ibnet_proc->remote_ports_info[BCOL_IBOFFLOAD_ENDPOINT_PORT_IDX(cgroup, ep)].local_cpc;
-    ep->endpoint_cpc = cpc; /* caching pointer to cpc */
-
-    if (NULL != cpc->cbm_endpoint_init) {
-        ep->cpc_context = cpc->cbm_endpoint_init(
-                ep->ibnet_proc->ompi_proc,
-                &ep->qp_config,
-                device->ib_pd,
-                ep->iboffload_module->subnet_id,
-                ep->iboffload_module->ibnet->group_id,
-                ep->iboffload_module->lid,
-                /* Remote lid of target module */
-                ep->ibnet_proc->remote_ports_info[BCOL_IBOFFLOAD_ENDPOINT_PORT_IDX(cgroup, ep)].lid,
-                ep->index,   /* user context index */
-                (void *) ep,  /* user context */
-                cpc,
-                mca_bcol_iboffload_endpoint_cpc_complete,
-                mca_bcol_iboffload_endpoint_invoke_error,
-                mca_bcol_iboffload_endpoint_post_recvs);
-
-        if (OPAL_UNLIKELY(NULL == ep->cpc_context)) {
-            IBOFFLOAD_ERROR(("Endpoint - %p, failed to init context", ep));
-            /* OBJ_RELEASE(ep); */ /* Vasily: What must we do in this case ??? */
-            return OMPI_ERROR;
-        }
-
-        /* Updating remote port info */
-        num_qps = mca_bcol_iboffload_component.num_qps;
-
-        ep->remote_info = &ep->cpc_context->remote_info;
-        for (qp_index = 0; qp_index < num_qps; ++qp_index) {
-            ep->qps[qp_index].qp = &ep->cpc_context->qps[qp_index];
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_endpoint.h b/ompi/mca/bcol/iboffload/bcol_iboffload_endpoint.h
deleted file mode 100644
index 7a57b57a7b..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_endpoint.h
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_IBOFFLOAD_ENDPOINT_H
-#define MCA_BCOL_IBOFFLOAD_ENDPOINT_H
-
-#include "ompi_config.h"
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-
-#include "ompi/mca/sbgp/ibnet/sbgp_ibnet.h"
-
-#define BCOL_IBOFFLOAD_ENDPOINT_PORT(cgroup, ep) (ep)->ibnet_proc->use_port[(cgroup)->index]
-#define BCOL_IBOFFLOAD_ENDPOINT_PORT_IDX(cgroup, ep) (BCOL_IBOFFLOAD_ENDPOINT_PORT(cgroup, ep) - 1)
-
-BEGIN_C_DECLS
-
-struct mca_bcol_iboffload_endpoint_qp_t {
-    struct ompi_common_ofacm_base_qp_t *qp;
-    size_t ib_inline_max;
-    int32_t  sd_wqe;             /* Number of available send wqe entries */
-    int32_t  rd_wqe;             /* Number of available recv wqe entries */
-    opal_list_t preposted_frags; /* List of preposted frags */
-    /* opal_mutex_t lock; */     /* Do I need lock here ? */
-};
-
-typedef struct mca_bcol_iboffload_endpoint_qp_t mca_bcol_iboffload_endpoint_qp_t;
-
-enum {
-    IBOFFLOAD_CQ_SMALL_MESSAGES = 0,
-    IBOFFLOAD_CQ_SYNC,
-    IBOFFLOAD_CQ_LARGE_MESSAGES,
-    IBOFFLOAD_CQ_LAST
-};
-
-/* Endpoint object */
-struct mca_bcol_iboffload_endpoint_t {
-    opal_list_item_t super;
-
-    /** BTL module that created this connection */
-    mca_bcol_iboffload_module_t *iboffload_module;
-
-    /** proc structure corresponding to endpoint */
-    mca_sbgp_ibnet_proc_t *ibnet_proc;
-
-    /** lock for concurrent access to endpoint state */
-    opal_mutex_t                endpoint_lock;
-
-    /** Penging frag list */
-    opal_list_t                 pending_frags;
-
-    /** QPs information */
-    mca_bcol_iboffload_endpoint_qp_t *qps;
-
-    /** endpoint index on array */
-    int32_t index;
-
-    /** CQ for receive queues on this endpoint */
-    struct ibv_cq *recv_cq[IBOFFLOAD_CQ_LAST];
-
-    /** QP configuration information */
-    ompi_common_ofacm_base_qp_config_t qp_config;
-
-    /** cpc context */
-    ompi_common_ofacm_base_local_connection_context_t *cpc_context;
-
-    /** caching pointer to remote info */
-    ompi_common_ofacm_base_remote_connection_context_t *remote_info;
-
-    /** caching pointer to cpc */
-    ompi_common_ofacm_base_module_t *endpoint_cpc;
-
-    /** The struct is used for zero RDMA with immediate
-        in some collectives, in barrier for example. */
-    mca_bcol_iboffload_rdma_info_t remote_zero_rdma_addr;
-    mca_bcol_iboffload_rem_rdma_block_t remote_rdma_block;
-
-    /** The pointer to device - In the destruction function
-        the iboffload module may not exist any more - caching the device */
-    struct mca_bcol_iboffload_device_t *device;
-
-    bool need_toset_remote_rdma_info;
-
-    mca_bcol_iboffload_rdma_info_t remote_rdma_info[MAX_REMOTE_RDMA_INFO];
-};
-typedef struct mca_bcol_iboffload_endpoint_t mca_bcol_iboffload_endpoint_t;
-OBJ_CLASS_DECLARATION(mca_bcol_iboffload_endpoint_t);
-
-/* Function declaration */
-int mca_bcol_iboffload_endpoint_init(mca_bcol_iboffload_endpoint_t *ep);
-
-static inline __opal_attribute_always_inline__
-    int check_endpoint_state(mca_bcol_iboffload_endpoint_t *ep,
-                             mca_bcol_base_descriptor_t *des,
-                             opal_list_t *pending_list)
-{
-    int rc = OMPI_ERR_RESOURCE_BUSY;
-
-    OPAL_THREAD_LOCK(&ep->cpc_context->context_lock);
-    /* Adding here one more redirection in critical path. Need to think
-     * what is the best way to prevent it */
-    switch(ep->cpc_context->state) {
-        case MCA_COMMON_OFACM_CLOSED:
-            rc = ep->endpoint_cpc->cbm_start_connect(ep->cpc_context);
-            if (OMPI_SUCCESS == rc) {
-                rc = OMPI_ERR_RESOURCE_BUSY;
-            }
-            /*
-             * As long as we expect a message from the peer (in order
-             * to setup the connection) let the event engine pool the
-             * OOB events. Note: we increment it once peer active
-             * connection.
-             */
-            opal_progress_event_users_increment();
-            /* fall through */
-        default:
-            /* opal_list_append(pending_list, (opal_list_item_t *)des); */ /* Vasily: will be uncomment later */
-            break;
-        case MCA_COMMON_OFACM_FAILED:
-            rc = OMPI_ERR_UNREACH;
-            break;
-        case MCA_COMMON_OFACM_CONNECTED:
-            rc = OMPI_SUCCESS;
-            break;
-    }
-
-    OPAL_THREAD_UNLOCK(&ep->cpc_context->context_lock);
-    return rc;
-}
-
-int mca_bcol_iboffloads_create_endpoints(mca_sbgp_ibnet_connection_group_info_t *cgroup,
-        mca_bcol_iboffload_module_t *module);
-
-int mca_bcol_iboffload_endpoint_post_recvs(void *context);
-
-static inline __opal_attribute_always_inline__ int
-                            mca_bcol_iboffload_prepost_recv(
-                        mca_bcol_iboffload_endpoint_t *endpoint,
-                        int qp_index, int num_to_prepost)
-{
-    mca_bcol_iboffload_prepost_qps_fn_t prepost_recv =
-                       mca_bcol_iboffload_component.qp_infos[qp_index].prepost_recv;
-    if (NULL != prepost_recv) {
-        return prepost_recv(endpoint, qp_index, num_to_prepost);
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__ int
-                    mca_bcol_iboffload_post_ml_scatter_recv_frag(
-                        int qp_index, uint32_t dest_rank,
-                        int nitems, struct iovec *buff_iovec,
-                        uint32_t lkey,
-                        struct ibv_sge *sg_entries,
-                        mca_bcol_iboffload_frag_t *frag,
-                        mca_bcol_iboffload_module_t *iboffload)
-{
-    int ret, start_wr_index;
-    struct ibv_recv_wr *recv_wr, *recv_bad;
-    int i;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    mca_bcol_iboffload_endpoint_t *endpoint = iboffload->endpoints[dest_rank];
-
-    mca_bcol_iboffload_recv_wr_manager *recv_wrs = &cm->recv_wrs;
-    mca_bcol_iboffload_device_t *device = endpoint->iboffload_module->device;
-
-    IBOFFLOAD_VERBOSE(10, ("Recv prepost call: endpoint %p, qp_index %d",
-                          (void *) endpoint, qp_index));
-
-    /* make sure that we do not overrun number of rd_wqe */
-    if (0 >= endpoint->qps[qp_index].rd_wqe) {
-        IBOFFLOAD_VERBOSE(10, ("There are no rd_wqe - %d",
-                                endpoint->qps[qp_index].rd_wqe));
-
-        return 0;
-    }
-
-    OPAL_THREAD_LOCK(&recv_wrs->lock);
-
-    /* Calculate start index in array
-     * of pre-allocated work requests */
-    start_wr_index = cm->qp_infos[qp_index].rd_num - 1;
-    recv_wr = &recv_wrs->recv_work_requests[qp_index][start_wr_index];
-
-    IBOFFLOAD_VERBOSE(10, ("Endpoint %p, qp_index - %d, "
-                           "start index of WRs - %d", (void *) endpoint,
-                            qp_index, start_wr_index));
-
-    for (i = 0; i < nitems; i++) {
-        sg_entries[i].length = buff_iovec[i].iov_len;
-        sg_entries[i].addr = (uint64_t)buff_iovec[i].iov_base;
-        sg_entries[i].lkey = lkey;
-
-       IBOFFLOAD_VERBOSE(10, ("Recv SGE List item %d , length %d , address %p",
-                               i, sg_entries[i].length, sg_entries[i].addr));
-
-       IBOFFLOAD_VERBOSE(10, ("Recv SGE List item %d , iovec length %d",
-                               i, buff_iovec[i].iov_len));
-    }
-
-    recv_wr->num_sge = nitems;
-    recv_wr->sg_list = sg_entries;
-
-    /* Set the tail */
-    recv_wr->next = NULL;
-
-    /* post the list of recvs */
-    ret = ibv_post_recv(endpoint->qps[qp_index].qp->lcl_qp, recv_wr, &recv_bad);
-    if (OPAL_UNLIKELY(0 != ret)) {
-        IBOFFLOAD_ERROR(("ibv_post_recv failed (%s), error: %s [%d], "
-                         "qp_index - %d.\n",
-                          ibv_get_device_name(device->dev.ib_dev),
-                          strerror(errno), ret, qp_index));
-
-        return -1;
-    }
-
-    /* decresing numbers of free recv wqe */
-    --endpoint->qps[qp_index].rd_wqe;
-
-    OPAL_THREAD_UNLOCK(&recv_wrs->lock);
-
-    IBOFFLOAD_VERBOSE(10, ("Return success: "
-                          "endpoint %p, qp_index %d, dest_rank %d",
-                           endpoint, qp_index, dest_rank));
-
-    return 1;
-}
-
-static inline __opal_attribute_always_inline__ int
-                    mca_bcol_iboffload_prepost_ml_recv_frag(
-                        int qp_index, uint32_t dest_rank,
-                        mca_bcol_iboffload_frag_t *frag,
-                        mca_bcol_iboffload_module_t *iboffload)
-{
-    int ret, start_wr_index;
-    struct ibv_recv_wr *recv_wr, *recv_bad;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    mca_bcol_iboffload_endpoint_t *endpoint = iboffload->endpoints[dest_rank];
-
-    mca_bcol_iboffload_recv_wr_manager *recv_wrs = &cm->recv_wrs;
-    mca_bcol_iboffload_device_t *device = endpoint->iboffload_module->device;
-
-    IBOFFLOAD_VERBOSE(10, ("Recv prepost call: endpoint %p, qp_index %d",
-                          (void *) endpoint, qp_index));
-
-    /* make sure that we do not overrun number of rd_wqe */
-    if (0 >= endpoint->qps[qp_index].rd_wqe) {
-        IBOFFLOAD_VERBOSE(10, ("There are no rd_wqe - %d",
-                                endpoint->qps[qp_index].rd_wqe));
-
-        return 0;
-    }
-
-    OPAL_THREAD_LOCK(&recv_wrs->lock);
-
-    /* Calculate start index in array
-     * of pre-allocated work requests */
-    start_wr_index = cm->qp_infos[qp_index].rd_num - 1;
-    recv_wr = &recv_wrs->recv_work_requests[qp_index][start_wr_index];
-
-    IBOFFLOAD_VERBOSE(10, ("Endpoint %p, qp_index - %d, "
-                           "start index of WRs - %d", (void *) endpoint,
-                            qp_index, start_wr_index));
-
-    recv_wr->sg_list = &frag->sg_entry;
-
-    /* Set the tail */
-    recv_wr->next = NULL;
-
-    /* post the list of recvs */
-    ret = ibv_post_recv(endpoint->qps[qp_index].qp->lcl_qp, recv_wr, &recv_bad);
-    if (OPAL_UNLIKELY(0 != ret)) {
-        IBOFFLOAD_ERROR(("ibv_post_recv failed (%s), error: %s [%d], "
-                         "qp_index - %d.\n",
-                          ibv_get_device_name(device->dev.ib_dev),
-                          strerror(errno), ret, qp_index));
-
-        return -1;
-    }
-
-    /* decresing numbers of free recv wqe */
-    --endpoint->qps[qp_index].rd_wqe;
-
-    OPAL_THREAD_UNLOCK(&recv_wrs->lock);
-
-    IBOFFLOAD_VERBOSE(10, ("Return success: "
-                          "endpoint %p, qp_index %d, dest_rank %d",
-                           endpoint, qp_index, dest_rank));
-
-    return 1;
-}
-
-static inline __opal_attribute_always_inline__
-   mca_bcol_iboffload_frag_t* mca_bcol_iboffload_get_preposted_recv_frag(
-                            mca_bcol_iboffload_module_t *iboffload,
-                            int source, int qp_index)
-{
-    mca_bcol_iboffload_frag_t *frag;
-    mca_bcol_iboffload_endpoint_t *endpoint = iboffload->endpoints[source];
-
-    frag = mca_bcol_iboffload_component.qp_infos[qp_index].get_preposted_recv(endpoint, qp_index);
-
-    /* do we want to run prepost */
-    IBOFFLOAD_VERBOSE(10, ("source - %d, qp_index - %d; "
-                          "allocating preposted addr %p.\n",
-                           source, qp_index,  (void *) frag->sg_entry.addr));
-
-    if (OPAL_LIKELY(NULL != frag)) {
-        frag->next = NULL;
-    }
-
-    return frag;
-}
-
-END_C_DECLS
-
-#endif /* MCA_BCOL_IBOFFLOAD_ENDPOINT_H */
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_fanin.c b/ompi/mca/bcol/iboffload/bcol_iboffload_fanin.c
deleted file mode 100644
index 49f771d46b..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_fanin.c
+++ /dev/null
@@ -1,350 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <errno.h>
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_task.h"
-#include "bcol_iboffload_collfrag.h"
-#include "bcol_iboffload_endpoint.h"
-
-static int mca_bcol_iboffload_fanin_leader_progress(
-                mca_bcol_iboffload_module_t *iboffload,
-                struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int rc = OMPI_SUCCESS, leader_rank = 0, rank,
-        sbgp_size = iboffload->ibnet->super.group_size;
-
-    struct mqe_task *last_wait = NULL;
-
-    mca_bcol_iboffload_task_t *wait_task = NULL;
-    mca_bcol_iboffload_frag_t *preposted_recv_frag = NULL;
-
-    struct mqe_task **mqe_ptr_to_set;
-    mca_bcol_iboffload_collfrag_t *coll_fragment;
-
-    coll_fragment = (mca_bcol_iboffload_collfrag_t *)
-                         opal_list_get_last(&coll_request->work_requests);
-
-    mqe_ptr_to_set = &coll_fragment->to_post;
-
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-               iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-        goto out_of_resources;
-    }
-
-    for (rank = leader_rank + 1; rank < sbgp_size; ++rank) {
-       /* post wait */
-        preposted_recv_frag = mca_bcol_iboffload_get_preposted_recv_frag(
-                                        iboffload, rank, coll_request->qp_index);
-        if(NULL == preposted_recv_frag) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting prepost recv frag.\n"));
-            goto out_of_resources;
-        }
-
-        wait_task = mca_bcol_iboffload_get_wait_task(iboffload, rank, 1,
-                             preposted_recv_frag, coll_request->qp_index, NULL);
-        if(NULL == wait_task) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n"));
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-    }
-
-   /* end of list */
-    *mqe_ptr_to_set = NULL;
-
-    last_wait->flags |= MQE_WR_FLAG_SIGNAL;
-
-    coll_fragment->signal_task_wr_id = last_wait->wr_id;
-    last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-
-    /* post the mwr */
-    rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post);
-    if(OMPI_SUCCESS != rc) {
-        IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
-        /* Note: need to clean up */
-        return rc;
-    }
-
-    MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info);
-
-    return OMPI_SUCCESS;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Fan-in, adding collfrag to collfrag_pending"));
-    return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload);
-}
-
-static int mca_bcol_iboffload_fanin_proxy_progress(
-                mca_bcol_iboffload_module_t *iboffload,
-                struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int rc = OMPI_SUCCESS, leader_rank = 0;
-
-    struct mqe_task *last_send = NULL;
-    mca_bcol_iboffload_task_t *send_task = NULL;
-    mca_bcol_iboffload_frag_t *send_fragment = NULL;
-
-    struct mqe_task **mqe_ptr_to_set;
-    mca_bcol_iboffload_collfrag_t *coll_fragment;
-
-    coll_fragment = (mca_bcol_iboffload_collfrag_t *)
-                         opal_list_get_last(&coll_request->work_requests);
-
-    mqe_ptr_to_set = &coll_fragment->to_post;
-
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-               iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-        goto out_of_resources;
-    }
-
-    /* post send */
-    send_fragment = mca_bcol_iboffload_get_send_frag(coll_request,
-                                    leader_rank, coll_request->qp_index, 0,
-                                    0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY);
-    if(NULL == send_fragment) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting and packing send frag.\n"));
-        goto out_of_resources;
-    }
-
-    send_task = mca_bcol_iboffload_get_send_task(iboffload, leader_rank, MCA_BCOL_IBOFFLOAD_QP_BARRIER,
-                                                 send_fragment, coll_fragment, INLINE);
-    if(NULL == send_task) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting send task.\n"));
-        goto out_of_resources;
-    }
-
-    APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send);
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task);
-
-   /* end of list */
-    *mqe_ptr_to_set = NULL;
-    assert(NULL != last_send);
-
-    last_send->flags |= MQE_WR_FLAG_SIGNAL;
-
-    coll_fragment->signal_task_wr_id = last_send->wr_id;
-    last_send->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-
-    /* post the mwr */
-    rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post);
-    if(OMPI_SUCCESS != rc) {
-        IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
-        /* Note: need to clean up */
-        return rc;
-    }
-
-    MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info);
-
-    return OMPI_SUCCESS;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Fan-in, adding collfrag to collfrag_pending"));
-    return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload);
-}
-
-static int mca_bcol_iboffload_fanin_init(
-                bcol_function_args_t *input_args,
-                mca_bcol_iboffload_module_t *iboffload,
-                struct mca_bcol_iboffload_collreq_t **coll_request)
-{
-    ompi_free_list_item_t *item = NULL;
-    mca_bcol_iboffload_collfrag_t *coll_fragment = NULL;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_barrier_init"));
-
-    OMPI_FREE_LIST_WAIT_MT(&cm->collreqs_free, item);
-    if(OPAL_UNLIKELY(NULL == item)) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for coll request free list waiting.\n"));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    (*coll_request) = (mca_bcol_iboffload_collreq_t *) item;
-    (*coll_request)->progress_fn = iboffload->fanin_algth;
-
-    (*coll_request)->completion_cb_fn = NULL;
-    (*coll_request)->order_info = &input_args->order_info;
-
-    (*coll_request)->module = iboffload;
-    (*coll_request)->ml_buffer_index = input_args->buffer_index;
-    (*coll_request)->buffer_info[SBUF].offset = 0;
-    (*coll_request)->buffer_info[RBUF].offset = 0;
-    (*coll_request)->qp_index = MCA_BCOL_IBOFFLOAD_QP_BARRIER;
-
-    input_args->bcol_opaque_data = (void *) (*coll_request);
-
-    /* finish initializing full message descriptor */
-    (*coll_request)->n_fragments  = 1;
-    (*coll_request)->n_frags_sent = 1;
-
-    (*coll_request)->n_frag_mpi_complete = 0;
-    (*coll_request)->n_frag_net_complete = 0;
-
-    (*coll_request)->user_handle_freed = false;
-
-    /*
-     * setup collective work request
-     */
-
-    /* get collective frag */
-    coll_fragment = &(*coll_request)->first_collfrag;
-    mca_bcol_iboffload_collfrag_init(coll_fragment);
-
-    coll_fragment->alg = FANIN_ALG;
-    coll_fragment->mq_index = COLL_MQ;
-
-    /* Set mq credits */
-    coll_fragment->mq_credits = iboffload->alg_task_consump[FANIN_ALG];
-
-    /* set pointers for (coll frag) <-> (coll full request) */
-    MCA_BCOL_IBOFFLOAD_SET_COLL_REQ_LINKS(*coll_request, coll_fragment);
-
-    return OMPI_SUCCESS;
-}
-
-/************************************************************************
- ************************ New style Fan-In ******************************
- ***********************************************************************/
-static int mca_bcol_iboffload_new_style_fanin_progress(
-                        bcol_function_args_t *input_args,
-                        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_iboffload_collreq_t *coll_request =
-                 (mca_bcol_iboffload_collreq_t *)
-                                   input_args->bcol_opaque_data;
-
-    if (BCOL_IS_COMPLETED(coll_request)) {
-        coll_request->user_handle_freed = true;
-        if (COLLREQ_IS_DONE(coll_request)) {
-            IBOFFLOAD_VERBOSE(10, ("Coll request already done.\n"));
-            RELEASE_COLLREQ(coll_request);
-        }
-
-        IBOFFLOAD_VERBOSE(10, ("Fan-In already done.\n"));
-        return BCOL_FN_COMPLETE;
-    }
-
-    return BCOL_FN_STARTED;
-}
-
-int mca_bcol_iboffload_new_style_fanin_first_call(
-                mca_bcol_iboffload_module_t *iboffload,
-                struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int i = 0, leader_rank = 0, /* We always suppose - the lowest index is a leader */
-        my_rank = iboffload->ibnet->super.my_index,
-        sbgp_size = iboffload->ibnet->super.group_size;
-
-    mca_bcol_iboffload_endpoint_t *ep = NULL;
-    mca_sbgp_ibnet_proc_t *my_ibnet_proc = iboffload->endpoints[my_rank]->ibnet_proc;
-
-    assert(NULL != my_ibnet_proc);
-
-    if (MCA_SBGP_IBNET_NODE_LEADER == my_ibnet_proc->duty) {
-        iboffload->fanin_algth = mca_bcol_iboffload_fanin_leader_progress;
-        iboffload->alg_task_consump[FANIN_ALG] += sbgp_size;
-
-        for (i = leader_rank + 1; i < sbgp_size; ++i) {
-            ep = iboffload->endpoints[i];
-            while (OMPI_SUCCESS !=
-                    check_endpoint_state(ep, NULL, NULL)) {
-                opal_progress();
-            }
-        }
-    } else {
-        iboffload->fanin_algth = mca_bcol_iboffload_fanin_proxy_progress;
-        iboffload->alg_task_consump[FANIN_ALG] += 1;
-
-        ep = iboffload->endpoints[leader_rank];
-        while(OMPI_SUCCESS !=
-                check_endpoint_state(ep, NULL, NULL)) {
-            opal_progress();
-        }
-    }
-
-    return iboffload->fanin_algth(iboffload, coll_request);
-}
-
-static int mca_bcol_iboffload_new_style_fanin_intra(
-                                bcol_function_args_t *input_args,
-                                struct mca_bcol_base_function_t *const_args)
-{
-    int rc = OMPI_SUCCESS;
-
-    struct mca_bcol_iboffload_collreq_t *coll_request = NULL;
-    mca_bcol_iboffload_module_t *iboffload =
-                    (mca_bcol_iboffload_module_t *) const_args->bcol_module;
-
-    assert(NULL != iboffload);
-
-    MCA_BCOL_CHECK_ORDER(const_args->bcol_module, input_args);
-
-    /* Init Fan-In collective reqeust */
-    rc = mca_bcol_iboffload_fanin_init(input_args, iboffload, &coll_request);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("Error from mca_bcol_iboffload_fanin_init.\n"));
-        return BCOL_FN_NOT_STARTED;
-    }
-
-    rc = iboffload->fanin_algth(iboffload, coll_request);
-    if (OPAL_UNLIKELY(OMPI_ERROR == rc)) {
-        return BCOL_FN_NOT_STARTED;
-    }
-
-    return BCOL_FN_STARTED;
-}
-
-int mca_bcol_iboffload_fanin_register(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    IBOFFLOAD_VERBOSE(10, ("Register iboffload Fan-In.\n"));
-
-    comm_attribs.bcoll_type = BCOL_FANIN;
-
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super,
-        &comm_attribs, &inv_attribs,
-        mca_bcol_iboffload_new_style_fanin_intra,
-        mca_bcol_iboffload_new_style_fanin_progress);
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_fanout.c b/ompi/mca/bcol/iboffload/bcol_iboffload_fanout.c
deleted file mode 100644
index 9ac93d16e7..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_fanout.c
+++ /dev/null
@@ -1,349 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <errno.h>
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_task.h"
-#include "bcol_iboffload_collfrag.h"
-#include "bcol_iboffload_endpoint.h"
-
-static int mca_bcol_iboffload_fanout_leader_progress(
-                mca_bcol_iboffload_module_t *iboffload,
-                struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int rc = OMPI_SUCCESS, leader_rank = 0, rank,
-        sbgp_size = iboffload->ibnet->super.group_size;
-
-    struct mqe_task *last_send = NULL;
-    mca_bcol_iboffload_task_t *send_task = NULL;
-    mca_bcol_iboffload_frag_t *send_fragment = NULL;
-
-    struct mqe_task **mqe_ptr_to_set;
-    mca_bcol_iboffload_collfrag_t *coll_fragment;
-
-    coll_fragment = (mca_bcol_iboffload_collfrag_t *)
-                         opal_list_get_last(&coll_request->work_requests);
-
-    mqe_ptr_to_set = &coll_fragment->to_post;
-
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-               iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-        goto out_of_resources;
-    }
-
-    for (rank = leader_rank + 1; rank < sbgp_size; ++rank) {
-        /* post send */
-        send_fragment = mca_bcol_iboffload_get_send_frag(coll_request,
-                                      rank, coll_request->qp_index, 0,
-                                      0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY);
-        if(NULL == send_fragment) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting and packing send frag.\n"));
-            goto out_of_resources;
-        }
-
-        send_task = mca_bcol_iboffload_get_send_task(iboffload, rank, MCA_BCOL_IBOFFLOAD_QP_BARRIER,
-                                                     send_fragment, coll_fragment, INLINE);
-        if(NULL == send_task) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting send task.\n"));
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send);
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task);
-    }
-
-   /* end of list */
-    *mqe_ptr_to_set = NULL;
-    assert(NULL != last_send);
-
-    last_send->flags |= MQE_WR_FLAG_SIGNAL;
-
-    coll_fragment->signal_task_wr_id = last_send->wr_id;
-    last_send->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-
-    /* post the mwr */
-    rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post);
-    if(OMPI_SUCCESS != rc) {
-        IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
-        /* Note: need to clean up */
-        return rc;
-    }
-
-    MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info);
-
-    return OMPI_SUCCESS;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Fan-in, adding collfrag to collfrag_pending"));
-    return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload);
-}
-
-static int mca_bcol_iboffload_fanout_proxy_progress(
-                mca_bcol_iboffload_module_t *iboffload,
-                struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int rc = OMPI_SUCCESS, leader_rank = 0;
-
-    struct mqe_task *last_wait = NULL;
-    mca_bcol_iboffload_task_t *wait_task = NULL;
-    mca_bcol_iboffload_frag_t *preposted_recv_frag = NULL;
-
-    struct mqe_task **mqe_ptr_to_set;
-    mca_bcol_iboffload_collfrag_t *coll_fragment;
-
-    coll_fragment = (mca_bcol_iboffload_collfrag_t *)
-                         opal_list_get_last(&coll_request->work_requests);
-
-    mqe_ptr_to_set = &coll_fragment->to_post;
-
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-               iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-        goto out_of_resources;
-    }
-
-     /* post wait */
-    preposted_recv_frag = mca_bcol_iboffload_get_preposted_recv_frag(
-                                        iboffload, leader_rank, coll_request->qp_index);
-    if(NULL == preposted_recv_frag) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting prepost recv frag.\n"));
-        goto out_of_resources;
-    }
-
-    wait_task = mca_bcol_iboffload_get_wait_task(iboffload, leader_rank, 1,
-                             preposted_recv_frag, coll_request->qp_index, NULL);
-    if(NULL == wait_task) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n"));
-        goto out_of_resources;
-    }
-
-    APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-
-   /* end of list */
-    *mqe_ptr_to_set = NULL;
-
-    last_wait->flags |= MQE_WR_FLAG_SIGNAL;
-
-    coll_fragment->signal_task_wr_id = last_wait->wr_id;
-    last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-
-    /* post the mwr */
-    rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post);
-    if(OMPI_SUCCESS != rc) {
-        IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
-        /* Note: need to clean up */
-        return rc;
-    }
-
-    MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info);
-
-    return OMPI_SUCCESS;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Fan-in, adding collfrag to collfrag_pending"));
-    return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload);
-}
-
-static int mca_bcol_iboffload_fanout_init(
-                bcol_function_args_t *input_args,
-                mca_bcol_iboffload_module_t *iboffload,
-                struct mca_bcol_iboffload_collreq_t **coll_request)
-{
-    ompi_free_list_item_t *item = NULL;
-    mca_bcol_iboffload_collfrag_t *coll_fragment = NULL;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_barrier_init"));
-
-    OMPI_FREE_LIST_WAIT_MT(&cm->collreqs_free, item);
-    if(NULL == item) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for coll request free list waiting.\n"));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    (*coll_request) = (mca_bcol_iboffload_collreq_t *) item;
-    (*coll_request)->progress_fn = iboffload->fanout_algth;
-
-    (*coll_request)->completion_cb_fn = NULL;
-    (*coll_request)->order_info = &input_args->order_info;
-
-    (*coll_request)->module = iboffload;
-    (*coll_request)->ml_buffer_index = input_args->buffer_index;
-    (*coll_request)->buffer_info[SBUF].offset = 0;
-    (*coll_request)->buffer_info[RBUF].offset = 0;
-    (*coll_request)->qp_index = MCA_BCOL_IBOFFLOAD_QP_BARRIER;
-
-    /* finish initializing full message descriptor */
-    (*coll_request)->n_fragments  = 1;
-    (*coll_request)->n_frags_sent = 1;
-
-    (*coll_request)->n_frag_mpi_complete = 0;
-    (*coll_request)->n_frag_net_complete = 0;
-
-    (*coll_request)->user_handle_freed = false;
-
-    input_args->bcol_opaque_data = (void *) (*coll_request);
-
-    /*
-     * setup collective work request
-     */
-
-    /* get collective frag */
-    coll_fragment = &(*coll_request)->first_collfrag;
-    mca_bcol_iboffload_collfrag_init(coll_fragment);
-
-    coll_fragment->alg = FANOUT_ALG;
-    coll_fragment->mq_index = COLL_MQ;
-
-    /* Set mq credits */
-    coll_fragment->mq_credits = iboffload->alg_task_consump[FANOUT_ALG];
-
-    /* set pointers for (coll frag) <-> (coll full request) */
-    MCA_BCOL_IBOFFLOAD_SET_COLL_REQ_LINKS(*coll_request, coll_fragment);
-
-    return OMPI_SUCCESS;
-}
-
-/************************************************************************
- ************************ New style Fan-In ******************************
- ***********************************************************************/
-static int mca_bcol_iboffload_new_style_fanout_progress(
-                        bcol_function_args_t *input_args,
-                        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_iboffload_collreq_t *coll_request =
-                 (mca_bcol_iboffload_collreq_t *)
-                                   input_args->bcol_opaque_data;
-
-    if (BCOL_IS_COMPLETED(coll_request)) {
-        coll_request->user_handle_freed = true;
-        if (COLLREQ_IS_DONE(coll_request)) {
-            IBOFFLOAD_VERBOSE(10, ("Coll request already done.\n"));
-            RELEASE_COLLREQ(coll_request);
-        }
-
-        IBOFFLOAD_VERBOSE(10, ("Fan-Out already done.\n"));
-        return BCOL_FN_COMPLETE;
-    }
-
-    return BCOL_FN_STARTED;
-}
-
-int mca_bcol_iboffload_new_style_fanout_first_call(
-                mca_bcol_iboffload_module_t *iboffload,
-                struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int i = 0, leader_rank = 0, /* We always suppose - the lowest index is a leader */
-        my_rank = iboffload->ibnet->super.my_index,
-        sbgp_size = iboffload->ibnet->super.group_size;
-
-    mca_bcol_iboffload_endpoint_t *ep = NULL;
-    mca_sbgp_ibnet_proc_t *my_ibnet_proc = iboffload->endpoints[my_rank]->ibnet_proc;
-
-    assert(NULL != my_ibnet_proc);
-
-    if (MCA_SBGP_IBNET_NODE_LEADER == my_ibnet_proc->duty) {
-        iboffload->fanout_algth = mca_bcol_iboffload_fanout_leader_progress;
-        iboffload->alg_task_consump[FANOUT_ALG] += sbgp_size;
-
-        for (i = leader_rank + 1; i < sbgp_size; ++i) {
-            ep = iboffload->endpoints[i];
-            while (OMPI_SUCCESS !=
-                    check_endpoint_state(ep, NULL, NULL)) {
-                opal_progress();
-            }
-        }
-    } else {
-        iboffload->fanout_algth = mca_bcol_iboffload_fanout_proxy_progress;
-        iboffload->alg_task_consump[FANOUT_ALG] += 1;
-
-        ep = iboffload->endpoints[leader_rank];
-        while(OMPI_SUCCESS !=
-                check_endpoint_state(ep, NULL, NULL)) {
-            opal_progress();
-        }
-    }
-
-    return iboffload->fanout_algth(iboffload, coll_request);
-}
-
-static int mca_bcol_iboffload_new_style_fanout_intra(
-                                bcol_function_args_t *input_args,
-                                struct mca_bcol_base_function_t *const_args)
-{
-    int rc = OMPI_SUCCESS;
-
-    struct mca_bcol_iboffload_collreq_t *coll_request = NULL;
-    mca_bcol_iboffload_module_t *iboffload =
-                    (mca_bcol_iboffload_module_t *) const_args->bcol_module;
-
-    assert(NULL != iboffload);
-
-    MCA_BCOL_CHECK_ORDER(const_args->bcol_module, input_args);
-
-    /* Init Fan-In collective reqeust */
-    rc = mca_bcol_iboffload_fanout_init(input_args, iboffload, &coll_request);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("Error from mca_bcol_iboffload_fanin_init.\n"));
-        return BCOL_FN_NOT_STARTED;
-    }
-
-    rc = iboffload->fanout_algth(iboffload, coll_request);
-    if (OPAL_UNLIKELY(OMPI_ERROR == rc)) {
-        return BCOL_FN_NOT_STARTED;
-    }
-
-    return BCOL_FN_STARTED;
-}
-
-int mca_bcol_iboffload_fanout_register(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    IBOFFLOAD_VERBOSE(10, ("Register iboffload Fan-In.\n"));
-
-    comm_attribs.bcoll_type = BCOL_FANOUT;
-
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super,
-        &comm_attribs, &inv_attribs,
-        mca_bcol_iboffload_new_style_fanout_intra,
-        mca_bcol_iboffload_new_style_fanout_progress);
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_frag.c b/ompi/mca/bcol/iboffload/bcol_iboffload_frag.c
deleted file mode 100644
index 0ecf1ef62e..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_frag.c
+++ /dev/null
@@ -1,272 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "opal/include/opal/types.h"
-#include "opal/datatype/opal_convertor.h"
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_device.h"
-#include "bcol_iboffload_collreq.h"
-#include "bcol_iboffload_endpoint.h"
-
-static void frag_constructor(mca_bcol_iboffload_frag_t *frag)
-{
-    mca_bcol_iboffload_reg_t* reg =
-        (mca_bcol_iboffload_reg_t*) frag->super.registration;
-
-    memset(&frag->sg_entry, 0, sizeof(struct ibv_sge));
-    frag->sg_entry.addr = (uint64_t) (uintptr_t) frag->super.ptr;
-
-    frag->registration = reg;
-
-    if (NULL != reg) {
-        frag->sg_entry.lkey = reg->mr->lkey;
-    }
-
-    frag->next = NULL;
-    frag->type = MCA_BCOL_IBOFFLOAD_NONE_OWNER;
-    frag->ref_counter = 0;
-    frag->qp_index = -1;
-}
-
-OBJ_CLASS_INSTANCE(
-        mca_bcol_iboffload_frag_t,
-        ompi_free_list_item_t,
-        frag_constructor,
-        NULL);
-
-
-static mca_bcol_iboffload_frag_t*
-    mca_bcol_iboffload_get_ml_frag_calc(mca_bcol_iboffload_module_t *iboffload,
-                                    mca_bcol_iboffload_collreq_t *coll_request,
-                                    size_t len, size_t src_offset)
-{
-    int rc;
-
-    mca_bcol_iboffload_frag_t *fragment;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    uint64_t sbuff = (uint64_t) (uintptr_t) coll_request->buffer_info[SBUF].buf +
-                                           src_offset;
-
-    /* The buffer was allocated on ML level,
-       no need to allocate local buffer */
-    rc = pack_data_for_calc(iboffload->device->dev.ib_dev_context,
-                            cm->map_ompi_to_ib_calcs[coll_request->op->op_type],
-                            cm->map_ompi_to_ib_dt[coll_request->dtype->id],
-                            false /* host order */,
-                            (void *) sbuff, 0,
-                            &coll_request->actual_ib_op,
-                            &coll_request->actual_ib_dtype,
-                            (void *) sbuff);
-    if (OPAL_UNLIKELY(0 != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("pack_data_for_calc failed, op: %s, type: %s\n",
-                                coll_request->op->o_name, coll_request->dtype->name));
-        return NULL;
-    }
-
-    fragment = mca_bcol_iboffload_get_ml_frag(
-            iboffload, coll_request->qp_index, len,
-            coll_request->buffer_info[SBUF].lkey,
-            sbuff);
-
-    return fragment;
-}
-
-static mca_bcol_iboffload_frag_t *
-mca_bcol_iboffload_get_packed_frag(mca_bcol_iboffload_module_t *iboffload,
-                                   uint32_t destination, int qp_index, size_t len,
-                                   struct opal_convertor_t *convertor)
-{
-    /* local variables */
-    int rc;
-    uint32_t out_size;
-    size_t max_size = 0;
-
-    struct iovec payload_iovec;
-
-    ompi_free_list_item_t *item;
-    mca_bcol_iboffload_frag_t *frag;
-
-    mca_bcol_iboffload_device_t *device = iboffload->device;
-
-    /* Get frag from free list */
-    OMPI_FREE_LIST_GET_MT(&device->frags_free[qp_index], item);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        return NULL;
-    }
-
-    frag = (mca_bcol_iboffload_frag_t *) item;
-
-    /* Pack data into the buffer */
-    out_size = 1;
-    payload_iovec.iov_len = len;
-
-    payload_iovec.iov_base = (void *) (uintptr_t) frag->sg_entry.addr;
-
-    rc = opal_convertor_pack(convertor, &(payload_iovec),
-            &out_size, &max_size);
-    if (OPAL_UNLIKELY(rc < 0)) {
-        /* Error: put the fragment back */
-        OMPI_FREE_LIST_RETURN_MT(&device->frags_free[qp_index], item);
-        return NULL;
-    }
-
-    return frag;
-}
-
-static mca_bcol_iboffload_frag_t *
-mca_bcol_iboffload_get_calc_frag(mca_bcol_iboffload_module_t *iboffload, int qp_index,
-                                 struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int rc;
-
-    ompi_free_list_item_t *item;
-    mca_bcol_iboffload_frag_t *frag;
-
-    mca_bcol_iboffload_device_t *device = iboffload->device;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    IBOFFLOAD_VERBOSE(10, ("Start to pack frag.\n"));
-
-    /* Get frag from free list */
-    OMPI_FREE_LIST_GET_MT(&device->frags_free[qp_index], item);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        return NULL;
-    }
-
-    frag = (mca_bcol_iboffload_frag_t *) item;
-
-    /* Pack data into the buffer */
-    rc = pack_data_for_calc(device->dev.ib_dev_context,
-                            cm->map_ompi_to_ib_calcs[coll_request->op->op_type],
-                            cm->map_ompi_to_ib_dt[coll_request->dtype->id], false,
-                            coll_request->buffer_info[SBUF].buf, 0,
-                            &coll_request->actual_ib_op,
-                            &coll_request->actual_ib_dtype,
-                            (void *) (uintptr_t) frag->sg_entry.addr);
-    if (OPAL_UNLIKELY(0 != rc)) {
-        IBOFFLOAD_ERROR(("pack_data_for_calc failed, op: %s, type: %s\n",
-                                coll_request->op->o_name, coll_request->dtype->name));
-        return NULL;
-    }
-
-    return frag;
-}
-
-mca_bcol_iboffload_frag_t*
-mca_bcol_iboffload_get_send_frag(mca_bcol_iboffload_collreq_t *coll_request,
-                                 uint32_t destination, int qp_index, size_t len,
-                                 size_t src_offset, int buf_index, int send_frag_type)
-{
-    /* local variables */
-    mca_bcol_iboffload_frag_t *frag;
-    mca_bcol_iboffload_module_t *iboffload = coll_request->module;
-
-    mca_bcol_iboffload_endpoint_t *endpoint =
-                iboffload->endpoints[destination];
-
-    IBOFFLOAD_VERBOSE(10, ("Calling mca_bcol_iboffload_get_send_frag qp_index %d",
-                            qp_index));
-
-    if ((endpoint->qps[qp_index].sd_wqe) <= 0) {
-        IBOFFLOAD_VERBOSE(10, ("No send wqe %d",
-                    endpoint->qps[qp_index].sd_wqe));
-        return NULL;
-    }
-
-    --endpoint->qps[qp_index].sd_wqe;
-
-    IBOFFLOAD_VERBOSE(10, ("Endpoint %p: qp_index %d, destination %d, sd_wqe %d",
-                            endpoint, qp_index, destination, endpoint->qps[qp_index].sd_wqe));
-
-    switch (send_frag_type) {
-        case MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY:
-            IBOFFLOAD_VERBOSE(10, ("Getting MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY"));
-            assert(NULL != &iboffload->device->dummy_frags[qp_index]);
-            return &iboffload->device->dummy_frags[qp_index];
-
-        case MCA_BCOL_IBOFFLOAD_SEND_FRAG:
-        {
-            ompi_free_list_item_t *item;
-            IBOFFLOAD_VERBOSE(10, ("Getting MCA_BCOL_IBOFFLOAD_SEND_FRAG"));
-
-            /* Get frag from free list */
-            OMPI_FREE_LIST_GET_MT(&iboffload->device->frags_free[qp_index], item);
-
-            frag = (mca_bcol_iboffload_frag_t *) item;
-        }
-
-        break;
-        case MCA_BCOL_IBOFFLOAD_SEND_FRAG_CONVERT:
-            IBOFFLOAD_VERBOSE(10, ("Getting MCA_BCOL_IBOFFLOAD_SEND_FRAG_CONVERT"));
-            frag = mca_bcol_iboffload_get_packed_frag(iboffload, destination,
-                         qp_index, len, &coll_request->send_convertor);
-
-        break;
-        case MCA_BCOL_IBOFFLOAD_SEND_FRAG_CALC:
-            IBOFFLOAD_VERBOSE(10, ("Getting MCA_BCOL_IBOFFLOAD_SEND_FRAG_CALC"));
-            frag = mca_bcol_iboffload_get_calc_frag(iboffload, qp_index, coll_request);
-
-        break;
-        case MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML:
-            IBOFFLOAD_VERBOSE(10, ("Getting MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML"));
-            frag = mca_bcol_iboffload_get_ml_frag(
-                  iboffload, qp_index, len, coll_request->buffer_info[buf_index].lkey,
-                  (uint64_t)(uintptr_t) coll_request->buffer_info[buf_index].buf + src_offset);
-
-        break;
-        case MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML_CALC:
-            frag = mca_bcol_iboffload_get_ml_frag_calc(iboffload, coll_request, len, src_offset);
-            IBOFFLOAD_VERBOSE(10, ("Getting MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML_CALC"));
-
-        break;
-        default:
-            IBOFFLOAD_VERBOSE(10, ("Getting default"));
-            frag = NULL;
-            IBOFFLOAD_ERROR(("Unknown send frag type %d for QP index %d",
-                              send_frag_type, qp_index));
-    }
-
-    if (OPAL_UNLIKELY(NULL == frag)) {
-        IBOFFLOAD_VERBOSE(10, ("Getting NULL"));
-        return NULL;
-    }
-
-    frag->sg_entry.length = len;
-    frag->next = NULL;
-
-    return frag;
-}
-
-void
-mca_bcol_iboffload_frag_init(ompi_free_list_item_t* item, void* ctx)
-{
-    int qp_index = *(int *) ctx;
-    mca_bcol_iboffload_frag_t *frag = (mca_bcol_iboffload_frag_t *) item;
-
-    frag->qp_index = qp_index;
-    frag->type = MCA_BCOL_IBOFFLOAD_BCOL_OWNER;
-}
-
-void
-mca_bcol_iboffload_ml_frag_init(ompi_free_list_item_t* item, void* ctx)
-{
-    mca_bcol_iboffload_frag_t *frag = (mca_bcol_iboffload_frag_t *) item;
-
-    frag->qp_index = -1;
-    frag->type = MCA_BCOL_IBOFFLOAD_ML_OWNER;
-}
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_frag.h b/ompi/mca/bcol/iboffload/bcol_iboffload_frag.h
deleted file mode 100644
index fffc33f293..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_frag.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_IBOFFLOAD_FRAG_H
-#define MCA_BCOL_IBOFFLOAD_FRAG_H
-
-#include "ompi_config.h"
-
-#include <infiniband/verbs.h>
-
-#include "opal/datatype/opal_convertor.h"
-
-#include "opal/mca/mpool/mpool.h"
-#include "opal/class/ompi_free_list.h"
-
-#include "bcol_iboffload.h"
-
-BEGIN_C_DECLS
-
-/* forward declarations */
-struct mca_bcol_iboffload_collreq_t;
-
-struct mca_bcol_iboffload_reg_t {
-    mca_mpool_base_registration_t base;
-    struct ibv_mr *mr;
-};
-typedef struct mca_bcol_iboffload_reg_t mca_bcol_iboffload_reg_t;
-
-typedef enum {
-    MCA_BCOL_IBOFFLOAD_NONE_OWNER = -1,
-    MCA_BCOL_IBOFFLOAD_DUMMY_OWNER,
-    MCA_BCOL_IBOFFLOAD_BCOL_OWNER,
-    MCA_BCOL_IBOFFLOAD_ML_OWNER
-} frag_type;
-
-typedef enum {
-    MCA_BCOL_IBOFFLOAD_SEND_FRAG,
-    MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML,
-    MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML_CALC,
-    MCA_BCOL_IBOFFLOAD_SEND_FRAG_CONVERT,
-    MCA_BCOL_IBOFFLOAD_SEND_FRAG_CALC,
-    MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY
-} send_frag_type;
-
-struct mca_bcol_iboffload_frag_t {
-    ompi_free_list_item_t super;
-
-    struct mca_bcol_iboffload_frag_t *next;
-    struct mca_bcol_iboffload_reg_t *registration;
-
-    struct ibv_sge sg_entry;
-
-    frag_type type;
-
-    int ref_counter;
-    int qp_index;
-};
-typedef struct mca_bcol_iboffload_frag_t mca_bcol_iboffload_frag_t;
-OBJ_CLASS_DECLARATION(mca_bcol_iboffload_frag_t);
-
-/* The same fragment maybe shared by multiple task.
- * In order to manage right release and allocation flow
- * we use reference counter on each fragment and the follow
- * wrapper allocation and release function that hides
- * the counter */
-
-#define IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(fragment, task)           \
-    do {                                                            \
-        ++((fragment)->ref_counter);                                \
-        (task)->frag = (fragment);                                  \
-    } while(0)
-
-#define IBOFFLOAD_SET_FRAGS_ON_TASK(fragment, task)                 \
-    do {                                                            \
-        struct mca_bcol_iboffload_frag_t *temp_frag = fragment;     \
-        while (NULL != temp_frag) {                                 \
-            ++(temp_frag->ref_counter);                             \
-            temp_frag = temp_frag->next;                            \
-        }                                                           \
-        (task)->frag = fragment;                                    \
-    } while(0)
-
-/* function declarations */
-mca_bcol_iboffload_frag_t *
-mca_bcol_iboffload_get_send_frag(struct mca_bcol_iboffload_collreq_t *coll_request,
-                                 uint32_t destination, int qp_index, size_t len,
-                                 size_t src_offset, int buff_index, int send_frag_type);
-
-void
-mca_bcol_iboffload_frag_init(ompi_free_list_item_t* item, void* ctx);
-void
-mca_bcol_iboffload_ml_frag_init(ompi_free_list_item_t* item, void* ctx);
-
-static inline __opal_attribute_always_inline__
-mca_bcol_iboffload_frag_t* mca_bcol_iboffload_get_ml_empty_frag(
-                    mca_bcol_iboffload_module_t *iboffload,
-                    int qp_index)
-{
-    ompi_free_list_item_t *item;
-    mca_bcol_iboffload_frag_t *frag;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    /* Get frag from free list */
-    OMPI_FREE_LIST_GET_MT(&cm->ml_frags_free, item);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        return NULL;
-    }
-
-    frag = (mca_bcol_iboffload_frag_t *) item;
-
-    frag->qp_index = qp_index;
-    frag->next = NULL;
-
-    return frag;
-}
-
-static inline __opal_attribute_always_inline__
-mca_bcol_iboffload_frag_t* mca_bcol_iboffload_get_ml_frag(
-                    mca_bcol_iboffload_module_t *iboffload,
-                    int qp_index, size_t len, uint32_t lkey, uint64_t addr)
-{
-    /* local variables */
-    mca_bcol_iboffload_frag_t *frag;
-
-    IBOFFLOAD_VERBOSE(10, ("Call for get ML frag - addr 0x%x", addr));
-
-    frag = mca_bcol_iboffload_get_ml_empty_frag(iboffload, qp_index);
-
-    frag->sg_entry.addr = addr;
-    frag->sg_entry.lkey = lkey;
-    frag->sg_entry.length = len;
-
-    IBOFFLOAD_VERBOSE(10, ("Setting ml frag lkey %u, "
-                           "addr %p, qp_index %d, send value - %lf",
-                            frag->sg_entry.lkey, frag->sg_entry.addr,
-                            qp_index, *(double *) frag->sg_entry.addr));
-
-    return frag;
-}
-
-END_C_DECLS
-
-#endif
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_mca.c b/ompi/mca/bcol/iboffload/bcol_iboffload_mca.c
deleted file mode 100644
index eb28525f36..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_mca.c
+++ /dev/null
@@ -1,451 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_mca.h"
-
-#include "ompi/constants.h"
-#include "ompi/mca/common/ofacm/base.h"
-#include "ompi/communicator/communicator.h"
-
-#include "opal/util/show_help.h"
-
-/*
- * Local flags
- */
-enum {
-    REGINT_NEG_ONE_OK = 0x01,
-    REGINT_GE_ZERO = 0x02,
-    REGINT_GE_ONE = 0x04,
-    REGINT_NONZERO = 0x08,
-    REGINT_MAX = 0x88
-};
-
-enum {
-    REGSTR_EMPTY_OK = 0x01,
-    REGSTR_MAX = 0x88
-};
-
-mca_base_var_enum_value_t mtu_values[] = {
-    {IBV_MTU_256, "256B"},
-    {IBV_MTU_512, "512B"},
-    {IBV_MTU_1024, "1k"},
-    {IBV_MTU_4096, "4k"},
-    {0, NULL}
-};
-
-/*
- * utility routine for string parameter registration
- */
-static int reg_string(const char* param_name,
-                      const char* deprecated_param_name,
-                      const char* param_desc,
-                      const char* default_value, char **storage,
-                      int flags)
-{
-    int index;
-
-    /* the MCA variable system will not attempt to modify this value */
-    *storage = (char *) default_value;
-    index = mca_base_component_var_register(&mca_bcol_iboffload_component.super.bcol_version,
-                                            param_name, param_desc, MCA_BASE_VAR_TYPE_STRING,
-                                            NULL, 0, 0, OPAL_INFO_LVL_9,
-                                            MCA_BASE_VAR_SCOPE_READONLY, storage);
-    if (NULL != deprecated_param_name) {
-        (void) mca_base_var_register_synonym(index, "ompi", "bcol", "iboffload", deprecated_param_name,
-                                             MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-    }
-
-    if (0 != (flags & REGSTR_EMPTY_OK) && 0 == strlen(*storage)) {
-        opal_output(0, "Bad parameter value for parameter \"%s\"",
-                param_name);
-        return OMPI_ERR_BAD_PARAM;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * utility routine for integer parameter registration
- */
-static int reg_int(const char* param_name,
-                   const char* deprecated_param_name,
-                   const char* param_desc,
-                   int default_value, int *storage, int flags)
-{
-    int index;
-
-    *storage = default_value;
-    index = mca_base_component_var_register(&mca_bcol_iboffload_component.super.bcol_version,
-                                            param_name, param_desc, MCA_BASE_VAR_TYPE_INT,
-                                            NULL, 0, 0, OPAL_INFO_LVL_9,
-                                            MCA_BASE_VAR_SCOPE_READONLY, storage);
-    if (NULL != deprecated_param_name) {
-        (void) mca_base_var_register_synonym(index, "ompi", "bcol", "iboffload", deprecated_param_name,
-                                             MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-    }
-
-    if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == *storage) {
-        return OMPI_SUCCESS;
-    }
-
-    if ((0 != (flags & REGINT_GE_ZERO) && *storage < 0) ||
-        (0 != (flags & REGINT_GE_ONE) && *storage < 1) ||
-        (0 != (flags & REGINT_NONZERO) && 0 == *storage)) {
-        opal_output(0, "Bad parameter value for parameter \"%s\"",
-                param_name);
-        return OMPI_ERR_BAD_PARAM;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * utility routine for integer parameter registration
- */
-static int reg_bool(const char* param_name,
-                    const char* deprecated_param_name,
-                    const char* param_desc,
-                    bool default_value, bool *storage)
-{
-    int index;
-
-    *storage = default_value;
-    index = mca_base_component_var_register(&mca_bcol_iboffload_component.super.bcol_version,
-                                            param_name, param_desc, MCA_BASE_VAR_TYPE_BOOL,
-                                            NULL, 0, 0, OPAL_INFO_LVL_9,
-                                            MCA_BASE_VAR_SCOPE_READONLY, storage);
-    if (NULL != deprecated_param_name) {
-        (void) mca_base_var_register_synonym(index, "ompi", "bcol", "iboffload", deprecated_param_name,
-                                             MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-    }
-
-    return OMPI_SUCCESS;
-}
-
-int mca_bcol_iboffload_verify_params(void)
-{
-    if (mca_bcol_iboffload_component.min_rnr_timer > 31) {
-        opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
-                       true, "bcol_iboffload_ib_min_rnr_timer > 31",
-                       "bcol_iboffload_ib_min_rnr_timer reset to 31");
-        mca_bcol_iboffload_component.min_rnr_timer = 31;
-    } else if (mca_bcol_iboffload_component.min_rnr_timer < 0){
-        opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
-                   true, "bcol_iboffload_ib_min_rnr_timer < 0",
-                   "bcol_iboffload_ib_min_rnr_timer reset to 0");
-        mca_bcol_iboffload_component.min_rnr_timer = 0;
-    }
-
-    if (mca_bcol_iboffload_component.timeout > 31) {
-        opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
-                       true, "bcol_iboffload_ib_timeout > 31",
-                       "bcol_iboffload_ib_timeout reset to 31");
-        mca_bcol_iboffload_component.timeout = 31;
-    } else if (mca_bcol_iboffload_component.timeout < 0) {
-        opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
-                   true, "bcol_iboffload_ib_timeout < 0",
-                   "bcol_iboffload_ib_timeout reset to 0");
-        mca_bcol_iboffload_component.timeout = 0;
-    }
-
-    if (mca_bcol_iboffload_component.retry_count > 7) {
-        opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
-                       true, "bcol_iboffload_ib_retry_count > 7",
-                       "bcol_iboffload_ib_retry_count reset to 7");
-        mca_bcol_iboffload_component.retry_count = 7;
-    } else if (mca_bcol_iboffload_component.retry_count < 0) {
-        opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
-                   true, "bcol_iboffload_ib_retry_count < 0",
-                   "bcol_iboffload_ib_retry_count reset to 0");
-        mca_bcol_iboffload_component.retry_count = 0;
-    }
-
-    if (mca_bcol_iboffload_component.max_rdma_dst_ops > 7) {
-        opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
-                       true, "bcol_iboffload_ib_rnr_retry > 7",
-                       "bcol_iboffload_ib_rnr_retry reset to 7");
-        mca_bcol_iboffload_component.max_rdma_dst_ops = 7;
-    } else if (mca_bcol_iboffload_component.max_rdma_dst_ops < 0) {
-        opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
-                   true, "bcol_iboffload_ib_rnr_retry < 0",
-                   "bcol_iboffload_ib_rnr_retry reset to 0");
-        mca_bcol_iboffload_component.max_rdma_dst_ops = 0;
-    }
-
-    if (mca_bcol_iboffload_component.service_level > 15) {
-        opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
-                       true, "bcol_iboffload_ib_service_level > 15",
-                       "bcol_iboffload_ib_service_level reset to 15");
-        mca_bcol_iboffload_component.service_level = 15;
-    } else if (mca_bcol_iboffload_component.service_level < 0) {
-        opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
-                   true, "bcol_iboffload_ib_service_level < 0",
-                   "bcol_iboffload_ib_service_level reset to 0");
-        mca_bcol_iboffload_component.service_level = 0;
-    }
-
-    if(mca_bcol_iboffload_component.buffer_alignment <= 1 ||
-       (mca_bcol_iboffload_component.buffer_alignment & (mca_bcol_iboffload_component.buffer_alignment - 1))) {
-        opal_show_help("help-mpi-btl-openib.txt", "wrong buffer alignment",
-                true, mca_bcol_iboffload_component.buffer_alignment, ompi_process_info.nodename, 64);
-        mca_bcol_iboffload_component.buffer_alignment = 64;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-int mca_bcol_iboffload_register_params(void)
-{
-    mca_base_var_enum_t *new_enum;
-    char *msg;
-    int ret = OMPI_SUCCESS, tmp;
-
-#define CHECK(expr) do {                    \
-        tmp = (expr);                       \
-        if (OMPI_SUCCESS != tmp) ret = tmp; \
-     } while (0)
-
-    /* register openib component parameters */
-    CHECK(reg_int("k_nomial_radix", NULL,
-                  "The radix of the K-nomial tree for scatther-gather type algorithms"
-                  "(starts from 2)", 2, &mca_bcol_iboffload_component.k_nomial_radix,
-                  REGINT_GE_ONE));
-
-    CHECK(reg_int("priority", NULL,
-                  "IB offload component priority"
-                  "(from 0(low) to 90 (high))", 90,
-                  &mca_bcol_iboffload_component.super.priority, 0));
-
-    CHECK(reg_int("verbose", NULL,
-                  "Output some verbose IB offload BTL information "
-                  "(0 = no output, nonzero = output)", 0,
-                  &mca_bcol_iboffload_component.verbose, 0));
-
-    CHECK(reg_bool("warn_default_gid_prefix", NULL,
-                   "Warn when there is more than one active ports and at least one of them connected to the network with only default GID prefix configured (0 = do not warn; any other value = warn)",
-                   true, &mca_bcol_iboffload_component.warn_default_gid_prefix));
-
-    CHECK(reg_bool("warn_nonexistent_if", NULL,
-                   "Warn if non-existent devices and/or ports are specified in the bcol_iboffla_if_[in|ex]clude MCA parameters (0 = do not warn; any other value = warn)",
-                   true, &mca_bcol_iboffload_component.warn_nonexistent_if));
-
-    CHECK(reg_int("max_pipeline_depth", NULL,
-                  "The maximal number of fragments of the same collective request that can be transferred in parallel", 3,
-                  (int *) &mca_bcol_iboffload_component.max_pipeline_depth, 0));
-
-    CHECK(reg_int("max_mqe_tasks", NULL,
-                  "Maximum number of MQEs for each iboffload module",
-                  1024, &mca_bcol_iboffload_component.max_mqe_tasks, 0));
-    CHECK(reg_int("max_mq_size", NULL,
-                  "Maximum size of each MQ for each iboffload module",
-                  1024, &mca_bcol_iboffload_component.max_mq_size, 0));
-    CHECK(reg_int("free_list_num", NULL,
-                  "Intial size of free lists (must be >= 1)",
-                  256, &mca_bcol_iboffload_component.free_list_num,
-                  REGINT_GE_ONE));
-    CHECK(reg_int("free_list_max", NULL,
-                  "Maximum size of free lists "
-                  "(-1 = infinite, otherwise must be >= 0)",
-                  -1, &mca_bcol_iboffload_component.free_list_max,
-                  REGINT_NEG_ONE_OK | REGINT_GE_ONE));
-    CHECK(reg_int("free_list_inc", NULL,
-                  "Increment size of free lists (must be >= 1)",
-                  32, &mca_bcol_iboffload_component.free_list_inc,
-                  REGINT_GE_ONE));
-    /* rdma mpool no longer exists - must use the grdma mpool component, should resolve errors in
-     * mtt testing
-     */
-    /*
-    CHECK(reg_string("mpool", NULL,
-                     "Name of the memory pool to be used (it is unlikely that you will ever want to change this",
-                     "rdma", &mca_bcol_iboffload_component.mpool_name,
-                     0));
-    */
-    CHECK(reg_string("mpool", NULL,
-                     "Name of the memory pool to be used (it is unlikely that you will ever want to change this",
-                     "grdma", &mca_bcol_iboffload_component.mpool_name,
-                     0));
-    CHECK(reg_int("cq_size", "cq_size",
-                  "Size of the OpenFabrics completion "
-                  "queue (will automatically be set to a minimum of "
-                  "(2 * number_of_peers * bcol_iboffload_rd_num))",
-                  1024, &mca_bcol_iboffload_component.cq_size, REGINT_GE_ONE));
-
-    CHECK(reg_int("exchange_tree_order", NULL,
-                  "The order of the exchange tree. "
-                  "Must be power of two.",
-                   2, &mca_bcol_iboffload_component.exchange_tree_order, REGINT_GE_ONE));
-
-    CHECK(reg_int("knomial_tree_order", NULL,
-                  "The order of the knomial exchange tree. ",
-                   3, &mca_bcol_iboffload_component.knomial_tree_order, REGINT_GE_ONE));
-
-
-    CHECK(reg_int("max_inline_data", "max_inline_data",
-                  "Maximum size of inline data segment "
-                  "(-1 = run-time probe to discover max value, "
-                  "otherwise must be >= 0). "
-                  "If not explicitly set, use max_inline_data from "
-                  "the INI file containing device-specific parameters",
-                  128, (int *) &mca_bcol_iboffload_component.max_inline_data,
-                  REGINT_NEG_ONE_OK | REGINT_GE_ZERO));
-
-#if 0
-    CHECK(reg_string("pkey", "ib_pkey_val",
-                     "OpenFabrics partition key (pkey) value. "
-                     "Unsigned integer decimal or hex values are allowed (e.g., \"3\" or \"0x3f\") and will be masked against the maximum allowable IB paritition key value (0x7fff)",
-                     "0", &pkey, 0));
-    /* Pasha
-    mca_bcol_iboffload_component.pkey_val =
-        ompi_btl_openib_ini_intify(pkey) & MCA_BTL_IB_PKEY_MASK;
-    free(pkey);
-    */
-#endif
-
-    CHECK(reg_string("receive_queues", NULL,
-                     "Colon-delimited, comma delimited list of receive queues: P,4096,8,6,4:P,32768,8,6,4",
-                     "P,512,256,192,128", &mca_bcol_iboffload_component.receive_queues,
-                     0));
-
-    CHECK(reg_int("qp_ous_rd_atom", NULL,
-                  "InfiniBand outstanding atomic reads (must be >= 0)", 4,
-                  (int *) &mca_bcol_iboffload_component.qp_ous_rd_atom, REGINT_GE_ZERO));
-
-    asprintf(&msg, "OpenFabrics MTU, in bytes (if not specified in INI files).  Valid values are: %d=256 bytes, %d=512 bytes, %d=1024 bytes, %d=2048 bytes, %d=4096 bytes",
-             IBV_MTU_256,
-             IBV_MTU_512,
-             IBV_MTU_1024,
-             IBV_MTU_2048,
-             IBV_MTU_4096);
-    if (NULL == msg) {
-        /* Don't try to recover from this */
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-    CHECK(mca_base_var_enum_create("infiniband mtu", mtu_values, &new_enum));
-    mca_bcol_iboffload_component.mtu = IBV_MTU_1024;
-    tmp = mca_base_component_var_register(&mca_bcol_iboffload_component.super.bcol_version,
-                                          "mtu", msg, MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                          OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
-                                          &mca_bcol_iboffload_component.mtu);
-    OBJ_RELEASE(new_enum);
-    free(msg);
-
-    if (0 > tmp) ret = tmp;
-
-    tmp = mca_base_var_register_synonym(tmp, "ompi", "bcol", "iboffload", "ib_mtu",
-                                        MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-    if (0 > tmp) ret = tmp;
-
-    CHECK(reg_int("ib_min_rnr_timer", NULL, "InfiniBand minimum "
-                  "\"receiver not ready\" timer, in seconds "
-                  "(must be >= 0 and <= 31)",
-                  1 , &mca_bcol_iboffload_component.min_rnr_timer, 0));
-
-    CHECK(reg_int("ib_timeout", NULL, "InfiniBand transmit timeout, plugged into formula: 4.096 microseconds * "
-                  "(2^bcol_iboffload_ib_timeout) (must be >= 0 and <= 31)",
-                  20, &mca_bcol_iboffload_component.timeout, 0));
-
-    CHECK(reg_int("ib_retry_count", NULL, "InfiniBand transmit retry count "
-                  "(must be >= 0 and <= 7)",
-                  7, &mca_bcol_iboffload_component.retry_count, 0));
-
-    CHECK(reg_int("ib_rnr_retry", NULL, "InfiniBand \"receiver not ready\" "
-                  "retry count; applies *only* to SRQ/XRC queues.  PP queues "
-                  "use RNR retry values of 0 because Open MPI performs "
-                  "software flow control to guarantee that RNRs never occur "
-                  "(must be >= 0 and <= 7; 7 = \"infinite\")",
-                  7, &mca_bcol_iboffload_component.rnr_retry, 0));
-
-    CHECK(reg_int("ib_max_rdma_dst_ops", NULL, "InfiniBand maximum pending RDMA "
-                  "destination operations "
-                  "(must be >= 0)",
-                  4, &mca_bcol_iboffload_component.max_rdma_dst_ops, REGINT_GE_ZERO));
-
-    CHECK(reg_int("ib_service_level", NULL, "InfiniBand service level "
-                  "(must be >= 0 and <= 15)",
-                  0, &mca_bcol_iboffload_component.service_level, 0));
-
-    CHECK(reg_int("buffer_alignment", NULL,
-                  "Prefered communication buffer alignment, in bytes "
-                  "(must be > 0 and power of two)",
-                  64, &mca_bcol_iboffload_component.buffer_alignment, REGINT_GE_ZERO));
-
-    /* register parmeters controlling message fragementation */
-    CHECK(reg_int("min_frag_size", NULL,
-                  "Minimum fragment size",
-                  getpagesize(), &mca_bcol_iboffload_component.super.min_frag_size,
-                  REGINT_GE_ONE));
-
-    CHECK(reg_int("max_frag_size", NULL,
-                  "Maximum fragment size",
-                  FRAG_SIZE_NO_LIMIT, &mca_bcol_iboffload_component.super.max_frag_size,
-                  REGINT_NONZERO));
-
-    CHECK(reg_bool("can_use_user_buffers", NULL,
-                   "User memory can be used by the collective algorithms",
-                   true, &mca_bcol_iboffload_component.super.can_use_user_buffers));
-
-    CHECK(reg_int("barrier_mode", NULL,
-                "Barrier mode: 0 - Recursive doubling; 1 - Recursive K-ing",
-                0, &mca_bcol_iboffload_component.barrier_mode, REGINT_GE_ZERO));
-
-    CHECK(reg_int("max_progress_pull", NULL,
-                "Max number of progress pull checks",
-                8, &mca_bcol_iboffload_component.max_progress_pull, REGINT_GE_ZERO));
-
-    CHECK(reg_int("use_brucks_smsg_alltoall_rdma", NULL,
-                "Use brucks algorithm for smsg alltoall and RDMA semantics 1 = No Temp buffer recycling"
-                "1 = Alg with no Temp Buffer Recycling (faster), 2 = Alg with temp Buffer Recycling (slower)",
-                0, &mca_bcol_iboffload_component.use_brucks_smsg_alltoall_rdma, 0));
-
-    CHECK(reg_int("use_brucks_smsg_alltoall_sr", NULL,
-                "Use brucks algorithm for smsg alltoall and Send/Recv semantics "
-                "1 = Alg with RTR (faster), 2 = Alg with RNR (slower)",
-                0, &mca_bcol_iboffload_component.use_brucks_smsg_alltoall_sr, 0));
-
-    CHECK(reg_int("alltoall_bruck_radix", NULL,
-                "Radix for Bruck algorithm for smsg alltoall",
-                3, &mca_bcol_iboffload_component.k_alltoall_bruck_radix, 0));
-
-    CHECK(reg_int("k_alltoall_bruck_radix", NULL,
-                "Temp Buffer alignment for Bruck algorithm for smsg alltoall",
-                64, &mca_bcol_iboffload_component.tmp_buf_alignment, 0));
-
-    /*
-    CHECK(reg_string("if_include", NULL,
-                     "Comma-delimited list of devices/ports to be used (e.g. \"mthca0,mthca1:2\"; empty value means to use all ports found).  Mutually exclusive with bcol_iboffload_if_exclude.",
-                     NULL, &mca_bcol_iboffload_component.if_include,
-                     0));
-
-    CHECK(reg_string("if_exclude", NULL,
-                     "Comma-delimited list of device/ports to be excluded (empty value means to not exclude any ports).  Mutually exclusive with bcol_iboffload_if_include.",
-                     NULL, &mca_bcol_iboffload_component.if_exclude,
-                     0));
-    */
-
-    CHECK(mca_bcol_iboffload_verify_params());
-
-    /* Register any MCA params for the connect pseudo-components */
-    if (OMPI_SUCCESS == ret) {
-        ret = ompi_common_ofacm_base_register(&mca_bcol_iboffload_component.super.bcol_version);
-    }
-
-    return ret;
-}
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_mca.h b/ompi/mca/bcol/iboffload/bcol_iboffload_mca.h
deleted file mode 100644
index 95e1ec7ee2..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_mca.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
- /** @file */
-
-#ifndef MCA_BCOL_IBOFFLOAD_MCA_H
-#define MCA_BCOL_IBOFFLOAD_MCA_H
-
-#include "ompi_config.h"
-
-int mca_bcol_iboffload_register_params(void);
-int mca_bcol_iboffload_verify_params(void);
-
-#endif
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_module.c b/ompi/mca/bcol/iboffload/bcol_iboffload_module.c
deleted file mode 100644
index 0e90fac944..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_module.c
+++ /dev/null
@@ -1,1538 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <errno.h>
-
-#include <infiniband/mqe.h>
-#include <infiniband/verbs.h>
-#include <infiniband/mverbs.h>
-
-#include "opal/util/arch.h"
-#include "opal/include/opal/types.h"
-#include "opal/datatype/opal_datatype.h"
-
-#include "ompi/mca/bcol/base/base.h"
-#include "opal/mca/mpool/base/base.h"
-#include "ompi/communicator/communicator.h"
-#include "opal/mca/mpool/grdma/mpool_grdma.h"
-#include "ompi/mca/coll/ml/coll_ml_allocation.h"
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_task.h"
-#include "bcol_iboffload_bcast.h"
-#include "bcol_iboffload_device.h"
-#include "bcol_iboffload_collreq.h"
-#include "bcol_iboffload_collfrag.h"
-#include "bcol_iboffload_endpoint.h"
-
-static int init_rdma_buf_desc(mca_bcol_iboffload_rdma_buffer_desc_t **desc, void *base_addr, uint32_t num_banks,
-        uint32_t num_buffers_per_bank, uint32_t size_buffer, uint32_t header_size);
-
-static int set_endpoint_remote_rdma_info(mca_bcol_iboffload_endpoint_t *ep, mca_bcol_iboffload_rdma_info_t *remote_rdma_info);
-
-static void
-mca_bcol_iboffload_module_construct(mca_bcol_iboffload_module_t *module)
-{
-    int i;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    /* set all to zero */
-    module->group_size      = 0;
-    module->segment_size    = 0;
-    module->collective_tag  = 0;
-    module->ibnet           = NULL;
-    module->cgroup_index    = 0;
-
-    module->num_endpoints   = 0;
-    module->endpoints       = NULL;
-
-    /* initi the previous sequence number */
-    module->prev_sequence_num = -1;
-
-    switch (cm->barrier_mode) {
-        case (0): module->barrier_algth =
-                  mca_bcol_iboffload_barrier_intra_recursive_doubling_start;
-                  break;
-        case (1): module->barrier_algth =
-                  mca_bcol_iboffload_barrier_intra_recursive_knomial_start;
-                  break;
-        default: module->barrier_algth = NULL;
-    }
-
-    module->allreduce_algth = NULL;
-    module->fanin_algth     = mca_bcol_iboffload_new_style_fanin_first_call;
-    module->fanout_algth    = mca_bcol_iboffload_new_style_fanout_first_call;
-    module->memsync_algth   = mca_bcol_iboffload_nb_memory_service_barrier_start;
-
-    memset(module->mq, 0, sizeof(module->mq[0]) * BCOL_IBOFFLOAD_MQ_NUM);
-    memset(module->alg_task_consump, 0, sizeof(uint32_t) * LAST_ALG);
-    memset(module->connection_status, 0, sizeof(bool) * LAST_ALG);
-
-    for (i = 0; i < BCOL_IBOFFLOAD_MQ_NUM; i++) {
-        module->mq_credit[i] = mca_bcol_iboffload_component.max_mqe_tasks;
-    }
-
-    module->super.bcol_component =
-                (mca_bcol_base_component_t *) &mca_bcol_iboffload_component;
-
-    /* We need two MQ's tasks for exchange with remote addresses */
-    module->alg_task_consump[REMOTE_EXCHANGE_ALG] += 2;
-
-    module->power_of_2_ranks = 0;
-    /* it is safe to set all the remote block to zero */
-    memset(&module->rdma_block, 0, sizeof(mca_bcol_iboffload_local_rdma_block_t));
-
-    module->super.list_n_connected = NULL;
-
-    OBJ_CONSTRUCT(&module->collfrag_pending, opal_list_t);
-}
-
-static void
-mca_bcol_iboffload_module_destruct(mca_bcol_iboffload_module_t *module)
-{
-    int i = 0;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    IBOFFLOAD_VERBOSE(10, ("Module - %p: start to destroy; "
-                           "pending queue size - %d.\n",
-                            module, opal_list_get_size(&module->collfrag_pending)));
-
-    /* Make sure that we done with all pending collective frags */
-    while (opal_list_get_size(&module->collfrag_pending) > 0) {
-        opal_progress();
-    }
-
-    OBJ_DESTRUCT(&module->collfrag_pending);
-
-    IBOFFLOAD_VERBOSE(10, ("module->mq_credit - %d, cm->max_mqe_tasks - %d.\n",
-                           module->mq_credit[0], cm->max_mqe_tasks));
-    /* Make sure that you got completion on all outstanding collectives */
-    for (i = 0; i < BCOL_IBOFFLOAD_MQ_NUM; i++) {
-        while (module->mq_credit[i] != (int) cm->max_mqe_tasks) {
-            opal_progress();
-        }
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("All credits were returned.\n"));
-
-    if (NULL != module && NULL != module->mq) {
-        for (i = 0; i < BCOL_IBOFFLOAD_MQ_NUM; i++) {
-            if (0 != mqe_context_destroy(module->mq[i])) {
-                IBOFFLOAD_ERROR(("Error destroying MQ for device (%s), error: %s\n",
-                            ibv_get_device_name(module->device->dev.ib_dev), strerror(errno)));
-            }
-        }
-
-        IBOFFLOAD_VERBOSE(10, ("MQ %d was destroyed.\n", i));
-    }
-
-    if (NULL != module->endpoints) {
-        mca_bcol_iboffload_endpoint_t *ep;
-        int qp_index, num_qps = cm->num_qps;
-
-        for (i = 0; i < module->num_endpoints; ++i) {
-            if (NULL != module->endpoints[i]) {
-                    /* Make sure that we get completions on all outstanding send requests */
-                    ep = module->endpoints[i];
-                    for (qp_index = 0; qp_index < num_qps; ++qp_index) {
-                        IBOFFLOAD_VERBOSE(10, ("qp_index - %d, ep->index - %d, "
-                                               "ep->qps[qp_index].sd_wqe - %d, "
-                                               "cm->qp_infos[qp_index].rd_num - %d.\n",
-                                                qp_index, ep->index,
-                                                ep->qps[qp_index].sd_wqe,
-                                                cm->qp_infos[qp_index].rd_num));
-
-                        while (ep->qps[qp_index].sd_wqe != cm->qp_infos[qp_index].rd_num) {
-                            opal_progress();
-                        }
-
-                        IBOFFLOAD_VERBOSE(10, ("qp_index - %d, ep->index - %d; "
-                                               "All sends were sent.\n",
-                                                qp_index, ep->index));
-                    }
-
-                    OBJ_RELEASE(ep);
-            }
-        }
-
-        free(module->endpoints);
-    }
-
-    netpatterns_free_recursive_doubling_tree_node(&module->n_exchange_tree);
-    netpatterns_free_recursive_doubling_tree_node(&module->recursive_doubling_tree);
-
-    OBJ_RELEASE(module->device->net_context);
-    OBJ_RELEASE(module->device);
-
-    if (NULL != module->super.list_n_connected) {
-        free(module->super.list_n_connected);
-        module->super.list_n_connected = NULL;
-    }
-
-    OBJ_DESTRUCT(&module->iovec_tasks_free);
-
-    IBOFFLOAD_VERBOSE(10, ("module - %p was successfully destructed.\n", module));
-}
-
-OBJ_CLASS_INSTANCE(mca_bcol_iboffload_module_t,
-                   mca_bcol_base_module_t,
-                   mca_bcol_iboffload_module_construct,
-                   mca_bcol_iboffload_module_destruct);
-
-static int iboffload_init_port(struct mca_bcol_iboffload_device_t *device,
-                               struct mca_bcol_iboffload_port_t *p)
-{
-    union ibv_gid gid;
-    struct ibv_port_attr ib_port_attr;
-
-    if (ibv_query_port(device->dev.ib_dev_context, p->id, &ib_port_attr)){
-        IBOFFLOAD_ERROR(("Error getting port attributes for device %s "
-                    "port number %d errno says %s",
-                    ibv_get_device_name(device->dev.ib_dev), p->id, strerror(errno)));
-        return OMPI_ERR_NOT_FOUND;
-    }
-
-    /* Set port data */
-    p->lmc  = (1 << ib_port_attr.lmc);
-    p->lid  = ib_port_attr.lid;
-    p->stat = ib_port_attr.state;
-    p->mtu  = ib_port_attr.active_mtu;
-
-    IBOFFLOAD_VERBOSE(10, (" Setting port data (%s:%d) lid=%d, lmc=%d, stat=%d, mtu=%d\n",
-                ibv_get_device_name(device->dev.ib_dev), p->id, p->lid,
-                p->lmc, p->stat, p->mtu));
-
-    if (0 != ibv_query_gid(device->dev.ib_dev_context, p->id, 0, &gid)) {
-        IBOFFLOAD_ERROR(("ibv_query_gid failed (%s:%d)\n",
-                    ibv_get_device_name(device->dev.ib_dev), p->id));
-        return OMPI_ERR_NOT_FOUND;
-    }
-
-    /* set subnet data */
-    p->subnet_id = ntoh64(gid.global.subnet_prefix);
-    IBOFFLOAD_VERBOSE(10, ("my IB-only subnet_id for HCA %s port %d is %lx",
-                ibv_get_device_name(device->dev.ib_dev), p->id, p->subnet_id));
-
-    return OMPI_SUCCESS;
-}
-
-/* mpool allocation maybe changed in future, so lets keep it as separate function */
-static int prepare_mpool(mca_bcol_iboffload_device_t *device)
-{
-    int ret = OMPI_SUCCESS;
-    mca_mpool_base_resources_t resources;
-
-    resources.reg_data = (void *) device;
-    resources.sizeof_reg = sizeof(mca_bcol_iboffload_reg_t);
-
-    resources.register_mem = mca_bcol_iboffload_register_mr;
-    resources.deregister_mem = mca_bcol_iboffload_deregister_mr;
-
-    device->mpool =
-        mca_mpool_base_module_create(mca_bcol_iboffload_component.mpool_name,
-                device, &resources);
-    if (NULL == device->mpool){
-        opal_output(0, "error creating IB memory pool for %s errno says %s\n",
-                ibv_get_device_name(device->dev.ib_dev), strerror(errno));
-        ret = OMPI_ERROR;
-    }
-
-    return ret;
-}
-
-/* Allocate device related resources: mpool, pd, cq, free_lists */
-static int allocate_device_resources(mca_bcol_iboffload_device_t *device)
-{
-    int qp_index, num_qps, rc;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    void* dummy_mem = (void *) &device->dummy_mem[0];
-
-    num_qps = cm->num_qps;
-
-    /* We have some active ports, alloce pd */
-    device->ib_pd = ibv_alloc_pd(device->dev.ib_dev_context);
-    if (NULL == device->ib_pd){
-        IBOFFLOAD_ERROR(("Error allocating protection domain for %s errno says %s",
-                    ibv_get_device_name(device->dev.ib_dev), strerror(errno)));
-        return OMPI_ERROR;
-    }
-
-    /* Pasha: allocate mpool here */
-    if (OMPI_SUCCESS != prepare_mpool(device)) {
-        return OMPI_ERROR;
-    }
-
-    /* Allocating free list of memory registered fragments */
-    device->frags_free = (ompi_free_list_t *) calloc(
-                                num_qps, sizeof(ompi_free_list_t));
-
-    if (NULL == device->frags_free) {
-        IBOFFLOAD_ERROR(("Error allocating memory for "
-                         "frags array, dev: %s errno says %s",
-                          ibv_get_device_name(device->dev.ib_dev),
-                          strerror(errno)));
-
-        return OMPI_ERROR;
-    }
-
-    for (qp_index = 0; qp_index < num_qps; ++qp_index) {
-        mca_bcol_iboffload_alloc_qps_resource_fn_t alloc_resource =
-                                      cm->qp_infos[qp_index].alloc_resource;
-
-        if (NULL != alloc_resource) {
-            if (OMPI_SUCCESS != alloc_resource(qp_index, device)) {
-                return OMPI_ERROR;
-            }
-        }
-
-    }
-
-    if (OMPI_SUCCESS !=
-            mca_bcol_iboffload_adjust_cq(device, &device->ib_cq)) {
-        IBOFFLOAD_ERROR(("Error creating CQ for %s errno says %s",
-                    ibv_get_device_name(device->dev.ib_dev), strerror(errno)));
-        return OMPI_ERROR;
-    }
-
-    if (OMPI_SUCCESS !=
-            mca_bcol_iboffload_adjust_cq(device, &device->ib_mq_cq)) {
-        IBOFFLOAD_ERROR(("Error creating mq CQ for %s errno says %s",
-                    ibv_get_device_name(device->dev.ib_dev), strerror(errno)));
-        return OMPI_ERROR;
-    }
-
-    rc = mca_bcol_iboffload_register_mr((void *) device, dummy_mem,
-                                        sizeof(char) * BCOL_IBOFFLOAD_DUMMY_MEM_SIZE,
-                                        &device->dummy_reg.base);
-
-    if (OMPI_SUCCESS != rc) {
-        IBOFFLOAD_ERROR(("Dummy memory registration failed for %s errno says %s",
-                          ibv_get_device_name(device->dev.ib_dev), strerror(errno)));
-        return OMPI_ERROR;
-    }
-
-    for (qp_index = 0; qp_index < num_qps; ++qp_index) {
-        mca_bcol_iboffload_frag_t *frag = &device->dummy_frags[qp_index];
-
-        memset(&frag->super.registration, 0, sizeof(mca_mpool_base_registration_t));
-        OBJ_CONSTRUCT(frag, mca_bcol_iboffload_frag_t);
-
-        frag->qp_index = qp_index;
-        frag->type = MCA_BCOL_IBOFFLOAD_DUMMY_OWNER;
-
-        frag->registration = &device->dummy_reg;
-
-        frag->super.ptr = dummy_mem;
-        frag->super.registration = &device->dummy_reg.base;
-
-        frag->sg_entry.length = 0;
-        frag->sg_entry.lkey = device->dummy_reg.mr->lkey;
-        frag->sg_entry.addr = (uint64_t) (uintptr_t) dummy_mem;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/* Register memory */
-int mca_bcol_iboffload_register_mr(void *reg_data, void *base, size_t size,
-                                    mca_mpool_base_registration_t *reg)
-{
-    mca_bcol_iboffload_device_t *device = (mca_bcol_iboffload_device_t *) reg_data;
-    mca_bcol_iboffload_reg_t *iboffload_reg = (mca_bcol_iboffload_reg_t *) reg;
-
-    iboffload_reg->mr = ibv_reg_mr(device->ib_pd, base, size,
-            IBV_ACCESS_LOCAL_WRITE |
-            IBV_ACCESS_REMOTE_WRITE |
-            IBV_ACCESS_REMOTE_READ);
-
-    if (NULL == iboffload_reg->mr) {
-        IBOFFLOAD_ERROR(("Device %s: %p addr, %d bytes registration failed.",
-                          ibv_get_device_name(device->dev.ib_dev), base, size));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("Device %s: memory register addr=%p, len=%d, mr - %p.",
-                ibv_get_device_name(device->dev.ib_dev), base, size, iboffload_reg->mr));
-
-    return OMPI_SUCCESS;
-}
-
-/* Deregister memory */
-int mca_bcol_iboffload_deregister_mr(void *reg_data, mca_mpool_base_registration_t *reg)
-{
-    mca_bcol_iboffload_device_t *device = (mca_bcol_iboffload_device_t *) reg_data;
-    mca_bcol_iboffload_reg_t *iboffload_reg = (mca_bcol_iboffload_reg_t *) reg;
-
-    IBOFFLOAD_VERBOSE(10, ("Device %s: mr - %p.",
-                ibv_get_device_name(device->dev.ib_dev), iboffload_reg->mr));
-
-    if (NULL != iboffload_reg->mr) {
-        if (ibv_dereg_mr(iboffload_reg->mr)) {
-            IBOFFLOAD_ERROR(("Device %s: error unpinning iboffload memory errno says %s",
-                        ibv_get_device_name(device->dev.ib_dev), strerror(errno)));
-            return OMPI_ERROR;
-        }
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("Device %s: memory deregister succeeded.",
-                            ibv_get_device_name(device->dev.ib_dev)));
-
-    iboffload_reg->mr = NULL;
-
-    return OMPI_SUCCESS;
-}
-
-/* We need to keep separate registration function for
-   ML list memory managment */
-static int mca_bcol_iboffload_lmngr_register(void *context_data,
-                                             void *base, size_t size,
-                                             void **reg_desc)
-{
-    struct ibv_mr *mr;
-    mca_bcol_iboffload_device_t *device =
-                  (mca_bcol_iboffload_device_t *) context_data;
-
-    mr = ibv_reg_mr(device->ib_pd, base, size,
-            IBV_ACCESS_LOCAL_WRITE |
-            IBV_ACCESS_REMOTE_WRITE |
-            IBV_ACCESS_REMOTE_READ);
-
-    if (NULL == mr) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("Device %s: memory register addr=%p, len=%d",
-                ibv_get_device_name(device->dev.ib_dev), base, size));
-
-    *reg_desc = (void *) mr;
-
-    /* Make sure that the addr stays the same */
-    assert(mr->addr == base);
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_bcol_iboffload_lmngr_deregister(void *context_data, void *reg_desc)
-{
-    struct ibv_mr *mr = (struct ibv_mr *) reg_desc;
-    mca_bcol_iboffload_device_t *device =
-                          (mca_bcol_iboffload_device_t *) context_data;
-
-    if (mr != NULL) {
-        if (ibv_dereg_mr(mr)) {
-            IBOFFLOAD_ERROR(("Device %s: error unpinning iboffload memory errno says %s",
-                        ibv_get_device_name(device->dev.ib_dev), strerror(errno)));
-            return OMPI_ERROR;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int iboffload_start_device(mca_bcol_iboffload_device_t *device)
-{
-    int port_cnt, port, ret;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-#if HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE
-    if (IBV_TRANSPORT_IB != device->dev.ib_dev->transport_type) {
-        IBOFFLOAD_VERBOSE(10, ("Skipping non IB device %s",
-                    ibv_get_device_name(device->dev.ib_dev)));
-        goto error;
-    }
-#endif
-
-    /* Open device context */
-    IBOFFLOAD_VERBOSE(10, ("Open IB device - %p", device->dev.ib_dev));
-
-    device->dev.ib_dev_context = ibv_open_device(device->dev.ib_dev);
-    if (NULL == device->dev.ib_dev_context) {
-        IBOFFLOAD_ERROR(("Error obtaining device context for %s errno says %s",
-                    ibv_get_device_name(device->dev.ib_dev), strerror(errno)));
-        goto error;
-    }
-
-    if (ibv_query_device(device->dev.ib_dev_context, &device->ib_dev_attr)) {
-        IBOFFLOAD_ERROR(("error obtaining device attributes for %s errno says %s",
-                    ibv_get_device_name(device->dev.ib_dev), strerror(errno)));
-        goto error;
-    }
-
-    port_cnt = device->ib_dev_attr.phys_port_cnt;
-    if (0 == port_cnt) {
-        goto error;
-    }
-
-    device->ports = (mca_bcol_iboffload_port_t *)
-                         calloc(port_cnt, sizeof(mca_bcol_iboffload_port_t));
-    if (NULL == device->ports) {
-        goto error;
-    }
-
-    /* Note ports are 1 based (i >= 1) */
-    for (port = 1; port <= port_cnt; port++) {
-        int pi = port - 1; /* port array index starts from zero */
-
-        struct ibv_port_attr ib_port_attr;
-        memset(&ib_port_attr, 0, sizeof(ib_port_attr));
-
-        if (ibv_query_port(device->dev.ib_dev_context, (uint8_t) port, &ib_port_attr)) {
-            IBOFFLOAD_ERROR(("Error getting port attributes for device %s "
-                        "port number %d errno says %s",
-                        ibv_get_device_name(device->dev.ib_dev), port, strerror(errno)));
-            continue;
-        }
-
-        if (IBV_PORT_ACTIVE == ib_port_attr.state) {
-            /* Pasha: Need to think how we want to handle MTUs
-            if (ib_port_attr.active_mtu < mca_bcol_iboffload_component.mtu){
-                device->mtu = ib_port_attr.active_mtu;
-            }
-            */
-            /* start to put port info */
-            ++device->num_act_ports;
-            device->ports[pi].id   = port;
-            device->ports[pi].stat = ib_port_attr.state;
-            device->ports[pi].mtu  = ib_port_attr.active_mtu;
-
-            if (0 == cm->pkey_val) {
-                ret = iboffload_init_port(device, &device->ports[pi]);
-                if (OMPI_SUCCESS != ret) {
-                    IBOFFLOAD_ERROR(("Device %s "
-                                "port number %d , failed to init port, errno says %s",
-                                ibv_get_device_name(device->dev.ib_dev),
-                                port, strerror(errno)));
-                    continue;
-                }
-            } else {
-                uint16_t pkey, j;
-                for (j = 0; j < device->ib_dev_attr.max_pkeys; j++) {
-                    if (ibv_query_pkey(device->dev.ib_dev_context, (uint8_t) port, j, &pkey)) {
-                        IBOFFLOAD_ERROR(("error getting pkey for index %d, device %s "
-                                    "port number %d errno says %s",
-                                    j, ibv_get_device_name(device->dev.ib_dev), port, strerror(errno)));
-                        continue;
-                    }
-
-                    pkey = ntohs(pkey) & MCA_BCOL_IBOFFLOAD_PKEY_MASK;
-                    if (pkey == cm->pkey_val) {
-                        ret = iboffload_init_port(device, &device->ports[pi]);
-                        if (OMPI_SUCCESS != ret) {
-                            IBOFFLOAD_ERROR(("Device %s "
-                                        "port number %d , failed to init port, errno says %s",
-                                        ibv_get_device_name(device->dev.ib_dev),
-                                        port, strerror(errno)));
-                            continue;
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    if (0 == device->num_act_ports) {
-        goto error;
-    }
-
-    if (OMPI_SUCCESS != allocate_device_resources(device)) {
-        goto error;
-    }
-
-    /* setup network context on device */
-    device->net_context = OBJ_NEW(bcol_base_network_context_t);
-
-    device->net_context->context_data = (void *) device;
-
-    device->net_context->register_memory_fn = mca_bcol_iboffload_lmngr_register;
-    device->net_context->deregister_memory_fn = mca_bcol_iboffload_lmngr_deregister;
-
-    /* the device is ready now */
-    device->activated = true;
-    return OMPI_SUCCESS;
-
-error:
-    /* Pasha: need to add nice resource clean up */
-    return OMPI_ERROR;
-}
-static void mca_bcol_iboffload_set_small_msg_thresholds(struct mca_bcol_base_module_t *super)
-{
-    mca_bcol_iboffload_module_t *iboffload_module =
-                            (mca_bcol_iboffload_module_t *) super;
-
-    /* Set the Bcast threshold, for IB it equals to ML buffer size */
-    super->small_message_thresholds[BCOL_BCAST] =
-                        iboffload_module->rdma_block.ml_mem_desc->size_buffer;
-
-    if ((mca_bcol_iboffload_component.use_brucks_smsg_alltoall_rdma)
-           || (mca_bcol_iboffload_component.use_brucks_smsg_alltoall_sr)) {
-        /* Set the Alltoall threshold, for Bruck's algth we use 1.5 of the buff size */
-        super->small_message_thresholds[BCOL_ALLTOALL] =
-                        (iboffload_module->rdma_block.ml_mem_desc->size_buffer / 3) * 2;
-    } else {
-        /* Set the Alltoall threshold, for this case it equals to a half of the ML buffer size */
-        super->small_message_thresholds[BCOL_ALLTOALL] =
-                        iboffload_module->rdma_block.ml_mem_desc->size_buffer / 2;
-    }
-
-    /* Set the Allreduce threshold, for IB it equals to ML buffer size */
-    super->small_message_thresholds[BCOL_ALLREDUCE] =
-                        iboffload_module->rdma_block.ml_mem_desc->size_buffer;
-
-    /* Set the Allgather threshold, for IB it equals to ML buffer size */
-    super->small_message_thresholds[BCOL_ALLGATHER] =
-                iboffload_module->rdma_block.ml_mem_desc->size_buffer /
-                ompi_comm_size(iboffload_module->super.sbgp_partner_module->group_comm);
-}
-
-static int mca_bcol_iboffload_init_buffer_memory(struct mca_coll_ml_module_t *ml_module,
-                                                 struct mca_bcol_base_module_t *bcol,
-                                                 void *reg_data)
-{
-    mca_bcol_iboffload_module_t *iboffload_module = (mca_bcol_iboffload_module_t *) bcol;
-    mca_bcol_iboffload_local_rdma_block_t *rdma_block = &iboffload_module->rdma_block;
-
-    struct mca_bcol_base_memory_block_desc_t *desc = ml_module->payload_block;
-    struct ibv_mr *mr = (struct ibv_mr *) desc->block->lmngr->reg_desc[bcol->context_index];
-    int i;
-
-    IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_init_buffer_memory was called"));
-
-    /* Set rdma block data */
-    rdma_block->ib_info.rkey = mr->rkey;
-    rdma_block->ib_info.lkey = mr->lkey;
-
-    rdma_block->ib_info.addr = (uint64_t) (uintptr_t) desc->block->base_addr;
-    IBOFFLOAD_VERBOSE(10, ("Caching rkey %u lkey %u addr %p",
-                rdma_block->ib_info.rkey,
-                rdma_block->ib_info.lkey,
-                rdma_block->ib_info.addr));
-
-    /* cache ml mem desc tunings localy */
-    rdma_block->bdesc.num_banks = desc->num_banks;
-    rdma_block->bdesc.num_buffers_per_bank = desc->num_buffers_per_bank;
-    rdma_block->bdesc.size_buffer = desc->size_buffer;
-    rdma_block->bdesc.data_offset = ml_module->data_offset;
-
-    IBOFFLOAD_VERBOSE(10, ("RDMA buffer configuration num banks %d num_per_bank %d size %d base addr %p",
-                           mr->addr, desc->num_banks, desc->num_buffers_per_bank, desc->size_buffer));
-
-    /* pointer to ml level descriptor */
-    rdma_block->ml_mem_desc = desc;
-
-    rdma_block->sync_counter = 0; /* reset the counter */
-    /* Allocate and set bank block counters */
-    for (i = 0; i < MCA_BCOL_IBOFFLOAD_BK_LAST; i++) {
-        rdma_block->bank_buffer_counter[i] = (int *) calloc(rdma_block->bdesc.num_banks,
-                sizeof(int));
-        if (NULL == rdma_block->bank_buffer_counter[i]) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to allocate bank_block_counter\n"));
-            return OMPI_ERROR;
-        }
-    }
-
-    if (OMPI_SUCCESS != init_rdma_buf_desc(&rdma_block->bdesc.rdma_desc,
-                desc->block->base_addr,
-                rdma_block->bdesc.num_banks,
-                rdma_block->bdesc.num_buffers_per_bank,
-                rdma_block->bdesc.size_buffer,
-                ml_module->data_offset)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to allocate rdma memory descriptor\n"));
-        return OMPI_ERROR;
-    }
-
-    /* The all data is now cached on module level. The
-       real data exchange will happen during qp creation and
-       data exchange */
-
-    IBOFFLOAD_VERBOSE(10, ("ml_module = %p, iboffload_module = %p, ml_mem_desc = %p.\n",
-                            ml_module, iboffload_module, rdma_block->ml_mem_desc));
-
-    for (i = 0; i < iboffload_module->num_endpoints; ++i) {
-        mca_bcol_iboffload_endpoint_t *ep = iboffload_module->endpoints[i];
-
-        if (true == ep->need_toset_remote_rdma_info) {
-            IBOFFLOAD_VERBOSE(10, ("ep %p index %d: postponed remote rdma block init.", ep, ep->index));
-            if (OPAL_UNLIKELY(OMPI_SUCCESS !=
-                          set_endpoint_remote_rdma_info(ep, ep->remote_rdma_info))) {
-                return OMPI_ERROR;
-            }
-        }
-    }
-
-    /* Hack:
-       !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-       Work around for deadlock caused by connection setup
-       for asyc service barrier. Asyc service barrier use own set of
-       MQ and QP _BUT_ the exchange operation uses the MQ that is used for
-       primary set of collectives operations like Allgahter, Barrier,etc.
-       As result exchange wait operation could be pushed to primary MQ and
-       cause dead-lock.
-       !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-       Create connection for service barrier and memory address exchange
-       for ml buffers and asyc service barrier
-     */
-    /* This nasty hack was moved to ml discovery
-    rc = mca_bcol_iboffload_rec_doubling_start_connections(iboffload_module);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        return rc;
-    }
-     */
-
-    return OMPI_SUCCESS;
-}
-
-static void load_func(mca_bcol_base_module_t *super)
-{
-    int fnc;
-
-    /* Loading Memory managment functions */
-    /* NULL means that mpool may decide about prefered memory allocate functions */
-    /* super->memory_management_functions.malloc_fn = NULL;*/
-    /* NULL means that mpool may decide about prefered memory release functions */
-    /* super->memory_management_functions.free_fn = NULL; */
-
-    /* JSL: setting the bcol_memory_init function to NULL, not sure what ib needs to do with
-     * the ml_memory_block
-     */
-    super->bcol_memory_init = NULL;
-
-
-    /* Loading collective functions */
-    for (fnc = 0; fnc < BCOL_NUM_OF_FUNCTIONS; ++fnc) {
-        super->bcol_function_table[fnc] = NULL;
-    }
-
-    super->bcol_function_init_table[BCOL_FANIN] = mca_bcol_iboffload_fanin_register;
-    super->bcol_function_init_table[BCOL_FANOUT] = mca_bcol_iboffload_fanout_register;
-
-    super->bcol_function_init_table[BCOL_BARRIER] = mca_bcol_iboffload_barrier_register;
-    super->bcol_function_init_table[BCOL_BCAST] = mca_bcol_iboffload_bcast_register;
-    /*super->bcol_function_init_table[BCOL_ALLTOALL] = mca_bcol_iboffload_alltoall_register;*/
-    super->bcol_function_init_table[BCOL_ALLGATHER] = mca_bcol_iboffload_allgather_register;
-    super->bcol_function_init_table[BCOL_SYNC] = mca_bcol_iboffload_memsync_register;
-    super->bcol_function_init_table[BCOL_ALLREDUCE] = mca_bcol_iboffload_allreduce_register;
-
-    super->bcol_memory_init = mca_bcol_iboffload_init_buffer_memory;
-
-    /* Set thresholds */
-    super->set_small_msg_thresholds = mca_bcol_iboffload_set_small_msg_thresholds;
-
-    super->k_nomial_tree  = mca_bcol_iboffload_setup_knomial_tree;
-}
-
-int mca_bcol_iboffload_setup_knomial_tree(mca_bcol_base_module_t *super)
-{
-    int rc;
-    mca_bcol_iboffload_module_t *ib_module = (mca_bcol_iboffload_module_t *) super;
-    rc = netpatterns_setup_recursive_knomial_allgather_tree_node(
-            ib_module->super.sbgp_partner_module->group_size,
-            ib_module->super.sbgp_partner_module->my_index,
-            mca_bcol_iboffload_component.k_nomial_radix,
-            super->list_n_connected,
-            &ib_module->knomial_allgather_tree);
-
-    return rc;
-}
-
-static inline struct ibv_cq *ibv_create_cq_compat(struct ibv_context *context,
-        int cqe, void *cq_context, struct ibv_comp_channel *channel,
-        int comp_vector)
-{
-#if OPAL_IBV_CREATE_CQ_ARGS == 3
-    return ibv_create_cq(context, cqe, channel);
-#else
-    return ibv_create_cq(context, cqe, cq_context, channel, comp_vector);
-#endif
-}
-
-int mca_bcol_iboffload_adjust_cq(mca_bcol_iboffload_device_t *device,
-                                                   struct ibv_cq **ib_cq)
-{
-    uint32_t cq_size = (uint32_t) mca_bcol_iboffload_component.cq_size;
-
-    if (NULL == *ib_cq) {
-        *ib_cq = ibv_create_cq_compat(device->dev.ib_dev_context, cq_size,
-#if OPAL_ENABLE_PROGRESS_THREADS == 1
-                device, device->ib_channel,
-#else
-                NULL, NULL,
-#endif
-                0);
-
-        if (NULL == *ib_cq) {
-            IBOFFLOAD_ERROR(("Device %s "
-                        ", failed to create CQ, errno says %s",
-                        ibv_get_device_name(device->dev.ib_dev), strerror(errno)));
-
-            return OMPI_ERROR;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int init_recv_wr_manager(mca_bcol_iboffload_recv_wr_manager *recv_wr_manager)
-{
-
-    struct ibv_recv_wr *recv_wr = NULL;
-    int ret = OMPI_SUCCESS, qp, wr, num_qps;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    num_qps = cm->num_qps;
-    OPAL_THREAD_LOCK(&recv_wr_manager->lock);
-
-    recv_wr_manager->recv_work_requests =
-        (struct ibv_recv_wr **) calloc(num_qps, sizeof(struct ibv_recv_wr *));
-    if (NULL == recv_wr_manager->recv_work_requests) {
-        IBOFFLOAD_ERROR(("Failed to allocate memory for recv_wr_manager->recv_work_requests"));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto error;
-    }
-
-    for (qp = 0; qp < num_qps; ++qp) {
-        int recv_queue_size = cm->qp_infos[qp].rd_num;
-
-        recv_wr_manager->recv_work_requests[qp] =
-            (struct ibv_recv_wr *) calloc(recv_queue_size, sizeof(struct ibv_recv_wr));
-        if (NULL == recv_wr_manager->recv_work_requests[qp]) {
-            IBOFFLOAD_ERROR(("Failed to allocate memory for recv_wr_manager->recv_work_requests"));
-            ret = OMPI_ERR_OUT_OF_RESOURCE;
-            goto error;
-        }
-
-        for (wr = 0; wr < recv_queue_size - 1; ++wr) {
-            recv_wr = &recv_wr_manager->recv_work_requests[qp][wr];
-            recv_wr->next = &recv_wr_manager->recv_work_requests[qp][wr + 1];
-            /* init receive work request.
-             * Real sg_list value we fill during receive prepost flow.
-             * recv_wr->wr_id and recv_wr->sg_list is zero by default */
-            recv_wr->wr_id   = 0;
-            recv_wr->sg_list = NULL;
-            recv_wr->num_sge = 1; /* single sge will be filled later */
-        }
-
-        recv_wr->next->num_sge = 1; /* for the last entry everything is null except the num_sge */
-    }
-
-error:
-    OPAL_THREAD_UNLOCK(&recv_wr_manager->lock);
-    return ret;
-}
-
-/* On first access to the component - allocate all memory resources */
-static int component_first_usage(void)
-{
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    int ret = OMPI_SUCCESS;
-
-    /* creating collfrag free list */
-    OBJ_CONSTRUCT(&cm->collfrags_free, ompi_free_list_t);
-    ret = ompi_free_list_init_new(&cm->collfrags_free,
-                                  sizeof(mca_bcol_iboffload_collfrag_t),
-                                  MCA_IBOFFLOAD_CACHE_LINE_SIZE,
-                                  OBJ_CLASS(mca_bcol_iboffload_collfrag_t),
-                                  0, MCA_IBOFFLOAD_CACHE_LINE_SIZE,
-                                  cm->free_list_num,
-                                  cm->free_list_max,
-                                  cm->free_list_inc,
-                                  NULL);
-    if (OMPI_SUCCESS != ret) {
-        IBOFFLOAD_ERROR(("Failed to allocate mwr_free %s:%d\n", __FILE__, __LINE__));
-        return ret;
-    }
-
-    /* allocate free list of collective message requests */
-    OBJ_CONSTRUCT(&cm->collreqs_free, ompi_free_list_t);
-    ret = ompi_free_list_init_new(&cm->collreqs_free,
-                                  sizeof(mca_bcol_iboffload_collreq_t),
-                                  MCA_IBOFFLOAD_CACHE_LINE_SIZE,
-                                  OBJ_CLASS(mca_bcol_iboffload_collreq_t),
-                                  0, MCA_IBOFFLOAD_CACHE_LINE_SIZE,
-                                  cm->free_list_num * 2,
-                                  cm->free_list_max * 2,
-                                  cm->free_list_inc * 2,
-                                  NULL);
-    if (OMPI_SUCCESS != ret) {
-        IBOFFLOAD_ERROR(("Error creating free list, error: %s\n", strerror(errno)));
-        goto release_collfrag;
-    }
-
-    OBJ_CONSTRUCT(&cm->tasks_free, ompi_free_list_t);
-    ret =  ompi_free_list_init_new(&cm->tasks_free,
-                                   sizeof(mca_bcol_iboffload_task_t),
-                                   MCA_IBOFFLOAD_CACHE_LINE_SIZE,
-                                   OBJ_CLASS(mca_bcol_iboffload_task_t),
-                                   0, MCA_IBOFFLOAD_CACHE_LINE_SIZE,
-                                   cm->free_list_num * 2,
-                                   cm->free_list_max * 2,
-                                   cm->free_list_inc * 2,
-                                   NULL);
-    if (OMPI_SUCCESS != ret) {
-        IBOFFLOAD_ERROR(("Error creating free list, error: %s\n", strerror(errno)));
-        goto release_collreq;
-    }
-
-    OBJ_CONSTRUCT(&cm->calc_tasks_free, ompi_free_list_t);
-    ret =  ompi_free_list_init_ex_new(&cm->calc_tasks_free,
-                                   sizeof(mca_bcol_iboffload_task_t),
-                                   MCA_IBOFFLOAD_CACHE_LINE_SIZE,
-                                   OBJ_CLASS(mca_bcol_iboffload_task_t),
-                                   0, MCA_IBOFFLOAD_CACHE_LINE_SIZE,
-                                   cm->free_list_num * 2,
-                                   cm->free_list_max * 2,
-                                   cm->free_list_inc * 2,
-                                   NULL,
-                                   mca_bcol_iboffload_calc_task_init,
-                                   &cm->calc_tasks_free);
-    if (OMPI_SUCCESS != ret) {
-        IBOFFLOAD_ERROR(("Error creating free list, error: %s\n", strerror(errno)));
-        goto release_collreq;
-    }
-
-    /* Initialization for frags that handle ML allocated memory,
-       it is NO registration is required !
-     */
-
-    OBJ_CONSTRUCT(&cm->ml_frags_free, ompi_free_list_t);
-    ret =  ompi_free_list_init_ex_new(&cm->ml_frags_free,
-                                   sizeof(mca_bcol_iboffload_frag_t),
-                                   MCA_IBOFFLOAD_CACHE_LINE_SIZE,
-                                   OBJ_CLASS(mca_bcol_iboffload_frag_t),
-                                   0, MCA_IBOFFLOAD_CACHE_LINE_SIZE,
-                                   cm->free_list_num * 2,
-                                   cm->free_list_max * 2,
-                                   cm->free_list_inc * 2,
-                                   NULL,
-                                   mca_bcol_iboffload_ml_frag_init,
-                                   NULL);
-    if (OMPI_SUCCESS != ret) {
-        IBOFFLOAD_ERROR(("Error creating free list, error: %s\n", strerror(errno)));
-        goto release_collreq;
-    }
-
-    ret = init_recv_wr_manager(&cm->recv_wrs);
-    if (OMPI_SUCCESS != ret){
-        IBOFFLOAD_ERROR(("Failed to prepare recv wrs"));
-        goto release_tasks;
-    }
-
-    cm->init_done = true;
-
-    return OMPI_SUCCESS;
-
-release_tasks:
-    OBJ_DESTRUCT(&cm->tasks_free);
-release_collreq:
-    OBJ_DESTRUCT(&cm->collreqs_free);
-release_collfrag:
-    OBJ_DESTRUCT(&cm->collfrags_free);
-    return ret;
-}
-
-
-/* query to see if some modules are available for use on the given
- * communicator, and if so, what it's priority is.
- */
-mca_bcol_base_module_t **
-mca_bcol_iboffload_comm_query(mca_sbgp_base_module_t *sbgp, int *num_modules)
-{
-    /* local variables */
-    int i, mq_index, rc, my_rank = 0;
-    struct mqe_context_attr mqe_attr;
-
-    mca_sbgp_ibnet_module_t *ibnet = NULL;
-    mca_bcol_base_module_t **iboffload_modules = NULL;
-    mca_bcol_iboffload_module_t *iboffload_module = NULL;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    /* Bruck's alltoall iovec */
-    size_t iovec_size;
-
-    if (OPAL_UNLIKELY(false == cm->init_done)) {
-        if (OMPI_SUCCESS != component_first_usage()) {
-            return NULL;
-        }
-    }
-
-    /* No group - no modules*/
-    if (OPAL_UNLIKELY(NULL == sbgp)) {
-        return NULL;
-    }
-    /*
-     * This is activated only for intra-communicators
-     */
-    if (OPAL_UNLIKELY(OMPI_COMM_IS_INTER(sbgp->group_comm))) {
-        return NULL;
-    }
-
-    ibnet = (mca_sbgp_ibnet_module_t *) sbgp;
-    if (OPAL_UNLIKELY(0 == ibnet->num_cgroups)) {
-        /* we have no connection group */
-        return NULL;
-    }
-
-    my_rank = sbgp->my_index;
-
-    iboffload_modules = (mca_bcol_base_module_t **) calloc
-                        (ibnet->num_cgroups, sizeof(mca_bcol_base_module_t *));
-    if (OPAL_UNLIKELY(NULL == iboffload_modules)) {
-        return NULL;
-    }
-
-    /* Go through list of connection groups that we have on ibnet
-     * and create bcol module for each one */
-    *num_modules = 0;
-    for (i = 0; i < ibnet->num_cgroups; i++) {
-        mca_sbgp_ibnet_connection_group_info_t *cgroup =
-            &ibnet->cgroups[i];
-
-        iboffload_module = OBJ_NEW(mca_bcol_iboffload_module_t);
-
-        iboffload_modules[i] = &(iboffload_module->super);
-
-        /*
-         * In fact the value == ibnet->num_cgroups in the end
-         * of the loop, but we need always to know how many modules
-         * release in the error case (under CLEANUP label)
-         */
-
-        (*num_modules)++;
-
-        iboffload_module->cgroup_index = i;
-        iboffload_module->group_size = ibnet->super.group_size;
-        iboffload_module->log_group_size = lognum(iboffload_module->group_size);
-        /* Put pointer to sbgp module */
-        iboffload_module->super.sbgp_partner_module = sbgp;
-        /* Put cgroup information on module */
-        iboffload_module->ibnet = ibnet;
-
-        iboffload_module->device = opal_pointer_array_get_item(&cm->devices, cgroup->device_index);
-
-        IBOFFLOAD_VERBOSE(10, ("Iboffload module - %p uses "
-                               "device - %p with index - %d.\n",
-                                iboffload_module,
-                                iboffload_module->device->dev.ib_dev,
-                                cgroup->device_index));
-
-        OBJ_RETAIN(iboffload_module->device);
-        /* Pasha: Need to print NICE error in future */
-        assert(NULL != iboffload_module->device);
-        iboffload_module->port = cgroup->port;
-
-        IBOFFLOAD_VERBOSE(10, ("Iboffload module - %p on local port %d.\n",
-                               iboffload_module, iboffload_module->port));
-
-        if (OPAL_UNLIKELY(!iboffload_module->device->activated)) {
-            /* this device was never used before, need to activate it */
-            if (OMPI_SUCCESS != iboffload_start_device(iboffload_module->device)) {
-                OBJ_RELEASE(iboffload_module->device);
-                goto CLEANUP;
-            }
-        }
-        /* Set pointer to network contest on bcol base, we need it for ml
-        memory managment */
-        OBJ_RETAIN(iboffload_module->device->net_context);
-        iboffload_module->super.network_context = iboffload_module->device->net_context;
-
-        iboffload_module->subnet_id = iboffload_module->device->ports[iboffload_module->port - 1].subnet_id;
-        iboffload_module->lid = iboffload_module->device->ports[iboffload_module->port - 1].lid;
-
-        load_func(&iboffload_module->super);
-
-        IBOFFLOAD_VERBOSE(10, ("Call for create endpoints for iboffload module %p,"
-                              " cgroup num (index) %d.\n", iboffload_module, i));
-
-        /* create endpoints and store its in the endpoints pointer of iboffload_module structer */
-        if (OMPI_SUCCESS !=
-                 mca_bcol_iboffloads_create_endpoints(cgroup, iboffload_module)) {
-            goto CLEANUP;
-        }
-
-        memset(&mqe_attr, 0, sizeof(mqe_attr));
-        mqe_attr.max_mqe_tasks = (uint32_t)mca_bcol_iboffload_component.max_mqe_tasks;
-        mqe_attr.max_mq_size = (uint32_t)mca_bcol_iboffload_component.max_mq_size;
-        mqe_attr.cq = iboffload_module->device->ib_mq_cq;
-
-        /* ALL MQs have the same configuration */
-        for (mq_index = 0; mq_index < BCOL_IBOFFLOAD_MQ_NUM; mq_index++) {
-            iboffload_module->mq[mq_index] =
-                mqe_context_create(iboffload_module->device->dev.ib_dev_context,
-                        iboffload_module->device->ib_pd, &mqe_attr);
-            if (OPAL_UNLIKELY(NULL == iboffload_module->mq[mq_index])) {
-                IBOFFLOAD_ERROR(("Error creating MQ for device (%s), error: %s\n",
-                            ibv_get_device_name(iboffload_module->device->dev.ib_dev), strerror(errno)));
-                goto CLEANUP;
-            }
-        }
-
-        /* Barrier initialization - recuresive doubling */
-#if 1
-        if (OMPI_SUCCESS !=
-                    netpatterns_setup_recursive_doubling_tree_node(
-                                iboffload_module->group_size, my_rank,
-                                &iboffload_module->recursive_doubling_tree)) {
-            IBOFFLOAD_ERROR(("Failed to setup recursive doubling tree,"
-                             " error: %s\n", strerror(errno)));
-            goto CLEANUP;
-        }
-#endif
-
-        /* Barrier initialization - N exchange tree */
-        if (OMPI_SUCCESS !=
-                netpatterns_setup_recursive_doubling_n_tree_node(
-                                iboffload_module->group_size, my_rank,
-                                cm->exchange_tree_order,
-                                &iboffload_module->n_exchange_tree)) {
-            IBOFFLOAD_ERROR(("Failed to setup recursive doubling tree,"
-                             " error: %s\n", strerror(errno)));
-            goto CLEANUP;
-        }
-
-
-        /* Recursive K-ing initialization - Knomial exchange tree */
-        if (OMPI_SUCCESS !=
-                netpatterns_setup_recursive_knomial_tree_node(
-                                iboffload_module->group_size, my_rank,
-                                cm->knomial_tree_order,
-                                &iboffload_module->knomial_exchange_tree)) {
-            IBOFFLOAD_ERROR(("Failed to setup recursive Knomial tree,"
-                             " error: %s\n", strerror(errno)));
-            goto CLEANUP;
-        }
-
-        /* Manju Brucks alltoall temp iovec list */
-        iovec_size = iboffload_module->group_size / 2 + iboffload_module->group_size % 2;
-        iboffload_module->alltoall_iovec = (struct iovec *) malloc(sizeof(struct iovec)
-                                                                   * iovec_size);
-        iboffload_module->alltoall_recv_iovec = (struct iovec *) malloc(sizeof(struct iovec)
-                                                                        * iovec_size);
-
-
-        iboffload_module->k_alltoall_bruck_radix=cm->k_alltoall_bruck_radix;
-        iboffload_module->tmp_buf_alignment=cm->tmp_buf_alignment;
-
-#if 1 /* Disabling this code since it brakes all iboffload functionality */
-        /* Sorry Pasha, gotta do this. Recursive K-ing allgather initialization - Knomial exchange tree */
-        /*Pretty sure I need to pass in the communicator rank */
-        /* I need to reindex this mess */
-        /* this looks silly, I know but it allows for minimal changes to existing code */
-        iboffload_module->comm_to_ibnet_map = sbgp->group_list;
-
-
-#endif
-#if 0
-        if ( NULL == iboffload_module->comm_to_ibnet_map ) {
-            IBOFFLOAD_ERROR(("Out of resources\n"));
-            goto CLEANUP;
-        }
-        for( i = 0; i < iboffload_module->group_size; i++) {
-            int j = 0;
-            while( sbgp->group_list[j] != i){
-                j++;
-            }
-            iboffload_module->comm_to_ibnet_map[i] = j;
-        }
-        /* that should take care of that */
-        if (OMPI_SUCCESS !=
-                netpatterns_setup_recursive_knomial_allgather_tree_node(
-                                iboffload_module->group_size, sbgp->group_list[my_rank],
-                                cm->k_nomial_radix, iboffload_module->super.list_n_connected,
-                                &iboffload_module->knomial_allgather_tree)) {
-            IBOFFLOAD_ERROR(("Failed to setup recursive Knomial tree,"
-                             " error: %s\n", strerror(errno)));
-            goto CLEANUP;
-        }
-#endif
-
-        iboffload_module->power_of_2 =
-            mca_bcol_iboffload_fls(iboffload_module->num_endpoints);
-        iboffload_module->power_of_2_ranks =
-                  (1 << iboffload_module->power_of_2);
-
-        /* header into ml buffer, we don't support header for anyone other than shared memory
-         * at the moment
-         */
-        iboffload_module->super.header_size = 0;
-
-        iboffload_module->super.supported_mode = MCA_BCOL_BASE_ZERO_COPY |
-                                                 MCA_BCOL_BASE_NO_ML_BUFFER_FOR_LARGE_MSG |
-                                                 MCA_BCOL_BASE_NO_ML_BUFFER_FOR_BARRIER;
-
-        rc = mca_bcol_base_bcol_fns_table_init(&(iboffload_module->super));
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            goto CLEANUP;
-        }
-
-        OBJ_CONSTRUCT(&iboffload_module->iovec_tasks_free, ompi_free_list_t);
-        rc =  ompi_free_list_init_ex_new(&iboffload_module->iovec_tasks_free,
-                                       sizeof(mca_bcol_iboffload_task_t),
-                                       MCA_IBOFFLOAD_CACHE_LINE_SIZE,
-                                       OBJ_CLASS(mca_bcol_iboffload_task_t),
-                                       0, MCA_IBOFFLOAD_CACHE_LINE_SIZE,
-                                       cm->free_list_num * 2,
-                                       cm->free_list_max * 2,
-                                       cm->free_list_inc * 2,
-                                       NULL,
-                                       mca_bcol_iboffload_iovec_task_init,
-                                       iboffload_module);
-        if (OMPI_SUCCESS != rc) {
-            IBOFFLOAD_ERROR(("Error creating free list, error: %s\n", strerror(errno)));
-            goto CLEANUP;
-        }
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("Finished with success, num of cgroups is %d, num of modules is %d.\n",
-                           ibnet->num_cgroups, *num_modules));
-
-    return iboffload_modules;
-
-CLEANUP:
-    for (i = 0; i < *num_modules; i++) {
-        if (NULL != iboffload_modules[i]) {
-            OBJ_RELEASE(iboffload_modules[i]);
-        }
-    }
-    free(iboffload_modules);
-    return NULL;
-}
-
-static int init_rdma_buf_desc(mca_bcol_iboffload_rdma_buffer_desc_t **desc, void *base_addr, uint32_t num_banks,
-        uint32_t num_buffers_per_bank, uint32_t size_buffer, uint32_t header_size)
-{
-    uint32_t i, j, ci;
-    mca_bcol_iboffload_rdma_buffer_desc_t *tmp_desc;
-
-    IBOFFLOAD_VERBOSE(10, ("init_rdma_buf_desc base addr %p, num_n %d , "
-                            "num_per_bank %d, size %d, header size %d",
-                            base_addr, num_banks, num_buffers_per_bank,
-                            size_buffer, header_size));
-    *desc = (mca_bcol_iboffload_rdma_buffer_desc_t *)
-                        calloc(num_banks * num_buffers_per_bank,
-                               sizeof(mca_bcol_iboffload_rdma_buffer_desc_t));
-    if (OPAL_UNLIKELY(NULL == *desc)) {
-        IBOFFLOAD_ERROR(("Failed to allocate memory"));
-        return OMPI_ERROR;
-    }
-
-    tmp_desc = *desc;
-
-    for (i = 0; i < num_banks; i++) {
-        for (j = 0; j < num_buffers_per_bank; j++) {
-            ci = i * num_buffers_per_bank + j;
-            tmp_desc[ci].generation_number = 0;
-            tmp_desc[ci].bank_index = i;
-            tmp_desc[ci].buffer_index = j;
-            /*
-             * iboffload don't have any header, but other bcols may to have. So
-             * we need to take it in account.
-             */
-            tmp_desc[ci].data_addr = (void *)
-                ((unsigned char *) base_addr + ci * size_buffer + header_size);
-            IBOFFLOAD_VERBOSE(10, ("RDMA setup %d %d - %p", i, j, tmp_desc[ci].data_addr));
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int set_endpoint_remote_rdma_info(mca_bcol_iboffload_endpoint_t *ep, mca_bcol_iboffload_rdma_info_t *remote_rdma_info)
-{
-    mca_bcol_iboffload_rem_rdma_block_t *rem_block = &ep->remote_rdma_block;
-
-    /* We'll continue if -
-        1. The module rdma_block is already initilized on this stage
-        2. All peers have the same rdma block configuration that actually is
-           define on ML level
-
-       Otherwise set flag to init it lately.
-    */
-    if (NULL == ep->iboffload_module->rdma_block.ml_mem_desc) {
-        IBOFFLOAD_VERBOSE(10, ("RDMA block information hasn't been inited yet."));
-        ep->need_toset_remote_rdma_info = true;
-        return OMPI_SUCCESS;
-    }
-
-    /* set the rdma addr for barrier */
-    ep->remote_zero_rdma_addr = remote_rdma_info[0];
-
-    IBOFFLOAD_VERBOSE(10, ("RDMA block information %p %d",
-                remote_rdma_info[0].addr, remote_rdma_info[0].rkey));
-
-    /* set the rdma block memory structs */
-    rem_block->ib_info = remote_rdma_info[1];
-
-
-    /* if we got some real data. lets init memory adress sctructures */
-    if (0 != rem_block->ib_info.addr) {
-        if (OMPI_SUCCESS != init_rdma_buf_desc(&rem_block->rdma_desc, (void *)rem_block->ib_info.addr,
-                ep->iboffload_module->rdma_block.bdesc.num_banks,
-                ep->iboffload_module->rdma_block.bdesc.num_buffers_per_bank,
-                ep->iboffload_module->rdma_block.bdesc.size_buffer,
-                /* remember, we use lkey to pass the data offset value */
-                rem_block->ib_info.lkey)) {
-            IBOFFLOAD_VERBOSE(10, ("Failed to allocate RDMA buffer descriptor"));
-            return OMPI_ERROR;
-        }
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("endpoint - %p, recv barrier rdma: rem addr - %p, rem rkey - %d.\n",
-                ep, ep->remote_zero_rdma_addr.addr, ep->remote_zero_rdma_addr.rkey));
-    IBOFFLOAD_VERBOSE(10, ("endpoint - %p, recv ml rdma: rem addr - %p, rem rkey - %d.\n",
-                ep, ep->remote_rdma_block.ib_info.addr, ep->remote_rdma_block.ib_info.rkey));
-
-    return OMPI_SUCCESS;
-}
-
-static int unpack_endpoint_rdma_addr(void *callback_data)
-{
-    int rc;
-    struct iovec payload_iovec;
-
-    size_t max_size = 0;
-    uint32_t out_size = 1;
-
-    mca_bcol_iboffload_collfrag_t *coll_frag = (mca_bcol_iboffload_collfrag_t *) callback_data;
-    mca_bcol_iboffload_collreq_t* collreq = coll_frag->coll_full_req;
-
-    mca_bcol_iboffload_task_t *wait_task = (mca_bcol_iboffload_task_t *) coll_frag->signal_task_wr_id;
-
-    mca_bcol_iboffload_frag_t *recv_frag = wait_task->frag;
-    mca_bcol_iboffload_endpoint_t *ep = wait_task->endpoint;
-
-    rc = opal_convertor_copy_and_prepare_for_recv(
-                ompi_mpi_local_convertor,
-                &opal_datatype_uint1,
-                sizeof(mca_bcol_iboffload_rdma_info_t) * MAX_REMOTE_RDMA_INFO,
-                ep->remote_rdma_info, 0,
-                &collreq->recv_convertor);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        return OMPI_ERROR;
-    }
-
-    payload_iovec.iov_base = (void*) (uintptr_t)
-                         recv_frag->sg_entry.addr;
-
-    payload_iovec.iov_len = sizeof(mca_bcol_iboffload_rdma_info_t) * MAX_REMOTE_RDMA_INFO;
-
-    if (0 > opal_convertor_unpack(&collreq->recv_convertor,
-               &payload_iovec, &out_size, &max_size)) {
-            return OMPI_ERROR;
-    }
-
-    if (OMPI_SUCCESS != set_endpoint_remote_rdma_info(ep, ep->remote_rdma_info)) {
-        return OMPI_ERROR;
-    }
-
-    opal_convertor_cleanup(&collreq->send_convertor);
-    opal_convertor_cleanup(&collreq->recv_convertor);
-
-    return OMPI_SUCCESS;
-}
-
-/* RDMA addr exchange with rem proc */
-int mca_bcol_iboffload_exchange_rem_addr(mca_bcol_iboffload_endpoint_t *ep)
-{
-    int rc;
-    /* the [0] used for constant barrier rdma operations
-       the [1] used for rdma block inforation exchange. The rdma
-       block is used for RDMA operation over ML allocated memory */
-    mca_bcol_iboffload_rdma_info_t remote_rdma_addr[MAX_REMOTE_RDMA_INFO];
-
-    mca_bcol_iboffload_task_t *send_task,
-                              *wait_task;
-
-    mca_bcol_iboffload_frag_t *send_fragment,
-                              *preposted_recv_frag;
-
-    ompi_free_list_item_t *item;
-
-    mca_bcol_iboffload_collreq_t  *coll_request;
-    mca_bcol_iboffload_collfrag_t *coll_fragment;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    OMPI_FREE_LIST_WAIT_MT(&cm->collreqs_free, item);
-    if (NULL == item) {
-        IBOFFLOAD_ERROR(("Failing for coll request free list waiting.\n"));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    coll_request = (mca_bcol_iboffload_collreq_t *) item;
-
-    coll_request->completion_cb_fn = unpack_endpoint_rdma_addr;
-    /* For the exchange the progress_fn should be never used */
-    coll_request->progress_fn = NULL;
-    coll_request->module = ep->iboffload_module;
-    coll_request->ml_buffer_index = MCA_COLL_ML_NO_BUFFER;
-    coll_request->buffer_info[SBUF].offset = 0;
-    coll_request->buffer_info[RBUF].offset = 0;
-    coll_request->qp_index = MCA_BCOL_IBOFFLOAD_QP_REGULAR;
-    /*
-     * setup collective work request
-     */
-
-    /* get collective frag */
-    coll_fragment = &coll_request->first_collfrag;
-    mca_bcol_iboffload_collfrag_init(coll_fragment);
-
-    coll_fragment->mq_credits = 2;
-    coll_fragment->mq_index = COLL_MQ;
-    coll_fragment->tail_next = &coll_fragment->to_post;
-    /* overwrite mq index to run over service setup */
-
-    /* Update the algorithm type in order to support credit mechanism */
-    coll_fragment->alg = REMOTE_EXCHANGE_ALG;
-    if (OPAL_UNLIKELY(false ==
-                BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(ep->iboffload_module,
-                    coll_fragment->mq_index, 2))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-
-        goto out_of_resources;
-    }
-
-    /* set pointers for (coll frag) <-> (coll full request) */
-    MCA_BCOL_IBOFFLOAD_SET_COLL_REQ_LINKS(coll_request, coll_fragment);
-
-    remote_rdma_addr[0].addr =
-           ep->iboffload_module->device->dummy_frags[MCA_BCOL_IBOFFLOAD_QP_BARRIER].sg_entry.addr;
-    remote_rdma_addr[0].rkey =
-           ep->iboffload_module->device->dummy_frags[MCA_BCOL_IBOFFLOAD_QP_BARRIER].registration->mr->rkey;
-
-    if (NULL != ep->iboffload_module->rdma_block.ml_mem_desc) {
-        remote_rdma_addr[1].addr = ep->iboffload_module->rdma_block.ib_info.addr;
-        remote_rdma_addr[1].rkey = ep->iboffload_module->rdma_block.ib_info.rkey;
-        /* Little bit ugly, but easy solution. The data_offset */
-        remote_rdma_addr[1].lkey = ep->iboffload_module->rdma_block.bdesc.data_offset;
-    } else {
-        /* since it is no data lets send 0, so remote side will knox that no real
-           data was send */
-        remote_rdma_addr[1].addr = 0;
-        remote_rdma_addr[1].rkey = 0;
-        remote_rdma_addr[1].lkey = 0;
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("endpoint - %p, sending barrier rdma: addr - %p, rkey - %d.\n",
-                        ep, remote_rdma_addr[0].addr, remote_rdma_addr[0].rkey));
-    IBOFFLOAD_VERBOSE(10, ("endpoint - %p, sending ml rdma: addr - %p, rkey - %d.\n",
-                ep, remote_rdma_addr[1].addr, remote_rdma_addr[1].rkey));
-
-    rc = opal_convertor_copy_and_prepare_for_send(
-                ompi_mpi_local_convertor,
-                &opal_datatype_uint1,
-                sizeof(mca_bcol_iboffload_rdma_info_t) * MAX_REMOTE_RDMA_INFO,
-                &remote_rdma_addr, 0,
-                &coll_request->send_convertor);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        goto out_of_resources;
-    }
-
-    send_fragment = mca_bcol_iboffload_get_send_frag(
-                coll_request, ep->index, coll_request->qp_index,
-                sizeof(mca_bcol_iboffload_rdma_info_t) * MAX_REMOTE_RDMA_INFO,
-                0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_CONVERT);
-    if (OPAL_UNLIKELY(NULL == send_fragment)) {
-        IBOFFLOAD_ERROR(("Failing for getting and packing send frag.\n"));
-        goto out_of_resources;
-    }
-
-    send_task = mca_bcol_iboffload_get_send_task(ep->iboffload_module,
-                                                 ep->index, coll_request->qp_index, send_fragment,
-                                                 coll_fragment, INLINE);
-    if (OPAL_UNLIKELY(NULL == send_task)) {
-        IBOFFLOAD_ERROR(("Failing for getting send task.\n"));
-        goto out_of_resources;
-    }
-
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task);
-    MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, send_task);
-
-    /* post wait */
-    preposted_recv_frag = mca_bcol_iboffload_get_preposted_recv_frag(
-            ep->iboffload_module, ep->index, coll_request->qp_index);
-    if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-        IBOFFLOAD_ERROR(("Exchaging: "
-                           "Failing for getting prepost recv frag.\n"));
-        goto out_of_resources;
-    }
-
-    wait_task = mca_bcol_iboffload_get_wait_task(ep->iboffload_module,
-            ep->index, 1, preposted_recv_frag, coll_request->qp_index, NULL);
-
-    if (OPAL_UNLIKELY(NULL == wait_task)) {
-        IBOFFLOAD_VERBOSE(10, ("Exchanging: "
-                           "Failing for getting wait task.\n"));
-        goto out_of_resources;
-    }
-
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-    MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, wait_task);
-
-    /* The last element must end with ZERO */
-    wait_task->element.next = NULL;
-
-    /* number of sends that need to be completed asynchronously */
-    coll_fragment->n_sends = 1;
-    SENDWR(send_task)->send_flags |= IBV_SEND_SIGNALED;
-
-    /* finish initializing full message descriptor */
-    coll_request->n_fragments  = 1;
-    coll_request->n_frags_sent = 1;
-
-    coll_request->n_frag_mpi_complete = 0;
-    coll_request->n_frag_net_complete = 0;
-    coll_request->user_handle_freed = false;
-
-    wait_task->element.flags |= MQE_WR_FLAG_SIGNAL;
-    coll_fragment->signal_task_wr_id =
-                      (uint64_t) (uintptr_t) wait_task->element.wr_id;
-
-    wait_task->element.wr_id = (uint64_t) (uintptr_t) coll_fragment;
-
-    /* post the mwr */
-    rc = mca_bcol_iboffload_post_mqe_tasks(coll_request->module, coll_fragment->to_post);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
-        /* Note: need to clean up */
-        return rc;
-    }
-
-    coll_request->user_handle_freed = true;
-    /* complete the exchange - progress releases full request descriptors */
-    while (!BCOL_IS_COMPLETED(coll_request)) {
-        mca_bcol_iboffload_component_progress();
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("RDMA addr exchange with comm rank: %d was finished.\n",
-                           ep->iboffload_module->ibnet->super.group_list[ep->index]));
-
-    return OMPI_SUCCESS;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("RDMA addr exchange, adding collfrag to collfrag_pending.\n"));
-    return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, ep->iboffload_module);
-}
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_qp_info.c b/ompi/mca/bcol/iboffload/bcol_iboffload_qp_info.c
deleted file mode 100644
index bfc5e4fbbf..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_qp_info.c
+++ /dev/null
@@ -1,452 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <infiniband/mqe.h>
-#include <infiniband/verbs.h>
-#include <infiniband/mverbs.h>
-
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_device.h"
-#include "bcol_iboffload_qp_info.h"
-#include "bcol_iboffload_collreq.h"
-#include "bcol_iboffload_endpoint.h"
-
-static int mca_bcol_iboffload_dummy_frag_qp_prepost(
-                mca_bcol_iboffload_endpoint_t *endpoint,
-                int qp_index, int num_to_prepost)
-{
-    struct ibv_recv_wr *recv_wr, *recv_bad;
-    int ret, num_preposted = 0, start_wr_index;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    mca_bcol_iboffload_recv_wr_manager *recv_wrs = &cm->recv_wrs;
-
-    IBOFFLOAD_VERBOSE(10, ("Recv prepost call: endpoint %p, to prepost %d",
-                          (void *) endpoint, num_to_prepost));
-
-    if (OPAL_UNLIKELY(0 == num_to_prepost)) {
-        IBOFFLOAD_VERBOSE(10, ("num_to_prepost = 0, return immediate"));
-        return OMPI_SUCCESS;
-    }
-
-    /* make sure that we do not overrun number of rd_wqe */
-    if (num_to_prepost > endpoint->qps[qp_index].rd_wqe) {
-        IBOFFLOAD_VERBOSE(10, ("Reset num_to_prepost = %d, to rd_wqe = %d",
-                                num_to_prepost, endpoint->qps[qp_index].rd_wqe));
-
-        num_to_prepost = endpoint->qps[qp_index].rd_wqe;
-    }
-
-    OPAL_THREAD_LOCK(&recv_wrs->lock);
-
-    /* calculate start index in array
-     * of pre-allocated work requests */
-    start_wr_index = cm->qp_infos[qp_index].rd_num - num_to_prepost;
-    recv_wr = &recv_wrs->recv_work_requests[qp_index][start_wr_index];
-
-    IBOFFLOAD_VERBOSE(10, ("Endpoint %p, qp_index - %d, to_porepost %d, "
-                           "start index of WRs - %d, rd_wqe - %d",
-                           (void *) endpoint, qp_index, num_to_prepost,
-                            start_wr_index, endpoint->qps[qp_index].rd_wqe));
-
-    while (num_preposted < num_to_prepost) {
-        /* prepost the special barrier frag to recv queue */
-        struct ibv_sge *dummy_sg_entry =
-                    &endpoint->iboffload_module->device->dummy_frags[qp_index].sg_entry;
-
-        recv_wr[num_preposted].sg_list = dummy_sg_entry;
-        ++num_preposted;
-    }
-
-    if (OPAL_LIKELY(num_preposted > 0)) {
-        /* Set the tail */
-        recv_wr[num_preposted - 1].next = NULL;
-
-        /* post the list of recvs */
-        ret = ibv_post_recv(endpoint->qps[qp_index].qp->lcl_qp, recv_wr, &recv_bad);
-        if (OPAL_UNLIKELY(0 != ret)) {
-            IBOFFLOAD_ERROR(("ibv_post_recv failed, error: %s [%d], "
-                             "qp_index - %d.\n", strerror(errno), ret, qp_index));
-
-            return OMPI_ERROR;
-        }
-
-        /* recover last recv_wr if needed */
-        if (OPAL_UNLIKELY(num_to_prepost != num_preposted)) {
-            recv_wr[num_preposted - 1].next = &recv_wr[num_preposted];
-        }
-
-        /* decresing numbers of free recv wqe */
-        endpoint->qps[qp_index].rd_wqe -= num_preposted;
-    }
-
-    OPAL_THREAD_UNLOCK(&recv_wrs->lock);
-
-    IBOFFLOAD_VERBOSE(10, ("Endpoint %p, to_porepost %d, num preposted - %d, qp_index - %d",
-                          (void *) endpoint, num_to_prepost, num_preposted, qp_index));
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * Receive prepost:
- * return values:
- * 0 - no prepost was done
- * -1 - fatal error during prepost
- * other value - number preposted elements
- */
-static int mca_bcol_iboffload_frag_reg_qp_prepost(
-                mca_bcol_iboffload_endpoint_t *endpoint,
-                int qp_index, int num_to_prepost)
-{
-    ompi_free_list_item_t *item;
-    mca_bcol_iboffload_frag_t *frag;
-
-    struct ibv_recv_wr *recv_wr, *recv_bad;
-    int i, ret, num_preposted = 0, start_wr_index;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    mca_bcol_iboffload_device_t *device = endpoint->iboffload_module->device;
-
-    opal_list_t *preposted = &(endpoint->qps[qp_index].preposted_frags);
-    mca_bcol_iboffload_recv_wr_manager *recv_wrs = &cm->recv_wrs;
-
-    IBOFFLOAD_VERBOSE(10, ("Recv prepost call: endpoint %p, to prepost %d",
-                          (void *) endpoint, num_to_prepost));
-
-    if (OPAL_UNLIKELY(0 == num_to_prepost)) {
-        IBOFFLOAD_VERBOSE(10, ("num_to_prepost = 0, return immediate"));
-        return OMPI_SUCCESS;
-    }
-
-    /* make sure that we do not overrun number of rd_wqe */
-    if (num_to_prepost > endpoint->qps[qp_index].rd_wqe) {
-        IBOFFLOAD_VERBOSE(10, ("Reset num_to_prepost = %d, to rd_wqe = %d",
-                                num_to_prepost, endpoint->qps[qp_index].rd_wqe));
-
-        num_to_prepost = endpoint->qps[qp_index].rd_wqe;
-    }
-
-    OPAL_THREAD_LOCK(&recv_wrs->lock);
-
-    /* calculate start index in array
-     * of pre-allocated work requests */
-    start_wr_index = cm->qp_infos[qp_index].rd_num - num_to_prepost;
-    recv_wr = &recv_wrs->recv_work_requests[qp_index][start_wr_index];
-
-    IBOFFLOAD_VERBOSE(10, ("Endpoint %p, qp_index - %d, to_porepost %d, "
-                           "start index of WRs - %d, rd_wqe - %d",
-                           (void *) endpoint, qp_index, num_to_prepost,
-                            start_wr_index, endpoint->qps[qp_index].rd_wqe));
-
-    while (num_preposted < num_to_prepost) {
-        /* put the item on list of preposted */
-        OMPI_FREE_LIST_GET_MT(&device->frags_free[qp_index], item);
-        if (OPAL_UNLIKELY(NULL == item)) {
-            break;
-        }
-
-        frag = (mca_bcol_iboffload_frag_t *) item;
-        opal_list_append(preposted, (opal_list_item_t *) item);
-
-        recv_wr[num_preposted].sg_list = &frag->sg_entry;
-        /* TODO Pasha - fix it later */ /* Vasily: Is it right place to take a size value ???? */
-        frag->sg_entry.length = cm->qp_infos[qp_index].size;
-        ++num_preposted;
-    }
-
-    if (OPAL_LIKELY(num_preposted > 0)) {
-        /* Set the tail */
-        recv_wr[num_preposted - 1].next = NULL;
-
-        /* post the list of recvs */
-        ret = ibv_post_recv(endpoint->qps[qp_index].qp->lcl_qp, recv_wr, &recv_bad);
-        if (OPAL_UNLIKELY(0 != ret)) {
-            IBOFFLOAD_ERROR(("ibv_post_recv failed (%s), error: %s [%d], "
-                             "qp_index - %d.\n",
-                              ibv_get_device_name(device->dev.ib_dev),
-                              strerror(errno), ret, qp_index));
-
-            /* Return allocated frags */
-            for (i = 0; i < num_preposted; i++) {
-                OMPI_FREE_LIST_RETURN_MT(&device->frags_free[qp_index],
-                        (ompi_free_list_item_t *)
-                            opal_list_remove_last(preposted));
-            }
-
-            return OMPI_ERROR;
-        }
-
-        /* recover last recv_wr if needed */
-        if (OPAL_UNLIKELY(num_to_prepost != num_preposted)) {
-            recv_wr[num_preposted - 1].next = &recv_wr[num_preposted];
-        }
-
-        /* decresing numbers of free recv wqe */
-        endpoint->qps[qp_index].rd_wqe -= num_preposted;
-    }
-
-    OPAL_THREAD_UNLOCK(&recv_wrs->lock);
-
-    IBOFFLOAD_VERBOSE(10, ("Endpoint %p, to_porepost %d, num preposted - %d",
-                          (void *) endpoint, num_to_prepost, num_preposted));
-
-    return OMPI_SUCCESS;
-}
-
-
-static void mca_bcol_iboffload_fillin_qp_attr(int qp_index,
-                                   mca_bcol_iboffload_endpoint_t *ep,
-                                   ompi_common_ofacm_base_qp_config_t *qp_config)
-{
-        uint32_t max_sge, *init_attr_mask =
-                                  &qp_config->init_attr_mask[qp_index];
-
-        struct ibv_qp_attr *attr = &qp_config->attr[qp_index];
-        struct ibv_qp_init_attr *init_attr = &qp_config->init_attr[qp_index];
-
-        mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-        /* Set special init attributes mask */
-        *init_attr_mask = IBV_M_QP_EXT_CLASS_1 |
-                          IBV_M_QP_EXT_CLASS_2 |
-                          IBV_M_QP_EXT_IGNORE_RQ_OVERFLOW;
-
-        /* Set init attributes */
-        init_attr->qp_type = IBV_QPT_RC;
-
-/* Vasily: ??????
-        init_attr->cap.max_inline_data =
-            max_inline_size(qp, iboffload_module->device);
-*/
-        /* Pasha: we can not leave max_inline empty !
-           Todo: copy max_inline_size() from ofacm to
-           common area.
-         */
-        init_attr->cap.max_inline_data = (int32_t) cm->max_inline_data;
-
-        /* We allocate SG list for some algorithms (Bruck's alltoall) */
-        max_sge = ep->iboffload_module->group_size / 2 +
-                       ep->iboffload_module->group_size % 2;
-
-        /* max send sge should be less than device maximums */
-        if (max_sge > (uint32_t)
-                             ep->iboffload_module->device->ib_dev_attr.max_sge) {
-            max_sge = (uint32_t) ep->iboffload_module->device->ib_dev_attr.max_sge;
-        }
-
-        init_attr->cap.max_send_sge = max_sge;
-        init_attr->cap.max_recv_sge = max_sge;
-/* Vasily: the value will be changed later */
-/* TODO Pasha: this is real crap */
-        init_attr->cap.max_recv_wr  = (uint32_t) cm->cq_size;
-        init_attr->cap.max_send_wr  = (uint32_t) cm->cq_size;
-
-        /* Set attributes */
-
-        /* attr->pkey_index = 0; */ /* Vasily: ????? */
-
-        attr->port_num = ep->iboffload_module->port;
-/* Vasily: the value will be changed later */
-        attr->path_mtu = (uint32_t)cm->mtu;
-
-        attr->max_dest_rd_atomic = cm->max_rdma_dst_ops;
-        attr->min_rnr_timer = (uint32_t)cm->min_rnr_timer;
-
-        attr->ah_attr.is_global = 0;
-        attr->ah_attr.sl = (uint32_t)cm->service_level;
-/* Vasily: from struct mca_bcol_iboffload_port_t ????? */
-/*
-        attr->ah_attr.src_path_bits = iboffload_module->src_path_bits;
-*/
-        attr->ah_attr.port_num = ep->iboffload_module->port;
-        /* JMS to be filled in later dynamically */
-        attr->ah_attr.static_rate = 0;
-        /* RTS params */
-        attr->timeout        = (uint32_t)cm->timeout;
-        attr->retry_cnt      = (uint32_t)cm->retry_count;
-        attr->rnr_retry      = (uint32_t)cm->rnr_retry;
-        attr->max_rd_atomic  = (uint32_t)cm->max_rdma_dst_ops;
-
-        /* Init for local mca_bcol_iboffload_endpoint_qp_t qps structure
-         * that caches the qp information on endpoint */
-        OBJ_CONSTRUCT(&ep->qps[qp_index].preposted_frags, opal_list_t);
-
-        /* Pasha: Need to add function that will */
-        ep->qps[qp_index].ib_inline_max = cm->max_inline_data;
-        /* TODO Pasha - this is crap too... we do not have info for sevice qps. Fix it later */
-
-        ep->qps[qp_index].sd_wqe = cm->qp_infos[qp_index].rd_num;
-        ep->qps[qp_index].rd_wqe = cm->qp_infos[qp_index].rd_num;
-
-        IBOFFLOAD_VERBOSE(10, ("ep - %p, qp index - %d, num of rd_wqe - %d.",
-                               ep, qp_index, ep->qps[qp_index].rd_wqe));
-}
-
-static int mca_bcol_iboffload_alloc_reg_qp_resource(int qp_index, mca_bcol_iboffload_device_t *device)
-{
-    int length;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    ompi_free_list_t *frags_free = &device->frags_free[qp_index];
-
-    OBJ_CONSTRUCT(frags_free, ompi_free_list_t);
-    length = cm->qp_infos[qp_index].size;
-
-    IBOFFLOAD_VERBOSE(10, ("free list len %d\n", length));
-    if (OMPI_SUCCESS != ompi_free_list_init_ex_new(frags_free,
-                sizeof(mca_bcol_iboffload_frag_t), MCA_IBOFFLOAD_CACHE_LINE_SIZE,
-                OBJ_CLASS(mca_bcol_iboffload_frag_t),
-                length, cm->buffer_alignment,
-                cm->free_list_num,
-                cm->free_list_max,
-                cm->free_list_inc,
-                device->mpool,
-                mca_bcol_iboffload_frag_init,
-                (void *) &cm->qp_infos[qp_index].qp_index)) {
-        IBOFFLOAD_ERROR(("Failed to allocate frags_free"));
-        return OMPI_ERROR;
-    }
-
-    return OMPI_SUCCESS;
-}
-static int mca_bcol_iboffload_dealloc_reg_qp_resource(int qp_index, mca_bcol_iboffload_device_t *device)
-{
-    OBJ_DESTRUCT(&device->frags_free[qp_index]);
-
-    return OMPI_SUCCESS;
-}
-
-static mca_bcol_iboffload_frag_t *mca_bcol_iboffload_get_dummy_frag(
-                         mca_bcol_iboffload_endpoint_t *ep, int qp_index)
-{
-    return &ep->iboffload_module->device->dummy_frags[qp_index];
-}
-
-static mca_bcol_iboffload_frag_t *mca_bcol_iboffload_endpoint_get_preposted_frag(
-                                   mca_bcol_iboffload_endpoint_t *ep, int qp_index)
-{
-    return (mca_bcol_iboffload_frag_t *)
-              opal_list_remove_first(&ep->qps[qp_index].preposted_frags);
-}
-
-static void mca_bcol_iboffload_regular_qp_attr(int qp_index,
-                                    mca_bcol_iboffload_endpoint_t *ep,
-                                    ompi_common_ofacm_base_qp_config_t *qp_config)
-{
-    struct ibv_qp_init_attr *init_attr = &qp_config->init_attr[qp_index];
-
-    mca_bcol_iboffload_fillin_qp_attr(qp_index, ep, qp_config);
-
-    init_attr->send_cq = ep->iboffload_module->device->ib_cq;
-    init_attr->recv_cq = ep->recv_cq[IBOFFLOAD_CQ_SMALL_MESSAGES];
-}
-
-static void mca_bcol_iboffload_large_buff_qp_attr(int qp_index,
-                                    mca_bcol_iboffload_endpoint_t *ep,
-                                    ompi_common_ofacm_base_qp_config_t *qp_config)
-{
-    struct ibv_qp_init_attr *init_attr = &qp_config->init_attr[qp_index];
-
-    mca_bcol_iboffload_fillin_qp_attr(qp_index, ep, qp_config);
-
-    init_attr->send_cq = ep->iboffload_module->device->ib_cq;
-    init_attr->recv_cq = ep->recv_cq[IBOFFLOAD_CQ_LARGE_MESSAGES];
-}
-
-static void mca_bcol_iboffload_sync_qp_attr(int qp_index,
-                                    mca_bcol_iboffload_endpoint_t *ep,
-                                    ompi_common_ofacm_base_qp_config_t *qp_config)
-{
-    struct ibv_qp_init_attr *init_attr = &qp_config->init_attr[qp_index];
-
-    mca_bcol_iboffload_fillin_qp_attr(qp_index, ep, qp_config);
-
-    init_attr->send_cq = ep->iboffload_module->device->ib_cq;
-    init_attr->recv_cq = ep->recv_cq[IBOFFLOAD_CQ_SYNC];
-}
-
-static int mca_bcol_iboffload_setup_barrier_qp(mca_bcol_iboffload_qp_info_t* qp_info)
-{
-    qp_info->config_qp = mca_bcol_iboffload_regular_qp_attr;
-    qp_info->prepost_recv = mca_bcol_iboffload_dummy_frag_qp_prepost;
-
-    qp_info->alloc_resource = NULL;
-    qp_info->dealloc_resource = NULL;
-
-    qp_info->get_preposted_recv = mca_bcol_iboffload_get_dummy_frag;
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_bcol_iboffload_setup_regular_qp(mca_bcol_iboffload_qp_info_t* qp_info)
-{
-    qp_info->config_qp = mca_bcol_iboffload_regular_qp_attr;
-    qp_info->prepost_recv = mca_bcol_iboffload_frag_reg_qp_prepost;
-
-    qp_info->alloc_resource = mca_bcol_iboffload_alloc_reg_qp_resource;
-    qp_info->dealloc_resource = mca_bcol_iboffload_dealloc_reg_qp_resource;
-
-    qp_info->get_preposted_recv = mca_bcol_iboffload_endpoint_get_preposted_frag;
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_bcol_iboffload_setup_large_buff_qp(mca_bcol_iboffload_qp_info_t* qp_info)
-{
-    qp_info->config_qp = mca_bcol_iboffload_large_buff_qp_attr;
-
-    qp_info->prepost_recv = NULL; /* We use "manual" ML frag preposting for this QP */
-    qp_info->alloc_resource = NULL;
-    qp_info->dealloc_resource = NULL;
-    qp_info->get_preposted_recv = NULL;
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_bcol_iboffload_setup_credit_qp(mca_bcol_iboffload_qp_info_t* qp_info)
-{
-    qp_info->config_qp = mca_bcol_iboffload_large_buff_qp_attr;
-    qp_info->prepost_recv = mca_bcol_iboffload_dummy_frag_qp_prepost;
-
-    qp_info->alloc_resource = NULL;
-    qp_info->dealloc_resource = NULL;
-
-    qp_info->get_preposted_recv = mca_bcol_iboffload_get_dummy_frag;
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_bcol_iboffload_setup_sync_qp(mca_bcol_iboffload_qp_info_t* qp_info)
-{
-    qp_info->config_qp = mca_bcol_iboffload_sync_qp_attr;
-    qp_info->prepost_recv = mca_bcol_iboffload_dummy_frag_qp_prepost;
-
-    qp_info->alloc_resource = NULL;
-    qp_info->dealloc_resource = NULL;
-
-    qp_info->get_preposted_recv = mca_bcol_iboffload_get_dummy_frag;
-
-    return OMPI_SUCCESS;
-}
-
-mca_bcol_iboffload_setup_qps_fn_t setup_qps_fn[MCA_BCOL_IBOFFLOAD_QP_LAST] = {
-    mca_bcol_iboffload_setup_barrier_qp,    /* MCA_BCOL_IBOFFLOAD_QP_BARRIER */
-    mca_bcol_iboffload_setup_regular_qp,    /* MCA_BCOL_IBOFFLOAD_QP_REGULAR */
-    mca_bcol_iboffload_setup_sync_qp,       /* MCA_BCOL_IBOFFLOAD_QP_SYNC */
-    mca_bcol_iboffload_setup_credit_qp,     /* MCA_BCOL_IBOFFLOAD_QP_CREDIT */
-    mca_bcol_iboffload_setup_large_buff_qp, /* MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF */
-    /* MCA_BCOL_IBOFFLOAD_QP_LAST */
-};
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_qp_info.h b/ompi/mca/bcol/iboffload/bcol_iboffload_qp_info.h
deleted file mode 100644
index e904e10888..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_qp_info.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/*
- * In order to add a new QP you need to do next steps:
- *
- *    1) Add new index to enum with list of the all QPs,
- *           MCA_BCOL_IBOFFLOAD_QP_NEW_QP e.g.
- *
- *    2) In the setup_qps_fn array init MCA_BCOL_IBOFFLOAD_QP_NEW_QP
- *        index with your init func for this QP.
- *
- *    3) In the init func you added init the next func pointers:
- *       a) config_qp - in this func you need to fill in ibv_qp_init_attr
- *                      structure will be used for this QP creation.
- *
- *       b) prepost_recv - you have to specify this poiner if you want
- *                         automatically executed preposting to your new QP.
- *
- *       c) alloc_resource - will be called during device activation,
- *                           if you need any device resource (list of frags for example)
- *                           for your new QP here the right place to allocate it.
- *
- *       d) dealloc_resource - if any resource was allocated dynamically
- *                             by alloc_resource func destruct it in this func.
- *
- *       e) get_preposted_recv - the function returns preposted recieve for 'wait task'.
- *
- *       d) If you don't need any of these funcs you have to init appropriate pointer with NULL.
- */
-
-#ifndef MCA_BCOL_IBOFFLOAD_QP_INFO_H
-#define MCA_BCOL_IBOFFLOAD_QP_INFO_H
-
-#include "ompi_config.h"
-
-BEGIN_C_DECLS
-
-/* forward declarations */
-struct mca_bcol_iboffload_device_t;
-struct mca_bcol_iboffload_collreq_t;
-struct mca_bcol_iboffload_qp_info_t;
-struct mca_bcol_iboffload_endpoint_t;
-
-/* The list of the all required QPs */
-enum {
-    MCA_BCOL_IBOFFLOAD_QP_BARRIER,
-    MCA_BCOL_IBOFFLOAD_QP_REGULAR,
-    MCA_BCOL_IBOFFLOAD_QP_SYNC,
-    MCA_BCOL_IBOFFLOAD_QP_CREDIT,
-    MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF,
-    MCA_BCOL_IBOFFLOAD_QP_LAST
-};
-
-typedef enum {
-    MCA_BCOL_IBOFFLOAD_PP_QP,
-    MCA_BCOL_IBOFFLOAD_SRQ_QP,
-    MCA_BCOL_IBOFFLOAD_XRC_QP
-} mca_bcol_iboffload_qp_type_t;
-
-struct mca_bcol_iboffload_pp_qp_info_t {
-    int32_t rd_win;
-    int32_t rd_rsv;
-}; typedef struct mca_bcol_iboffload_pp_qp_info_t mca_bcol_iboffload_pp_qp_info_t;
-
-struct mca_bcol_iboffload_srq_qp_info_t {
-    int32_t sd_max;
-}; typedef struct mca_bcol_iboffload_srq_qp_info_t mca_bcol_iboffload_srq_qp_info_t;
-
-typedef int (*mca_bcol_iboffload_setup_qps_fn_t) (struct mca_bcol_iboffload_qp_info_t*);
-typedef int (*mca_bcol_iboffload_prepost_qps_fn_t)
-                        (struct mca_bcol_iboffload_endpoint_t *endpoint,
-                         int qp_index, int num_to_prepost);
-
-typedef void (*mca_bcol_iboffload_config_qps_fn_t)
-                        (int qp_index,
-                         struct mca_bcol_iboffload_endpoint_t *ep,
-                         ompi_common_ofacm_base_qp_config_t *qp_config);
-
-typedef int (*mca_bcol_iboffload_alloc_qps_resource_fn_t)
-                               (int qp_index,
-                                struct mca_bcol_iboffload_device_t *device);
-
-typedef int (*mca_bcol_iboffload_dealloc_qps_resource_fn_t)
-                               (int qp_index,
-                                struct mca_bcol_iboffload_device_t *device);
-
-typedef struct mca_bcol_iboffload_frag_t* (*mca_bcol_iboffload_get_preposted_recv_fn_t)
-                         (struct mca_bcol_iboffload_endpoint_t *ep, int qp_index);
-
-struct mca_bcol_iboffload_qp_info_t {
-    size_t size;
-
-    int32_t rd_num;
-    int32_t rd_low;
-    int32_t rd_pp_win; /* prepost window = rd_num - rd_low */
-    int qp_index;
-
-    mca_bcol_iboffload_qp_type_t type;
-
-    mca_bcol_iboffload_config_qps_fn_t config_qp;
-    mca_bcol_iboffload_prepost_qps_fn_t prepost_recv;
-
-    mca_bcol_iboffload_alloc_qps_resource_fn_t alloc_resource;
-    mca_bcol_iboffload_dealloc_qps_resource_fn_t dealloc_resource;
-
-    mca_bcol_iboffload_get_preposted_recv_fn_t get_preposted_recv;
-
-    union {
-        mca_bcol_iboffload_pp_qp_info_t pp_qp;
-        mca_bcol_iboffload_srq_qp_info_t srq_qp;
-    } u;
-}; typedef struct mca_bcol_iboffload_qp_info_t mca_bcol_iboffload_qp_info_t;
-
-extern mca_bcol_iboffload_setup_qps_fn_t setup_qps_fn[MCA_BCOL_IBOFFLOAD_QP_LAST];
-
-END_C_DECLS
-
-#endif /* MCA_BCOL_IBOFFLOAD_QP_INFO_H */
-
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_task.c b/ompi/mca/bcol/iboffload/bcol_iboffload_task.c
deleted file mode 100644
index 6fcb62391a..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_task.c
+++ /dev/null
@@ -1,81 +0,0 @@
- /*
-  * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-  * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
-  * $COPYRIGHT$
-  *
-  * Additional copyrights may follow
-  *
-  * $HEADER$
-  */
-
-#include "ompi_config.h"
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_task.h"
-
-static void task_constructor(mca_bcol_iboffload_task_t *task)
-{
-    task->frag = NULL;
-    task->collfrag = NULL;
-    task->endpoint = NULL;
-    task->next_task = NULL;
-
-    task->sg_entries = NULL;
-    task->sg_entries_num = 0;
-
-    task->task_list = NULL;
-
-    memset(&task->wr, 0, sizeof(task->wr));
-
-    memset(&task->element, 0, sizeof(struct mqe_task));
-    memset(&task->task_mqe_qp_entry, 0, sizeof(struct mqe_qp_entry));
-}
-
-static void task_destructor(mca_bcol_iboffload_task_t *task)
-{
-    if (NULL != task->sg_entries) {
-        free(task->sg_entries);
-    }
-}
-
-OBJ_CLASS_INSTANCE(
-        mca_bcol_iboffload_task_t,
-        ompi_free_list_item_t,
-        task_constructor,
-        task_destructor);
-
-void
-mca_bcol_iboffload_calc_task_init(ompi_free_list_item_t* item, void* ctx)
-{
-    mca_bcol_iboffload_task_t *calc_task =
-                    (mca_bcol_iboffload_task_t *) item;
-
-    calc_task->task_list = (ompi_free_list_t *) ctx;
-
-    calc_task->sg_entries_num = 2;
-    calc_task->sg_entries = (struct ibv_sge *) malloc (2 * sizeof(struct ibv_sge));
-}
-
-void
-mca_bcol_iboffload_iovec_task_init(ompi_free_list_item_t* item, void* ctx)
-{
-    mca_bcol_iboffload_task_t *iovec_task =
-                    (mca_bcol_iboffload_task_t *) item;
-
-    mca_bcol_iboffload_module_t *iboffload_module =
-                   (mca_bcol_iboffload_module_t *) ctx;
-
-    int nitems, group_size = iboffload_module->group_size;
-
-    nitems = group_size / 2 + group_size % 2;
-    if (nitems > iboffload_module->device->ib_dev_attr.max_sge) {
-        nitems = iboffload_module->device->ib_dev_attr.max_sge;
-    }
-
-    iovec_task->sg_entries_num = nitems;
-    iovec_task->task_list = &iboffload_module->iovec_tasks_free;
-
-    iovec_task->sg_entries = (struct ibv_sge *)
-                       malloc(nitems * sizeof(struct ibv_sge));
-}
diff --git a/ompi/mca/bcol/iboffload/bcol_iboffload_task.h b/ompi/mca/bcol/iboffload/bcol_iboffload_task.h
deleted file mode 100644
index 99bbe8eb1a..0000000000
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_task.h
+++ /dev/null
@@ -1,613 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_IBOFFLOAD_TASK_H
-#define MCA_BCOL_IBOFFLOAD_TASK_H
-
-#include "ompi_config.h"
-
-#include <infiniband/verbs.h>
-#include <infiniband/mverbs.h>
-#include <infiniband/mqe.h>
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_collreq.h"
-#include "bcol_iboffload_endpoint.h"
-#include "bcol_iboffload_collfrag.h"
-
-#define SENDWR(task)  ((task)->element.post.send_wr)
-
-BEGIN_C_DECLS
-
-/* the mca_bcol_ibv_mwr_task_t name was replaced with mca_bcol_iboffload_task_t */
-struct mca_bcol_iboffload_task_t {
-    ompi_free_list_item_t super;
-
-    /* pointer to the memory descriptor associated with the task */
-    mca_bcol_iboffload_frag_t *frag;
-
-    /* pointer to the bcol descriptor,
-     * we need it for send task only becasue we complete them in async maner
-     */
-    mca_bcol_iboffload_collfrag_t *collfrag;
-
-    /* task to be posted */
-    struct mqe_task element;
-
-    /* allocate ibv_sge structs array - in a CALC case
-     * for example it will have two entries.
-     */
-    struct ibv_sge *sg_entries;
-
-    /* sg_entries array length */
-    int sg_entries_num;
-
-    /* Each task is a member of some free list,
-       if the pointer is NULL => we assume the task
-       is a member of the common task list (tasks_free) */
-    ompi_free_list_t *task_list;
-
-    /* Pointer to the next task */
-    struct mca_bcol_iboffload_task_t *next_task;
-
-    /* pasha - it is crappy work around for driver interface
-     * the send_wr and recv_wr should be part of mqe_task and not pointers !
-     */
-    union {
-        struct ibv_m_send_wr  send_wr;
-        struct ibv_recv_wr    recv_wr;
-    } wr;
-
-    /* If we'll decide to post a task to a different qp */
-    struct mqe_qp_entry task_mqe_qp_entry;
-
-    /* Pointer to endpoint for this task */
-    mca_bcol_iboffload_endpoint_t *endpoint;
-};
-typedef struct mca_bcol_iboffload_task_t mca_bcol_iboffload_task_t;
-OBJ_CLASS_DECLARATION(mca_bcol_iboffload_task_t);
-
-
-/* calc_tasks_free free list init function */
-void
-mca_bcol_iboffload_calc_task_init(ompi_free_list_item_t* item, void* ctx);
-
-/* iovec_tasks_free free list init function */
-void
-mca_bcol_iboffload_iovec_task_init(ompi_free_list_item_t* item, void* ctx);
-
-static inline __opal_attribute_always_inline__ void
-        mca_bcol_iboffload_return_frag_tolist(
-                        mca_bcol_iboffload_frag_t *frag,
-                        ompi_free_list_t *list)
-{
-    if (NULL != frag) {
-        mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-        assert(MCA_BCOL_IBOFFLOAD_NONE_OWNER != frag->type);
-
-        if (MCA_BCOL_IBOFFLOAD_DUMMY_OWNER != frag->type &&
-                                      0 == frag->ref_counter) {
-            if (MCA_BCOL_IBOFFLOAD_BCOL_OWNER == frag->type) {
-                OMPI_FREE_LIST_RETURN_MT((&(list[frag->qp_index])),
-                        (ompi_free_list_item_t*) frag);
-            } else if (MCA_BCOL_IBOFFLOAD_ML_OWNER == frag->type) {
-                OMPI_FREE_LIST_RETURN_MT((&(cm->ml_frags_free)),
-                        (ompi_free_list_item_t*) frag);
-            }
-        }
-    }
-}
-
-static inline __opal_attribute_always_inline__ void
-        mca_bcol_iboffload_return_recv_frags_toendpoint(
-                        mca_bcol_iboffload_frag_t *frags,
-                        mca_bcol_iboffload_endpoint_t *ep,
-                        int qp_index)
-{
-    mca_bcol_iboffload_frag_t *recv_frag = frags;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    while (NULL != recv_frag) {
-        assert(MCA_BCOL_IBOFFLOAD_NONE_OWNER != recv_frag->type);
-        if (MCA_BCOL_IBOFFLOAD_ML_OWNER != recv_frag->type) {
-            opal_list_prepend(&ep->qps[qp_index].preposted_frags,
-                            (opal_list_item_t *) recv_frag);
-        } else {
-            OMPI_FREE_LIST_RETURN_MT((&(cm->ml_frags_free)),
-                (ompi_free_list_item_t*) recv_frag);
-        }
-
-        recv_frag = recv_frag->next;
-    }
-}
-
-/* Wait task allocation and initialization */
-static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
-        mca_bcol_iboffload_get_wait_task(mca_bcol_iboffload_module_t *iboffload,
-                                         uint32_t source, int num_waits,
-                                         mca_bcol_iboffload_frag_t *frags,
-                                         int qp_index, struct ibv_qp *qp)
-{
-    ompi_free_list_item_t *item;
-    mca_bcol_iboffload_task_t *task;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    mca_bcol_iboffload_endpoint_t *endpoint = iboffload->endpoints[source];
-
-    /* blocking allocation for send fragment */
-    OMPI_FREE_LIST_GET_MT(&cm->tasks_free, item);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        mca_bcol_iboffload_return_recv_frags_toendpoint(frags, endpoint, qp_index);
-        return NULL;
-    }
-
-    task = (mca_bcol_iboffload_task_t *) item;
-    /* set pointer to corresponding recv fragment */
-    IBOFFLOAD_SET_FRAGS_ON_TASK(frags, task);
-
-    task->next_task = NULL;
-    task->endpoint = endpoint;
-
-    /* set opcode */
-    task->element.opcode = MQE_WR_CQE_WAIT;
-    task->element.flags = 0; /* Here maybe ANY flag, anyway driver ignore it */
-    /* set task id */
-    task->element.wr_id = (uint64_t) (uintptr_t) task;
-    /* set CQ */
-    task->element.wait.cq = endpoint->qp_config.init_attr[qp_index].recv_cq;
-
-    /* set number of tasks to task */
-    task->element.wait.count = num_waits;
-    /* set pointer to QP */
-
-    if (NULL == qp) { /* NULL means use MQ's QP */
-        task->element.wait.mqe_qp = NULL;
-    } else { /* Post wait to the SQ of this QP */
-        task->task_mqe_qp_entry.next = NULL;
-        task->task_mqe_qp_entry.qp = qp;
-
-        task->element.wait.mqe_qp = &task->task_mqe_qp_entry;
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("Allocating task %p, cq: %p, num waits: %d, qp_index - %d, "
-                           "destination %d for comm rank: %d.\n",
-                           (void *) task, (void *) task->element.wait.cq,
-                            task->element.wait.count, qp_index, source,
-                            endpoint->iboffload_module->ibnet->super.group_list[endpoint->index]));
-    return task;
-}
-
-static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
-mca_bcol_iboffload_prepare_send_task(
-        mca_bcol_iboffload_module_t *iboffload,
-        mca_bcol_iboffload_endpoint_t *endpoint,
-        int qp_index, ompi_free_list_t *task_list,
-        mca_bcol_iboffload_collfrag_t *collfrag)
-{
-    ompi_free_list_item_t *item;
-    mca_bcol_iboffload_task_t *task;
-
-    IBOFFLOAD_VERBOSE(10, ("Destination rank - %d, QP index - %d, "
-                           "for comm rank - %d\n", endpoint->index, qp_index,
-                            endpoint->iboffload_module->ibnet->super.group_list[endpoint->index]));
-
-    /* get item from free list */
-    OMPI_FREE_LIST_GET_MT(task_list, item);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        return NULL;
-    }
-
-    task = (mca_bcol_iboffload_task_t*) item;
-    task->endpoint = endpoint;
-
-    ++(collfrag->n_sends);
-    task->collfrag = collfrag;
-
-    task->next_task = NULL;
-    task->element.wr_id = (uint64_t) (uintptr_t) task;
-
-    task->element.post.qp = endpoint->qps[qp_index].qp->lcl_qp;
-
-    task->element.opcode = MQE_WR_SEND;
-
-    /* define send work request */
-    SENDWR(task) = &(task->wr.send_wr);
-
-    SENDWR(task)->next = NULL;
-
-    SENDWR(task)->wr_id = (uint64_t) (uintptr_t) collfrag;
-    IBOFFLOAD_VERBOSE(10, ("coll_frag - %p.\n", collfrag));
-
-    /* Allways send IMM on sends ! */
-    task->element.flags  = MQE_WR_FLAG_IMM_EXE;
-
-    /* Always signal completion */
-    SENDWR(task)->send_flags = IBV_SEND_SIGNALED;
-
-    return task;
-}
-
-static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
-mca_bcol_iboffload_get_send_task(
-        mca_bcol_iboffload_module_t *iboffload,
-        uint32_t destination, int qp_index,
-        mca_bcol_iboffload_frag_t *frag,
-        mca_bcol_iboffload_collfrag_t *collfrag,
-        bool enable_inline)
-{
-    mca_bcol_iboffload_task_t *task;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    mca_bcol_iboffload_endpoint_t *endpoint = iboffload->endpoints[destination];
-
-    IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_get_send_task qp_index %d\n",
-                qp_index));
-
-    task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint, qp_index,
-                                                &cm->tasks_free,
-                                                collfrag);
-
-    if (OPAL_UNLIKELY(NULL == task)) {
-        mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free);
-        return NULL;
-    }
-
-    /* no support for multiple frags */
-    IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task);
-
-    /* We can not do send with 0 byte but we can do zero byte RDMA with immidiate */
-    if (0 == frag->sg_entry.length) {
-        SENDWR(task)->imm_data = 0;
-        SENDWR(task)->opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
-
-        SENDWR(task)->wr.rdma.rkey = endpoint->remote_zero_rdma_addr.rkey;
-        SENDWR(task)->wr.rdma.remote_addr = endpoint->remote_zero_rdma_addr.addr;
-    } else {
-        SENDWR(task)->opcode = IBV_WR_SEND;
-    }
-
-    /* single sge */
-    SENDWR(task)->num_sge = 1;
-    SENDWR(task)->sg_list = &(frag->sg_entry);
-
-    /* Use inline send when it is possible */
-    if (enable_inline &&
-            frag->sg_entry.length < cm->max_inline_data) {
-        IBOFFLOAD_VERBOSE(10, ("Setting inline for len %d\n", frag->sg_entry.length));
-        SENDWR(task)->send_flags |= IBV_SEND_INLINE;
-    }
-
-    return task;
-}
-
-static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
-mca_bcol_iboffload_get_send_vec_task(
-        mca_bcol_iboffload_module_t *iboffload,
-        uint32_t destination, int qp_index,
-        size_t nitems,
-        struct iovec *buff_iovec,
-        uint32_t lkey,
-        mca_bcol_iboffload_frag_t *frag,
-        mca_bcol_iboffload_collfrag_t *collfrag,
-        bool enable_inline)
-{
-    mca_bcol_iboffload_task_t *task;
-    int i;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    mca_bcol_iboffload_endpoint_t *endpoint = iboffload->endpoints[destination];
-
-    IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_get_send_task qp_index %d\n",
-                qp_index));
-
-    task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint, qp_index,
-                                                &iboffload->iovec_tasks_free,
-                                                collfrag);
-
-    if (OPAL_UNLIKELY(NULL == task)) {
-        mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free);
-        return NULL;
-    }
-
-    /* no support for multiple frags */
-    IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task);
-
-    /* We can not do send with 0 byte but we can do zero byte RDMA with immidiate */
-    SENDWR(task)->opcode = IBV_WR_SEND;
-
-    assert (task->sg_entries != NULL);
-
-    for (i = 0; (size_t) i < nitems; ++i){
-        task->sg_entries[i].length = buff_iovec[i].iov_len;
-        task->sg_entries[i].addr = (uint64_t) buff_iovec[i].iov_base;
-        task->sg_entries[i].lkey = lkey;
-    }
-
-    /* multiple sge */
-    SENDWR(task)->num_sge = nitems;
-    SENDWR(task)->sg_list = (task->sg_entries);
-
-   /* Use inline send when it is possible */
-    if (enable_inline &&
-            frag->sg_entry.length < cm->max_inline_data) {
-        IBOFFLOAD_VERBOSE(10, ("Setting inline for len %d\n", frag->sg_entry.length));
-        SENDWR(task)->send_flags |= IBV_SEND_INLINE;
-    }
-
-    return task;
-}
-static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
-    mca_bcol_iboffload_get_rdma_vec_task(
-        uint32_t destination, size_t offset, size_t nitems,
-        mca_bcol_iboffload_frag_t *frag,
-        mca_bcol_iboffload_module_t *iboffload,
-        struct iovec *buff_iovec, uint32_t lkey,
-        mca_bcol_iboffload_collfrag_t *collfrag)
-{
-    int i;
-    mca_bcol_iboffload_collreq_t *coll_request = collfrag->coll_full_req;
-
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_endpoint_t *endpoint =
-                            iboffload->endpoints[destination];
-
-    task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint,
-                                                coll_request->qp_index,
-                                                &iboffload->iovec_tasks_free,
-                                                collfrag);
-    if (OPAL_UNLIKELY(NULL == task)) {
-        mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free);
-        return NULL;
-    }
-
-    /* no support for multiple frags */
-    IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task);
-
-    SENDWR(task)->imm_data = 0;
-    SENDWR(task)->opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
-    SENDWR(task)->wr.rdma.rkey = endpoint->remote_rdma_block.ib_info.rkey;
-
-    SENDWR(task)->wr.rdma.remote_addr = (uint64_t) (uintptr_t)
-       ((unsigned char *) endpoint->remote_rdma_block.rdma_desc[coll_request->ml_buffer_index].data_addr + offset);
-
-    for (i = 0; (size_t) i < nitems; ++i){
-        task->sg_entries[i].length = buff_iovec[i].iov_len;
-        task->sg_entries[i].addr = (uint64_t) buff_iovec[i].iov_base;
-        task->sg_entries[i].lkey = lkey;
-    }
-
-    /* single sge */
-    SENDWR(task)->num_sge = nitems;
-    SENDWR(task)->sg_list = (task->sg_entries);
-
-    IBOFFLOAD_VERBOSE(10, ("The remote offset %ld \n", offset));
-    return task;
-}
-
-static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
-    mca_bcol_iboffload_get_rdma_task(
-        uint32_t destination, size_t offset,
-        mca_bcol_iboffload_frag_t *frag,
-        mca_bcol_iboffload_module_t *iboffload,
-        mca_bcol_iboffload_collfrag_t *collfrag)
-{
-    mca_bcol_iboffload_collreq_t *coll_request = collfrag->coll_full_req;
-
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_endpoint_t *endpoint =
-                            iboffload->endpoints[destination];
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint,
-                                                coll_request->qp_index,
-                                                &cm->tasks_free, collfrag);
-    if (OPAL_UNLIKELY(NULL == task)) {
-        mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free);
-        return NULL;
-    }
-
-    /* no support for multiple frags */
-    IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task);
-
-    SENDWR(task)->imm_data = 0;
-    SENDWR(task)->opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
-    SENDWR(task)->wr.rdma.rkey = endpoint->remote_rdma_block.ib_info.rkey;
-    /* Pasha: I really not happy with the way we calculate remote addresses.
-       why we don't use rbuf + offset ?*/
-    SENDWR(task)->wr.rdma.remote_addr = (uint64_t) (uintptr_t)
-       ((unsigned char *) endpoint->remote_rdma_block.rdma_desc[coll_request->ml_buffer_index].data_addr + offset);
-    /* single sge */
-    SENDWR(task)->num_sge = 1;
-    SENDWR(task)->sg_list = &(frag->sg_entry);
-
-    IBOFFLOAD_VERBOSE(10, ("The remote offset %ld \n", offset));
-    return task;
-}
-
-/* Pasha: hacking version of calc operation */
-    static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
-mca_bcol_iboffload_get_calc_task(mca_bcol_iboffload_module_t *iboffload,
-        uint32_t destination, int qp_index, mca_bcol_iboffload_frag_t *frag,
-        struct ibv_sge *l_operand, struct ibv_sge *r_operand,
-        mca_bcol_iboffload_collreq_t *coll_request,
-        bool enable_inline)
-/* Some specifications for this function:
- *  1) We assume that the len of two operands (ibv_sge structs) is a same.
- *  2) Possibly we use the results (ibv_sge structs) from previous
- *     calc operations => maybe the frag pointer is NULL.
- */
-{
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_endpoint_t *endpoint =
-                                     iboffload->endpoints[destination];
-
-    mca_bcol_iboffload_collfrag_t *collfrag =
-                                    (mca_bcol_iboffload_collfrag_t *)
-                                     opal_list_get_last(&coll_request->work_requests);
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint, qp_index,
-                                                &cm->calc_tasks_free, collfrag);
-    if (OPAL_UNLIKELY(NULL == task)) {
-        mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free);
-        return NULL;
-    }
-
-    if (NULL != frag) {
-        IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task);
-    } else {
-        task->frag = NULL;
-    }
-
-    task->sg_entries[0] = *l_operand;
-    task->sg_entries[1] = *r_operand;
-
-    SENDWR(task)->num_sge = 2;
-    SENDWR(task)->sg_list = task->sg_entries;
-
-    SENDWR(task)->opcode = MCA_BCOL_IBOFFLOAD_SEND_CALC;
-#if OPAL_HAVE_IBOFFLOAD_CALC_RDMA
-    SENDWR(task)->wr.calc_send.data_type = coll_request->actual_ib_dtype;
-    SENDWR(task)->wr.calc_send.calc_op = coll_request->actual_ib_op;
-#else
-    SENDWR(task)->wr.calc.data_type = coll_request->actual_ib_dtype;
-    SENDWR(task)->wr.calc.calc_op = coll_request->actual_ib_op;
-#endif
-
-    return task;
-}
-
-static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
-    mca_bcol_iboffload_get_rdma_calc_task(mca_bcol_iboffload_module_t *iboffload,
-        uint32_t destination, int qp_index, mca_bcol_iboffload_frag_t *frag,
-        struct ibv_sge *l_operand, struct ibv_sge *r_operand,
-        mca_bcol_iboffload_collreq_t *coll_request,
-        size_t offset)
-/* Some specifications for this function:
- *  1) We assume that the len of two operands (ibv_sge structs) is a same.
- *  2) Possibly we use the results (ibv_sge structs) from previous
- *     calc operations => maybe the frag pointer is NULL.
- */
-{
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_endpoint_t *endpoint =
-                                     iboffload->endpoints[destination];
-
-    mca_bcol_iboffload_collfrag_t *collfrag =
-                                    (mca_bcol_iboffload_collfrag_t *)
-                                     opal_list_get_last(&coll_request->work_requests);
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint, qp_index,
-                                                &cm->calc_tasks_free, collfrag);
-    if (OPAL_UNLIKELY(NULL == task)) {
-        mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free);
-        return NULL;
-    }
-
-    if (NULL != frag) {
-        IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task);
-    } else {
-        task->frag = NULL;
-    }
-
-    task->sg_entries[0] = *l_operand;
-
-    /* Hack - we don't really use it.
-    task->sg_entries[1] = *r_operand;
-    */
-    /* We use only single entry
-    SENDWR(task)->num_sge = 2;
-    */
-    SENDWR(task)->num_sge = 1;
-    SENDWR(task)->sg_list = task->sg_entries;
-
-#if OPAL_HAVE_IBOFFLOAD_CALC_RDMA
-    SENDWR(task)->opcode = IBV_M_WR_CALC_RDMA_WRITE_WITH_IMM;
-    SENDWR(task)->wr.calc_rdma.data_type = coll_request->actual_ib_dtype;
-    SENDWR(task)->wr.calc_rdma.calc_op = coll_request->actual_ib_op;
-    SENDWR(task)->wr.calc_rdma.rkey = endpoint->remote_rdma_block.ib_info.rkey;
-    SENDWR(task)->wr.calc_rdma.remote_addr = (uint64_t) (uintptr_t)
-        ((unsigned char *) endpoint->remote_rdma_block.rdma_desc[coll_request->ml_buffer_index].data_addr + offset);
-#else
-    IBOFFLOAD_ERROR(("Fatal error: RDMA CALC was called, but the driver does not support this operation"));
-    return NULL;
-#endif
-
-    return task;
-}
-
-static inline __opal_attribute_always_inline__
-              int release_frags_on_task(mca_bcol_iboffload_task_t *task,
-                                        ompi_free_list_t *list)
-{
-    int rc, qp_index;
-
-    mca_bcol_iboffload_frag_t *temp_frag = task->frag;
-    mca_bcol_iboffload_endpoint_t *endpoint = task->endpoint;
-
-    mca_bcol_iboffload_component_t *cm =
-                       &mca_bcol_iboffload_component;
-
-    IBOFFLOAD_VERBOSE(10, ("\nCalling release_frags_on_task"));
-
-    while (NULL != temp_frag) {
-        qp_index = temp_frag->qp_index;
-
-        --(temp_frag->ref_counter);
-
-        /* Return credits */
-        if (MQE_WR_CQE_WAIT == task->element.opcode) {
-            ++(endpoint->qps[qp_index].rd_wqe);
-
-            IBOFFLOAD_VERBOSE(10, ("Return rd_wqe %d pp_win %d",
-                        endpoint->qps[qp_index].rd_wqe,
-                        cm->qp_infos[qp_index].rd_pp_win));
-
-            /* Call for recv prepost */
-            if (endpoint->qps[qp_index].rd_wqe >=
-                        cm->qp_infos[qp_index].rd_pp_win) {
-                IBOFFLOAD_VERBOSE(10, ("Prepost to endpoint->index - %d, qp_index - %d", endpoint->index, qp_index));
-                rc = mca_bcol_iboffload_prepost_recv(endpoint, qp_index,
-                        endpoint->qps[qp_index].rd_wqe);
-                if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                    IBOFFLOAD_ERROR(("QP %d: failed to prepost.\n", qp_index));
-                    return OMPI_ERROR;
-                }
-                /* What happens if we can not prepost ?*/
-            }
-        } else if (MQE_WR_SEND == task->element.opcode) {
-            ++(endpoint->qps[qp_index].sd_wqe);
-
-            assert(endpoint->qps[qp_index].sd_wqe <= cm->qp_infos[qp_index].rd_num);
-
-            IBOFFLOAD_VERBOSE(10, ("Return sd_wqe %d, qp_index - %d, endpoint - %p",
-                                    endpoint->qps[qp_index].sd_wqe, qp_index, endpoint));
-        } else {
-            /* We should not arrive to this case */
-            IBOFFLOAD_ERROR(("Unsupporeted operation"));
-
-            return OMPI_ERROR;
-        }
-
-        mca_bcol_iboffload_return_frag_tolist(temp_frag, list);
-        temp_frag = temp_frag->next;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-END_C_DECLS
-
-#endif
diff --git a/ompi/mca/bcol/iboffload/configure.m4 b/ompi/mca/bcol/iboffload/configure.m4
deleted file mode 100644
index 510e0117e0..0000000000
--- a/ompi/mca/bcol/iboffload/configure.m4
+++ /dev/null
@@ -1,40 +0,0 @@
-# -*- shell-script -*-
-#
-# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
-# Copyright (c) 2015      Research Organization for Information Science
-#                         and Technology (RIST). All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-# MCA_ompi_bcol_iboffload_CONFIG([should_build])
-# ------------------------------------------
-# AC_DEFUN([MCA_ompi_bcol_iboffload_POST_CONFIG], [
-# ])
-
-
-# MCA_ompi_bcol_iboffload_CONFIG([action-if-can-compile],
-#                      [action-if-cant-compile])
-# ------------------------------------------------
-AC_DEFUN([MCA_ompi_bcol_iboffload_CONFIG],[
-    AC_CONFIG_FILES([ompi/mca/bcol/iboffload/Makefile])
-    bcol_ofa_happy="no"
-    bcol_mlnx_ofed_happy="no"
-
-    OPAL_CHECK_OPENFABRICS([bcol_iboffload], [bcol_ofa_happy="yes"])
-    OPAL_CHECK_MLNX_OPENFABRICS([bcol_iboffload], [bcol_mlnx_ofed_happy="yes"])
-
-    AS_IF([test "$bcol_ofa_happy" = "yes" && test "$bcol_mlnx_ofed_happy" = "yes"],
-          [$1],
-          [$2])
-
-    # substitute in the things needed to build iboffload
-    AC_SUBST([bcol_iboffload_CFLAGS])
-    AC_SUBST([bcol_iboffload_CPPFLAGS])
-    AC_SUBST([bcol_iboffload_LDFLAGS])
-    AC_SUBST([bcol_iboffload_LIBS])
-])dnl
diff --git a/ompi/mca/bcol/iboffload/owner.txt b/ompi/mca/bcol/iboffload/owner.txt
deleted file mode 100644
index 1c86df367b..0000000000
--- a/ompi/mca/bcol/iboffload/owner.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# owner/status file
-# owner: institution that is responsible for this package
-# status: e.g. active, maintenance, unmaintained
-#
-owner: ORNL
-status: unmaintained
diff --git a/ompi/mca/bcol/ptpcoll/Makefile.am b/ompi/mca/bcol/ptpcoll/Makefile.am
deleted file mode 100644
index a0bd0cb83e..0000000000
--- a/ompi/mca/bcol/ptpcoll/Makefile.am
+++ /dev/null
@@ -1,57 +0,0 @@
-#
-# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2009-2013 Mellanox Technologies.  All rights reserved.
-# Copyright (c) 2013      Los Alamos National Security, LLC. All rights
-#                         reserved.
-# Copyright (c) 2015 Cisco Systems, Inc.  All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-sources = \
-        bcol_ptpcoll.h \
-        bcol_ptpcoll_utils.h \
-        bcol_ptpcoll_utils.c \
-        bcol_ptpcoll_mca.h \
-        bcol_ptpcoll_mca.c \
-        bcol_ptpcoll_barrier.c \
-        bcol_ptpcoll_bcast.c \
-        bcol_ptpcoll_bcast.h \
-        bcol_ptpcoll_component.c  \
-        bcol_ptpcoll_fanin.c \
-        bcol_ptpcoll_fanout.c \
-        bcol_ptpcoll_module.c \
-        bcol_ptpcoll_allreduce.h \
-        bcol_ptpcoll_allreduce.c \
-        bcol_ptpcoll_reduce.h \
-        bcol_ptpcoll_reduce.c \
-        bcol_ptpcoll_allgather.c
-
-
-# Make the output library in this directory, and name it either
-# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
-# (for static builds).
-
-component_noinst =
-component_install =
-if MCA_BUILD_ompi_bcol_ptpcoll_DSO
-component_install += mca_bcol_ptpcoll.la
-else
-component_noinst += libmca_bcol_ptpcoll.la
-endif
-
-# See ompi/mca/btl/sm/Makefile.am for an explanation of
-# libmca_common_sm.la.
-
-mcacomponentdir = $(ompilibdir)
-mcacomponent_LTLIBRARIES = $(component_install)
-mca_bcol_ptpcoll_la_SOURCES = $(sources)
-mca_bcol_ptpcoll_la_LDFLAGS = -module -avoid-version
-mca_bcol_ptpcoll_la_LIBADD =
-
-noinst_LTLIBRARIES = $(component_noinst)
-libmca_bcol_ptpcoll_la_SOURCES =$(sources)
-libmca_bcol_ptpcoll_la_LDFLAGS = -module -avoid-version
diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h
deleted file mode 100644
index a72197c78a..0000000000
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h
+++ /dev/null
@@ -1,474 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2015      Los Alamos National Security, LLC.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_PTPCOLL_EXPORT_H
-#define MCA_BCOL_PTPCOLL_EXPORT_H
-
-#include "ompi_config.h"
-
-#include "mpi.h"
-#include "ompi/mca/mca.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "opal/mca/mpool/mpool.h"
-#include "ompi/request/request.h"
-#include "ompi/mca/pml/pml.h"
-#include "ompi/patterns/net/netpatterns.h"
-
-BEGIN_C_DECLS
-
-#ifdef HAVE_SCHED_YIELD
-#  include <sched.h>
-#  define SPIN sched_yield()
-#else  /* no switch available */
-#  define SPIN
-#endif
-
-/**
- * Structure to hold the basic shared memory coll component.  First it holds the
- * base coll component, and then holds a bunch of
- * sm-coll-component-specific stuff (e.g., current MCA param
- * values).
- */
-struct mca_bcol_ptpcoll_component_t {
-    /** Base coll component */
-    mca_bcol_base_component_2_0_0_t super;
-    /** Verbosity level, used only in debug enabled builds */
-    int verbose;
-    /** The radix of K-nomial tree, initilized by mca parameter */
-    int k_nomial_radix;
-    /** The radix of narray tree, initilized by mca parameter */
-    int narray_radix;
-    /** The radix is used for narray scatther and knomail gather for
-      large message bcast **/
-    int narray_knomial_radix;
-    /** Number of times to poll for specific tag/src */
-    int num_to_probe;
-    /*
-     * bcast small messages algorithm
-     * 1 - Knomial bcast
-     * 2 - Narray bcast
-     */
-    int bcast_small_messages_known_root_alg;
-    /*
-     * bcast large messages algorithm
-     * 1 - binomial scatter-gather
-     * 2 - Narray scatther, knomial gather
-     */
-    int bcast_large_messages_known_root_alg;
-    /*
-     * barrier algorithm
-     * 1 - recursive doubling
-     * 2 - recursive K-ing
-     */
-    int barrier_alg;
-
-    int use_brucks_smsg_alltoall_rdma;
-};
-
-struct mca_bcol_ptpcoll_collreq_t {
-    opal_free_list_item_t super;
-
-    int tag;
-    int num_reqs;
-    int exchange;
-
-    int need_toserv_extra;
-    int extra_partner_rank;
-
-    ompi_request_t **requests;
-};
-typedef struct mca_bcol_ptpcoll_collreq_t mca_bcol_ptpcoll_collreq_t;
-OBJ_CLASS_DECLARATION(mca_bcol_ptpcoll_collreq_t);
-
-/**
- * Convenience typedef
- */
-typedef struct mca_bcol_ptpcoll_component_t mca_bcol_ptpcoll_component_t;
-
-/* Bcast small messages,
-   known root algorithm */
-enum {
-    PTPCOLL_KNOMIAL = 1,
-    PTPCOLL_NARRAY
-};
-
-/* Bcast large messages,
-   known root algorithm */
-enum {
-    PTPCOLL_BINOMIAL_SG = 1,  /* Binomila scatter-gather */
-    PTPCOLL_NARRAY_KNOMIAL_SG /* Narray-Knomial scatter-gather */
-};
-
-/*
- * Implemented function index list
- */
-
-/* barrier */
-enum{
-    FANIN_FAN_OUT_BARRIER_FN,
-    RECURSIVE_DOUBLING_BARRIER_FN,
-    N_BARRIER_FNS
-};
-
-/* reduce */
-enum{
-    FANIN_REDUCE_FN,
-    REDUCE_SCATTER_GATHER_FN,
-    N_REDUCE_FNS
-};
-enum{
-    SHORT_DATA_FN_REDUCE,
-    LONG_DATA_FN_REDUCE,
-    N_REDUCE_FNS_USED
-};
-
-/* all-reduce */
-enum{
-    FANIN_FANOUT_ALLREDUCE_FN,
-    REDUCE_SCATTER_ALLGATHER_FN,
-    N_ALLREDUCE_FNS
-};
-enum{
-    SHORT_DATA_FN_ALLREDUCE,
-    LONG_DATA_FN_ALLREDUCE,
-    N_ALLREDUCE_FNS_USED
-};
-
-
-/*
- * N-order tree node description
- */
-struct tree_node_t {
-    /* my rank within the group */
-    int my_rank;
-    /* my node type - root, leaf, or interior */
-    int my_node_type;
-    /* number of nodes in the tree */
-    int tree_size;
-    /* number of parents (0/1) */
-    int n_parents;
-    /* number of children */
-    int n_children;
-    /* parent rank within the group */
-    int parent_rank;
-    /* chidren ranks within the group */
-    int *children_ranks;
-};
-typedef struct tree_node_t tree_node_t;
-
-struct pair_exchange_node_t {
-
-    /* number of nodes this node will exchange data with */
-    int n_exchanges;
-
-    /* ranks of nodes involved in data exchnge */
-    int *rank_exchanges;
-
-    /* number of extra sources of data - outside largest power of 2 in
-     *  this group */
-    int n_extra_sources;
-
-    /* rank of the extra source */
-    int rank_extra_source;
-
-    /* number of tags needed per stripe */
-    int n_tags;
-
-    /* log 2 of largest full power of 2 for this node set */
-    int log_2;
-
-    /* largest power of 2 that fits in this group */
-    int n_largest_pow_2;
-
-    /* node type */
-    int node_type;
-
-};
-typedef struct pair_exchange_node_t pair_exchange_node_t;
-
-/*
- * Barrier request objects
- */
-
-/* enum for phase at which the nb barrier is in */
-enum{
-    NB_BARRIER_INACTIVE,
-    NB_BARRIER_FAN_IN,
-    NB_BARRIER_FAN_OUT,
-    /* done and not started are the same for all practicle
-     * purposes, as the init funtion always sets this flag
-     */
-    NB_BARRIER_DONE
-};
-
-typedef enum {
-    PTPCOLL_NOT_STARTED         = 1,
-    PTPCOLL_WAITING_FOR_DATA    = 1 << 1,
-    PTPCOLL_SCATTER_STARTED     = 1 << 2,
-    PTPCOLL_GATHER_STARTED      = 1 << 3,
-    PTPCOLL_EXTRA_SEND_STARTED  = 1 << 4,
-    PTPCOLL_ROOT_SEND_STARTED   = 1 << 5
-} ptpcoll_op_status;
-
-struct mca_bcol_ptpcoll_ml_buffer_desc_t {
-    void     *data_addr;            /* buffer address */
-    uint64_t     bank_index;        /* my bank */
-    uint64_t     buffer_index;      /* my buff index */
-    int       active_requests;   /* keep number of active requests */
-    ompi_request_t **requests;      /* caching pointers to requests */
-    int          data_src;          /* used for bcast to cache internal data */
-    int          radix_mask;        /* used for bcast to cache internal data */
-    int          radix_mask_pow;    /* used for bcast to cache internal data */
-    int          iteration;         /* buffer iteration in knomial, binomail, etc. algorithms */
-    int          tag;               /* tag number that is attached to this operation */
-    int          status;       /* operation status */
-    /* Fixme: Probably we can get rid of these fields by redesigning
-     * the reduce implementation
-     */
-    int          reduction_status; /* used for reduction to cache internal
-                                      reduction status */
-    bool          reduce_init_called;
-};
-typedef struct mca_bcol_ptpcoll_ml_buffer_desc_t mca_bcol_ptpcoll_ml_buffer_desc_t;
-
-/*
- * Information that we need to keep in order to access and
- * track local ML memory that is used as source and destinatination
- * for collectives operations
- */
-struct mca_bcol_ptpcoll_local_mlmem_desc_t {
-    /* Bank index to release */
-    uint32_t bank_index_for_release;
-    /* number of memory banks */
-    uint32_t     num_banks;
-    /* number of buffers per bank */
-    uint32_t     num_buffers_per_bank;
-    /* size of a payload buffer */
-    uint32_t     size_buffer;
-    /* pointer to buffer descriptors initialized */
-    mca_bcol_ptpcoll_ml_buffer_desc_t *ml_buf_desc;
-};
-typedef struct mca_bcol_ptpcoll_local_mlmem_desc_t mca_bcol_ptpcoll_local_mlmem_desc_t;
-
-typedef enum {
-    PTPCOLL_PROXY       = 1,
-    PTPCOLL_IN_GROUP    = 1 << 1,
-    PTPCOLL_EXTRA       = 1 << 2,
-    PTPCOLL_KN_PROXY    = 1 << 3,
-    PTPCOLL_KN_IN_GROUP = 1 << 4,
-    PTPCOLL_KN_EXTRA    = 1 << 5
-} node_type_pow2;
-
-struct mca_bcol_ptpcoll_module_t {
-    /* base structure */
-    mca_bcol_base_module_t super;
-
-    /* size */
-    int group_size;
-
-    /* size of each memory segment */
-    size_t segment_size;
-
-    /* k_nomial radix */
-    int k_nomial_radix;
-    /* caching power of K, for K-nomial operations */
-    int pow_k;
-    /* caching power of K number that is smaller or equal to size of group */
-    int pow_knum;
-    /* caching power of 2, it is special case for some algorithms */
-    int pow_2;
-    /* caching power of 2 number that is closet to size of group */
-    int pow_2num;
-    /* type of this node in group of power 2 */
-    int pow_2type;
-    /* type of this node in group of K-nomaial tree */
-    int pow_ktype;
-    /* type of this node in group of narray tree */
-    int narray_type;
-    /* size of full narray tree */
-    int full_narray_tree_size;
-    /* num leafs on last level */
-    int full_narray_tree_num_leafs;
-
-    /* Nary tree info */
-    netpatterns_tree_node_t *narray_node;
-
-    /* if the rank in group, it keeps the extra peer.
-       if the rank is extra, it keeps the proxy peer.
-     */
-    int proxy_extra_index;    /* pow2 algorithm */
-    int *kn_proxy_extra_index; /* K nomaila algorithm */
-    int kn_proxy_extra_num; /* number of extra peers , maximum k - 1*/
-
-    /* collective tag */
-    long long collective_tag;
-
-    /* tag mask - the pml has a limit on tag size, so need
-     * to wrap around
-     */
-    uint64_t tag_mask;
-
-    /* Caching information about local ml memory.
-     * Since ptpcoll does not support RDMA operations over pml,
-     * we don't need to keep any information about remote buffers
-     */
-    mca_bcol_ptpcoll_local_mlmem_desc_t ml_mem;
-
-
-    /* Narray-Knomial scatther gather */
-
-    /* list of extra indexes */
-    int *narray_knomial_proxy_extra_index;
-    /* number of extra peers , maximum k - 1*/
-    int narray_knomial_proxy_num;
-    /* Narray-Knomial node information array */
-    netpatterns_narray_knomial_tree_node_t *narray_knomial_node;
-    /* Knomial exchange tree */
-    netpatterns_k_exchange_node_t knomial_exchange_tree;
-    /* knomial allgather tree --- Do not disable, we need both
-       different algorithms define recursive k - ing differently
-     */
-    netpatterns_k_exchange_node_t knomial_allgather_tree;
-
-	/* Knomial allgather offsets */
-	int **allgather_offsets;
-
-    /* Free lists of outstanding collective operations */
-    opal_free_list_t collreqs_free;
-
-    int log_group_size;
-    struct iovec *alltoall_iovec;
-};
-
-typedef struct mca_bcol_ptpcoll_module_t mca_bcol_ptpcoll_module_t;
-OBJ_CLASS_DECLARATION(mca_bcol_ptpcoll_module_t);
-
-
-/**
- * Global component instance
- */
-OMPI_MODULE_DECLSPEC extern mca_bcol_ptpcoll_component_t
-mca_bcol_ptpcoll_component;
-
-
-/*
- * coll module functions
- */
-
-/* query to see if the component is available for use, and can
- * satisfy the thread and progress requirements
- */
-int mca_bcol_ptpcoll_init_query(bool enable_progress_threads,
-        bool enable_mpi_threads);
-
-/* query to see if the module is available for use on the given
- * communicator, and if so, what it's priority is.
- */
-mca_bcol_base_module_t **
-mca_bcol_ptpcoll_comm_query(mca_sbgp_base_module_t *sbgp, int *num_modules);
-
-/* interface function to setup recursive k-ing tree */
-int mca_bcol_ptpcoll_setup_knomial_tree(mca_bcol_base_module_t *super);
-
-/* barrier routines */
-int bcol_ptpcoll_barrier_recurs_dbl(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-int bcol_ptpcoll_barrier_recurs_knomial(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-int bcol_ptpcoll_barrier_init(mca_bcol_base_module_t *super);
-int mca_bcol_ptpcoll_memsync_init(mca_bcol_base_module_t *super);
-void * bcol_ptpcoll_allocate_memory(size_t length, size_t alignment,
-        struct mca_bcol_base_module_t *bcol_module);
-int bcol_ptpcoll_register_memory(void * in_ptr, size_t length, size_t alignment,
-        struct mca_bcol_base_module_t *bcol_module);
-int bcol_ptpcoll_deregister_memory( void * in_ptr,
-        struct mca_bcol_base_module_t *bcol_module);
-int bcol_ptpcoll_free_memory(void *ptr,
-        struct mca_bcol_base_module_t *bcol_module);
-int bcol_ptpcoll_fanin( bcol_function_args_t *input_args,
-        struct mca_bcol_base_module_t *module);
-int bcol_ptpcoll_fanout( bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-
-/* allgather routine */
-int bcol_ptpcoll_k_nomial_allgather_init(bcol_function_args_t *input_args,
-                        struct mca_bcol_base_function_t *const_args);
-
-/* allgather progress */
-int bcol_ptpcoll_k_nomial_allgather_progress(bcol_function_args_t *input_args,
-                        struct mca_bcol_base_function_t *const_args);
-/* allgather register */
-int bcol_ptpcoll_allgather_init(mca_bcol_base_module_t *super);
-
-static inline __opal_attribute_always_inline__
-        int mca_bcol_ptpcoll_test_for_match(ompi_request_t **request , int *rc)
-{
-    int matched = 0;
-    int i;
-    mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
-    *rc = OMPI_SUCCESS;
-
-    for (i = 0; i < cm->num_to_probe &&
-             0 == matched && OMPI_SUCCESS == *rc ; i++) {
-        *rc = ompi_request_test(request, &matched, MPI_STATUS_IGNORE);
-    }
-
-    return matched;
-}
-
-static inline __opal_attribute_always_inline__
-        int mca_bcol_ptpcoll_test_all_for_match(int *n_requests, ompi_request_t **requests , int *rc)
-{
-    int matched = 0;
-    int i;
-    mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
-    *rc = OMPI_SUCCESS;
-
-    assert(*n_requests >= 0);
-
-    if (0 == *n_requests) {
-        return 1;
-    }
-
-    for (i = 0; i < cm->num_to_probe &&
-            0 == matched && OMPI_SUCCESS == *rc; i++) {
-        *rc = ompi_request_test_all
-            (*n_requests, requests, &matched, MPI_STATUS_IGNORE);
-    }
-
-    if (matched) {
-        *n_requests = 0;
-    }
-
-    return matched;
-}
-
-/* Some negative tags already used by OMPI, making sure that we take safe offset */
-#define PTPCOLL_TAG_OFFSET 100
-#define PTPCOLL_TAG_FACTOR 2
-
-static inline int lognum(int n){
-	int count = 1, lognum = 0;
-
-	while (count < n) {
-		count = count << 1;
-		lognum++;
-	}
-	return lognum;
-}
-
-END_C_DECLS
-
-#endif /* MCA_BCOL_PTPCOLL_EXPORT_H */
diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allgather.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allgather.c
deleted file mode 100644
index eeed28e9fe..0000000000
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allgather.c
+++ /dev/null
@@ -1,605 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/include/ompi/constants.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "bcol_ptpcoll_allreduce.h"
-/*
- * Recursive K-ing allgather
- */
-
-/*
- *
- * Recurssive k-ing algorithm
- * Example k=3 n=9
- *
- *
- * Number of Exchange steps = log (basek) n
- * Number of steps in exchange step = k (radix)
- *
- */
-
-int bcol_ptpcoll_k_nomial_allgather_init(bcol_function_args_t *input_args,
-                struct mca_bcol_base_function_t *const_args)
-{
-    /* local variables */
-
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    netpatterns_k_exchange_node_t *exchange_node = &ptpcoll_module->knomial_allgather_tree;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    int group_size = ptpcoll_module->group_size;
-    int *list_connected = ptpcoll_module->super.list_n_connected; /* critical for hierarchical colls */
-
-    int tag;
-    int i, j;
-    int knt;
-    int comm_src, comm_dst, src, dst;
-    int recv_offset, recv_len;
-    int send_offset, send_len;
-
-    uint32_t buffer_index = input_args->buffer_index;
-    int pow_k, tree_order;
-    int rc = OMPI_SUCCESS;
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int completed = 0; /* initialized */
-    void *data_buffer = (void*)(
-            (unsigned char *) input_args->sbuf +
-            (size_t) input_args->sbuf_offset);
-    int pack_len = input_args->count * input_args->dtype->super.size;
-
-#if 0
-    fprintf(stderr,"entering p2p allgather pack_len %d. exchange node: %p\n",pack_len, exchange_node);
-#endif
-    /* initialize the iteration counter */
-    int *iteration = &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration;
-    *iteration = 0;
-
-    /* reset active request counter */
-    *active_requests = 0;
-
-    /* keep tag within the limit supported by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    /* mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-
-    /* k-nomial parameters */
-    tree_order = exchange_node->tree_order;
-    pow_k = exchange_node->log_tree_order;
-
-
-    /* let's begin the collective, starting with extra ranks and their
-     * respective proxies
-     */
-    if( EXTRA_NODE == exchange_node->node_type ) {
-
-        /* then I will send to my proxy rank*/
-        dst = exchange_node->rank_extra_sources_array[0];
-        /* find rank in the communicator */
-        comm_dst = group_list[dst];
-        /* now I need to calculate my own offset */
-        knt = 0;
-        for (i = 0 ; i < my_group_index; i++){
-            knt += list_connected[i];
-        }
-
-        /* send the data to my proxy */
-        rc = MCA_PML_CALL(isend((void *) ( (unsigned char *) data_buffer +
-                        knt*pack_len),
-                        pack_len * list_connected[my_group_index],
-                        MPI_BYTE,
-                        comm_dst, tag,
-                        MCA_PML_BASE_SEND_STANDARD, comm,
-                        &(requests[*active_requests])));
-
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10,("Failed to isend data"));
-            return OMPI_ERROR;
-        }
-        ++(*active_requests);
-
-        /* now I go ahead and post the receive from my proxy */
-        comm_src = comm_dst;
-        knt = 0;
-        for( i =0; i < group_size; i++){
-            knt += list_connected[i];
-        }
-        rc = MCA_PML_CALL(irecv(data_buffer,
-                    knt * pack_len,
-                    MPI_BYTE,
-                    comm_src,
-                    tag , comm, &(requests[*active_requests])));
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10, ("Failed to post ireceive "));
-            return OMPI_ERROR;
-        }
-
-        ++(*active_requests);
-        /* poll for completion */
-        /* this polls internally */
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if(completed){
-            /* go to buffer release */
-            goto FINISHED;
-        }else{
-            /* save state and hop out
-             * nothing to save here
-             */
-            return ((OMPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED);
-        }
-    }else if ( 0 < exchange_node->n_extra_sources ) {
-
-        /* I am a proxy for someone */
-        src = exchange_node->rank_extra_sources_array[0];
-        /* find the rank in the communicator */
-        comm_src = group_list[src];
-        knt = 0;
-        for(i = 0; i < src; i++){
-            knt += list_connected[i];
-        }
-        /* post the receive */
-        rc = MCA_PML_CALL(irecv((void *) ( (unsigned char *) data_buffer
-                        + knt*pack_len),
-                        pack_len * list_connected[src],
-                        MPI_BYTE,
-                        comm_src,
-                        tag , comm, &(requests[*active_requests])));
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10, ("Failed to post ireceive "));
-            return OMPI_ERROR;
-        }
-
-        ++(*active_requests);
-        /* poll for completion */
-        /* this routine polls internally */
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if(!completed){
-            /* save state and hop out
-             * We really do need to block here so set
-             * the iteration to -1 indicating we need to
-             *  finish this part first
-             */
-            *iteration = -1;
-            return ((OMPI_SUCCESS != rc )? OMPI_ERROR : BCOL_FN_STARTED);
-        }
-
-    }
-
-    /* we start the recursive k - ing phase */
-    /* fprintf(stderr,"tree order %d pow_k %d \n",tree_order,pow_k);*/
-    for( i = 0; i < pow_k; i++) {
-        for(j = 0; j < (tree_order - 1); j++) {
-
-            /* send phase */
-            dst = exchange_node->rank_exchanges[i][j];
-            if( dst < 0 ){
-                continue;
-            }
-            comm_dst = group_list[dst];
-            send_offset = exchange_node->payload_info[i][j].s_offset * pack_len;
-            send_len = exchange_node->payload_info[i][j].s_len * pack_len;
-            /* debug print */
-            /* fprintf(stderr,"sending %d bytes to rank %d at offset %d\n",send_len, */
-            /*         comm_dst,send_offset); */
-            rc = MCA_PML_CALL(isend((void*)((unsigned char *) data_buffer +
-                            send_offset),
-                            send_len,
-                            MPI_BYTE,
-                            comm_dst, tag,
-                            MCA_PML_BASE_SEND_STANDARD, comm,
-                            &(requests[*active_requests])));
-
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10,("Failed to isend data"));
-                return OMPI_ERROR;
-            }
-            ++(*active_requests);
-
-            /* sends are posted */
-        }
-
-        /* Now post the recv's */
-        for( j = 0; j < (tree_order - 1); j++ ) {
-
-            /* recv phase */
-            src = exchange_node->rank_exchanges[i][j];
-            if( src < 0 ) {
-                continue;
-            }
-            comm_src = group_list[src];
-            recv_offset = exchange_node->payload_info[i][j].r_offset * pack_len;
-            recv_len = exchange_node->payload_info[i][j].r_len * pack_len;
-            /* debug print */
-            /* fprintf(stderr,"recving %d bytes to rank %d at offset %d\n",recv_len, */
-            /*         comm_src,recv_offset); */
-            /* post the receive */
-            rc = MCA_PML_CALL(irecv((void *) ((unsigned char *) data_buffer +
-                            recv_offset),
-                            recv_len,
-                            MPI_BYTE,
-                            comm_src,
-                            tag, comm, &(requests[*active_requests])));
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to post ireceive "));
-                return OMPI_ERROR;
-            }
-
-            ++(*active_requests);
-        }
-        /* finished all send/recv's now poll for completion before
-         * continuing to next iteration
-         */
-        completed = 0;
-        /* polling internally on 2*(k - 1) requests */
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-
-        if(!completed){
-            /* save state and hop out
-             * only the iteration needs to be tracked
-             */
-            *iteration = i; /* need to pick up here */
-
-            return ((OMPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED);
-        }
-    }
-
-    /* finish off the last piece, send the data back to the extra  */
-    if( 0 < exchange_node->n_extra_sources ) {
-        dst = exchange_node->rank_extra_sources_array[0];
-        comm_dst = group_list[dst];
-        knt = 0;
-        for( i = 0; i < group_size; i++){
-            knt += list_connected[i];
-        }
-        /* debug print */
-        /*
-        fprintf(stderr,"sending %d bytes to extra %d \n",pack_len*knt,comm_dst);
-        */
-        rc = MCA_PML_CALL(isend(data_buffer,
-                    pack_len * knt,
-                    MPI_BYTE,
-                    comm_dst, tag,
-                    MCA_PML_BASE_SEND_STANDARD, comm,
-                    &(requests[*active_requests])));
-
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10,("Failed to isend data"));
-            return OMPI_ERROR;
-        }
-        ++(*active_requests);
-
-        /* probe for send completion */
-        completed = 0;
-        /* polling internally */
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if(!completed){
-            /* save state and hop out
-             * We really do need to block here so set
-             * the iteration to pow_k +1 indicating we need to
-             *  finish progressing the last part
-             */
-            *iteration = pow_k + 1;
-
-            return (OMPI_SUCCESS != rc ? OMPI_ERROR : BCOL_FN_STARTED);
-        }
-    }
-
-FINISHED:
-    /* recycle buffer if need be */
-    return BCOL_FN_COMPLETE;
-}
-
-/* allgather progress function */
-
-int bcol_ptpcoll_k_nomial_allgather_progress(bcol_function_args_t *input_args,
-                        struct mca_bcol_base_function_t *const_args)
-{
-
-
-    /* local variables */
-
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    netpatterns_k_exchange_node_t *exchange_node = &ptpcoll_module->knomial_allgather_tree;
-    int group_size = ptpcoll_module->group_size;
-    int *list_connected = ptpcoll_module->super.list_n_connected; /* critical for hierarchical colls */
-
-
-    int tag;
-    int i, j;
-    int knt;
-    int comm_src, comm_dst, src, dst;
-    int recv_offset, recv_len;
-    int send_offset, send_len;
-    uint32_t buffer_index = input_args->buffer_index;
-
-    int pow_k, tree_order;
-    int rc = OMPI_SUCCESS;
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int completed = 0; /* initialized */
-    void *data_buffer = (void*)(
-            (unsigned char *) input_args->sbuf +
-            (size_t) input_args->sbuf_offset);
-    int pack_len = input_args->count * input_args->dtype->super.size;
-    /* initialize the counter */
-    int *iteration = &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration;
-
-
-#if 0
-    fprintf(stderr,"%d: entering p2p allgather progress AR: %d iter: %d\n",my_group_index,*active_requests,
-            *iteration);
-#endif
-    /* keep tag within the limit supported by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    /* mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-
-    /* k-nomial tree parameters */
-    tree_order = exchange_node->tree_order;
-    pow_k = exchange_node->log_tree_order;
-
-    /* let's begin the collective, starting with extra ranks and their
-     * respective proxies
-     */
-    if( EXTRA_NODE == exchange_node->node_type ) {
-
-        /* debug print */
-        /*fprintf(stderr,"666 \n");*/
-        /* simply poll for completion */
-        completed = 0;
-        /* polling internally */
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if(completed){
-            /* go to buffer release */
-            goto FINISHED;
-        }else{
-            /* save state and hop out
-             * nothing to save here
-             */
-            return ((OMPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED);
-        }
-    }else if ( 0 < exchange_node->n_extra_sources && (-1 == *iteration)) {
-
-        /* I am a proxy for someone */
-        /* Simply poll for completion */
-        completed = 0;
-        /* polling internally */
-        assert( 1 == *active_requests);
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if(!completed){
-            /* save state and hop out
-             * We really do need to block here so set
-             * the iteration to -1 indicating we need to
-             *  finish this part first
-             */
-            (*iteration) = -1;
-            return ((OMPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED);
-        }
-        /* I may now proceed to the recursive k - ing phase */
-        *iteration = 0;
-    }
-
-
-    /* the ordering here between the extra rank and progress active requests
-     * is critical
-     */
-    /* extra rank */
-    if( (pow_k + 1) == *iteration ){
-        /* finish off the last one */
-        goto PROGRESS_EXTRA;
-    }
-
-    /* active requests must be completed before continuing on to
-     * recursive k -ing step
-     * CAREFUL HERE, IT THIS REALLY WHAT YOU WANT??
-     */
-    if( 0 < (*active_requests) ) {
-        /* then we have something to progress from last step */
-        /* debug print */
-        /*
-        fprintf(stderr,"%d: entering progress AR: %d iter: %d\n",my_group_index,*active_requests,
-            *iteration);
-        */
-        completed = 0;
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if(!completed){
-            /* save state and hop out
-             * state hasn't changed
-             */
-
-            return ((MPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED);
-        }
-        ++(*iteration);
-    }
-
-
-
-    /* we start the recursive k - ing phase */
-    for( i = *iteration; i < pow_k; i++) {
-        /* nothing changes here */
-        for(j = 0; j < (tree_order - 1); j++) {
-
-            /* send phase */
-            dst = exchange_node->rank_exchanges[i][j];
-            if( dst < 0 ){
-                continue;
-            }
-            comm_dst = group_list[dst];
-            send_offset = exchange_node->payload_info[i][j].s_offset * pack_len;
-            send_len = exchange_node->payload_info[i][j].s_len * pack_len;
-            rc = MCA_PML_CALL(isend((void*)((unsigned char *) data_buffer +
-                            send_offset),
-                            send_len,
-                            MPI_BYTE,
-                            comm_dst, tag,
-                            MCA_PML_BASE_SEND_STANDARD, comm,
-                            &(requests[*active_requests])));
-
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10,("Failed to isend data"));
-                return OMPI_ERROR;
-            }
-            ++(*active_requests);
-
-            /* sends are posted */
-        }
-
-        /* Now post the recv's */
-        for( j = 0; j < (tree_order - 1); j++ ) {
-
-            /* recv phase */
-            src = exchange_node->rank_exchanges[i][j];
-            if( src < 0 ) {
-                continue;
-            }
-            comm_src = group_list[src];
-            recv_offset = exchange_node->payload_info[i][j].r_offset * pack_len;
-            recv_len = exchange_node->payload_info[i][j].r_len * pack_len;
-            /* post the receive */
-            rc = MCA_PML_CALL(irecv((void *) ((unsigned char *) data_buffer +
-                            recv_offset),
-                            recv_len,
-                            MPI_BYTE,
-                            comm_src,
-                            tag, comm, &(requests[*active_requests])));
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to post ireceive "));
-                return OMPI_ERROR;
-            }
-
-            ++(*active_requests);
-        }
-        /* finished all send/recv's now poll for completion before
-         * continuing to next iteration
-         */
-        completed = 0;
-        /* make this non-blocking */
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if(!completed){
-            /* save state and hop out
-             * We really do need to block here so set
-             * the iteration to -1 indicating we need to
-             *  finish this part first
-             */
-            *iteration = i; /* need to pick up here */
-
-            return ((OMPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED);
-        }
-    }
-
-    /* finish off the last piece, send the data back to the extra  */
-    if( 0 < exchange_node->n_extra_sources ) {
-        dst = exchange_node->rank_extra_sources_array[0];
-        comm_dst = group_list[dst];
-        knt = 0;
-        for( i = 0; i < group_size; i++){
-            knt += list_connected[i];
-        }
-        rc = MCA_PML_CALL(isend(data_buffer,
-                    pack_len * knt,
-                    MPI_BYTE,
-                    comm_dst, tag,
-                    MCA_PML_BASE_SEND_STANDARD, comm,
-                    &(requests[*active_requests])));
-
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10,("Failed to isend data"));
-            return OMPI_ERROR;
-        }
-        ++(*active_requests);
-
-        /* probe for send completion */
-        completed = 0;
-        /* make this non-blocking */
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if(!completed){
-            /* save state and hop out
-             * We really do need to block here so set
-             * the iteration to pow_k +1 indicating we need to
-             *  finish progressing the last part
-             */
-            *iteration = pow_k + 1;
-
-            return ((OMPI_SUCCESS != rc) ? OMPI_ERROR :  BCOL_FN_STARTED);
-        }
-    }
-    /* folks need to skip this unless they really are the proxy
-     * reentering with the intent of progressing the final send
-     */
-    goto FINISHED;
-
-PROGRESS_EXTRA:
-
-    /* probe for send completion */
-    completed = 0;
-    /* make this non-blocking */
-    completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-    if(!completed){
-        /* save state and hop out
-         * We really do need to block here so set
-         * the iteration to pow_k +1 indicating we need to
-         *  finish progressing the last part
-         */
-
-        return ((OMPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED);
-    }
-
-FINISHED:
-    /* recycle buffer if need be */
-    return BCOL_FN_COMPLETE;
-}
-
-/*
- * Register allreduce functions to the BCOL function table,
- * so they can be selected
- */
-int bcol_ptpcoll_allgather_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    comm_attribs.bcoll_type = BCOL_ALLGATHER;
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                bcol_ptpcoll_k_nomial_allgather_init,
-                bcol_ptpcoll_k_nomial_allgather_progress);
-
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-    inv_attribs.bcol_msg_min = 10000000;
-    inv_attribs.bcol_msg_max = 10485760; /* range 4 */
-
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                bcol_ptpcoll_k_nomial_allgather_init,
-                bcol_ptpcoll_k_nomial_allgather_progress);
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.c
deleted file mode 100644
index 14a4f76958..0000000000
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.c
+++ /dev/null
@@ -1,1032 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/include/ompi/constants.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "bcol_ptpcoll_allreduce.h"
-
-/*
- * Recursive K-ing allreduce
- */
-static inline int bcol_ptpcoll_allreduce_narray_schedule_extra_node_exchange (mca_bcol_ptpcoll_module_t *ptpcoll_module, netpatterns_k_exchange_node_t *k_node,
-                                                                              void *data_buffer, size_t data_size, ompi_request_t **requests, int *active_requests,
-                                                                              int tag)
-{
-    ompi_communicator_t *comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    int peer_comm_rank, k, offset, rc;
-
-    if (EXCHANGE_NODE == k_node->node_type) {
-        /* the send data resides in the first part of the buffer */
-        for (k = 0, offset = data_size ; k < k_node->n_extra_sources ; ++k, offset += data_size) {
-            peer_comm_rank = ptpcoll_module->super.sbgp_partner_module->group_list[k_node->rank_extra_sources_array[k]];
-
-            PTPCOLL_VERBOSE(10, ("Recv data from %d, addr %p len %d tag %d",
-                                 peer_comm_rank, data_buffer, data_size, tag));
-            rc = MCA_PML_CALL(irecv((void *)((unsigned char *)data_buffer + offset),
-                                    data_size, MPI_BYTE, peer_comm_rank, tag, comm,
-                                    &requests[*active_requests]));
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to receive data"));
-                return OMPI_ERROR;
-            }
-
-            ++(*active_requests);
-        }
-    } else {
-        peer_comm_rank = ptpcoll_module->super.sbgp_partner_module->group_list[k_node->rank_extra_sources_array[0]];
-
-        PTPCOLL_VERBOSE(10, ("Send data to %d, addr %p len %d tag %d",
-                             peer_comm_rank, data_buffer, data_size, tag));
-
-        rc = MCA_PML_CALL(isend(data_buffer, data_size, MPI_BYTE, peer_comm_rank,
-                                tag, MCA_PML_BASE_SEND_STANDARD, comm,
-                                &(requests[*active_requests])));
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10, ("Failed to send data"));
-            return OMPI_ERROR;
-        }
-
-        ++(*active_requests);
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static inline void bcol_ptpcoll_allreduce_narray_reduce (void *data_buffer, struct ompi_datatype_t *data_type, int count, struct ompi_op_t *op, int sources)
-{
-    size_t data_size = mca_bcol_base_get_buff_length(data_type, count);
-
-    for (int k = 0, offset = data_size ; k < sources ; ++k, offset += data_size) {
-        ompi_op_reduce(op, (char *) data_buffer + offset, data_buffer, count, data_type);
-    }
-}
-
-static int bcol_ptpcoll_allreduce_narraying_progress (bcol_function_args_t *input_args,
-                                                      struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
-    void *data_buffer = (void *) ( (unsigned char *) input_args->sbuf +
-                                   (size_t) input_args->sbuf_offset);
-    struct ompi_datatype_t *data_type = input_args->dtype;
-    uint32_t buffer_index = input_args->buffer_index;
-    struct ompi_op_t *op = input_args->op;
-    int count = input_args->count;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag - 1;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    int k, rc, peer, group_peer;
-    int offset = 0;
-    ompi_communicator_t *comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-
-    netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree;
-    int k_radix = k_node->tree_order;
-
-    size_t data_size = mca_bcol_base_get_buff_length(data_type, count);
-    int *iteration =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration);
-
-    /* if we are just staring the collective and there are extra sources then schedule the
-     * extra node exchange. otherwise check if the exchange is complete. */
-    if (-1 == *iteration) {
-        if (0 < k_node->n_extra_sources) {
-            if (!(*active_requests)) {
-                rc = bcol_ptpcoll_allreduce_narray_schedule_extra_node_exchange (ptpcoll_module, k_node, data_buffer, data_size,
-                                                                             requests, active_requests, tag);
-                if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                    return rc;
-                }
-            }
-
-            /* check for extra node exchange completion */
-            if (!mca_bcol_ptpcoll_test_all_for_match (active_requests, requests, &rc)) {
-                return (OMPI_SUCCESS == rc) ? BCOL_FN_STARTED : rc;
-            }
-
-            if (EXCHANGE_NODE == k_node->node_type) {
-                bcol_ptpcoll_allreduce_narray_reduce (data_buffer, data_type, count, op, k_node->n_extra_sources);
-            }
-        }
-
-        /* start recursive k-ing */
-        *iteration = 0;
-    }
-
-    if (*iteration < k_node->n_exchanges) {
-        if (*active_requests) {
-            if (!mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc)) {
-                return (OMPI_SUCCESS == rc) ? BCOL_FN_STARTED : rc;
-            }
-
-            ++(*iteration);
-            bcol_ptpcoll_allreduce_narray_reduce (data_buffer, data_type, count, op, k_radix - 1);
-        }
-    }
-
-    for ( ; *iteration < k_node->n_exchanges ; ++(*iteration)) {
-        for (k = 0; k < k_radix - 1; k++) {
-            group_peer = k_node->rank_exchanges[*iteration][k];
-
-            peer = group_list[group_peer];
-
-            PTPCOLL_VERBOSE(10, ("Send data to %d, addr %p len %d tag %d",
-                                 peer, data_buffer, data_size, tag));
-            rc = MCA_PML_CALL(isend(data_buffer, data_size, MPI_BYTE, peer, tag,
-                                    MCA_PML_BASE_SEND_STANDARD, comm,
-                                    &(requests[*active_requests])));
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to send data"));
-                return OMPI_ERROR;
-            }
-
-            ++(*active_requests);
-        }
-
-        for (k = 0, offset = data_size ; k < k_radix - 1 ; ++k, offset += data_size) {
-            group_peer = k_node->rank_exchanges[*iteration][k];
-            peer = group_list[group_peer];
-
-            PTPCOLL_VERBOSE(10, ("Recv data from %d, addr %p len %d tag %d",
-                                 peer, data_buffer, data_size, tag));
-            rc = MCA_PML_CALL(irecv((void *)((unsigned char *)data_buffer + offset ),
-                                    data_size, MPI_BYTE, peer, tag, comm,
-                                    &requests[*active_requests]));
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to receive data"));
-                return OMPI_ERROR;
-            }
-
-            ++(*active_requests);
-        }
-
-        if (!mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc)) {
-            return (OMPI_SUCCESS == rc) ? BCOL_FN_STARTED : rc;
-        }
-
-        bcol_ptpcoll_allreduce_narray_reduce (data_buffer, data_type, count, op, k_radix - 1);
-    }
-
-    /* ensure extra nodes get the result */
-    if (0 < k_node->n_extra_sources)  {
-        if (!(*active_requests)) {
-            int peer_comm_rank;
-
-            if (EXTRA_NODE == k_node->node_type) {
-                peer_comm_rank = ptpcoll_module->super.sbgp_partner_module->group_list[k_node->rank_extra_sources_array[0]];
-
-                PTPCOLL_VERBOSE(10, ("EXTRA_NODE: Recv data from %d, addr %p len %d tag %d",
-                                     peer_comm_rank, data_buffer, data_size, tag));
-                rc = MCA_PML_CALL(irecv(data_buffer, data_size, MPI_BYTE, peer_comm_rank,
-                                        tag, comm, &requests[*active_requests]));
-                if( OMPI_SUCCESS != rc ) {
-                    PTPCOLL_VERBOSE(10, ("Failed to receive data"));
-                    return OMPI_ERROR;
-                }
-
-                ++(*active_requests);
-            } else {
-                for (k = 0; k < k_node->n_extra_sources; k++) {
-                    peer_comm_rank = ptpcoll_module->super.sbgp_partner_module->group_list[k_node->rank_extra_sources_array[k]];
-
-                    PTPCOLL_VERBOSE(10, ("EXCHANGE_NODE: Send data to %d, addr %p len %d tag %d",
-                                         peer_comm_rank, data_buffer, data_size, tag));
-                    rc = MCA_PML_CALL(isend(data_buffer, data_size, MPI_BYTE, peer_comm_rank,
-                                            tag, MCA_PML_BASE_SEND_STANDARD, comm,
-                                            &(requests[*active_requests])));
-
-                    if( OMPI_SUCCESS != rc ) {
-                        PTPCOLL_VERBOSE(10, ("Failed to send data"));
-                        return OMPI_ERROR;
-                    }
-
-                    ++(*active_requests);
-                }
-            }
-        }
-
-        if (!mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc)) {
-            return (OMPI_SUCCESS == rc) ? BCOL_FN_STARTED : rc;
-        }
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-int bcol_ptpcoll_allreduce_narraying_init(bcol_function_args_t *input_args,
-                                          struct mca_bcol_base_function_t *const_args){
-
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-    uint64_t sequence_number = input_args->sequence_num;
-    uint32_t buffer_index = input_args->buffer_index;
-    int count = input_args->count;
-    struct ompi_datatype_t *dtype = input_args->dtype;
-    size_t buffer_size;
-    int tag;
-
-    tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag = tag = -tag;
-    ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask = 1;
-
-    /* start with extra node exchange if needed */
-    ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration = -1;
-    ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests = 0;
-    ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status = PTPCOLL_NOT_STARTED;
-
-    /*
-     * ML bufer is segmented into k segments and each of the k segment is used
-     * for reductions
-     */
-    /* This has to be based on ml buffer size. Need to take into account the space used
-     * by the headers of other bcol modules. */
-    buffer_size  = ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX;
-    assert(buffer_size >= count * dtype->super.size *
-           ptpcoll_module->k_nomial_radix);
-    (void)buffer_size;  // silence compiler warning
-    (void)dtype;
-    (void)count;
-
-    return bcol_ptpcoll_allreduce_narraying_progress (input_args, const_args);
-}
-
-static inline int compute_seg_index(int peer, int kpow_num, int tree_order) {
-
-    int peer_base, peer_position, peer_base_rank, peer_index;
-
-    peer_base = peer / (kpow_num * tree_order);
-    peer_base_rank = peer_base * kpow_num * tree_order ;
-    peer_position = peer_base_rank == 0 ? peer : peer % (peer_base_rank);
-    peer_index    = peer_position / kpow_num ;
-
-    return peer_index;
-}
-
-int compute_knomial_allgather_offsets(int group_index, int count, struct
-                                      ompi_datatype_t *dtype,int k_radix,int n_exchanges,
-                                      int **offsets){
-
-    int modulo_group_size;
-    size_t seg_count, seg_size, seg_index, seg_offset;
-    size_t block_offset, block_count;
-    int exchange_step;
-    ptrdiff_t lb, extent;
-
-    if (0 >= n_exchanges) {
-        PTPCOLL_VERBOSE(10,("Nothing to initialize "));
-        return 0;
-    }
-    modulo_group_size = 1;
-    seg_count = count / k_radix;
-    ompi_datatype_get_extent(dtype, &lb, &extent);
-    seg_size = seg_count * extent;
-
-    seg_index = group_index % k_radix;
-    seg_offset = seg_index * seg_size;
-
-    offsets[0][BLOCK_OFFSET] = block_offset = 0;
-    offsets[0][BLOCK_COUNT] = block_count = count;
-    offsets[0][LOCAL_REDUCE_SEG_OFFSET] = seg_offset;
-    offsets[0][SEG_SIZE] = seg_size;
-
-
-    for(exchange_step = 1; exchange_step < n_exchanges; exchange_step++) {
-
-        /* Previous step's segment is this exchange step's block */
-        block_count = seg_count;
-        block_offset = seg_offset;
-
-        /* Divide the segment into k parts */
-        seg_count = seg_count / k_radix;
-        seg_size = seg_count * extent;
-
-        /* Among different segments in block, which segment should I reduce ? */
-        /* For allgather phase, I will not send out this segment to peers */
-        modulo_group_size *= k_radix;
-        seg_index = compute_seg_index(group_index, modulo_group_size, k_radix);
-        seg_offset = seg_index * seg_size;
-
-
-        offsets[exchange_step][BLOCK_OFFSET] = block_offset;
-        offsets[exchange_step][LOCAL_REDUCE_SEG_OFFSET] = seg_offset;
-        offsets[exchange_step][BLOCK_COUNT] = block_count;
-        offsets[exchange_step][SEG_SIZE] = seg_size;
-
-        /* Change to absolute offset */
-        seg_offset = block_offset + seg_offset;
-
-    }
-
-    return 0;
-}
-
-static inline int compute_send_segment_size(int block_offset,
-                                            int send_offset,
-                                            int segment_size,
-                                            int padded_offset) {
-    int send_size = -1;
-    /* segment to be sent starts here */
-    int segment_offset = block_offset + send_offset ;
-    send_size = (segment_offset + segment_size) >= padded_offset ?
-        segment_size - (segment_offset + segment_size - padded_offset) : segment_size;
-    return send_size;
-}
-
-static inline int compute_recv_segment_size(int block_offset,
-                                            int recv_offset,
-                                            int segment_size,
-                                            int padded_offset) {
-    int recv_size = -1;
-    /* segment to be sent starts here */
-    int segment_offset = block_offset + recv_offset ;
-    recv_size = (segment_offset + segment_size) >= padded_offset ?
-        segment_size - (segment_offset + segment_size - padded_offset) : segment_size;
-
-    return recv_size;
-}
-
-/*
- *
- * K-nomial Reduce Scatter
- * Example k=3 n=9
- *
- * | ABCDEFGH |0|
- *
- * Number of Exchange steps = log (basek) n
- * Number of steps in exchange step = k (radix)
- *
- * block_size = Size of data that is reduce in exchange step
- * segment_size = Size of data that is send or received by rank in radix step
- *
- * block_size = segment_size * k
- *
- * my_block_start_addr = Address of the segment in the block where I reference my
- * offsets
- *
- * This is version 1 : Experimenting with decoupling offset calcuations
- */
-int bcol_ptpcoll_allreduce_recursivek_scatter_reduce(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-                                                     const int buffer_index, void *sbuf,
-                                                     void *rbuf,
-                                                     struct ompi_op_t *op,
-                                                     const int count, struct ompi_datatype_t *dtype,
-                                                     const int relative_group_index,
-                                                     const int padded_start_byte){
-    int blocks_in_step =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask;
-    int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag - 1;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree;
-    mca_bcol_ptpcoll_component_t *cm =
-        &mca_bcol_ptpcoll_component;
-    void *my_block_start_addr = NULL, *my_block_addr = NULL;
-    int i, k, group_peer, peer ;
-    int k_radix = k_node->tree_order;
-    int rc = OMPI_SUCCESS;
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int completed;
-    void *my_recv_start_addr, *my_recv_addr;
-    size_t block_offset, reduce_seg_offset, send_offset, recv_offset;
-    int seg_size, block_size;
-    int block_count, seg_count;
-    ptrdiff_t lb, extent;
-    ompi_datatype_get_extent(dtype, &lb, &extent);
-
-    my_recv_start_addr = rbuf;
-    my_block_start_addr = sbuf;
-    block_count = count;
-    block_size = count * extent;
-
-
-    for (i = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration;
-         i < k_node->n_exchanges; i++, blocks_in_step *= cm->narray_knomial_radix) {
-
-        block_offset = ptpcoll_module->allgather_offsets[i][BLOCK_OFFSET];
-        reduce_seg_offset = ptpcoll_module->allgather_offsets[i][LOCAL_REDUCE_SEG_OFFSET];
-        block_count = ptpcoll_module->allgather_offsets[i][BLOCK_COUNT];
-        seg_size = ptpcoll_module->allgather_offsets[i][SEG_SIZE];
-        block_size = block_count * extent;
-
-        PTPCOLL_VERBOSE(10,("Block offset %d, reduce_seg_offset %d, block_count %d seg_size %d",
-                            block_offset, reduce_seg_offset, block_count, seg_size));
-
-        seg_count = block_count / k_radix;
-        my_block_addr = (void*)((char*)my_block_start_addr + block_offset);
-        my_recv_addr = (void*)((char*)my_recv_start_addr + block_offset);
-
-        for (k = 0; k < k_radix - 1; k++) {
-            size_t soffset;
-            int snd_size = 0;
-
-            group_peer = k_node->rank_exchanges[i][k];
-            peer = group_list[group_peer];
-
-            send_offset = reduce_seg_offset + (seg_size * (k + 1));
-
-            if ((int)send_offset + seg_size  > block_size) {
-                send_offset = send_offset % block_size;
-            }
-
-            PTPCOLL_VERBOSE(10, ("Send data to %d,send offset %d len %d",
-                                 peer, send_offset, seg_size));
-
-            soffset = send_offset;
-            snd_size =
-                compute_send_segment_size((int)block_offset,(int)soffset,(int)seg_size,padded_start_byte);
-
-            if (snd_size > 0) {
-                rc = MCA_PML_CALL(isend((void *)((unsigned char *)my_block_addr
-                                                 + soffset),
-                                        snd_size, MPI_BYTE,
-                                        peer, tag, MCA_PML_BASE_SEND_STANDARD, comm,
-                                        &(requests[*active_requests])));
-
-                if( OMPI_SUCCESS != rc ) {
-                    PTPCOLL_VERBOSE(10, ("Failed to send the segment to %d", peer));
-                    return OMPI_ERROR;
-                }
-                ++(*active_requests);
-            }
-
-        }
-
-        /*
-         * Receive the segments to tmp addr and then do a reduction
-         */
-        for (k = 0; k < k_radix - 1; k++) {
-            int recv_size=0;
-
-            group_peer = k_node->rank_exchanges[i][k];
-            peer = group_list[group_peer];
-
-            recv_offset = reduce_seg_offset + (seg_size * (k+1));
-
-            if ((int)recv_offset + seg_size  > block_size) {
-                recv_offset = recv_offset % block_size;
-            }
-
-            PTPCOLL_VERBOSE(10, ("Receive data to receive buffer at offset %d\n",
-                                 recv_offset));
-            recv_size = compute_recv_segment_size((int)block_offset,
-                                                  (int)reduce_seg_offset, (int)seg_size,
-                                                  padded_start_byte);
-
-            if (recv_size > 0 ) {
-                rc = MCA_PML_CALL(irecv((void *)((unsigned char *)
-                                                 my_recv_addr + recv_offset),
-                                        recv_size, MPI_BYTE,
-                                        peer, tag, comm, &requests[*active_requests]));
-                if( OMPI_SUCCESS != rc ) {
-                    PTPCOLL_VERBOSE(10, ("Failed to receive the segment from %d", peer));
-                    return OMPI_ERROR;
-                }
-                ++(*active_requests);
-            }
-
-        }
-
-        completed = 0;
-        while(!completed){
-            completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        }
-
-        /* Do a reduction on received buffers */
-        {
-            void *src_data_buffer = NULL, *dst_data_buffer = NULL;
-            int reduce_data_count = 0;
-
-            src_data_buffer = my_block_addr;
-            dst_data_buffer = my_recv_addr;
-
-            for (k = 0; k < k_radix - 1; k++) {
-                recv_offset = reduce_seg_offset + (seg_size * (k+1));
-
-                if ((int)recv_offset + seg_size  > block_size) {
-                    recv_offset = recv_offset % block_size;
-                }
-
-                reduce_data_count = (int)(block_offset + reduce_seg_offset) + seg_size >= padded_start_byte ?
-                    (seg_size - (((int)(block_offset + reduce_seg_offset) + seg_size) - padded_start_byte))/(int)dtype->super.size
-                    : (int)seg_count;
-
-                if (reduce_data_count > 0) {
-                    ompi_3buff_op_reduce(op,
-                                         (void*)((unsigned char*)my_recv_addr + recv_offset),
-                                         (void*)((unsigned char*)src_data_buffer +
-                                                 reduce_seg_offset),
-                                         (void*)((unsigned char*)dst_data_buffer +
-                                                 reduce_seg_offset),
-                                         reduce_data_count,dtype);
-                }
-
-                src_data_buffer = dst_data_buffer;
-
-            }
-        }
-
-        /* After first iteration we have data (to work with) in recv buffer */
-        my_block_start_addr = rbuf;
-
-    }
-
-    return rc;
-}
-
-
-int bcol_ptpcoll_allreduce_knomial_allgather(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-                                             const int buffer_index,
-                                             void *sbuf,void *rbuf, int count, struct
-                                             ompi_datatype_t *dtype,
-                                             const int relative_group_index,
-                                             const int padded_start_byte){
-
-    size_t block_offset = 0, send_offset = 0, recv_offset = 0;
-    int seg_size=0, block_size=0;
-    int i,k,completed;
-    void *my_block_start_addr = rbuf, *my_block_addr;
-    size_t block_count = count;
-    netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree;
-    int k_radix = k_node->tree_order;
-    int peer, group_peer;
-    int rc = OMPI_SUCCESS;
-    int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag - 1;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    int exchange_step;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    ptrdiff_t lb, extent;
-    ompi_datatype_get_extent(dtype, &lb, &extent);
-
-
-    for (i = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration;
-         i < k_node->n_exchanges; i++) {
-
-        exchange_step = k_node->n_exchanges - 1 - i;
-
-        block_offset = ptpcoll_module->allgather_offsets[exchange_step][BLOCK_OFFSET];
-        send_offset = ptpcoll_module->allgather_offsets[exchange_step][LOCAL_REDUCE_SEG_OFFSET];
-        block_count = ptpcoll_module->allgather_offsets[exchange_step][BLOCK_COUNT];
-        seg_size = ptpcoll_module->allgather_offsets[exchange_step][SEG_SIZE];
-        block_size = block_count * extent;
-
-
-        PTPCOLL_VERBOSE(10, ("Send offset %d block_offset %d seg_size %\n",
-                             send_offset, block_offset, seg_size));
-
-        my_block_addr = (void*)((unsigned char*)my_block_start_addr + block_offset);
-
-        for (k = 0; k < k_radix - 1; k++) {
-            size_t soffset=0; int snd_size = 0;
-            group_peer = k_node->rank_exchanges[exchange_step][k];
-            peer = group_list[group_peer];
-
-            soffset = send_offset;
-            snd_size = compute_send_segment_size((int)block_offset,
-                                                 (int)soffset,
-                                                 (int)seg_size,
-                                                 padded_start_byte);
-            if (snd_size > 0) {
-                rc = MCA_PML_CALL(isend((void *)((unsigned char *)my_block_addr
-                                                 + soffset),
-                                        snd_size, MPI_BYTE,
-                                        peer, tag, MCA_PML_BASE_SEND_STANDARD, comm,
-                                        &(requests[*active_requests])));
-
-                if( OMPI_SUCCESS != rc ) {
-                    PTPCOLL_VERBOSE(10, ("Failed to send the segment to %d", peer));
-                    return OMPI_ERROR;
-                }
-
-                ++(*active_requests);
-            }
-
-            PTPCOLL_VERBOSE(10, ("Send data to receive buffer at offset %d to %d\n",
-                                 send_offset, peer));
-        }
-
-        for (k = 0; k < k_radix - 1; k++) {
-            int recv_size=0;
-
-            group_peer = k_node->rank_exchanges[exchange_step][k];
-            peer = group_list[group_peer];
-
-            recv_offset = send_offset + (k + 1) * seg_size;
-
-            if ((int)recv_offset + seg_size > block_size){
-                recv_offset = recv_offset % block_size;
-            }
-
-            PTPCOLL_VERBOSE(10, ("Receive data to receive buffer at offset %d from %d\n",
-                                 recv_offset, peer));
-
-
-            recv_size = compute_recv_segment_size((int)block_offset,
-                                                  (int)recv_offset,
-                                                  (int)seg_size,
-                                                  padded_start_byte);
-            if (recv_size > 0) {
-                rc = MCA_PML_CALL(irecv((void *)((unsigned char *)
-                                                 my_block_addr + recv_offset),
-                                        recv_size, MPI_BYTE,
-                                        peer, tag, comm, &requests[*active_requests]));
-
-                if( OMPI_SUCCESS != rc ) {
-                    PTPCOLL_VERBOSE(10, ("Failed to receive the segment from %d", peer));
-                    return OMPI_ERROR;
-                }
-                ++(*active_requests);
-            }
-
-        }
-
-        completed = 0;
-        while(!completed){
-            completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        }
-
-        block_count = block_count * k_radix;
-        block_size = block_count * extent;
-
-    }
-
-    return rc;
-
-}
-
-static inline int compute_padding_count(int count, int k_radix, int n_exchanges){
-    bool fpadding = false;
-    size_t dsize;
-    int i, pad_count=0, kpow;
-
-    /* is padding required */
-    dsize = count;
-    kpow = 1;
-    for ( i=0; i < n_exchanges; i++) {
-        if (dsize % k_radix) {
-            fpadding = true;
-        }
-        dsize /= k_radix;
-        kpow *= k_radix;
-    }
-
-    if (fpadding) {
-        pad_count = count % kpow;
-        pad_count = kpow - pad_count;
-    }
-
-    return pad_count;
-}
-
-
-int bcol_ptpcoll_allreduce_recursivek_scatter_reduce_allgather_init(bcol_function_args_t *input_args,
-                                                                    struct mca_bcol_base_function_t *const_args){
-
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-    struct ompi_op_t *op = input_args->op;
-    int tag;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    uint64_t sequence_number = input_args->sequence_num;
-    uint32_t buffer_index = input_args->buffer_index;
-    void *src_buffer = (void *) (
-        (unsigned char *)input_args->sbuf +
-        (size_t)input_args->sbuf_offset);
-
-    void *recv_buffer = (void *) (
-        (unsigned char *)input_args->rbuf +
-        (size_t)input_args->rbuf_offset);
-
-    int count = input_args->count;
-    struct ompi_datatype_t *dtype = input_args->dtype;
-    int *iteration =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration);
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int *status =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status);
-    ptrdiff_t lb, extent;
-
-    /* Get the knomial tree */
-    netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree;
-    int k_radix = k_node->tree_order;
-    int n_exchanges = k_node->n_exchanges;
-    int padded_start_byte;
-    int padding_count = compute_padding_count(count, k_radix, n_exchanges);
-
-    ompi_datatype_get_extent(dtype, &lb, &extent);
-    padded_start_byte = count * extent;
-
-
-    /* Init for making the functions Re-entrant */
-    tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag = tag = -tag;
-    ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask = 1;
-    *active_requests = 0;
-    *iteration = -1;
-    *status = PTPCOLL_NOT_STARTED;
-    *iteration = 0;
-
-    compute_knomial_allgather_offsets(my_group_index,count + padding_count, dtype,k_radix,n_exchanges,
-                                      ptpcoll_module->allgather_offsets);
-
-    /* Perform a recursive k'ing reduce scatter */
-    bcol_ptpcoll_allreduce_recursivek_scatter_reduce(ptpcoll_module, buffer_index,
-                                                     src_buffer, recv_buffer, op, count + padding_count, dtype,
-                                                     my_group_index,padded_start_byte);
-
-
-    /* Perform a recursive k'ing allgather */
-    bcol_ptpcoll_allreduce_knomial_allgather(ptpcoll_module,
-                                             buffer_index,
-                                             src_buffer, recv_buffer, count + padding_count, dtype,
-                                             my_group_index, padded_start_byte);
-
-    return BCOL_FN_COMPLETE;
-}
-
-int bcol_ptpcoll_allreduce_recursivek_scatter_reduce_extra(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-                                                           int buffer_index,
-                                                           void *sbuf,
-                                                           void *rbuf,
-                                                           struct ompi_op_t *op,
-                                                           const int count, struct ompi_datatype_t *dtype){
-    int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag - 1;
-    netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree;
-    int k, peer ;
-    int rc = OMPI_SUCCESS;
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    int block_count, block_size;
-    char *tmprecv_buffer = NULL, *data_src_buffer, *data_dst_buffer;
-    ptrdiff_t lb, extent;
-    ompi_datatype_get_extent(dtype, &lb, &extent);
-
-    block_count = count;
-    block_size = count * extent;
-
-
-    if (0 < block_size) {
-        tmprecv_buffer = (void*)malloc(block_size);
-    }
-
-    data_src_buffer = sbuf;
-    data_dst_buffer = rbuf;
-
-    if (EXCHANGE_NODE == k_node->node_type) {
-        for (k = 0; k < k_node->n_extra_sources; k++){
-
-            peer = ptpcoll_module->super.sbgp_partner_module->group_list[
-                k_node->rank_extra_sources_array[k]];
-
-            rc = MCA_PML_CALL(recv((void *)((unsigned char *)tmprecv_buffer),
-                                   block_size, MPI_BYTE,
-                                   peer, tag, comm, MPI_STATUS_IGNORE));
-
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to receive the segment from %d", peer));
-                rc = OMPI_ERROR;
-                goto clean;
-            }
-
-            ompi_3buff_op_reduce(op, (void*)((unsigned char*)data_src_buffer),
-                                 (void*)((unsigned char*)tmprecv_buffer),
-                                 (void*)((unsigned char*)data_dst_buffer),
-                                 block_count,dtype);
-            data_src_buffer = data_dst_buffer;
-        }
-    } else {
-        peer = ptpcoll_module->super.sbgp_partner_module->group_list[
-            k_node->rank_extra_sources_array[0]];
-
-        rc = MCA_PML_CALL(send((void *)((unsigned char *)sbuf),
-                               block_size, MPI_BYTE,
-                               peer, tag, MCA_PML_BASE_SEND_STANDARD, comm));
-
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10, ("Failed to send data"));
-            rc = OMPI_ERROR;
-            goto clean;
-        }
-    }
-
-clean:
-    if (tmprecv_buffer) {
-        free(tmprecv_buffer);
-    }
-    return rc;
-}
-
-int bcol_ptpcoll_allreduce_knomial_allgather_extra(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-                                                   int buffer_index,
-                                                   void *sbuf,
-                                                   void *rbuf,
-                                                   const int count, struct ompi_datatype_t *dtype){
-    int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag - 1;
-    netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree;
-    int k, peer ;
-    int rc = OMPI_SUCCESS;
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    int block_size, completed;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    ptrdiff_t lb, extent;
-    ompi_datatype_get_extent(dtype, &lb, &extent);
-
-
-    block_size = count * extent;
-
-    if (EXTRA_NODE == k_node->node_type) {
-        peer = ptpcoll_module->super.sbgp_partner_module->group_list[
-            k_node->rank_extra_sources_array[0]];
-
-        rc = MCA_PML_CALL(irecv((void *)((unsigned char *)rbuf),
-                                block_size, MPI_BYTE,
-                                peer, tag, comm, &requests[*active_requests]));
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10, ("Failed to receive data"));
-            return OMPI_ERROR;
-        }
-
-        ++(*active_requests);
-    } else {
-        for (k = 0; k < k_node->n_extra_sources; k++) {
-            peer = ptpcoll_module->super.sbgp_partner_module->group_list[
-                k_node->rank_extra_sources_array[k]];
-
-            rc = MCA_PML_CALL(isend((void *)((unsigned char *)rbuf),
-                                    block_size, MPI_BYTE,
-                                    peer, tag, MCA_PML_BASE_SEND_STANDARD, comm,
-                                    &(requests[*active_requests])));
-
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to send data"));
-                return OMPI_ERROR;
-            }
-
-            ++(*active_requests);
-        }
-
-    }
-
-    completed = 0;
-
-    while(!completed){
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-    }
-
-    return rc;
-}
-
-int bcol_ptpcoll_allreduce_recursivek_scatter_reduce_allgather_extra_init(bcol_function_args_t *input_args,
-                                                                          struct mca_bcol_base_function_t *const_args){
-
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-    struct ompi_op_t *op = input_args->op;
-    int tag;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    uint64_t sequence_number = input_args->sequence_num;
-    uint32_t buffer_index = input_args->buffer_index;
-    void *src_buffer = (void *) (
-        (unsigned char *)input_args->sbuf +
-        (size_t)input_args->sbuf_offset);
-
-    void *recv_buffer = (void *) (
-        (unsigned char *)input_args->rbuf +
-        (size_t)input_args->rbuf_offset);
-
-    int count = input_args->count;
-    struct ompi_datatype_t *dtype = input_args->dtype;
-    int *iteration =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration);
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int *status =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status);
-    ptrdiff_t lb, extent;
-    /* Get the knomial tree */
-    netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree;
-    int k_radix = k_node->tree_order;
-    int n_exchanges = k_node->n_exchanges;
-    int padded_start_byte;
-    int padding_count = compute_padding_count(count, k_radix, n_exchanges);
-    void *tmpsrc_buffer = NULL;
-
-    ompi_datatype_get_extent(dtype, &lb, &extent);
-    padded_start_byte = count * extent;
-
-    /* Init for making the functions Re-entrant */
-    tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag = tag = -tag;
-    ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask = 1;
-    *active_requests = 0;
-    *iteration = -1;
-    *status = PTPCOLL_NOT_STARTED;
-    *iteration = 0;
-
-    compute_knomial_allgather_offsets(my_group_index,count + padding_count, dtype,k_radix,n_exchanges,
-                                      ptpcoll_module->allgather_offsets);
-
-    if (EXCHANGE_NODE == k_node->node_type) {
-        bcol_ptpcoll_allreduce_recursivek_scatter_reduce_extra(ptpcoll_module,
-                                                               buffer_index,
-                                                               src_buffer, recv_buffer, op, count, dtype);
-        tmpsrc_buffer = src_buffer;
-        if ( k_node->n_extra_sources > 0){
-            tmpsrc_buffer = recv_buffer;
-        }
-        bcol_ptpcoll_allreduce_recursivek_scatter_reduce(ptpcoll_module, buffer_index,
-                                                         tmpsrc_buffer, recv_buffer, op, count + padding_count, dtype,
-                                                         my_group_index,padded_start_byte);
-        bcol_ptpcoll_allreduce_knomial_allgather(ptpcoll_module,
-                                                 buffer_index,
-                                                 src_buffer, recv_buffer, count + padding_count, dtype,
-                                                 my_group_index, padded_start_byte);
-        bcol_ptpcoll_allreduce_knomial_allgather_extra(ptpcoll_module,
-                                                       buffer_index,
-                                                       src_buffer, recv_buffer, count, dtype);
-
-    }
-    else if (EXTRA_NODE == k_node->node_type) {
-        bcol_ptpcoll_allreduce_recursivek_scatter_reduce_extra(ptpcoll_module,
-                                                               buffer_index,
-                                                               src_buffer, recv_buffer, op, count, dtype);
-        bcol_ptpcoll_allreduce_knomial_allgather_extra(ptpcoll_module,
-                                                       buffer_index,
-                                                       src_buffer, recv_buffer, count, dtype);
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-
-
-/*
- * Register allreduce functions to the BCOL function table,
- * so they can be selected
- */
-int bcol_ptpcoll_allreduce_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module =
-        (mca_bcol_ptpcoll_module_t *) super;
-
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    comm_attribs.bcoll_type = BCOL_ALLREDUCE;
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-
-    /* not an accurate attribute, none of these algorithms
-     * are non-blocking
-     */
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_ptpcoll_allreduce_narraying_init,
-                                 bcol_ptpcoll_allreduce_narraying_progress);
-
-    inv_attribs.bcol_msg_min = 10000000;
-    inv_attribs.bcol_msg_max = 10485760; /* range 4 */
-
-    if (ptpcoll_module->pow_knum == ptpcoll_module->group_size) {
-        mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                     bcol_ptpcoll_allreduce_recursivek_scatter_reduce_allgather_init,
-                                     NULL);
-
-    } else {
-
-        mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                     bcol_ptpcoll_allreduce_recursivek_scatter_reduce_allgather_extra_init,
-                                     NULL);
-
-    }
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.h b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.h
deleted file mode 100644
index 144e256761..0000000000
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_PTPCOLL_ALLREDUCE_H
-#define MCA_BCOL_PTPCOLL_ALLREDUCE_H
-
-#include "ompi_config.h"
-#include "ompi/op/op.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "bcol_ptpcoll.h"
-#include "bcol_ptpcoll_utils.h"
-
-enum {
-	BLOCK_OFFSET = 0,
-	LOCAL_REDUCE_SEG_OFFSET,
-	BLOCK_COUNT,
-	SEG_SIZE,
-	NOFFSETS
-};
-
-BEGIN_C_DECLS
-int bcol_ptpcoll_allreduce_narraying(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-        const int buffer_index, void *data_buffer,
-		struct ompi_op_t *op,
-		const int count, struct ompi_datatype_t *dtype, const int
-		buffer_size, const int relative_group_index);
-
-
-int bcol_ptpcoll_allreduce_narraying_init(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-int bcol_ptpcoll_allreduce_recursivek_scatter_reduce(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-						const int buffer_index, void *sbuf,
-					    void *rbuf,
-						struct ompi_op_t *op,
-						const int count, struct ompi_datatype_t *dtype,
-						const int relative_group_index,
-						const int padded_start_byte);
-
-int bcol_ptpcoll_allreduce_knomial_allgather(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-				const int buffer_index,
-				void *sbuf,void *rbuf, int count, struct
-				ompi_datatype_t *dtype,
-				const int relative_group_index,
-				const int padded_start_byte);
-
-int bcol_ptpcoll_allreduce_recursivek_scatter_reduce_allgather_init(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-
-int compute_knomial_allgather_offsets(int group_index, int count, struct
-				ompi_datatype_t *dtype,int k_radix,int n_exchanges,
-				int **offsets);
-
-
-int bcol_ptpcoll_allreduce_recursivek_scatter_reduce_extra(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-						int buffer_index,
-						void *sbuf,
-					    void *rbuf,
-						struct ompi_op_t *op,
-						const int count, struct ompi_datatype_t *dtype);
-
-int bcol_ptpcoll_allreduce_knomial_allgather_extra(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-						int buffer_index,
-						void *sbuf,
-					    void *rbuf,
-						const int count, struct ompi_datatype_t *dtype);
-
-int bcol_ptpcoll_allreduce_recursivek_scatter_reduce_allgather_extra_init(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-int bcol_ptpcoll_allreduce_init(mca_bcol_base_module_t *super);
-
-#if 0
-int knomial_reduce_scatter_offsets(int group_index,int count, struct ompi_datatype_t *dtype, int k_radix,
-				int n_exchanges, int nth_exchange, size_t *recv_offset, size_t
-				*block_offset, size_t *block_count, size_t *block_size, size_t
-				*seg_size);
-
-int allgather_offsets(int group_index,int count, struct ompi_datatype_t *dtype, int k_radix,
-				int n_exchanges, int nth_exchange, size_t *send_offset, size_t
-				*block_offset, size_t *block_count, size_t *block_size, size_t
-				*seg_size);
-#endif
-
-END_C_DECLS
-
-#endif
diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_barrier.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_barrier.c
deleted file mode 100644
index 6ad04db6c6..0000000000
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_barrier.c
+++ /dev/null
@@ -1,933 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2015      Los Alamos National Security, LLC.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/include/ompi/constants.h"
-#include "bcol_ptpcoll.h"
-#include "bcol_ptpcoll_utils.h"
-
-/*
- * Fanin routines - no user data
- */
-
-/********************************************* New Barrier *********************************************/
-/*******************************************************************************************************/
-/*******************************************************************************************************/
-
-/*************************************** K-nominal ***************************************/
-/*****************************************************************************************/
-static int bcol_ptpcoll_barrier_recurs_knomial_new(
-                bcol_function_args_t *input_args,
-                struct mca_bcol_base_function_t *const_args)
-{
-    /* local variable */
-    uint64_t sequence_number;
-    mca_bcol_ptpcoll_module_t *ptpcoll_module =
-                        (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
-
-    netpatterns_k_exchange_node_t *my_exchange_node =
-                                       &ptpcoll_module->knomial_exchange_tree;
-
-    int rc, k, pair_comm_rank, exchange, completed,
-        tree_order = my_exchange_node->tree_order, tag,
-        n_extra_sources = my_exchange_node->n_extra_sources,
-        n_exchange = my_exchange_node->n_exchanges, num_reqs;
-
-    ompi_communicator_t *comm =
-            ptpcoll_module->super.sbgp_partner_module->group_comm;
-
-    int *extra_sources_array = NULL,
-        **rank_exchanges = my_exchange_node->rank_exchanges;
-
-    ompi_request_t **requests;
-    opal_free_list_item_t *item;
-
-    mca_bcol_ptpcoll_collreq_t *collreq;
-
-    item = opal_free_list_wait (&ptpcoll_module->collreqs_free);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        PTPCOLL_ERROR(("Free list waiting failed."));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    collreq = (mca_bcol_ptpcoll_collreq_t *) item;
-    input_args->bcol_opaque_data = (void *) collreq;
-
-    requests = collreq->requests;
-
-    /* TAG Calculation */
-    sequence_number = input_args->sequence_num;
-
-    /* Keep tag within the limit supportd by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-
-    /* Mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-
-    if (0 < n_extra_sources) { /* EXCHANGE_NODE case */
-        collreq->need_toserv_extra = 1;
-        extra_sources_array = my_exchange_node->rank_extra_sources_array;
-
-        /* I will participate in the exchange (of the algorithm) -
-         * wait for signal from extra process */
-        for (k = 0; k < n_extra_sources; ++k) {
-            pair_comm_rank =
-                    ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[k]];
-
-            rc = MCA_PML_CALL(irecv(
-                        NULL, 0, MPI_INT,
-                        pair_comm_rank, tag,
-                        comm, &(requests[k])));
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                PTPCOLL_ERROR(("IRecv failed."));
-                return rc;
-            }
-        }
-
-        num_reqs = n_extra_sources;
-
-        /* Test for completion */
-        completed =
-            mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("Test for all failed."));
-            return rc;
-        }
-
-        if (!completed) {
-            collreq->tag = tag;
-            collreq->num_reqs = num_reqs;
-            collreq->exchange = 0;
-
-            return BCOL_FN_STARTED;
-        }
-    } else {
-        collreq->need_toserv_extra = 0;
-    }
-
-    /* loop over exchange send/recv pairs */
-    for (exchange = 0; exchange < n_exchange; ++exchange) {
-        for (k = 0; k < tree_order - 1; ++k) {
-            /* rank of exchange partner within the group */
-            pair_comm_rank =
-                ptpcoll_module->super.sbgp_partner_module->group_list[rank_exchanges[exchange][k]];
-
-            assert(2 * ptpcoll_module->k_nomial_radix > (k * 2 + 1));
-
-            /* send to partner - we will wait for completion, as send
-             *   completion is at the MPI level, and will not
-             *   incur network level completion costs
-             */
-            rc = MCA_PML_CALL(isend(
-                        NULL, 0, MPI_INT,
-                        pair_comm_rank, tag,
-                        MCA_PML_BASE_SEND_STANDARD,
-                        comm, &(requests[k * 2 + 1])));
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                PTPCOLL_ERROR(("ISend failed."));
-                return rc;
-            }
-
-            PTPCOLL_VERBOSE(10, ("Ex %d, K %d send to %d[%d]", exchange, k,
-                                  pair_comm_rank, rank_exchanges[exchange][k]));
-
-            /* recive from partner */
-            rc = MCA_PML_CALL(irecv(
-                        NULL, 0, MPI_INT,
-                        pair_comm_rank, tag,
-                        comm, &(requests[k * 2])));
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                PTPCOLL_ERROR(("IRecv failed."));
-                return rc;
-            }
-
-            PTPCOLL_VERBOSE(10, ("Ex %d, K %d irecv from %d[%d]", exchange, k,
-                                  pair_comm_rank, rank_exchanges[exchange][k]));
-        }
-
-        num_reqs = 2 * (tree_order - 1);
-
-        /* Test for completion */
-        completed =
-            mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("Test for all failed."));
-            return rc;
-        }
-
-        if (!completed) {
-            collreq->tag = tag;
-            collreq->num_reqs = num_reqs;
-            collreq->exchange = exchange + 1;
-
-            return BCOL_FN_STARTED;
-        }
-    }
-
-    /* If non power of 2, may need to send message to "extra" proc */
-    if (0 < n_extra_sources)  {  /* EXCHANGE_NODE case */
-        for (k = 0; k < n_extra_sources; ++k) {
-            pair_comm_rank =
-                ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[k]];
-
-            rc = MCA_PML_CALL(isend(
-                        NULL, 0, MPI_INT,
-                        pair_comm_rank, tag,
-                        MCA_PML_BASE_SEND_STANDARD,
-                        comm, &(requests[k])));
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                PTPCOLL_ERROR(("ISend failed."));
-                return rc;
-            }
-        }
-
-        num_reqs = n_extra_sources;
-
-        /* Test for completion */
-        completed =
-            mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("Test for all failed."));
-            return rc;
-        }
-
-        if (!completed) {
-            collreq->tag = tag;
-            collreq->num_reqs = num_reqs;
-
-            collreq->exchange = n_exchange;
-            collreq->need_toserv_extra = 0;
-
-            return BCOL_FN_STARTED;
-        }
-    }
-
-    opal_free_list_return (&ptpcoll_module->collreqs_free, (opal_free_list_item_t *) collreq);
-    return BCOL_FN_COMPLETE;
-}
-
-static int bcol_ptpcoll_barrier_recurs_knomial_new_progress(
-                                bcol_function_args_t *input_args,
-                                struct mca_bcol_base_function_t *const_args)
-{
-    /* local variable */
-    mca_bcol_ptpcoll_module_t *ptpcoll_module =
-                        (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
-
-    netpatterns_k_exchange_node_t *my_exchange_node =
-                                       &ptpcoll_module->knomial_exchange_tree;
-
-    int rc, k, tag, pair_comm_rank, exchange,
-        tree_order = my_exchange_node->tree_order, num_reqs,
-        n_exchange = my_exchange_node->n_exchanges, completed,
-        n_extra_sources = my_exchange_node->n_extra_sources;
-
-    ompi_communicator_t *comm =
-            ptpcoll_module->super.sbgp_partner_module->group_comm;
-
-    int *extra_sources_array,
-        **rank_exchanges = my_exchange_node->rank_exchanges;
-
-    mca_bcol_ptpcoll_collreq_t *collreq =
-                    (mca_bcol_ptpcoll_collreq_t *) input_args->bcol_opaque_data;
-
-    ompi_request_t **requests = collreq->requests;
-
-    num_reqs = collreq->num_reqs;
-
-    /* Test for completion */
-    completed =
-        mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        PTPCOLL_ERROR(("Test for all failed."));
-        return rc;
-    }
-
-    if (!completed) {
-        return BCOL_FN_STARTED;
-    }
-
-    /* Continue loop over exchange send/recv pairs */
-    tag = collreq->tag;
-
-    for (exchange = collreq->exchange; exchange < n_exchange; ++exchange) {
-        for (k = 0; k < tree_order - 1; ++k) {
-            /* rank of exchange partner within the group */
-            pair_comm_rank =
-                ptpcoll_module->super.sbgp_partner_module->group_list[rank_exchanges[exchange][k]];
-
-            assert(2 * ptpcoll_module->k_nomial_radix > (k * 2 + 1));
-
-            /* send to partner - we will wait for completion, as send
-             *   completion is at the MPI level, and will not
-             *   incur network level completion costs
-             */
-            rc = MCA_PML_CALL(isend(
-                        NULL, 0, MPI_INT,
-                        pair_comm_rank, tag,
-                        MCA_PML_BASE_SEND_STANDARD,
-                        comm, &(requests[k * 2 + 1])));
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                PTPCOLL_ERROR(("ISend failed."));
-                return rc;
-            }
-
-            PTPCOLL_VERBOSE(10, ("Ex %d, K %d send to %d[%d]", exchange, k,
-                                  pair_comm_rank, rank_exchanges[exchange][k]));
-
-            /* recive from partner */
-            rc = MCA_PML_CALL(irecv(
-                        NULL, 0, MPI_INT,
-                        pair_comm_rank, tag,
-                        comm, &(requests[k * 2])));
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                PTPCOLL_ERROR(("IRecv failed."));
-                return rc;
-            }
-
-            PTPCOLL_VERBOSE(10, ("Ex %d, K %d irecv from %d[%d]", exchange, k,
-                                  pair_comm_rank, rank_exchanges[exchange][k]));
-        }
-
-        num_reqs = 2 * (tree_order - 1);
-
-        /* Test for completion */
-        completed =
-            mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("Test for all failed."));
-            return rc;
-        }
-
-        if (!completed) {
-            collreq->num_reqs = num_reqs;
-            collreq->exchange = exchange + 1;
-
-            return BCOL_FN_STARTED;
-        }
-    }
-
-    /* If non power of 2, may need to send message to "extra" proc */
-    if (collreq->need_toserv_extra)  {  /* EXCHANGE_NODE case */
-        extra_sources_array = my_exchange_node->rank_extra_sources_array;
-
-        for (k = 0; k < n_extra_sources; ++k) {
-            pair_comm_rank =
-                ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[k]];
-
-            rc = MCA_PML_CALL(isend(
-                        NULL, 0, MPI_INT,
-                        pair_comm_rank, tag,
-                        MCA_PML_BASE_SEND_STANDARD,
-                        comm, &(requests[k])));
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                PTPCOLL_ERROR(("ISend failed."));
-                return rc;
-            }
-        }
-
-        num_reqs = n_extra_sources;
-
-        /* Test for completion */
-        completed =
-            mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("Test for all failed."));
-            return rc;
-        }
-
-        if (!completed) {
-            collreq->num_reqs = num_reqs;
-            collreq->exchange = n_exchange;
-            collreq->need_toserv_extra = 0;
-
-            return BCOL_FN_STARTED;
-        }
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-/****************************************** Extra node Barrier ******************************************/
-
-static int bcol_ptpcoll_barrier_recurs_knomial_extra_new(
-                                bcol_function_args_t *input_args,
-                                struct mca_bcol_base_function_t *const_args)
-{
-    /* local variable */
-    uint64_t sequence_number;
-    int rc, tag, pair_comm_rank,
-        completed, num_reqs = 2;
-
-    mca_bcol_ptpcoll_module_t *ptpcoll_module =
-                    (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
-
-    netpatterns_k_exchange_node_t *my_exchange_node =
-                                   &ptpcoll_module->knomial_exchange_tree;
-
-    ompi_communicator_t *comm =
-                    ptpcoll_module->super.sbgp_partner_module->group_comm;
-
-    int *extra_sources_array = my_exchange_node->rank_extra_sources_array;
-
-    ompi_request_t **requests;
-    opal_free_list_item_t *item;
-
-    mca_bcol_ptpcoll_collreq_t *collreq;
-
-    item = opal_free_list_wait (&ptpcoll_module->collreqs_free);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        PTPCOLL_ERROR(("Free list waiting failed."));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    collreq = (mca_bcol_ptpcoll_collreq_t *) item;
-    input_args->bcol_opaque_data = (void *) collreq;
-
-    requests = collreq->requests;
-
-    /* TAG Calculation */
-    sequence_number = input_args->sequence_num;
-
-    /* Keep tag within the limit supportd by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-
-    /* Mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-
-    pair_comm_rank =
-            ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[0]];
-
-    rc = MCA_PML_CALL(isend(
-                NULL, 0, MPI_INT,
-                pair_comm_rank, tag,
-                MCA_PML_BASE_SEND_STANDARD,
-                comm, &(requests[0])));
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        PTPCOLL_ERROR(("ISend failed."));
-        return rc;
-    }
-
-    rc = MCA_PML_CALL(irecv(
-                NULL, 0, MPI_INT,
-                pair_comm_rank, tag,
-                comm, &(requests[1])));
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        PTPCOLL_ERROR(("IRecv failed."));
-        return rc;
-    }
-
-    /* Test for completion */
-    completed =
-        mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        PTPCOLL_ERROR(("Test for all failed."));
-        return rc;
-    }
-
-    if (!completed) {
-        return BCOL_FN_STARTED;
-    }
-
-    opal_free_list_return (&ptpcoll_module->collreqs_free, (opal_free_list_item_t *) collreq);
-    return BCOL_FN_COMPLETE;
-}
-
-/*************************************** Recursive-Doubling ***************************************/
-/**************************************************************************************************/
-
-static int bcol_ptpcoll_barrier_recurs_dbl_new(
-                                bcol_function_args_t *input_args,
-                                struct mca_bcol_base_function_t *const_args)
-{
-   /* local variable */
-    uint64_t sequence_number;
-    mca_bcol_ptpcoll_module_t *ptp_module =
-                         (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
-
-    ompi_communicator_t *comm = ptp_module->super.sbgp_partner_module->group_comm;
-
-    int rc, my_extra_partner_comm_rank = 0, exchange, completed,
-        pair_comm_rank, pair_rank, delta, tag, num_reqs = 0,
-        my_rank = ptp_module->super.sbgp_partner_module->my_index,
-        n_exchange = ptp_module->super.sbgp_partner_module->n_levels_pow2;
-
-    ompi_request_t **requests;
-    opal_free_list_item_t *item;
-
-    mca_bcol_ptpcoll_collreq_t *collreq;
-
-    item = opal_free_list_wait (&ptp_module->collreqs_free);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        PTPCOLL_ERROR(("Free list waiting failed."));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    collreq = (mca_bcol_ptpcoll_collreq_t *) item;
-    input_args->bcol_opaque_data = (void *) collreq;
-
-    assert(PTPCOLL_EXTRA != ptp_module->pow_2type);
-
-    requests = collreq->requests;
-
-    /* TAG Calculation */
-    sequence_number = input_args->sequence_num;
-
-    /* keep tag within the limit supportd by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptp_module->tag_mask);
-
-    /* mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-
-    if (PTPCOLL_PROXY == ptp_module->pow_2type) {
-        /* I will participate in the exchange - wait for signal from extra
-         ** process */
-        /*
-         * recv from extra rank - my_extra_partner_comm_rank
-         *  can use blocking recv, as no other communications
-         *  need to take place.
-         */
-        my_extra_partner_comm_rank =
-                       ptp_module->super.sbgp_partner_module->group_list[ptp_module->proxy_extra_index];
-
-        collreq->need_toserv_extra = 1;
-        collreq->extra_partner_rank = my_extra_partner_comm_rank;
-
-        rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT,
-                    my_extra_partner_comm_rank, tag, comm,
-                    &(requests[0])));
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("IRecv failed."));
-            return rc;
-        }
-
-        completed = mca_bcol_ptpcoll_test_for_match(&requests[0], &rc);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("Test for irecv failed."));
-            return rc;
-        }
-
-        if (!completed) {
-            collreq->tag = tag;
-            collreq->num_reqs = 1;
-            collreq->exchange = 0;
-
-            return BCOL_FN_STARTED;
-        }
-    } else {
-        collreq->need_toserv_extra = 0;
-    }
-
-    /* Loop over exchange send/recv pairs */
-    delta = 1;
-    for (exchange = 0; exchange < n_exchange; ++exchange) {
-
-        /* rank of exchange partner within the group */
-        pair_rank = my_rank ^ delta;
-
-        /* rank within the communicator */
-        pair_comm_rank =
-            ptp_module->super.sbgp_partner_module->group_list[pair_rank];
-
-        /* send to partner - we will wait for completion, as send
-         *   completion is at the MPI level, and will not
-         *   incur network level completion costs
-         */
-        rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT,
-                    pair_comm_rank, tag,
-                    MCA_PML_BASE_SEND_STANDARD, comm,
-                    &(requests[0])));
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("ISend failed."));
-            return rc;
-        }
-
-        ++num_reqs;
-
-        /* recive from partner */
-        rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT,
-                    pair_comm_rank, tag, comm,
-                    &(requests[1])));
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("IRecv failed."));
-            return rc;
-        }
-
-        ++num_reqs;
-
-        PTPCOLL_VERBOSE(5, ("exchange - %d, pair_rank - %d, pair_comm_rank - %d",
-                             exchange, pair_rank, pair_comm_rank));
-
-        /* test for completion */
-        completed =
-            mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("Test for all failed."));
-            return rc;
-        }
-
-        if (!completed) {
-            collreq->tag = tag;
-            collreq->num_reqs = num_reqs;
-
-            collreq->exchange = exchange + 1;
-            assert(collreq->exchange >= 0);
-
-            return BCOL_FN_STARTED;
-        }
-
-        delta <<= 1; /* delta *= 2 */
-    }
-
-    if (PTPCOLL_PROXY == ptp_module->pow_2type) {
-        /* send - let the extra rank know that we are done */
-        rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT,
-                    my_extra_partner_comm_rank, tag,
-                    MCA_PML_BASE_SEND_STANDARD, comm,
-                    &(requests[0])));
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("ISend failed."));
-            return rc;
-        }
-
-        completed = mca_bcol_ptpcoll_test_for_match(&requests[0], &rc);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("Test for isend failed."));
-            return rc;
-        }
-
-        if (!completed) {
-            collreq->tag = tag;
-            collreq->num_reqs = 1;
-
-            collreq->need_toserv_extra = 0;
-            collreq->exchange = n_exchange;
-
-            return BCOL_FN_STARTED;
-        }
-    }
-
-    opal_free_list_return (&ptp_module->collreqs_free, (opal_free_list_item_t *) collreq);
-    return BCOL_FN_COMPLETE;
-}
-
-static int bcol_ptpcoll_barrier_recurs_dbl_new_progress(
-                                bcol_function_args_t *input_args,
-                                struct mca_bcol_base_function_t *const_args)
-{
-   /* local variable */
-    mca_bcol_ptpcoll_module_t *ptp_module =
-                         (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
-
-    ompi_communicator_t *comm = ptp_module->super.sbgp_partner_module->group_comm;
-
-    int rc, exchange, pair_comm_rank, tag,
-        pair_rank, delta, num_reqs, completed,
-        my_rank = ptp_module->super.sbgp_partner_module->my_index,
-        n_exchange = ptp_module->super.sbgp_partner_module->n_levels_pow2;
-
-    ompi_request_t **requests;
-    mca_bcol_ptpcoll_collreq_t *collreq =
-                    (mca_bcol_ptpcoll_collreq_t *) input_args->bcol_opaque_data;
-
-    num_reqs = collreq->num_reqs;
-    requests = collreq->requests;
-
-    /* test for completion */
-    completed =
-        mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        PTPCOLL_ERROR(("Test for all failed."));
-        return rc;
-    }
-
-    if (!completed) {
-          return BCOL_FN_STARTED;
-    }
-
-    assert(PTPCOLL_EXTRA != ptp_module->pow_2type);
-
-    /* Continue loop over exchange send/recv pairs */
-    num_reqs = 0;
-    tag = collreq->tag;
-
-    exchange = collreq->exchange;
-    assert(exchange >= 0);
-
-    delta = 1 << exchange;
-    for (; exchange < n_exchange; ++exchange) {
-
-        /* rank of exchange partner within the group */
-        pair_rank = my_rank ^ delta;
-
-        /* rank within the communicator */
-        pair_comm_rank =
-            ptp_module->super.sbgp_partner_module->group_list[pair_rank];
-
-        /* send to partner - we will wait for completion, as send
-         *   completion is at the MPI level, and will not
-         *   incur network level completion costs
-         */
-        rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT,
-                    pair_comm_rank, tag,
-                    MCA_PML_BASE_SEND_STANDARD, comm,
-                    &(requests[0])));
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("ISend failed."));
-            return rc;
-        }
-
-        ++num_reqs;
-
-        /* recive from partner */
-        rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT,
-                    pair_comm_rank, tag, comm,
-                    &(requests[1])));
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("IRecv failed."));
-            return rc;
-        }
-
-        ++num_reqs;
-
-        PTPCOLL_VERBOSE(5, ("exchange - %d, pair_rank - %d, pair_comm_rank - %d",
-                             exchange, pair_rank, pair_comm_rank));
-
-        /* test for completion */
-        completed =
-            mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("Test for all failed."));
-            return rc;
-        }
-
-        if (!completed) {
-            collreq->num_reqs = num_reqs;
-            collreq->exchange = exchange + 1;
-            assert(collreq->exchange >= 0);
-
-            return BCOL_FN_STARTED;
-        }
-
-        delta <<= 1; /* delta *= 2 */
-    }
-
-    /* if non power of 2, may need to send message to "extra" proc */
-    if (collreq->need_toserv_extra) {
-        /* send - let the extra rank know that we are done */
-        rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT,
-                    collreq->extra_partner_rank, tag,
-                    MCA_PML_BASE_SEND_STANDARD, comm,
-                    &(requests[0])));
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("ISend failed."));
-            return rc;
-        }
-
-        completed = mca_bcol_ptpcoll_test_for_match(&requests[0], &rc);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("Test for isend failed."));
-            return rc;
-        }
-
-        if (!completed) {
-            collreq->num_reqs = 1;
-            collreq->need_toserv_extra = 0;
-            collreq->exchange = n_exchange;
-
-            return BCOL_FN_STARTED;
-        }
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-/****************************************** Extra node Barrier ******************************************/
-
-static int bcol_ptpcoll_barrier_recurs_dbl_extra_new(
-                                bcol_function_args_t *input_args,
-                                struct mca_bcol_base_function_t *const_args)
-{
-   /* local variable */
-    uint64_t sequence_number;
-    int rc, completed, num_reqs = 2,
-        tag, my_extra_partner_comm_rank;
-
-    ompi_request_t **requests;
-    opal_free_list_item_t *item;
-
-    mca_bcol_ptpcoll_collreq_t *collreq;
-
-    mca_bcol_ptpcoll_module_t *ptp_module =
-                         (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
-    ompi_communicator_t *comm = ptp_module->super.sbgp_partner_module->group_comm;
-
-    item = opal_free_list_wait (&ptp_module->collreqs_free);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        PTPCOLL_ERROR(("Free list waiting failed."));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    collreq = (mca_bcol_ptpcoll_collreq_t *) item;
-    input_args->bcol_opaque_data = (void *) collreq;
-
-    requests = collreq->requests;
-
-    /* TAG Calculation */
-    sequence_number = input_args->sequence_num;
-
-    /* Keep tag within the limit supportd by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptp_module->tag_mask);
-
-    /* mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-
-    /* I will not participate in the exchange - so just "register" as here,
-     * signal the extra rank that I am here */
-
-    my_extra_partner_comm_rank =
-                 ptp_module->super.sbgp_partner_module->group_list[ptp_module->proxy_extra_index];
-
-    rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT,
-                my_extra_partner_comm_rank, tag,
-                MCA_PML_BASE_SEND_STANDARD, comm,
-                &(requests[0])));
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        PTPCOLL_ERROR(("Send failed."));
-        return rc;
-    }
-
-    /* Recv signal that the rest are done - my_extra_partner_comm_rank */
-    rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT,
-                my_extra_partner_comm_rank, tag, comm,
-                &(requests[1])));
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        PTPCOLL_ERROR(("IRecv failed."));
-        return rc;
-    }
-
-    /* Test for completion */
-    completed =
-        mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        PTPCOLL_ERROR(("Test for all failed."));
-        return rc;
-    }
-
-    if (!completed) {
-        return BCOL_FN_STARTED;
-    }
-
-    opal_free_list_return (&ptp_module->collreqs_free, (opal_free_list_item_t *) collreq);
-    return BCOL_FN_COMPLETE;
-}
-
-/* We have the same progress func for both cases (R-D and K-Nominal) */
-static int bcol_ptpcoll_barrier_extra_node_progress(
-                            bcol_function_args_t *input_args,
-                            struct mca_bcol_base_function_t *const_args)
-{
-   /* local variable */
-    ompi_request_t **requests;
-    int rc, completed, num_reqs = 2;
-
-    mca_bcol_ptpcoll_collreq_t *collreq =
-                    (mca_bcol_ptpcoll_collreq_t *) input_args->bcol_opaque_data;
-
-    requests = collreq->requests;
-
-    /* test for completion */
-    completed =
-        mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        PTPCOLL_ERROR(("Test for all failed."));
-        return rc;
-    }
-
-    if (!completed) {
-        return BCOL_FN_STARTED;
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-static int mca_bcol_ptpcoll_barrier_setup(mca_bcol_base_module_t *super, int bcoll_type)
-{
-    netpatterns_k_exchange_node_t *my_exchange_node;
-    mca_bcol_ptpcoll_module_t * ptpcoll_module =
-                           (mca_bcol_ptpcoll_module_t *) super;
-
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    comm_attribs.bcoll_type = bcoll_type;
-
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    switch(mca_bcol_ptpcoll_component.barrier_alg) {
-        case 1:
-            if (PTPCOLL_EXTRA == ptpcoll_module->pow_2type) {
-                mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                    bcol_ptpcoll_barrier_recurs_dbl_extra_new,
-                    bcol_ptpcoll_barrier_extra_node_progress);
-                break;
-            }
-
-            mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                bcol_ptpcoll_barrier_recurs_dbl_new,
-                bcol_ptpcoll_barrier_recurs_dbl_new_progress);
-            break;
-        case 2:
-            my_exchange_node = &ptpcoll_module->knomial_exchange_tree;
-            if (my_exchange_node->n_extra_sources > 0 &&
-                           EXTRA_NODE == my_exchange_node->node_type) {
-                mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                    bcol_ptpcoll_barrier_recurs_knomial_extra_new,
-                    bcol_ptpcoll_barrier_extra_node_progress);
-                break;
-            }
-
-            mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                bcol_ptpcoll_barrier_recurs_knomial_new,
-                bcol_ptpcoll_barrier_recurs_knomial_new_progress);
-            break;
-        default:
-            PTPCOLL_ERROR(("Wrong barrier_alg flag value."));
-    }
-
-    return OMPI_SUCCESS;
-}
-
-int mca_bcol_ptpcoll_memsync_init(mca_bcol_base_module_t *super)
-{
-    return mca_bcol_ptpcoll_barrier_setup(super, BCOL_SYNC);
-}
-
-int bcol_ptpcoll_barrier_init(mca_bcol_base_module_t *super)
-{
-    return mca_bcol_ptpcoll_barrier_setup(super, BCOL_BARRIER);
-}
diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.c
deleted file mode 100644
index f2b039e3ac..0000000000
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.c
+++ /dev/null
@@ -1,2321 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2016      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/include/ompi/constants.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "bcol_ptpcoll_bcast.h"
-#include "bcol_ptpcoll_utils.h"
-
-#define K_NOMIAL_ROOT_BCAST_NB_NOTEST(step_info, radix,                                    \
-        my_group_index, group_list,                                                        \
-        data_buffer, count, tag, comm, send_requests, num_pending_sends)                   \
-do {                                                                                       \
-    int rc = OMPI_SUCCESS;                                                                 \
-    int dst;                                                                               \
-    int comm_dst;                                                                          \
-    *num_pending_sends = 0;                                                                \
-                                                                                           \
-    while(MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER_CHECK_LEVEL(step_info)) {           \
-        /* For each level of tree, do sends */                                             \
-        MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER(my_group_index,                       \
-                                                     radix, step_info, dst);               \
-        comm_dst = group_list[dst];                                                        \
-                                                                                           \
-            /* Non blocking send .... */                                                   \
-        PTPCOLL_VERBOSE(9 , ("Bcast, Isend data to %d[%d], count %d, tag %d, addr %p",     \
-                    dst, comm_dst, count, tag,                                             \
-                    data_buffer));                                                         \
-        rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE,                              \
-                    comm_dst, tag,                                                         \
-                    MCA_PML_BASE_SEND_STANDARD, comm,                                      \
-                    &(send_requests[*num_pending_sends])));                                \
-        PTPCOLL_VERBOSE(10, ("send request addr is %p", send_requests[*num_pending_sends]));   \
-        if( OMPI_SUCCESS != rc ) {                                                         \
-            PTPCOLL_VERBOSE(10, ("Failed to isend data"));                                 \
-            return OMPI_ERROR;                                                             \
-        }                                                                                  \
-        ++(*num_pending_sends);                                                            \
-    }                                                                                      \
-} while(0)
-
-#define NARRAY_BCAST_NB(narray_node, process_shift, group_size,                            \
-                        data_buffer, count, tag, comm, send_requests,                      \
-                        num_pending_sends)                                                 \
-do {                                                                                       \
-    int n, rc = OMPI_SUCCESS;                                                              \
-    int dst;                                                                               \
-    int comm_dst;                                                                          \
-                                                                                           \
-        /* Send out data to all relevant childrens  */                                     \
-    for (n = 0; n < narray_node->n_children; n++) {                                        \
-                                                                                           \
-        dst = narray_node->children_ranks[n] + process_shift;                              \
-        if (dst >= group_size) {                                                           \
-            dst -= group_size;                                                             \
-        }                                                                                  \
-        comm_dst = group_list[dst];                                                        \
-                                                                                           \
-        /* Non blocking send .... */                                                       \
-        PTPCOLL_VERBOSE(9 , ("Bcast, Isend data to %d[%d], count %d, tag %d, addr %p",     \
-                    dst, comm_dst, count, tag,                                             \
-                    data_buffer));                                                         \
-        rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE,                              \
-                    comm_dst, tag,                                                         \
-                    MCA_PML_BASE_SEND_STANDARD, comm,                                      \
-                    &(send_requests[*num_pending_sends])));                                \
-        if( OMPI_SUCCESS != rc ) {                                                         \
-            PTPCOLL_VERBOSE(10, ("Failed to isend data"));                                 \
-            return OMPI_ERROR;                                                             \
-        }                                                                                  \
-        ++(*num_pending_sends);                                                            \
-    }                                                                                      \
-} while(0)
-
-
-int bcol_ptpcoll_bcast_k_nomial_anyroot_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    int completed = 0;
-    int rc;
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-    uint32_t buffer_index = input_args->buffer_index;
-
-    ompi_request_t **send_requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-
-    completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, send_requests, &rc);
-    if (OMPI_SUCCESS != rc) {
-        return OMPI_ERROR;
-    }
-
-    /* DONE */
-    if(completed) {
-        PTPCOLL_VERBOSE(10, ("bcast root is done"));
-        return BCOL_FN_COMPLETE;
-    } else {
-        PTPCOLL_VERBOSE(10, ("bcast root is started"));
-        return BCOL_FN_STARTED;
-    }
-}
-
-/* K-nomial tree ( with any root ) algorithm */
-int bcol_ptpcoll_bcast_k_nomial_anyroot(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-    mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
-
-    int tag;
-    int rc;
-    int matched = 0; /* not matched */
-    int comm_root = 0; /* no root */
-    int i;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    int radix = ptpcoll_module->k_nomial_radix;
-    int root_radix_mask = ptpcoll_module->pow_knum;
-    int peer = -1;
-    uint64_t sequence_number = input_args->sequence_num;
-    uint32_t buffer_index = input_args->buffer_index;
-    int extra_root = -1;
-
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_status_public_t status;
-    ompi_request_t **send_requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    void *data_buffer = (void *) (
-            (unsigned char *)input_args->sbuf +
-            (size_t)input_args->sbuf_offset);
-    int count = input_args->count * input_args->dtype->super.size;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    netpatterns_knomial_step_info_t step_info = {0, 0, 0};
-
-    PTPCOLL_VERBOSE(3, ("BCAST Anyroot, index_this_type %d, num_of_this_type %d",
-                    const_args->index_of_this_type_in_collective + 1,
-                    const_args->n_of_this_type_in_collective));
-
-    /* keep tag within the limit support by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    /* mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-    /* reset requests */
-    *active_requests = 0;
-
-    PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_k_nomial_anyroot, buffer index: %d \n"
-                         "tag: %d "
-                         "tag_mask: %d "
-                         "sn: %d "
-                         "root: %d "
-                         "pow_k: %d %d "
-                         "buff: %p "
-                         "radix: %d",
-                         buffer_index,  tag,
-                         ptpcoll_module->tag_mask, sequence_number,
-                         input_args->root_flag,
-                         ptpcoll_module->pow_k, ptpcoll_module->pow_knum,
-                         data_buffer,
-                         radix));
-
-    if (input_args->root_flag) {
-        PTPCOLL_VERBOSE(10, ("I'm root of the data"));
-        /*
-         * I'm root of the operation
-         * send data to (k - 1) * log base k N neighbors
-         */
-        MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info,
-                ptpcoll_module->pow_knum, my_group_index);
-        K_NOMIAL_ROOT_BCAST_NB_NOTEST(step_info, radix,
-                my_group_index, group_list,
-                data_buffer, count, tag, comm, send_requests,
-                active_requests);
-
-        goto ANY_ROOT_KNOMIAL_EXTRA;
-    }
-
-    /*
-     * I'm not root, and I don't know to calculate root, so just
-     * wait for data from ANY_SOURCE, once you get it, proceed like a root
-     */
-
-    for (i = 0; i < cm->num_to_probe; i++) {
-        MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info, ptpcoll_module->pow_knum, my_group_index);
-        while(MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER_CHECK_LEVEL(step_info)) {
-            MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_PEER(my_group_index, radix, step_info, peer);
-            PTPCOLL_VERBOSE(10, ("Bcast, iprobe tag %d rank %d",
-                        tag, group_list[peer]));
-            MCA_PML_CALL(iprobe(group_list[peer], tag,
-                        comm, &matched, &status));
-            if (matched) {
-                MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_UPDATE_LEVEL_FOR_BCAST(step_info, radix);
-                break;
-            }
-        }
-
-        /* Check of the */
-        if (PTPCOLL_KN_PROXY & ptpcoll_module->pow_ktype) {
-            for (i = 0 ; i < ptpcoll_module->kn_proxy_extra_num; i++) {
-                PTPCOLL_VERBOSE(10, ("Bcast, iprobe tag %d rank %d",
-                            tag, group_list[peer]));
-                MCA_PML_CALL(iprobe(group_list[ptpcoll_module->kn_proxy_extra_index[i]], tag,
-                            comm, &matched, &status));
-                if (matched) {
-                    step_info.k_level = root_radix_mask;
-                    extra_root = group_list[ptpcoll_module->kn_proxy_extra_index[i]];
-                    goto ANY_ROOT_KNOMIAL_BCAST;
-                }
-            }
-        }
-    }
-
-    /* the function always returns OMPI_SUCCESS, so we don't check return code */
-    if (0 == matched) {
-        PTPCOLL_VERBOSE(10, ("IPROBE was not matched"));
-        /* No data was received, return no match error */
-        return BCOL_FN_NOT_STARTED;
-    }
-
-    /* set the source of data */
-    comm_root = status.MPI_SOURCE;
-
-    PTPCOLL_VERBOSE(10, ("A. step info %d %d %d", step_info.k_level, step_info.k_step, step_info.k_tmp_peer));
-
-    /* Bcast the data */
-    PTPCOLL_VERBOSE(10, ("Starting data bcast"));
-
-ANY_ROOT_KNOMIAL_BCAST:
-    /* Post receive that will fetch the data */
-    PTPCOLL_VERBOSE(10, ("Bcast, receive data from %d[%d], count %d, tag %d, addr %p",
-                comm_root, count, tag, data_buffer));
-
-    rc = MCA_PML_CALL(recv(data_buffer, count, MPI_BYTE, comm_root, tag, comm, MPI_STATUS_IGNORE));
-    if( OMPI_SUCCESS != rc ) {
-        PTPCOLL_VERBOSE(10, ("Failed to receive data"));
-        return OMPI_ERROR;
-    }
-    PTPCOLL_VERBOSE(10, ("Bcast, Data was received"));
-
-    /* Sending forward the data over K-nomial tree */
-    MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info, step_info.k_level, my_group_index);
-
-    PTPCOLL_VERBOSE(10, ("B. step info %d %d %d", step_info.k_level, step_info.k_step, step_info.k_tmp_peer));
-    K_NOMIAL_ROOT_BCAST_NB_NOTEST(step_info, radix,
-                        my_group_index, group_list,
-                        data_buffer, count, tag, comm, send_requests,
-                        active_requests);
-
-ANY_ROOT_KNOMIAL_EXTRA:
-    /* Proxy node but NOT virtual root */
-    if (PTPCOLL_KN_PROXY & ptpcoll_module->pow_ktype) {
-        for (i = 0 ; i < ptpcoll_module->kn_proxy_extra_num; i++) {
-            if (ptpcoll_module->kn_proxy_extra_index[i] == extra_root)
-                continue;
-
-            PTPCOLL_VERBOSE(10, ("Extra_Isend to %d", ptpcoll_module->kn_proxy_extra_index[i]));
-            rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE,
-                        group_list[ptpcoll_module->kn_proxy_extra_index[i]], tag - 1,
-                        MCA_PML_BASE_SEND_STANDARD, comm,
-                        &(send_requests[*active_requests])));
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to send data"));
-                return OMPI_ERROR;
-            }
-            ++(*active_requests);
-        }
-    }
-
-    if (*active_requests > 0) {
-        matched =
-            mca_bcol_ptpcoll_test_all_for_match
-            (active_requests, send_requests, &rc);
-    }
-
-    /* If it is last call, we have to recycle memory */
-    if(matched) {
-        PTPCOLL_VERBOSE(10, ("bcast root is done"));
-        return BCOL_FN_COMPLETE;
-    } else {
-        PTPCOLL_VERBOSE(10, ("bcast root is started"));
-        return BCOL_FN_STARTED;
-    }
-}
-
-static int bcol_ptpcoll_bcast_k_nomial_extra_known_and_anyroot(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-
-    int tag;
-    int rc;
-    int i;
-    int completed = 0; /* not completed */
-    uint32_t buffer_index = input_args->buffer_index;
-
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    void *data_buffer = (void *) (
-            (unsigned char *)input_args->sbuf +
-            (size_t)input_args->sbuf_offset);
-    int count = input_args->count * input_args->dtype->super.size;
-    int *iteration =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration);
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
-    ompi_status_public_t status;
-
-    PTPCOLL_VERBOSE(3, ("Knomial Anyroot, index_this_type %d, num_of_this_type %d",
-                const_args->index_of_this_type_in_collective + 1,
-                const_args->n_of_this_type_in_collective));
-
-    /* keep tag within the limit support by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    /* mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-    /* reset active requests */
-    *active_requests = 0;
-    /* reset iteration counter */
-    *iteration = -1;
-
-    PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_k_nomial_anyroot extra, buffer index: %d \n"
-                "tag: %d "
-                "tag_mask: %d "
-                "sn: %d "
-                "root: %d "
-                "pow_k: %d %d "
-                "buff: %p "
-                ,buffer_index, tag,
-                ptpcoll_module->tag_mask, input_args->sequence_num,
-                input_args->root_flag,
-                ptpcoll_module->pow_k, ptpcoll_module->pow_knum,
-                data_buffer
-                ));
-
-    /* we have a power 2 group */
-    if (input_args->root_flag) {
-
-        PTPCOLL_VERBOSE(10, ("I'm EXTRA root of the data, v root %d", ptpcoll_module->kn_proxy_extra_index[0]));
-        /* send the all data to your proxy peer */
-        rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE,
-                    group_list[ptpcoll_module->kn_proxy_extra_index[0]], tag,
-                    MCA_PML_BASE_SEND_STANDARD, comm,
-                    &(requests[*active_requests])));
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10, ("Failed to send data"));
-            return OMPI_ERROR;
-        }
-        ++(*active_requests);
-
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if (0 == completed) {
-            /* we have to store the iteration number somewhere */
-            PTPCOLL_VERBOSE(10, ("Extra was started"));
-            return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-        }
-    } else {
-        for (i = 0; i < cm->num_to_probe &&
-                0 == completed; i++) {
-            MCA_PML_CALL(iprobe(group_list[ptpcoll_module->kn_proxy_extra_index[0]], tag - 1,
-                        comm, &completed, &status));
-        }
-        if (0 == completed) {
-            /* No data was received */
-            return BCOL_FN_NOT_STARTED;
-        }
-
-        /* the data is ready */
-        rc = MCA_PML_CALL(recv(data_buffer, count, MPI_BYTE,
-                    group_list[ptpcoll_module->kn_proxy_extra_index[0]], tag - 1,
-                    comm, MPI_STATUS_IGNORE));
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10, ("Failed to send data"));
-            return OMPI_ERROR;
-        }
-    }
-
-    PTPCOLL_VERBOSE(10, ("Extra was done"));
-    return BCOL_FN_COMPLETE;
-}
-
-static int bcol_ptpcoll_bcast_k_nomial_extra_known_and_anyroot_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    int rc;
-    int completed = 0; /* not completed */
-    int i;
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[input_args->buffer_index].requests;
-    uint32_t buffer_index = input_args->buffer_index;
-    mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    ompi_status_public_t status;
-    void *data_buffer = (void *) (
-            (unsigned char *)input_args->sbuf +
-            (size_t)input_args->sbuf_offset);
-    int count = input_args->count * input_args->dtype->super.size;
-    /* keep tag within the limit support by the pml */
-    int tag = -((PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask));
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-
-    PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_k_nomial_extra_known_and_anyroot_progress extra, was called, tag %d\n", tag));
-    if (input_args->root_flag) {
-        PTPCOLL_VERBOSE(10, ("I'm EXTRA root of the data"));
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if (0 == completed) {
-            return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-        }
-    } else {
-        for (i = 0; i < cm->num_to_probe &&
-                0 == completed; i++) {
-            MCA_PML_CALL(iprobe(group_list[ptpcoll_module->kn_proxy_extra_index[0]], tag - 1,
-                        comm, &completed, &status));
-        }
-        if (0 == completed) {
-            return BCOL_FN_STARTED;
-        }
-        /* the data is ready */
-
-        rc = MCA_PML_CALL(recv(data_buffer, count, MPI_BYTE,
-                    group_list[ptpcoll_module->kn_proxy_extra_index[0]], tag - 1,
-                    comm, MPI_STATUS_IGNORE));
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10, ("Failed to send data"));
-            return OMPI_ERROR;
-        }
-    }
-
-    /* Done */
-    return BCOL_FN_COMPLETE;                                    \
-}
-
-/* Know root means that we know exactly the source of data and we do not have to check multiple
- * sources
- */
-
-#define K_NOMIAL_DATA_SRC(radix, my_group_index, group_size, group_root, data_src, radix_mask)      \
-    do {                                                                                            \
-        int relative_rank = (my_group_index >= group_root) ? my_group_index - group_root :          \
-            my_group_index - group_root + group_size;                                               \
-                                                                                                    \
-        radix_mask = 1;                                                                             \
-        while (radix_mask < group_size) {                                                           \
-            if (relative_rank % (radix * radix_mask)) {                                             \
-                data_src = relative_rank/(radix * radix_mask) * (radix * radix_mask) + group_root;  \
-                if (data_src >= group_size) data_src -= group_size;                                 \
-                break;                                                                              \
-            }                                                                                       \
-            radix_mask *= radix;                                                                    \
-        }                                                                                           \
-    } while (0)
-
-
-int bcol_ptpcoll_bcast_k_nomial_known_root_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-
-    int tag;
-    int rc = OMPI_SUCCESS;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    int radix = ptpcoll_module->k_nomial_radix;
-    int radix_mask;
-    uint64_t sequence_number = input_args->sequence_num;
-    uint32_t buffer_index = input_args->buffer_index;
-    int group_root_index = 0;
-
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **send_requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    ompi_request_t **recv_request =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests[0];
-    void *data_buffer = (void *) (
-            (unsigned char *)input_args->sbuf +
-            (size_t)input_args->sbuf_offset);
-    int count = input_args->count * input_args->dtype->super.size;
-    int completed = 0;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-
-    tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    /* mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-
-    PTPCOLL_VERBOSE(3, ("BCAST Know root, index_this_type %d, num_of_this_type %d",
-                const_args->index_of_this_type_in_collective + 1,
-                const_args->n_of_this_type_in_collective));
-
-    PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_k_nomial_known_root_progress, buffer index: %d \n"
-                         "tag: %d "
-                         "tag_mask: %d "
-                         "sn: %d "
-                         "root: %d "
-                         "pow_k: %d %d "
-                         "buff: %p "
-                         "radix: %d",
-                         buffer_index, tag,
-                         ptpcoll_module->tag_mask, sequence_number,
-                         input_args->root_flag,
-                         ptpcoll_module->pow_k, ptpcoll_module->pow_knum,
-                         data_buffer,
-                         radix));
-
-    if (input_args->root_flag) {
-        /* Check for completion */
-        assert(*active_requests > 0);
-        PTPCOLL_VERBOSE(10, ("Requests %d", *active_requests));
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, send_requests, &rc);
-        if (OMPI_SUCCESS != rc) {
-            return OMPI_ERROR;
-        }
-    } else {
-        /* No data was received. Waiting for data */
-        if (0 == (*active_requests)) {
-            int extra_root = -1;
-            netpatterns_knomial_step_info_t step_info;
-            /* We can not block. So run couple of test for data arrival */
-            if (0 == mca_bcol_ptpcoll_test_for_match(recv_request, &rc)) {
-                PTPCOLL_VERBOSE(10, ("Test was not matched (active request %d)",
-                            *active_requests));
-                /* No data was received, return no match error */
-                return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-            }
-
-            radix_mask = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask;
-            group_root_index = input_args->root_route->rank;
-
-            PTPCOLL_VERBOSE(10, ("Test was matched - radix %d", radix_mask));
-            /* Bcast the data */
-            MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info,
-                    radix_mask, my_group_index);
-            K_NOMIAL_ROOT_BCAST_NB_NOTEST(step_info, radix,
-                    my_group_index, group_list,
-                    data_buffer, count, tag, comm, send_requests,
-                    active_requests);
-
-            if (PTPCOLL_KN_PROXY & ptpcoll_module->pow_ktype) {
-                int i;
-                if (radix_mask == ptpcoll_module->pow_knum) {
-                    extra_root = group_root_index;
-                }
-                for (i = 0 ; i < ptpcoll_module->kn_proxy_extra_num; i++) {
-                    if (ptpcoll_module->kn_proxy_extra_index[i] == extra_root)
-                        continue;
-                    PTPCOLL_VERBOSE(10, ("Extra_Isend to %d", ptpcoll_module->kn_proxy_extra_index[i]));
-                    rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE,
-                                group_list[ptpcoll_module->kn_proxy_extra_index[i]], tag - 1,
-                                MCA_PML_BASE_SEND_STANDARD, comm,
-                                &(send_requests[*active_requests])));
-                    if( OMPI_SUCCESS != rc ) {
-                        PTPCOLL_VERBOSE(10, ("Failed to send data"));
-                        return OMPI_ERROR;
-                    }
-                    ++(*active_requests);
-                }
-            }
-            if (*active_requests > 0) {
-                completed = mca_bcol_ptpcoll_test_all_for_match
-                    (active_requests, send_requests, &rc);
-            } else {
-                completed = 1;
-            }
-        } else {
-         /* Data was received and sent out, check for completion */
-            completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, send_requests, &rc);
-            if (OMPI_SUCCESS != rc) {
-                PTPCOLL_VERBOSE(10, ("Test was not matched (active request %d)",
-                            *active_requests));
-                return OMPI_ERROR;
-            }
-        }
-    }
-    /* DONE */
-    if(completed) {
-        return BCOL_FN_COMPLETE;
-    } else {
-        PTPCOLL_VERBOSE(10, ("bcast root is started"));
-        return BCOL_FN_STARTED;
-    }
-}
-
-int bcol_ptpcoll_bcast_k_nomial_known_root(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-
-    int tag;
-    int rc;
-    int comm_root;
-    int data_src = -1;
-    int group_root_index;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    int radix = ptpcoll_module->k_nomial_radix;
-    uint32_t buffer_index = input_args->buffer_index;
-
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **send_requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    ompi_request_t **recv_request =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests[0];
-    void *data_buffer = (void *) (
-            (unsigned char *)input_args->sbuf +
-            (size_t)input_args->sbuf_offset);
-    int count = input_args->count * input_args->dtype->super.size;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int matched = 0;
-    int k_level, logk_level;
-    int extra_root = -1;
-    netpatterns_knomial_step_info_t step_info;
-
-    PTPCOLL_VERBOSE(3, ("BCAST Know root, index_this_type %d, num_of_this_type %d",
-                    const_args->index_of_this_type_in_collective + 1,
-                    const_args->n_of_this_type_in_collective));
-
-    /* reset active request counter */
-    (*active_requests) = 0;
-    /* keep tag within the limit support by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    /* mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-
-    PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_k_nomial_known_root, buffer index: %d \n"
-                         "tag: %d "
-                         "tag_mask: %d "
-                         "sn: %d "
-                         "root: %d "
-                         "pow_k: %d %d "
-                         "buff: %p "
-                         "radix: %d",
-                         buffer_index, tag,
-                         ptpcoll_module->tag_mask, input_args->sequence_num,
-                         input_args->root_flag,
-                         ptpcoll_module->pow_k, ptpcoll_module->pow_knum,
-                         data_buffer,
-                         radix));
-
-    if (input_args->root_flag) {
-        PTPCOLL_VERBOSE(10, ("I'm root of the data"));
-        /*
-         * I'm root of the operation
-         * send data to (k - 1) * log base k N neighbors
-         */
-        MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info,
-                ptpcoll_module->pow_knum, my_group_index);
-        K_NOMIAL_ROOT_BCAST_NB_NOTEST(step_info, radix,
-                my_group_index, group_list,
-                data_buffer, count, tag, comm, send_requests,
-                active_requests);
-        goto KNOWN_ROOT_KNOMIAL_BCAST_EXTRA;
-    }
-
-    /* I'm not root */
-    group_root_index = input_args->root_route->rank;
-
-    /* If Proxy node,  check if extra node is root */
-    PTPCOLL_VERBOSE(10, ("Check if I virtual root, groop root %d group_size_pow %d type %d\n",
-                group_root_index, ptpcoll_module->pow_knum , ptpcoll_module->pow_ktype));
-    if (group_root_index >= ptpcoll_module->pow_knum) {
-        /* Chech if the rank is virtual root */
-        int virtual_root = (group_root_index -
-                ptpcoll_module->pow_knum) / (radix - 1);
-
-        if (my_group_index == virtual_root) {
-                MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info,
-                        ptpcoll_module->pow_knum, my_group_index);
-                k_level = ptpcoll_module->pow_knum;
-                comm_root = group_list[group_root_index];
-                extra_root = group_root_index;
-                PTPCOLL_VERBOSE(10, ("Im virtual root klevel %d, comm_root %d  vroot %d\n",
-                            k_level, comm_root, virtual_root));
-                goto KNOWN_ROOT_KNOMIAL_BCAST;
-        } else {
-            /* set virtual root as real root of the group */
-            group_root_index = virtual_root;
-            PTPCOLL_VERBOSE(10, ("My virtual root vroot %d\n", group_root_index));
-        }
-    }
-
-    data_src = netpatterns_get_knomial_data_source(
-                my_group_index, group_root_index, radix, ptpcoll_module->pow_knum,
-                &k_level, &logk_level);
-
-    comm_root = group_list[data_src];
-
-KNOWN_ROOT_KNOMIAL_BCAST:
-    PTPCOLL_VERBOSE(10, ("Bcast, receive data from %d[%d], count %d, tag %d, addr %p",
-                comm_root, data_src, count, tag, data_buffer));
-
-    rc = MCA_PML_CALL(irecv(data_buffer, count, MPI_BYTE, comm_root, tag, comm, recv_request));
-    if( OMPI_SUCCESS != rc ) {
-        PTPCOLL_VERBOSE(10, ("Failed to receive data"));
-        return OMPI_ERROR;
-    }
-
-    /* We can not block. So run couple of test for data arrival */
-    if (0 == mca_bcol_ptpcoll_test_for_match(recv_request, &rc)) {
-        PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc));
-        /* cache the radix mask for future progress */
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask = k_level;
-        /* No data was received, return no match error */
-        return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-    }
-
-    /* Bcast the data */
-    MCA_COMMON_NETPATTERNS_GET_NEXT_KNOMIAL_INIT(step_info,
-            k_level, my_group_index);
-
-    K_NOMIAL_ROOT_BCAST_NB_NOTEST(step_info, radix,
-            my_group_index, group_list,
-            data_buffer, count, tag, comm, send_requests,
-            active_requests);
-
-KNOWN_ROOT_KNOMIAL_BCAST_EXTRA:
-    /* Proxy node but NOT virtual root */
-    if (PTPCOLL_KN_PROXY & ptpcoll_module->pow_ktype) {
-        int i;
-        for (i = 0 ; i < ptpcoll_module->kn_proxy_extra_num; i++) {
-            if (ptpcoll_module->kn_proxy_extra_index[i] == extra_root)
-                continue;
-
-            PTPCOLL_VERBOSE(10, ("Extra_Isend to %d", ptpcoll_module->kn_proxy_extra_index[i]));
-            rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE,
-                        group_list[ptpcoll_module->kn_proxy_extra_index[i]], tag - 1,
-                        MCA_PML_BASE_SEND_STANDARD, comm,
-                        &(send_requests[*active_requests])));
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to send data"));
-                return OMPI_ERROR;
-            }
-            ++(*active_requests);
-        }
-    }
-
-    if (*active_requests > 0) {
-        matched =
-            mca_bcol_ptpcoll_test_all_for_match
-            (active_requests, send_requests, &rc);
-    } else {
-        matched = 1;
-    }
-
-    /* If it is last call, we have to recycle memory */
-    if(matched) {
-        return BCOL_FN_COMPLETE;
-    } else {
-        PTPCOLL_VERBOSE(10, ("bcast root is started"));
-        return BCOL_FN_STARTED;
-    }
-}
-
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_extra(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-
-    int tag;
-    int rc;
-    int i;
-    int completed = 0; /* not completed */
-    uint32_t buffer_index = input_args->buffer_index;
-
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    void *data_buffer = (void *) (
-            (unsigned char *)input_args->sbuf +
-            (size_t)input_args->sbuf_offset);
-    int count = input_args->count * input_args->dtype->super.size;
-    int *iteration =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration);
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
-    ompi_status_public_t status;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-
-    PTPCOLL_VERBOSE(3, ("BCAST Anyroot, index_this_type %d, num_of_this_type %d",
-                const_args->index_of_this_type_in_collective + 1,
-                const_args->n_of_this_type_in_collective));
-
-    /* keep tag within the limit support by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    /* mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-    /* reset active requests */
-    *active_requests = 0;
-    /* reset iteration counter */
-    *iteration = -1;
-
-    PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_k_nomial_anyroot extra, buffer index: %d \n"
-                "tag: %d "
-                "tag_mask: %d "
-                "sn: %d "
-                "root: %d "
-                "pow_k: %d %d "
-                "buff: %p "
-                "radix: %d" ,
-                buffer_index, tag,
-                ptpcoll_module->tag_mask, input_args->sequence_num,
-                input_args->root_flag,
-                ptpcoll_module->pow_k, ptpcoll_module->pow_knum,
-                data_buffer,
-                2
-                ));
-
-    /* we have a power 2 group */
-    if (input_args->root_flag) {
-
-        PTPCOLL_VERBOSE(10, ("I'm EXTRA root of the data"));
-        /* send the all data to your proxy peer */
-        rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE,
-                    group_list[ptpcoll_module->proxy_extra_index], tag,
-                    MCA_PML_BASE_SEND_STANDARD, comm,
-                    &(requests[*active_requests])));
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10, ("Failed to send data"));
-            return OMPI_ERROR;
-        }
-        ++(*active_requests);
-
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if (0 == completed) {
-            /* we have to store the iteration number somewhere */
-            return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-        }
-    } else {
-        for (i = 0; i < cm->num_to_probe &&
-                0 == completed; i++) {
-            MCA_PML_CALL(iprobe(group_list[ptpcoll_module->proxy_extra_index], tag - 1,
-                        comm, &completed, &status));
-        }
-        if (0 == completed) {
-            /* No data was received */
-            return BCOL_FN_NOT_STARTED;
-        }
-
-        /* the data is ready */
-        rc = MCA_PML_CALL(recv(data_buffer, count, MPI_BYTE,
-                    group_list[ptpcoll_module->proxy_extra_index], tag - 1,
-                    comm, MPI_STATUS_IGNORE));
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10, ("Failed to send data"));
-            return OMPI_ERROR;
-        }
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_extra_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    int rc;
-    int completed = 0; /* not completed */
-    int i;
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[input_args->buffer_index].requests;
-    uint32_t buffer_index = input_args->buffer_index;
-    mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    ompi_status_public_t status;
-    void *data_buffer = (void *) (
-            (unsigned char *)input_args->sbuf +
-            (size_t)input_args->sbuf_offset);
-    int count = input_args->count * input_args->dtype->super.size;
-    /* keep tag within the limit support by the pml */
-    int tag = -((PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask));
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-
-    PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_k_nomial_extra_known_and_anyroot_progress extra, was called, tag %d\n", tag));
-    if (input_args->root_flag) {
-        PTPCOLL_VERBOSE(10, ("I'm EXTRA root of the data"));
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if (0 == completed) {
-            return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-        }
-    } else {
-        for (i = 0; i < cm->num_to_probe &&
-                0 == completed; i++) {
-            MCA_PML_CALL(iprobe(group_list[ptpcoll_module->proxy_extra_index], tag - 1,
-                        comm, &completed, &status));
-        }
-        if (0 == completed) {
-            return BCOL_FN_STARTED;
-        }
-        /* the data is ready */
-
-        rc = MCA_PML_CALL(recv(data_buffer, count, MPI_BYTE,
-                    group_list[ptpcoll_module->proxy_extra_index], tag - 1,
-                    comm, MPI_STATUS_IGNORE));
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10, ("Failed to send data"));
-            return OMPI_ERROR;
-        }
-    }
-
-    /* Done */
-    return BCOL_FN_COMPLETE;
-}
-
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-
-    int rc;
-    int completed = 0; /* not completed */
-    uint32_t buffer_index = input_args->buffer_index;
-
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    void *data_buffer = (void *) (
-            (unsigned char *)input_args->sbuf +
-            (size_t)input_args->sbuf_offset);
-    int count = input_args->count * input_args->dtype->super.size;
-    int *iteration =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration);
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    size_t base_block_size = (count +  ptpcoll_module->pow_2num - 1) /
-        ptpcoll_module->pow_2num;
-    int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag;
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    int *status =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status;
-
-    PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_progress, buffer index: %d \n"
-                         "tag: %d "
-                         "tag_mask: %d "
-                         "sn: %d "
-                         "root: %d "
-                         "pow_2: %d %d "
-                         "buff: %p "
-                         "radix: %d"
-                         "block_size: %d",
-                         buffer_index, tag,
-                         ptpcoll_module->tag_mask, 0,
-                         input_args->root_flag,
-                         ptpcoll_module->pow_2, ptpcoll_module->pow_2num,
-                         data_buffer,
-                         2,
-                         base_block_size));
-
-    switch(*status) {
-        case PTPCOLL_GATHER_STARTED:
-            completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-            if (0 == completed) {
-                PTPCOLL_VERBOSE(10, ("Not done, have to complete %d, Return %d", *active_requests, rc));
-                return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-            }
-            ++(*iteration); /* start from next iteration */
-            PTPCOLL_VERBOSE(10, ("Outstanding operation was comleted, starting next one ! %d", *iteration));
-            break;
-        case PTPCOLL_EXTRA_SEND_STARTED:
-            completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-            if (0 == completed) {
-                PTPCOLL_VERBOSE(10, ("Not done, have to complete %d, Return %d", *active_requests, rc));
-                return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-            }
-            return BCOL_FN_COMPLETE;
-        default:
-            PTPCOLL_VERBOSE(10, ("Unknown status %d", *status));
-            return OMPI_ERROR;
-    }
-
-    PTPCOLL_VERBOSE(10, ("Stating PR_GATHER"));
-    /* Gather, continue the recoursive doubling iterations */
-    rc = bcol_ptpcoll_bcast_binomial_gather_anyroot(ptpcoll_module, buffer_index, data_buffer,
-            count, base_block_size);
-    if (BCOL_FN_COMPLETE != rc) {
-        assert(0 != *active_requests);
-        PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc));
-        return rc;
-    }
-    PTPCOLL_VERBOSE(10, ("PR_GATHER done"));
-
-    /* it the process is proxy , it has to send full
-       message to remote peer */
-    if ((PTPCOLL_PROXY & ptpcoll_module->pow_2type) &&
-            ! CHECK_IF_ROOT_OR_VROOT(ptpcoll_module, buffer_index)) {
-        *status = PTPCOLL_EXTRA_SEND_STARTED;
-        rc = bcol_ptpcoll_bcast_binomial_scatter_gatther_send_extra(
-                ptpcoll_module,
-                data_buffer, count, tag - 1,
-                ptpcoll_module->proxy_extra_index, comm,
-                active_requests, requests);
-        if (BCOL_FN_COMPLETE != rc) {
-            return rc;
-        }
-    }
-    /* return */
-    return BCOL_FN_COMPLETE;
-}
-
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-
-    int tag;
-    int rc;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    uint64_t sequence_number = input_args->sequence_num;
-    uint32_t buffer_index = input_args->buffer_index;
-
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    void *data_buffer = (void *) (
-            (unsigned char *)input_args->sbuf +
-            (size_t)input_args->sbuf_offset);
-    int count = input_args->count * input_args->dtype->super.size;
-    int *iteration =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration);
-    int *radix_mask_pow =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask_pow);
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    size_t base_block_size = (count +  ptpcoll_module->pow_2num - 1) /
-        ptpcoll_module->pow_2num;
-    int root_pow2 = ptpcoll_module->pow_2 - 1;
-    int *status =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status;
-
-    PTPCOLL_VERBOSE(3, ("BCAST Anyroot, index_this_type %d, num_of_this_type %d",
-                    const_args->index_of_this_type_in_collective + 1,
-                    const_args->n_of_this_type_in_collective));
-
-    /* keep tag within the limit support by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    /* mark this as a collective tag, to avoid conflict with user-level flags */
-    ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag = tag = -tag;
-    /* reset active requests */
-    *active_requests = 0;
-    /* reset iteration counter */
-    *iteration = -1;
-    /* set initial status */
-    *status = PTPCOLL_NOT_STARTED;
-
-    PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_k_nomial_anyroot, buffer index: %d \n"
-                         "tag: %d "
-                         "tag_mask: %d "
-                         "sn: %d "
-                         "root: %d "
-                         "pow_2: %d %d "
-                         "buff: %p "
-                         "radix: %d"
-                         "block_size: %d",
-                         buffer_index, tag,
-                         ptpcoll_module->tag_mask, sequence_number,
-                         input_args->root_flag,
-                         ptpcoll_module->pow_2, ptpcoll_module->pow_2num,
-                         data_buffer,
-                         2,
-                         base_block_size));
-
-    /* we have a power 2 group */
-    if (input_args->root_flag) {
-
-        PTPCOLL_VERBOSE(10, ("I'm root of the data"));
-        /* for proxy we have little bit more work to do */
-        if (PTPCOLL_PROXY & ptpcoll_module->pow_2type) {
-            /* send the all data to your extra peer */
-            rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE,
-                        group_list[ptpcoll_module->proxy_extra_index],
-                        tag - 1,
-                        MCA_PML_BASE_SEND_STANDARD, comm,
-                        &(requests[*active_requests])));
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to send data"));
-                return OMPI_ERROR;
-            }
-            ++(*active_requests);
-        }
-        /*
-         * I'm root of the operation
-         * send data to (k - 1) * log base k N neighbors
-         */
-        *radix_mask_pow = ptpcoll_module->pow_2;
-
-        K_NOMIAL_ROOT_BCAST_NB_BINOMIAL_SCATTER(root_pow2,
-                my_group_index, group_size, group_list,
-                data_buffer, base_block_size, count, tag, comm, requests,
-                active_requests);
-
-        goto GATHER;
-    }
-
-    /* <-- non root flow --> */
-    rc = bcol_ptpcoll_bcast_binomial_probe_and_scatter_anyroot(ptpcoll_module, buffer_index,
-            data_buffer, count, base_block_size);
-    if (BCOL_FN_COMPLETE != rc) {
-        PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc));
-        return rc;
-    }
-
-GATHER:
-    *iteration = 0;
-    *status = PTPCOLL_GATHER_STARTED;
-    rc = bcol_ptpcoll_bcast_binomial_gather_anyroot(ptpcoll_module, buffer_index,
-            data_buffer, count, base_block_size);
-
-    if (BCOL_FN_COMPLETE != rc) {
-        assert(0 != *active_requests);
-        PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc));
-        return rc;
-    }
-
-    ++(*iteration); /* I need it for progress */
-
-    /* proxy case */
-    if ((PTPCOLL_PROXY & ptpcoll_module->pow_2type) &&
-            ! CHECK_IF_ROOT_OR_VROOT(ptpcoll_module, buffer_index)) {
-        *status = PTPCOLL_EXTRA_SEND_STARTED;
-        rc = bcol_ptpcoll_bcast_binomial_scatter_gatther_send_extra(ptpcoll_module,
-                data_buffer, count, tag - 1,
-                ptpcoll_module->proxy_extra_index, comm,
-                active_requests, requests);
-        if (BCOL_FN_COMPLETE != rc) {
-            return rc;
-        }
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-
-    int rc;
-    int completed = 0; /* not completed */
-    uint32_t buffer_index = input_args->buffer_index;
-
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    void *data_buffer = (void *) (
-            (unsigned char *)input_args->sbuf +
-            (size_t)input_args->sbuf_offset);
-    int count = input_args->count * input_args->dtype->super.size;
-    int *iteration =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration);
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    size_t base_block_size = (count +  ptpcoll_module->pow_2num - 1) /
-        ptpcoll_module->pow_2num;
-    int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag;
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    int *status =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status);
-
-    PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_binomial_scatter_gatther_known_progress, buffer index: %d \n"
-                "tag: %d "
-                "tag_mask: %d "
-                "sn: %d "
-                "root: %d "
-                "pow_2: %d %d "
-                "buff: %p "
-                "radix: %d"
-                "block_size: %d",
-                buffer_index, tag,
-                ptpcoll_module->tag_mask, 0,
-                input_args->root_flag,
-                ptpcoll_module->pow_2, ptpcoll_module->pow_2num,
-                data_buffer,
-                2,
-                base_block_size));
-
-    switch(*status) {
-        case PTPCOLL_WAITING_FOR_DATA:
-            PTPCOLL_VERBOSE(10, ("Probe for the data"));
-            rc = bcol_ptpcoll_bcast_binomial_test_and_scatter_known_root(ptpcoll_module, buffer_index,
-                    data_buffer, count, base_block_size);
-            if (BCOL_FN_COMPLETE != rc) {
-                assert(0 != *active_requests);
-                PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc));
-                return rc;
-            }
-            *iteration = 0;
-            *status = PTPCOLL_GATHER_STARTED;
-            break;
-        case PTPCOLL_GATHER_STARTED:
-            completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-            if (0 == completed) {
-                PTPCOLL_VERBOSE(10, ("Not done, have to complete %d, Return %d", *active_requests, rc));
-                return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-            }
-            ++(*iteration); /* start from next iteration */
-            PTPCOLL_VERBOSE(10, ("Outstanding operation was comleted, starting next one ! %d", *iteration));
-            break;
-        case PTPCOLL_EXTRA_SEND_STARTED:
-            completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-            if (0 == completed) {
-                PTPCOLL_VERBOSE(10, ("Not done, have to complete %d, Return %d", *active_requests, rc));
-                return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-            }
-            return BCOL_FN_COMPLETE;
-        default:
-            PTPCOLL_VERBOSE(10, ("Unknown status %d", *status));
-            return OMPI_ERROR;
-    }
-
-    PTPCOLL_VERBOSE(10, ("Stating PR_GATHER"));
-    /* Gather, continue the recoursive doubling iterations */
-    rc = bcol_ptpcoll_bcast_binomial_gather_anyroot(ptpcoll_module, buffer_index, data_buffer,
-            count, base_block_size);
-    if (BCOL_FN_COMPLETE != rc) {
-        assert(0 != *active_requests);
-        PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc));
-        return rc;
-    }
-    PTPCOLL_VERBOSE(10, ("PR_GATHER done"));
-
-    /* it the process is proxy , it has to send full
-       message to remote peer */
-    if ((PTPCOLL_PROXY & ptpcoll_module->pow_2type) &&
-            ! CHECK_IF_ROOT_OR_VROOT(ptpcoll_module, buffer_index)) {
-        *status = PTPCOLL_EXTRA_SEND_STARTED;
-        rc = bcol_ptpcoll_bcast_binomial_scatter_gatther_send_extra(
-                ptpcoll_module,
-                data_buffer, count, tag - 1,
-                ptpcoll_module->proxy_extra_index, comm,
-                active_requests, requests);
-        if (BCOL_FN_COMPLETE != rc) {
-            return rc;
-        }
-    }
-
-    /* return */
-    return BCOL_FN_COMPLETE;
-}
-
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-
-    int tag;
-    int rc;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    int group_src, comm_root;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    int pow2_distance;
-    void *curr_data_buffer;
-    int recv_count;
-    uint64_t sequence_number = input_args->sequence_num;
-    uint32_t buffer_index = input_args->buffer_index;
-
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    void *data_buffer = (void *) (
-            (unsigned char *)input_args->sbuf +
-            (size_t)input_args->sbuf_offset);
-    int count = input_args->count * input_args->dtype->super.size;
-    int *iteration =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration);
-    int *radix_mask_pow =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask_pow);
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    size_t base_block_size = (count +  ptpcoll_module->pow_2num - 1) /
-        ptpcoll_module->pow_2num;
-    int root_pow2 = ptpcoll_module->pow_2 - 1;
-    int *status =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status);
-
-    PTPCOLL_VERBOSE(3, ("BCAST Anyroot, index_this_type %d, num_of_this_type %d",
-                    const_args->index_of_this_type_in_collective + 1,
-                    const_args->n_of_this_type_in_collective));
-
-    /* keep tag within the limit support by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    /* mark this as a collective tag, to avoid conflict with user-level flags */
-    ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag = tag = -tag;
-    /* reset active requests */
-    *active_requests = 0;
-    /* reset iteration counter */
-    *iteration = -1;
-    /* set initial status */
-    *status = PTPCOLL_NOT_STARTED;
-
-    PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_binomial_scatter_gatther_known, buffer index: %d \n"
-                         "tag: %d "
-                         "tag_mask: %d "
-                         "sn: %d "
-                         "root: %d "
-                         "pow_2: %d %d "
-                         "buff: %p "
-                         "radix: %d"
-                         "block_size: %d",
-                         buffer_index, tag,
-                         ptpcoll_module->tag_mask, sequence_number,
-                         input_args->root_flag,
-                         ptpcoll_module->pow_2, ptpcoll_module->pow_2num,
-                         data_buffer,
-                         2,
-                         base_block_size));
-
-    /* we have a power 2 group */
-    if (input_args->root_flag) {
-
-        PTPCOLL_VERBOSE(10, ("I'm root of the data"));
-        /* for proxy we have little bit more work to do */
-        if (PTPCOLL_PROXY & ptpcoll_module->pow_2type) {
-            /* send the all data to your extra peer */
-            rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE,
-                        group_list[ptpcoll_module->proxy_extra_index], tag - 1,
-                        MCA_PML_BASE_SEND_STANDARD, comm,
-                        &(requests[*active_requests])));
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to send data"));
-                return OMPI_ERROR;
-            }
-            *active_requests = 1;
-        }
-        /*
-         * I'm root of the operation
-         * send data to (k - 1) * log base k N neighbors
-         */
-        K_NOMIAL_ROOT_BCAST_NB_BINOMIAL_SCATTER(root_pow2,
-                my_group_index, group_size, group_list,
-                data_buffer, base_block_size, count, tag, comm, requests,
-                active_requests);
-
-        /* EXIT OR GO TO Gather */
-        *iteration = 0;
-        *radix_mask_pow = ptpcoll_module->pow_2;
-        goto GATHER;
-    }
-
-    /* <-- non root flow --> */
-    /* prapare and post recv operation */
-    group_src = bcol_ptpcoll_binomial_root_to_src(input_args->root_route->rank,
-            my_group_index, ptpcoll_module->pow_2num,
-            ptpcoll_module->group_size, &pow2_distance);
-
-    assert(group_src >= 0);
-
-    if (0 > pow2_distance) {
-        /* the rank is virtual root for this group, receive the data
-           and scatter gather as root */
-        PTPCOLL_VERBOSE(10, ("Virtual root %d , set mask to %d", my_group_index, ptpcoll_module->pow_2));
-        *radix_mask_pow = ptpcoll_module->pow_2;
-        curr_data_buffer = data_buffer;
-        recv_count = count;
-    } else {
-        int my_left_boundary_rank;
-        recv_count = base_block_size * (1 << pow2_distance); /* we may receive larger data */
-        my_left_boundary_rank = my_group_index & ((~(int)0) << pow2_distance );
-        curr_data_buffer = (void *)((unsigned char *)data_buffer +
-                (size_t) base_block_size * my_left_boundary_rank);
-        *radix_mask_pow = pow2_distance;
-    }
-
-    comm_root = group_list[group_src];
-
-    PTPCOLL_VERBOSE(10, ("Bcast, receive data from %d[%d], count %d, tag %d, addr %p",
-                comm_root, group_src, count, tag, data_buffer));
-
-    rc = MCA_PML_CALL(irecv(curr_data_buffer, recv_count, MPI_BYTE, comm_root,
-                tag, comm, &requests[*active_requests]));
-    if( OMPI_SUCCESS != rc ) {
-        PTPCOLL_VERBOSE(10, ("Failed to receive data"));
-        return OMPI_ERROR;
-    }
-
-    ++(*active_requests);
-
-    *status = PTPCOLL_WAITING_FOR_DATA;
-    rc = bcol_ptpcoll_bcast_binomial_test_and_scatter_known_root(ptpcoll_module,
-            buffer_index, data_buffer, count, base_block_size);
-
-    if (BCOL_FN_COMPLETE != rc) {
-        PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc));
-        return rc;
-    }
-
-    /* recv operation is done */
-
-    *iteration = 0;
-
-GATHER:
-
-    *status = PTPCOLL_GATHER_STARTED;
-    rc = bcol_ptpcoll_bcast_binomial_gather_anyroot(ptpcoll_module, buffer_index,
-            data_buffer, count, base_block_size);
-
-    if (BCOL_FN_COMPLETE != rc) {
-        assert(0 != *active_requests);
-        PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc));
-        return rc;
-    }
-
-    ++(*iteration); /* I need it for progress */
-
-    /* proxy case */
-    if ((PTPCOLL_PROXY & ptpcoll_module->pow_2type) &&
-            ! CHECK_IF_ROOT_OR_VROOT(ptpcoll_module, buffer_index)) {
-        *status = PTPCOLL_EXTRA_SEND_STARTED;
-        rc = bcol_ptpcoll_bcast_binomial_scatter_gatther_send_extra(
-                ptpcoll_module,
-                data_buffer, count, tag - 1,
-                ptpcoll_module->proxy_extra_index, comm,
-                active_requests, requests);
-        if (BCOL_FN_COMPLETE != rc) {
-            return rc;
-        }
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_extra(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-
-    int tag;
-    int rc;
-    int completed = 0; /* not completed */
-    uint32_t buffer_index = input_args->buffer_index;
-
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    void *data_buffer = (void *) (
-            (unsigned char *)input_args->sbuf +
-            (size_t)input_args->sbuf_offset);
-    int count = input_args->count * input_args->dtype->super.size;
-    int *iteration =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration);
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-
-    PTPCOLL_VERBOSE(3, ("BCAST known root, index_this_type %d, num_of_this_type %d",
-                const_args->index_of_this_type_in_collective + 1,
-                const_args->n_of_this_type_in_collective));
-
-    /* keep tag within the limit support by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    /* mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-    /* reset active requests */
-    *active_requests = 0;
-    /* reset iteration counter */
-    *iteration = -1;
-
-    PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_k_nomial_anyroot extra, buffer index: %d \n"
-                "tag: %d "
-                "tag_mask: %d "
-                "sn: %d "
-                "root: %d "
-                "pow_k: %d %d "
-                "buff: %p "
-                "radix: %d" ,
-                buffer_index, tag,
-                ptpcoll_module->tag_mask, input_args->sequence_num,
-                input_args->root_flag,
-                ptpcoll_module->pow_k, ptpcoll_module->pow_knum,
-                data_buffer,
-                2
-                ));
-
-    /* we have a power 2 group */
-    if (input_args->root_flag) {
-        PTPCOLL_VERBOSE(10, ("I'm EXTRA root of the data"));
-        /* send the all data to your proxy peer */
-        rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE,
-                    group_list[ptpcoll_module->proxy_extra_index], tag,
-                    MCA_PML_BASE_SEND_STANDARD, comm,
-                    &(requests[*active_requests])));
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10, ("Failed to send data"));
-            return OMPI_ERROR;
-        }
-        ++(*active_requests);
-
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if (0 == completed) {
-            /* we have to store the iteration number somewhere */
-            return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-        }
-    } else {
-        rc = MCA_PML_CALL(irecv(data_buffer, count, MPI_BYTE,
-                    group_list[ptpcoll_module->proxy_extra_index],
-                    tag - 1, comm, &requests[*active_requests]));
-        ++(*active_requests);
-
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if (0 == completed) {
-            PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc));
-            return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-        }
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_extra_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    int rc;
-    int completed = 0; /* not completed */
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[input_args->buffer_index].requests;
-    uint32_t buffer_index = input_args->buffer_index;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-
-    PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_binomial_known_root_extra_progress extra, was called\n"));
-
-    completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-    if (0 == completed) {
-        return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-static int bcol_ptpcoll_bcast_narray_knomial_scatter_gatther_known_root_progress(
-        bcol_function_args_t *input_args, struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-
-    int rc;
-    int completed = 0; /* not completed */
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    uint32_t buffer_index = input_args->buffer_index;
-
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    void *data_buffer = (void *) (
-            (unsigned char *)input_args->sbuf +
-            (size_t)input_args->sbuf_offset);
-    int count = input_args->count * input_args->dtype->super.size;
-    int *iteration =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration);
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag;
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    int *status =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status);
-    int relative_group_index,
-        group_root_index = 0;
-    int group_size = ptpcoll_module->full_narray_tree_size;
-
-    PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_narray_knomial_scatter_gatther_known_root_progress, buffer index: %d "
-                         "tag: %d "
-                         "tag_mask: %d "
-                         "root: %d "
-                         "buff: %p "
-                         "radix: %d"
-                         , buffer_index, tag,
-                         ptpcoll_module->tag_mask,
-                         input_args->root_flag,
-                         data_buffer,
-                         ptpcoll_module->narray_knomial_proxy_num
-                         ));
-
-    if (input_args->root_flag ||
-            /* virtual root case */
-            (input_args->root_route->rank >= group_size &&
-             my_group_index == (input_args->root_route->rank - group_size) /
-             mca_bcol_ptpcoll_component.narray_knomial_radix)) {
-        relative_group_index = 0;
-        group_root_index = my_group_index;
-    } else {
-        if (input_args->root_route->rank >= group_size) {
-            group_root_index = (input_args->root_route->rank - group_size) /
-                mca_bcol_ptpcoll_component.narray_knomial_radix;
-        } else {
-            group_root_index = input_args->root_route->rank;
-        }
-        relative_group_index = my_group_index - group_root_index;
-        if (relative_group_index < 0) {
-            relative_group_index += group_size;
-        }
-    }
-
-    switch(*status) {
-        case PTPCOLL_WAITING_FOR_DATA:
-            PTPCOLL_VERBOSE(10, ("Probe for the data"));
-            rc = bcol_ptpcoll_bcast_narray_test_and_scatter_known_root(ptpcoll_module,
-                    buffer_index, data_buffer, count, group_root_index,
-                    relative_group_index);
-
-            if (BCOL_FN_COMPLETE != rc) {
-                assert(0 != *active_requests);
-                PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc));
-                return rc;
-            }
-            *iteration = 0;
-            *status = PTPCOLL_GATHER_STARTED;
-            break;
-        case PTPCOLL_ROOT_SEND_STARTED:
-        case PTPCOLL_GATHER_STARTED:
-            completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-            if (0 == completed) {
-                PTPCOLL_VERBOSE(10, ("Not done, have to complete %d, Return %d", *active_requests, rc));
-                return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-            }
-            ++(*iteration); /* start from next iteration */
-            PTPCOLL_VERBOSE(10, ("Outstanding operation was comleted, starting next one ! %d", *iteration));
-            break;
-        case PTPCOLL_EXTRA_SEND_STARTED:
-            completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-            if (0 == completed) {
-                PTPCOLL_VERBOSE(10, ("Not done, have to complete %d, Return %d", *active_requests, rc));
-                return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-            }
-            return BCOL_FN_COMPLETE;
-        default:
-            PTPCOLL_VERBOSE(10, ("Unknown status %d", *status));
-            return OMPI_ERROR;
-    }
-
-    PTPCOLL_VERBOSE(10, ("Stating PR_GATHER"));
-    /* Gather, continue the recoursive doubling iterations */
-    rc = bcol_ptpcoll_bcast_narray_knomial_gather(ptpcoll_module,
-            buffer_index, data_buffer, count,
-            relative_group_index);
-    if (BCOL_FN_COMPLETE != rc) {
-        assert(0 != *active_requests);
-        PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc));
-        return rc;
-    }
-    PTPCOLL_VERBOSE(10, ("PR_GATHER done"));
-
-    /* it the process is proxy , it has to send full
-       message to remote peer */
-    if ((PTPCOLL_PROXY & ptpcoll_module->narray_type) &&
-            !input_args->root_flag) {
-        *status = PTPCOLL_EXTRA_SEND_STARTED;
-        rc =  bcol_ptpcoll_send_n_extra(
-                ptpcoll_module,
-                data_buffer, count, tag - 1,
-                ptpcoll_module->narray_knomial_proxy_extra_index,
-                ptpcoll_module->narray_knomial_proxy_num,
-                input_args->root_route->rank,
-                comm, active_requests, requests);
-        if (BCOL_FN_COMPLETE != rc) {
-            return rc;
-        }
-    }
-
-    /* return */
-    return BCOL_FN_COMPLETE;
-}
-
-
-static int bcol_ptpcoll_bcast_narray_knomial_scatter_gatther_known_root(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-
-    int tag, rc, i;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    int data_src, offset,
-        comm_root;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    void *curr_data_buffer;
-    int recv_count;
-    uint64_t sequence_number = input_args->sequence_num;
-    uint32_t buffer_index = input_args->buffer_index;
-
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    void *data_buffer = (void *) (
-            (unsigned char *)input_args->sbuf +
-            (size_t)input_args->sbuf_offset);
-    int count = input_args->count * input_args->dtype->super.size;
-    int *iteration =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration);
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    size_t base_block_size = 0;
-    int *status =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status);
-    int relative_group_index,
-        group_root_index;
-    int group_size = ptpcoll_module->full_narray_tree_size;
-    int completed = 0;
-    int virtual_root;
-    netpatterns_narray_knomial_tree_node_t *narray_knomial_node = NULL;
-    netpatterns_narray_knomial_tree_node_t *narray_node = NULL;
-
-    PTPCOLL_VERBOSE(3, ("BCAST Anyroot, index_this_type %d, num_of_this_type %d",
-                    const_args->index_of_this_type_in_collective + 1,
-                    const_args->n_of_this_type_in_collective));
-
-    /* keep tag within the limit support by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    /* mark this as a collective tag, to avoid conflict with user-level flags */
-    ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag = tag = -tag;
-    /* reset radix mask, it used to keep last block size */
-    ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask = 1;
-    /* reset active requests */
-    *active_requests = 0;
-    /* reset iteration counter */
-    *iteration = -1;
-    /* set initial status */
-    *status = PTPCOLL_NOT_STARTED;
-
-    PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_narray_knomial_scatter_gatther_known_root, buffer index: %d "
-                         "tag: %d "
-                         "tag_mask: %d "
-                         "sn: %d "
-                         "root: %d "
-                         "buff: %p "
-                         "radix: %d"
-                         ,buffer_index, tag,
-                         ptpcoll_module->tag_mask, sequence_number,
-                         input_args->root_flag,
-                         data_buffer,
-                         ptpcoll_module->narray_knomial_proxy_num
-                         ));
-
-    /* we have a power 2 group */
-    if (input_args->root_flag) {
-        PTPCOLL_VERBOSE(10, ("I'm root of the data"));
-        narray_knomial_node = &ptpcoll_module->narray_knomial_node[0];
-        relative_group_index = 0;
-        group_root_index = my_group_index;
-
-        /* for proxy we have little bit more work to do */
-        if (PTPCOLL_PROXY & ptpcoll_module->narray_type) {
-            /* send the all data to your extra peer */
-            for (i = 0; i < ptpcoll_module->narray_knomial_proxy_num; ++i) {
-                PTPCOLL_VERBOSE(9, ("Extra send %d, dst %d, tag %d",
-                            i, ptpcoll_module->narray_knomial_proxy_extra_index[i], tag - 1));
-                rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE,
-                            group_list[ptpcoll_module->narray_knomial_proxy_extra_index[i]],
-                            tag - 1,
-                            MCA_PML_BASE_SEND_STANDARD, comm,
-                            &(requests[*active_requests])));
-                if( OMPI_SUCCESS != rc ) {
-                    PTPCOLL_VERBOSE(10, ("Failed to send data"));
-                    return OMPI_ERROR;
-                }
-                ++(*active_requests);
-            }
-        }
-        /*
-         * I'm root of the operation
-         * send data to radix_k neighbors
-         */
-        base_block_size = NARRAY_BLOCK_SIZE(count, ptpcoll_module,
-                narray_knomial_node->level_size);
-
-        NARRAY_SCATTER_B(narray_knomial_node, my_group_index,
-                group_size, data_buffer,
-                base_block_size, count, tag, comm, requests,
-                active_requests, completed);
-        if (0 == completed) {
-            *status = PTPCOLL_ROOT_SEND_STARTED;
-            return BCOL_FN_STARTED;
-        }
-        goto EXIT;
-    }
-
-    /* <-- non root flow --> */
-    group_root_index = input_args->root_route->rank;
-
-    if (group_root_index >= group_size) {
-        /* calculate virtual root */
-        virtual_root =
-            (group_root_index - group_size) /
-            mca_bcol_ptpcoll_component.narray_knomial_radix;
-        if (my_group_index == virtual_root) {
-            PTPCOLL_VERBOSE(10, ("I'm virtual root of the data"));
-
-            rc = MCA_PML_CALL(irecv(data_buffer, count, MPI_BYTE,
-                        group_list[group_root_index],
-                        tag, comm, &requests[*active_requests]));
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to receive data"));
-                return OMPI_ERROR;
-            }
-            ++(*active_requests);
-            /* act like a root */
-            relative_group_index = 0;
-            group_root_index = my_group_index;
-            goto SCATTER;
-        }
-        group_root_index = virtual_root;
-    }
-
-    relative_group_index = my_group_index - group_root_index;
-    if (relative_group_index < 0) {
-        relative_group_index += group_size;
-    }
-
-    narray_node = &ptpcoll_module->narray_knomial_node[relative_group_index];
-
-    data_src = narray_node->parent_rank + group_root_index;
-    if (data_src >= group_size) {
-        data_src -= group_size;
-    }
-
-    comm_root = group_list[data_src];
-
-    recv_count = NARRAY_BLOCK_SIZE(count, ptpcoll_module, narray_node->level_size);
-    offset = recv_count * narray_node->rank_on_level;
-    /* make sure that we do not overun memory */
-    if (OPAL_UNLIKELY(offset + recv_count > count)) {
-        recv_count = count - offset;
-        if (0 >= recv_count) {
-            goto GATHER;
-        }
-    }
-
-    curr_data_buffer = (void *)((unsigned char *)data_buffer + (size_t)offset);
-    PTPCOLL_VERBOSE(10, ("Bcast, receive data from %d[%d], count %d, tag %d, addr %p len %d offset %d",
-                comm_root, data_src, count, tag, data_buffer, recv_count, offset));
-
-    rc = MCA_PML_CALL(irecv(curr_data_buffer, recv_count, MPI_BYTE, comm_root,
-                tag, comm, &requests[*active_requests]));
-    if( OMPI_SUCCESS != rc ) {
-        PTPCOLL_VERBOSE(10, ("Failed to receive data"));
-        return OMPI_ERROR;
-    }
-
-    ++(*active_requests);
-
-SCATTER:
-    *status = PTPCOLL_WAITING_FOR_DATA;
-
-    rc = bcol_ptpcoll_bcast_narray_test_and_scatter_known_root(ptpcoll_module,
-            buffer_index, data_buffer,
-            count, group_root_index, relative_group_index);
-
-    if (BCOL_FN_COMPLETE != rc) {
-        PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc));
-        return rc;
-    }
-
-GATHER:
-    /* recv operation is done */
-    *iteration = 0;
-    *status = PTPCOLL_GATHER_STARTED;
-    rc = bcol_ptpcoll_bcast_narray_knomial_gather(ptpcoll_module,
-            buffer_index, data_buffer, count,
-            relative_group_index);
-    if (BCOL_FN_COMPLETE != rc) {
-        assert(0 != *active_requests);
-        PTPCOLL_VERBOSE(10, ("Not done. Return %d", rc));
-        return rc;
-    }
-
-    ++(*iteration); /* I need it for progress */
-
-    /* proxy case */
-    if ((PTPCOLL_PROXY & ptpcoll_module->narray_type) &&
-            ! input_args->root_flag) {
-        *status = PTPCOLL_EXTRA_SEND_STARTED;
-        rc =  bcol_ptpcoll_send_n_extra(
-                ptpcoll_module,
-                data_buffer, count, tag - 1,
-                ptpcoll_module->narray_knomial_proxy_extra_index,
-                ptpcoll_module->narray_knomial_proxy_num,
-                input_args->root_route->rank,
-                comm, active_requests, requests);
-            if (BCOL_FN_COMPLETE != rc) {
-                return rc;
-            }
-    }
-
-EXIT:
-    return BCOL_FN_COMPLETE;
-}
-
-/* Pasha : need to move this code to some common function */
-static int bcol_ptpcoll_bcast_narray_knomial_scatter_gatther_known_root_extra(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-
-    int tag;
-    int rc;
-    int completed = 0; /* not completed */
-    uint32_t buffer_index = input_args->buffer_index;
-
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    void *data_buffer = (void *) (
-            (unsigned char *)input_args->sbuf +
-            (size_t)input_args->sbuf_offset);
-    int count = input_args->count * input_args->dtype->super.size;
-    int *iteration =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration);
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-
-    PTPCOLL_VERBOSE(3, ("BCAST known root, index_this_type %d, num_of_this_type %d",
-                const_args->index_of_this_type_in_collective + 1,
-                const_args->n_of_this_type_in_collective));
-
-    /* keep tag within the limit support by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    /* mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-    /* reset active requests */
-    *active_requests = 0;
-    /* reset iteration counter */
-    *iteration = -1;
-
-    PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_narray_knomial_scatter_gatther_known_root_extra, buffer index: %d "
-                "tag: %d "
-                "tag_mask: %d "
-                "sn: %d "
-                "root: %d "
-                "buff: %p "
-                ,buffer_index, tag,
-                ptpcoll_module->tag_mask, input_args->sequence_num,
-                input_args->root_flag,
-                data_buffer
-                ));
-
-    /* we have a power 2 group */
-    if (input_args->root_flag) {
-        PTPCOLL_VERBOSE(10, ("I'm EXTRA root of the data"));
-        /* send the all data to your proxy peer */
-        rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE,
-                    group_list[ptpcoll_module->narray_knomial_proxy_extra_index[0]], tag,
-                    MCA_PML_BASE_SEND_STANDARD, comm,
-                    &(requests[*active_requests])));
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10, ("Failed to send data"));
-            return OMPI_ERROR;
-        }
-        ++(*active_requests);
-
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if (0 == completed) {
-            /* we have to store the iteration number somewhere */
-            return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-        }
-    } else {
-        PTPCOLL_VERBOSE(9, ("Posting recive from %d tag %d",
-                        ptpcoll_module->narray_knomial_proxy_extra_index[0], tag - 1));
-        rc = MCA_PML_CALL(irecv(data_buffer, count, MPI_BYTE,
-                    group_list[ptpcoll_module->narray_knomial_proxy_extra_index[0]],
-                    tag - 1, comm, &requests[*active_requests]));
-        ++(*active_requests);
-
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if (0 == completed) {
-            PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc));
-            return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-        }
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-static int bcol_ptpcoll_bcast_known_root_extra_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    int rc;
-    int completed = 0; /* not completed */
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[input_args->buffer_index].requests;
-    uint32_t buffer_index = input_args->buffer_index;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-
-    PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_binomial_known_root_extra_progress extra, was called\n"));
-
-    completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-    if (0 == completed) {
-        return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-    }
-
-    PTPCOLL_VERBOSE(10, ("Test was matched - %d", rc));
-    return BCOL_FN_COMPLETE;
-}
-
-
-static int bcol_ptpcoll_bcast_narray_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-
-    int tag = -1;
-    int rc;
-    int group_size = ptpcoll_module->group_size;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    uint32_t buffer_index = input_args->buffer_index;
-
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **send_requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    ompi_request_t **recv_request =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests[0];
-    void *data_buffer = (void *) (
-            (unsigned char *)input_args->sbuf +
-            (size_t)input_args->sbuf_offset);
-    int count = input_args->count * input_args->dtype->super.size;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int matched = true;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    int relative_group_index = 0;
-    netpatterns_tree_node_t *narray_node = NULL;
-
-    PTPCOLL_VERBOSE(3, ("Bcast, Narray tree Progress"));
-
-
-    PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_k_nomial_known_root, buffer index: %d "
-                         "tag: %d "
-                         "tag_mask: %d "
-                         "sn: %d "
-                         "root: %d [%d]"
-                         "buff: %p ",
-                         buffer_index, tag,
-                         ptpcoll_module->tag_mask, input_args->sequence_num,
-                         input_args->root_flag, input_args->root_route->rank,
-                         data_buffer));
-
-    if (0 == *active_requests) {
-        int group_root_index = input_args->root_route->rank;
-        /* If the collective does not have any active requests, it
-           means the initial data was not received from parent.
-           Check if some data arrived
-         */
-        if (0 == mca_bcol_ptpcoll_test_for_match(recv_request, &rc)) {
-            PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc));
-            /* No data was received, return no match error */
-            return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-        }
-
-        /* set all paremetres */
-        relative_group_index = my_group_index - group_root_index;
-        if (relative_group_index < 0) {
-            relative_group_index +=group_size;
-        }
-        narray_node = &ptpcoll_module->narray_node[relative_group_index];
-        /* keep tag within the limit support by the pml */
-        tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-        /* mark this as a collective tag, to avoid conflict with user-level flags */
-        tag = -tag;
-        /* Bcast the data */
-        NARRAY_BCAST_NB(narray_node, group_root_index, group_size,
-                data_buffer, count, tag, comm, send_requests, active_requests);
-    }
-
-    /* All data was received and sent out.
-       Check if the completion arrived */
-    matched = mca_bcol_ptpcoll_test_all_for_match
-        (active_requests, send_requests, &rc);
-    if (OMPI_SUCCESS != rc) {
-        return OMPI_ERROR;
-    }
-
-    /* If it is last call, we have to recycle memory */
-    if(matched) {
-        return BCOL_FN_COMPLETE;
-    } else {
-        PTPCOLL_VERBOSE(10, ("bcast root is started"));
-        return BCOL_FN_STARTED;
-    }
-}
-
-static int bcol_ptpcoll_bcast_narray(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-
-    int tag;
-    int rc;
-    int data_src;
-    int group_size = ptpcoll_module->group_size;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    uint32_t buffer_index = input_args->buffer_index;
-
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **send_requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    ompi_request_t **recv_request =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests[0];
-    void *data_buffer = (void *) (
-            (unsigned char *)input_args->sbuf +
-            (size_t)input_args->sbuf_offset);
-    int count = input_args->count * input_args->dtype->super.size;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int matched = true;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    int group_root_index;
-    int relative_group_index = 0;
-    netpatterns_tree_node_t *narray_node = NULL;
-
-    PTPCOLL_VERBOSE(3, ("Bcast, Narray tree"));
-
-    /* reset active request counter */
-    (*active_requests) = 0;
-    /* keep tag within the limit support by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    /* mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-
-    PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_bcast_narray, buffer index: %d "
-                         "tag: %d "
-                         "tag_mask: %d "
-                         "sn: %d "
-                         "root: %d "
-                         "buff: %p ",
-                         buffer_index, tag,
-                         ptpcoll_module->tag_mask, input_args->sequence_num,
-                         input_args->root_flag,
-                         data_buffer));
-
-
-    if (input_args->root_flag) {
-        PTPCOLL_VERBOSE(10, ("I'm root of the data"));
-        narray_node = &ptpcoll_module->narray_node[0];
-        group_root_index = my_group_index;
-        /*
-         * I'm root of the operation
-         * send data to N childrens
-         */
-        goto NARRAY_BCAST_START;
-    }
-
-    /* I'm not root */
-    group_root_index = input_args->root_route->rank;
-
-    relative_group_index = my_group_index - group_root_index;
-    if (relative_group_index < 0) {
-        relative_group_index += group_size;
-    }
-
-    data_src =
-        ptpcoll_module->narray_node[relative_group_index].parent_rank +
-        group_root_index;
-    if (data_src >= group_size) {
-        data_src -= group_size;
-    }
-
-    PTPCOLL_VERBOSE(10, ("Bcast, receive data from %d [%d], count %d, tag %d, addr %p",
-                group_list[data_src], data_src,
-                count, tag, data_buffer));
-
-
-    rc = MCA_PML_CALL(irecv(data_buffer, count, MPI_BYTE,
-                group_list[data_src],
-                tag, comm, recv_request));
-    if( OMPI_SUCCESS != rc ) {
-        PTPCOLL_VERBOSE(10, ("Failed to receive data"));
-        return OMPI_ERROR;
-    }
-
-    /* We can not block. So run couple of test for data arrival */
-    if (0 == mca_bcol_ptpcoll_test_for_match(recv_request, &rc)) {
-        PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc));
-        /* No data was received, return no match error */
-        return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-    }
-
-    narray_node = &ptpcoll_module->narray_node[relative_group_index];
-
-NARRAY_BCAST_START:
-    /* Bcast the data */
-    NARRAY_BCAST_NB(narray_node, group_root_index, group_size,
-                    data_buffer, count, tag, comm, send_requests, active_requests);
-
-    matched = mca_bcol_ptpcoll_test_all_for_match
-        (active_requests, send_requests, &rc);
-    if (OMPI_SUCCESS != rc) {
-        return OMPI_ERROR;
-    }
-
-    /* If it is last call, we have to recycle memory */
-    if(matched) {
-        return BCOL_FN_COMPLETE;
-    } else {
-        PTPCOLL_VERBOSE(10, ("bcast root is started"));
-        return BCOL_FN_STARTED;
-    }
-}
-
-int bcol_ptpcoll_bcast_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module =
-                    (mca_bcol_ptpcoll_module_t *) super;
-
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    comm_attribs.bcoll_type = BCOL_BCAST;
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-
-    comm_attribs.data_src = DATA_SRC_UNKNOWN;
-
-    if(PTPCOLL_KN_EXTRA == ptpcoll_module->pow_ktype) {
-        mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                bcol_ptpcoll_bcast_k_nomial_extra_known_and_anyroot,
-                bcol_ptpcoll_bcast_k_nomial_extra_known_and_anyroot_progress);
-    } else {
-        mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                bcol_ptpcoll_bcast_k_nomial_anyroot,
-                bcol_ptpcoll_bcast_k_nomial_anyroot_progress);
-    }
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-    switch(mca_bcol_ptpcoll_component.bcast_small_messages_known_root_alg) {
-        case PTPCOLL_KNOMIAL:
-            if(PTPCOLL_KN_EXTRA == ptpcoll_module->pow_ktype) {
-                mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                        bcol_ptpcoll_bcast_k_nomial_extra_known_and_anyroot,
-                        bcol_ptpcoll_bcast_k_nomial_extra_known_and_anyroot_progress);
-            } else {
-                mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                        bcol_ptpcoll_bcast_k_nomial_known_root,
-                        bcol_ptpcoll_bcast_k_nomial_known_root_progress);
-            }
-            break;
-        case PTPCOLL_NARRAY:
-            mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                        bcol_ptpcoll_bcast_narray,
-                        bcol_ptpcoll_bcast_narray_progress);
-            break;
-        default:
-            PTPCOLL_ERROR(("Unknown algorithm index was selected %",
-             mca_bcol_ptpcoll_component.bcast_small_messages_known_root_alg));
-            return OMPI_ERROR;
-    }
-
-    comm_attribs.data_src = DATA_SRC_UNKNOWN;
-    inv_attribs.bcol_msg_min = 10000000;
-    inv_attribs.bcol_msg_max = 10485760; /* range 4 */
-
-    /* Anyroot large messages functions registration */
-
-    if (PTPCOLL_EXTRA == ptpcoll_module->pow_2type) {
-        mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_extra,
-                bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_extra_progress);
-    } else {
-        mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot,
-                bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_progress);
-    }
-
-    /* Known-root large messages functions registration */
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-    switch(mca_bcol_ptpcoll_component.bcast_large_messages_known_root_alg) {
-        case PTPCOLL_BINOMIAL_SG:
-            if (PTPCOLL_EXTRA == ptpcoll_module->pow_2type) {
-                mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                        bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_extra,
-                        bcol_ptpcoll_bcast_known_root_extra_progress);
-                        /* bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_extra_progress); */
-            } else {
-                mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                        bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root,
-                        bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_progress);
-            }
-            break;
-        case PTPCOLL_NARRAY_KNOMIAL_SG:
-            if (PTPCOLL_EXTRA == ptpcoll_module->narray_type) {
-                mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                        bcol_ptpcoll_bcast_narray_knomial_scatter_gatther_known_root_extra,
-                        bcol_ptpcoll_bcast_known_root_extra_progress);
-            } else {
-                mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                        bcol_ptpcoll_bcast_narray_knomial_scatter_gatther_known_root,
-                        bcol_ptpcoll_bcast_narray_knomial_scatter_gatther_known_root_progress);
-            }
-            break;
-        default:
-            PTPCOLL_ERROR(("Unknown algorithm index was selected %",
-                        mca_bcol_ptpcoll_component.bcast_large_messages_known_root_alg));
-            return OMPI_ERROR;
-    }
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.h b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.h
deleted file mode 100644
index 4e0581e350..0000000000
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.h
+++ /dev/null
@@ -1,868 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2016      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_PTPCOLL_BCAST_H
-#define MCA_BCOL_PTPCOLL_BCAST_H
-
-#include "ompi_config.h"
-#include "bcol_ptpcoll.h"
-#include "bcol_ptpcoll_utils.h"
-
-BEGIN_C_DECLS
-
-int bcol_ptpcoll_bcast_init(mca_bcol_base_module_t *super);
-
-int bcol_ptpcoll_bcast_k_nomial_anyroot (bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-int bcol_ptpcoll_bcast_k_nomial_anyroot_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-int bcol_ptpcoll_bcast_k_nomial_known_root(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-int bcol_ptpcoll_bcast_k_nomial_known_root_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_extra(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_extra_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_extra(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_extra_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-
-/* macros */
-#define K_NOMIAL_ROOT_BCAST_NB_BINOMIAL_SCATTER(                                                                    \
-        radix_mask_pow,                                                                                             \
-        my_group_index, group_size, group_list,                                                                     \
-        data_buffer, segment_size, count, tag,                                                                      \
-        comm, send_requests, num_pending_sends)                                                                     \
-do {                                                                                                                \
-    int rc = OMPI_SUCCESS;                                                                                          \
-    int dst;                                                                                                        \
-    int comm_dst;                                                                                                   \
-    int send_size;                                                                                                  \
-    int send_offset;                                                                                                \
-    int delta;                                                                                                      \
-    int dst_boundary_rank;                                                                                          \
-    int radix_mask = radix_mask_pow >= 0 ? 1 << radix_mask_pow : 0;                                                 \
-                                                                                                                    \
-    while(radix_mask_pow >= 0) {                                                                                    \
-        /* For each level of tree, do sends */                                                                      \
-        dst = my_group_index ^ radix_mask;                                                                          \
-        comm_dst = group_list[dst];                                                                                 \
-                                                                                                                    \
-        dst_boundary_rank = dst & ((~(int)0) << (radix_mask_pow));                                                  \
-                                                                                                                    \
-        send_offset = segment_size * dst_boundary_rank;                                                             \
-        /* Pasha: make sure that we handle the corner cases */                                                      \
-        delta = count - send_offset;                                                                                \
-        if (delta <= 0) {                                                                                           \
-            send_size = 0; /* we have to send something, other way it will hang */                                  \
-        } else  {                                                                                                   \
-            /* the tail case */                                                                                     \
-            send_size = (int)                                                                                       \
-            (delta - (int)segment_size * radix_mask) < 0 ? delta :                                                  \
-            (int)segment_size * radix_mask;                                                                         \
-        }                                                                                                           \
-                                                                                                                    \
-        /* Non blocking send .... */                                                                                \
-        PTPCOLL_VERBOSE(9 ,                                                                                         \
-                ("Bcast p2s, Isend to %d[%d],count %d,tag %d,addr %p [%p] send_size %d,send_offset %d, radix %d %d",\
-                 dst, comm_dst, count, tag,                                                                         \
-                 data_buffer, (void *)((unsigned char *)data_buffer + (size_t)send_offset),                         \
-                 send_size,                                                                                         \
-                 send_offset,                                                                                       \
-                 radix_mask,                                                                                        \
-                 radix_mask_pow                                                                                     \
-                ));                                                                                                 \
-        rc = MCA_PML_CALL(isend((void *)((unsigned char *)data_buffer + (size_t)send_offset),                       \
-                    send_size, MPI_BYTE,                                                                            \
-                    comm_dst, tag,                                                                                  \
-                    MCA_PML_BASE_SEND_STANDARD, comm,                                                               \
-                    &(send_requests[*num_pending_sends])));                                                         \
-        PTPCOLL_VERBOSE(10, ("send request addr is %p", send_requests[*num_pending_sends]));                        \
-        if( OMPI_SUCCESS != rc ) {                                                                                  \
-            PTPCOLL_VERBOSE(10, ("Failed to isend data"));                                                          \
-            return OMPI_ERROR;                                                                                      \
-        }                                                                                                           \
-        ++(*num_pending_sends);                                                                                     \
-        radix_mask >>= 1;                                                                                           \
-        radix_mask_pow--;                                                                                           \
-    }                                                                                                               \
-} while(0)
-
-#define NARRAY_SCATTER_NB(narray_node, process_shift, group_size,                          \
-        data_buffer, base_block_size, count, tag, comm, send_requests,                     \
-        num_pending_sends)                                                                 \
-do {                                                                                       \
-    int n, rc = OMPI_SUCCESS;                                                              \
-    int dst;                                                                               \
-    int comm_dst;                                                                          \
-    int offset;                                                                            \
-    int size_count = count;                                                                \
-    \
-    /* Send out data to all relevant childrens  */                                         \
-    for (n = 0; n < narray_node->n_children && size_count > 0; n++) {                      \
-        \
-        dst = narray_node->children_ranks[n] + process_shift;                              \
-        if (dst >= group_size) {                                                           \
-            dst -= group_size;                                                             \
-        }                                                                                  \
-        \
-        comm_dst = group_list[dst];                                                        \
-        offset = n * base_block_size;                                                      \
-        size_count -= base_block_size;                                                     \
-        if (OPAL_UNLIKELY(size_count < 0)) {                                               \
-            count = base_block_size + size_count;                                          \
-        } else {                                                                           \
-            count = base_block_size;                                                       \
-        }                                                                                  \
-        \
-        /* Non blocking send .... */                                                       \
-        PTPCOLL_VERBOSE(9 , ("Bcast, Isend data to %d[%d], count %d, tag %d, addr %p",     \
-                    dst, comm_dst, count, tag,                                             \
-                    data_buffer));                                                         \
-        rc = MCA_PML_CALL(isend((void *)((char *)data_buffer + (size_t)offset), count, MPI_BYTE,\
-                    comm_dst, tag,                                                         \
-                    MCA_PML_BASE_SEND_STANDARD, comm,                                      \
-                    &(send_requests[*num_pending_sends])));                                \
-        if( OMPI_SUCCESS != rc ) {                                                         \
-            PTPCOLL_VERBOSE(10, ("Failed to isend data"));                                 \
-            return OMPI_ERROR;                                                             \
-        }                                                                                  \
-        ++(*num_pending_sends);                                                            \
-    }                                                                                      \
-} while(0)
-
-#define NARRAY_SCATTER_B(narray_node, process_shift, group_size,                                                    \
-                          data_buffer, base_block_size, count, tag, comm, send_requests,                            \
-                          num_pending_sends, completed)                                                             \
-do {                                                                                                                \
-    NARRAY_SCATTER_NB(narray_node, process_shift, group_size,                                                       \
-            data_buffer, base_block_size, count, tag, comm, send_requests,                                          \
-            num_pending_sends);                                                                                     \
-    if (*num_pending_sends > 0) {                                                                                   \
-        completed = mca_bcol_ptpcoll_test_all_for_match(num_pending_sends, send_requests, &rc);                     \
-        if (OMPI_SUCCESS != rc) {                                                                                   \
-            return OMPI_ERROR;                                                                                      \
-        }                                                                                                           \
-    } else {                                                                                                        \
-        completed = 1;                                                                                              \
-    }                                                                                                               \
-} while (0)
-
-#define CHECK_IF_ROOT_OR_VROOT(module, i)  \
-    (module->pow_2 == module->ml_mem.ml_buf_desc[i].radix_mask_pow)
-
-/* inline functions */
-static inline __opal_attribute_always_inline__
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_send_extra(
-        mca_bcol_ptpcoll_module_t *ptpcoll_module,
-        void *data_buffer, int count, int tag,
-        int extra_peer, ompi_communicator_t *comm,
-        int *active_requests, ompi_request_t **requests)
-{
-    int rc = OMPI_SUCCESS;
-    int completed = 0;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-
-    /* tag is -1 already */
-    /* send the all data to your extra peer */
-    PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_binomial_scatter_gatther_send_extra to %d tag %d",
-                extra_peer, tag));
-    rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE,
-                group_list[extra_peer], tag,
-                MCA_PML_BASE_SEND_STANDARD, comm,
-                &(requests[*active_requests])));
-    if( OMPI_SUCCESS != rc ) {
-        PTPCOLL_VERBOSE(10, ("Failed to send data"));
-        return OMPI_ERROR;
-    }
-
-    ++(*active_requests);
-
-    completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-    if (0 == completed) {
-        PTPCOLL_VERBOSE(10, ("PR Extra send was not completed"));
-        /* we have to store the iteration number somewhere */
-        return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-static inline __opal_attribute_always_inline__
-int bcol_ptpcoll_send_n_extra(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-        void *data_buffer, int count, int tag,
-        int *extra_peers, int num_peers, int skip,
-        ompi_communicator_t *comm,
-        int *active_requests, ompi_request_t **requests)
-{
-    int rc = OMPI_SUCCESS;
-    int completed = 0;
-    int i;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-
-    /* send the all data to your extra peer */
-    for (i = 0; i < num_peers; i++) {
-        PTPCOLL_VERBOSE(10, ("send_n_extra to %d tag %d",
-                    extra_peers[i], tag));
-        if (extra_peers[i] == skip) {
-            PTPCOLL_VERBOSE(10, ("SKIP"));
-            continue;
-        }
-
-        rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE,
-                    group_list[extra_peers[i]], tag,
-                    MCA_PML_BASE_SEND_STANDARD, comm,
-                    &(requests[*active_requests])));
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10, ("Failed to send data"));
-            return OMPI_ERROR;
-        }
-
-        ++(*active_requests);
-    }
-
-    completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-    if (0 == completed) {
-        PTPCOLL_VERBOSE(10, ("PR Extra send was not completed"));
-        /* we have to store the iteration number somewhere */
-        return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-static inline __opal_attribute_always_inline__
-int bcol_ptpcoll_bcast_binomial_gather_anyroot(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-        int buffer_index, void *data_buffer, int count, int base_block_size)
-{
-    int rc;
-    int completed = 0; /* not completed */
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int i;
-    int *iteration =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration;
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    void *curr_data_sbuffer = NULL,
-         *curr_data_rbuffer = NULL;
-    int radix_mask_pow = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask_pow;
-    int delta;
-    int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag - 1;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-
-    PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_binomial_gather_anyroot %d %d %d",
-                ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration,
-                ptpcoll_module->pow_2,
-                1 << ptpcoll_module->pow_2));
-
-    /* we assume the iteration #iteration already was completed with probe */
-    for (i = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration;
-            i < ptpcoll_module->pow_2; i++) {
-        int pow2 = 1 << i;
-        int peer_index = my_group_index ^ pow2;
-        int comm_rank  = group_list[peer_index];
-        int slen, rlen,
-            send_offset,
-            recv_offset;
-
-        if (i > radix_mask_pow) {
-            /* *active_requests = 0; */
-            /* send - receive data from the peer */
-            slen = rlen = pow2 * base_block_size;
-            send_offset = base_block_size * ((my_group_index) & ((~(int)0) << i));
-            recv_offset = base_block_size * ((peer_index)     & ((~(int)0) << i));
-            curr_data_sbuffer = (void *)((unsigned char *)data_buffer + send_offset);
-            curr_data_rbuffer = (void *)((unsigned char *)data_buffer + recv_offset);
-
-            delta = count - recv_offset;
-            if (delta > 0) {
-                if (delta < rlen) {
-                    /* recv the tail */
-                    rlen = delta;
-                }
-                PTPCOLL_VERBOSE(10, ("[ pow2 %d, radix %d ] recv data %p (offset %d) , len %d , dest %d",
-                            pow2,
-                            1 << ptpcoll_module->pow_2,
-                            curr_data_rbuffer,
-                            recv_offset,
-                            rlen,
-                            comm_rank));
-                rc = MCA_PML_CALL(irecv(curr_data_rbuffer, rlen, MPI_BYTE,
-                            comm_rank, tag, comm, &requests[*active_requests]));
-                if( OMPI_SUCCESS != rc ) {
-                    PTPCOLL_VERBOSE(10, ("Failed to receive data"));
-                    return OMPI_ERROR;
-                }
-                ++(*active_requests);
-            }
-
-            delta = count - send_offset;
-            if (delta > 0) {
-                if (delta < slen) {
-                    /* recv the tail */
-                    slen = delta;
-                }
-                PTPCOLL_VERBOSE(10, ("[ pow2 %d, radix %d ] sending data %p (offset %d) , len %d , dest %d",
-                            pow2,
-                            1 << ptpcoll_module->pow_2,
-                            curr_data_sbuffer,
-                            send_offset,
-                            slen,
-                            comm_rank));
-                rc = MCA_PML_CALL(isend(curr_data_sbuffer, slen, MPI_BYTE,
-                            comm_rank, tag,
-                            MCA_PML_BASE_SEND_STANDARD, comm,
-                            &(requests[*active_requests])));
-                if( OMPI_SUCCESS != rc ) {
-                    PTPCOLL_VERBOSE(10, ("Failed to send data"));
-                    return OMPI_ERROR;
-                }
-                ++(*active_requests);
-            }
-
-            if (*active_requests > 0) {
-                completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-                if (0 == completed) {
-                    *iteration = i;
-                    /* we have to store the iteration number somewhere */
-                    return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-                }
-            }
-        } else if (i == radix_mask_pow) {
-            /* only receive data */
-            rlen = pow2 * base_block_size;
-            recv_offset = base_block_size * ((peer_index)     & ((~(int)0) << i));
-            curr_data_rbuffer = (void *)((unsigned char *)data_buffer + recv_offset);
-            delta = count - recv_offset;
-            if (0 >= delta) {
-                /* we have nothing to send, skip the iteration */
-                continue;
-            }
-            if (delta < rlen) {
-                /* recv the tail */
-                rlen = delta;
-            }
-            /* receive data from the peer */
-            PTPCOLL_VERBOSE(10, ("[ pow2 %d, radix %d ] recv data %p (offset %d) , len %d , dest %d",
-                        pow2,
-                        1 << ptpcoll_module->pow_2,
-                        curr_data_rbuffer,
-                        recv_offset,
-                        rlen,
-                        comm_rank));
-            rc = MCA_PML_CALL(irecv(curr_data_rbuffer, rlen, MPI_BYTE,
-                        comm_rank, tag, comm, &(requests[*active_requests])));
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to receive data"));
-                return OMPI_ERROR;
-            }
-            ++(*active_requests);
-            completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-            if (0 == completed) {
-                *iteration = i;
-                PTPCOLL_VERBOSE(10, ("Recv was not completed"));
-                /* we have to store the iteration number somewhere */
-                return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-            }
-            PTPCOLL_VERBOSE(10, ("Recv was completed"));
-        } else if (i < radix_mask_pow) {
-            /* Only send data */
-            slen = pow2 * base_block_size;
-            send_offset = base_block_size * ((my_group_index) & ((~(int)0) << i));
-            curr_data_sbuffer = (void *)((unsigned char *)data_buffer + send_offset);
-            delta = count - send_offset;
-            if (0 >= delta) {
-                /* we have nothing to send, skip the iteration */
-                continue;
-            }
-            if (delta < slen) {
-                slen = delta;
-            }
-            PTPCOLL_VERBOSE(10, ("[ pow2 %d, radix %d ] sending data %p (offset %d) , len %d , dest %d",
-                        pow2,
-                        1 << ptpcoll_module->pow_2,
-                        curr_data_sbuffer,
-                        send_offset,
-                        slen,
-                        comm_rank));
-            rc = MCA_PML_CALL(isend(curr_data_sbuffer, slen, MPI_BYTE,
-                        comm_rank, tag, MCA_PML_BASE_SEND_STANDARD, comm,
-                        &(requests[*active_requests])));
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to send data"));
-                return OMPI_ERROR;
-            }
-            ++(*active_requests);
-            completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-            if (0 == completed) {
-                *iteration = i;
-                /* we have to store the iteration number somewhere */
-                return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-            }
-        }
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-static inline __opal_attribute_always_inline__
-int bcol_ptpcoll_bcast_binomial_probe_and_scatter_anyroot(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-        int buffer_index, void *data_buffer, int count, int base_block_size)
-{
-    mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    int rc;
-    int completed = 0; /* not completed */
-    int comm_root;
-    int i;
-    int *radix_mask_pow =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask_pow);
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_status_public_t status;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    int pow2_group_size = ptpcoll_module->pow_2num;
-    int pow2_distance;
-    int my_left_boundary_rank;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    int group_root_index = 0;
-    void *curr_data_buffer = NULL;
-    int tag =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag;
-    int recv_count = 0;
-    int *coll_status =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status;
-
-    assert(0 == *active_requests);
-
-    PTPCOLL_VERBOSE(10, ("Running bcol_ptpcoll_bcast_binomial_probe_and_scatter_anyroot"));
-    for (i = 0; i < cm->num_to_probe &&
-            0 == completed; i++) {
-        MCA_PML_CALL(iprobe(MPI_ANY_SOURCE, tag,
-                    comm, &completed, &status));
-        PTPCOLL_VERBOSE(10, ("Bcast, iprobe tag %d",
-                    tag));
-    }
-
-    /* the function always returns OMPI_SUCCESS, so we don't check return code */
-    if (0 == completed) {
-        PTPCOLL_VERBOSE(10, ("IPROBE was not matched"));
-        /* No data was received, return no match error */
-        return BCOL_FN_NOT_STARTED;
-    }
-
-    comm_root = status.MPI_SOURCE;
-
-
-    PTPCOLL_VERBOSE(9, ("IPROBE was matched, root of the data on communicator is %d", comm_root));
-
-    /* For proxy we have to check if we got something from extra node */
-    if (PTPCOLL_PROXY & ptpcoll_module->pow_2type) {
-        if (group_list[ptpcoll_module->proxy_extra_index] == comm_root) {
-            PTPCOLL_VERBOSE(9, ("IPROBE was matched, root of the data on communicator is extra node %d",
-                        comm_root));
-            /* scatter the data among other peer in the pow2 group */
-            *radix_mask_pow =  ptpcoll_module->pow_2;
-
-            pow2_distance  = ptpcoll_module->pow_2 - 1;
-            curr_data_buffer = data_buffer;
-            recv_count = count;
-            goto PR_SCATTHER;
-        }
-    }
-
-    /* Find group index for communicator root of the data */
-    group_root_index = get_group_index_and_distance_for_binomial
-        (my_group_index, comm_root, pow2_group_size, group_list, &pow2_distance);
-    if (OPAL_UNLIKELY(group_root_index < 0)) {
-        PTPCOLL_ERROR(("Fatal error, no group root index found, my id %d, pow2_g_size %d comm_root %d",
-                    my_group_index, pow2_group_size, comm_root));
-        return OMPI_ERROR;
-    }
-
-    PTPCOLL_VERBOSE(10, ("Group root index is %d distance is %d",
-                group_root_index, pow2_distance));
-
-    /* Use group_root_index to calculate the */
-
-    /* Post receive that will fetch the data */
-    /* Pasha: Who is packing data ?
-       Should I assume that we get contiguous buffer ?
-       Or should I pack by myself
-       ===================================================================================================
-       === On this stage I assume that data is contiguous. So I use MPI_BYTE datatype and COUNT = size ===
-       ===================================================================================================
-     */
-
-    recv_count = base_block_size * (1 << pow2_distance); /* we may receive larger data */
-
-    my_left_boundary_rank = my_group_index & ((~(int)0) << pow2_distance );
-
-    curr_data_buffer = (void *)((unsigned char *)data_buffer +
-            (size_t) base_block_size * my_left_boundary_rank);
-
-    *radix_mask_pow = pow2_distance;
-
-    pow2_distance--;
-
-PR_SCATTHER:
-    PTPCOLL_VERBOSE(10, ("Bcast, receive data from %d[%d], "
-                "recv_count %d, tag %d, addr %p, offset %d, pow2_distace %d",
-                comm_root, group_root_index, recv_count,
-                tag, curr_data_buffer,
-                my_group_index * base_block_size, pow2_distance));
-
-    rc = MCA_PML_CALL(recv(curr_data_buffer, recv_count, MPI_BYTE,
-                comm_root, tag, comm, MPI_STATUS_IGNORE));
-    if( OMPI_SUCCESS != rc ) {
-        PTPCOLL_VERBOSE(10, ("Failed to receive data"));
-        return OMPI_ERROR;
-    }
-
-    PTPCOLL_VERBOSE(10, ("Bcast, Data was received"));
-
-    /* Sending forward the data over K-nomial tree */
-    *coll_status = PTPCOLL_SCATTER_STARTED;
-    K_NOMIAL_ROOT_BCAST_NB_BINOMIAL_SCATTER(
-            pow2_distance,
-            my_group_index, group_size, group_list,
-            data_buffer, base_block_size,
-            count, tag, comm, requests,
-            active_requests);
-
-    /* Since the next step (gather) does not really require
-       completion on scatter , we may return complete  */
-    return BCOL_FN_COMPLETE;
-}
-
-static inline __opal_attribute_always_inline__
-int bcol_ptpcoll_binomial_root_to_src(int group_root, int my_rank,
-        int pow2_size, int group_size, int *distance)
-{
-    int root, relative_rank, src,
-        pow2_distance = 0, i;
-
-    if (group_root < pow2_size) {
-        root = group_root;
-    } else {
-        /* the source of the data is extra node,
-           the real root it represented by some rank from
-           pow2 group */
-        root = group_root - pow2_size;
-        /* shortcut for the case when my rank is root for the group */
-        if (my_rank == root) {
-            *distance = -1;
-            return group_root;
-        }
-    }
-
-    relative_rank = (my_rank - root) < 0 ? my_rank - root + pow2_size :
-                                           my_rank - root;
-
-    for (i = 1; i < pow2_size; i<<=1, pow2_distance++) {
-        if (relative_rank & i) {
-            src = my_rank ^ i;
-            if (src >= pow2_size)
-                src -= pow2_size;
-
-            *distance = pow2_distance;
-            return src;
-        }
-    }
-
-    /* error case */
-    *distance = -1;
-    return -1;
-}
-
-static inline __opal_attribute_always_inline__
-int bcol_ptpcoll_bcast_binomial_test_and_scatter_known_root(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-        int buffer_index, void *data_buffer, int count, int base_block_size)
-{
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    int rc;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    int tmp_radix_mask_pow =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask_pow - 1;
-    int tag =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag;
-    int *status =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status;
-
-    PTPCOLL_VERBOSE(10, ("Running bcol_ptpcoll_bcast_binomial_probe_and_scatter_anyroot"));
-
-    if (0 == mca_bcol_ptpcoll_test_all_for_match(active_requests,
-                requests, &rc)) {
-        PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc));
-        return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-    }
-
-    PTPCOLL_VERBOSE(10, ("Bcast, Data was received"));
-
-    /* Sending forward the data over binimial nomial tree */
-    *status = PTPCOLL_SCATTER_STARTED;
-    K_NOMIAL_ROOT_BCAST_NB_BINOMIAL_SCATTER(
-            tmp_radix_mask_pow,
-            my_group_index, group_size, group_list,
-            data_buffer, base_block_size,
-            count, tag, comm, requests,
-            active_requests);
-
-
-    return BCOL_FN_COMPLETE;
-}
-
-#define NARRAY_BLOCK_SIZE(size, module, level_size)                                      \
-                     ((size + (module)->full_narray_tree_num_leafs - 1) /                \
-                     (module)->full_narray_tree_num_leafs) *                             \
-                     ((module)->full_narray_tree_num_leafs /                             \
-                     ((0 == level_size) ?                                                \
-                      mca_bcol_ptpcoll_component.narray_knomial_radix :                  \
-                      level_size))
-
-static inline __opal_attribute_always_inline__
-int bcol_ptpcoll_bcast_narray_test_and_scatter_known_root(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-        int buffer_index, void *data_buffer, int count, int process_shift,
-        int relative_group_index)
-{
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    int rc;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag;
-    int *status =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status;
-    int scatter_count = 0;
-    int offset = 0;
-    int base_block_size = 0;
-    void *curr_data_buffer = NULL;
-
-    PTPCOLL_VERBOSE(10, ("Running bcol_ptpcoll_bcast_narray_test_and_scatter_known_root"));
-
-    if (0 == mca_bcol_ptpcoll_test_all_for_match(active_requests,
-                requests, &rc)) {
-        PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc));
-        return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-    }
-
-    /* Sending forward the data over binimial nomial tree */
-    *status = PTPCOLL_SCATTER_STARTED;
-    if(0 == relative_group_index) {
-        scatter_count = count;
-    } else {
-        scatter_count = NARRAY_BLOCK_SIZE(count, ptpcoll_module,
-                ptpcoll_module->narray_knomial_node[relative_group_index].level_size);
-    }
-
-    offset = scatter_count *
-        ptpcoll_module->narray_knomial_node[relative_group_index].rank_on_level;
-
-    /* make sure that we do not overun memory */
-    if (OPAL_UNLIKELY(offset + scatter_count > count)) {
-        scatter_count = count - offset;
-    }
-
-    PTPCOLL_VERBOSE(10, ("Bcast, Data was received %d %d %d",
-                scatter_count,
-                ptpcoll_module->narray_knomial_node[relative_group_index].level_size,
-                ptpcoll_module->narray_knomial_node[relative_group_index].rank_on_level));
-
-
-    curr_data_buffer = (void *)((unsigned char *)data_buffer + (size_t)offset);
-
-    /* calculating scatter block size for next level of tree */
-    base_block_size = NARRAY_BLOCK_SIZE(count, ptpcoll_module,
-        ptpcoll_module->narray_knomial_node[relative_group_index].level_size *
-        mca_bcol_ptpcoll_component.narray_knomial_radix);
-
-    PTPCOLL_VERBOSE(10, ("scatter_known_rootaaa %d %d %d %d %d",scatter_count, offset, base_block_size,
-                ptpcoll_module->narray_knomial_node[relative_group_index].level_size /mca_bcol_ptpcoll_component.narray_knomial_radix,
-                ptpcoll_module->full_narray_tree_num_leafs));
-
-    NARRAY_SCATTER_NB((&ptpcoll_module->narray_knomial_node[relative_group_index]),
-            process_shift, ptpcoll_module->full_narray_tree_size,
-            curr_data_buffer, base_block_size, scatter_count, tag, comm,
-            requests, active_requests);
-
-    /* Bummer, I tried to prevent this, special case for virtual root */
-    if(0 == relative_group_index) {
-        if (0 == mca_bcol_ptpcoll_test_all_for_match(active_requests,
-                    requests, &rc)) {
-            PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc));
-            *status = PTPCOLL_ROOT_SEND_STARTED;
-            return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-        }
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-static inline __opal_attribute_always_inline__
-int bcol_ptpcoll_bcast_narray_knomial_gather(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-        const int buffer_index, void *data_buffer, const int count,
-        const int relative_group_index)
-{
-    int completed = 0; /* not completed */
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    int blocks_in_step =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask;
-    int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag - 1;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    int group_size = ptpcoll_module->full_narray_tree_size;
-    int i, k,
-        rc,
-        len, slen, rlen,
-        peer, group_peer;
-    size_t s_offset,
-           r_offset;
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    netpatterns_narray_knomial_tree_node_t *narray_node =
-        &ptpcoll_module->narray_knomial_node[relative_group_index];
-    netpatterns_k_exchange_node_t *k_node =
-        &narray_node->k_node;
-    mca_bcol_ptpcoll_component_t *cm =
-        &mca_bcol_ptpcoll_component;
-    size_t base_block_size =
-        NARRAY_BLOCK_SIZE(count, ptpcoll_module, narray_node->level_size);
-
-    PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_narray_knomial_gather %d %d %d %d %d %d %d",
-                ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration,
-                base_block_size, count, narray_node->level_size,
-                relative_group_index, k_node->n_exchanges, tag));
-
-    /* we assume the iteration #iteration already was completed with probe */
-    for (i = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration;
-            i < k_node->n_exchanges; i++, blocks_in_step *= cm->narray_knomial_radix) {
-
-        len = base_block_size * blocks_in_step;
-
-        for (k = 0; k < cm->narray_knomial_radix - 1; k++) {
-            group_peer = my_group_index +
-                (k_node->rank_exchanges[i][k] - narray_node->rank_on_level);
-            if (group_peer >= group_size) {
-                group_peer -= group_size;
-            } else if (group_peer < 0) {
-                group_peer += group_size;
-            }
-            peer = group_list[group_peer];
-
-            r_offset = (size_t)k_node->rank_exchanges[i][k] / blocks_in_step *
-                len;
-
-            /* check that we do not run out of message boundary */
-            if (OPAL_UNLIKELY(r_offset + len > (size_t)count)) {
-                rlen = count - r_offset;
-                if (OPAL_UNLIKELY(rlen <= 0)) {
-                    continue;
-                }
-            } else {
-                rlen = len;
-            }
-            PTPCOLL_VERBOSE(10, ("Recv data from %d, addr %p offset %d len %d %d %d tag %d",
-                        peer, data_buffer, r_offset, rlen, len, blocks_in_step, tag));
-            rc = MCA_PML_CALL(irecv((void *)((unsigned char *)data_buffer + r_offset),
-                        rlen, MPI_BYTE,
-                        peer, tag, comm, &requests[*active_requests]));
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to receive data"));
-                return OMPI_ERROR;
-            }
-            ++(*active_requests);
-        }
-
-        for (k = 0; k < cm->narray_knomial_radix - 1; k++) {
-            group_peer = my_group_index +
-                (k_node->rank_exchanges[i][k] - narray_node->rank_on_level);
-            if (group_peer >= group_size) {
-                group_peer -= group_size;
-            } else if (group_peer < 0) {
-                group_peer += group_size;
-            }
-            peer = group_list[group_peer];
-
-            s_offset = (size_t)narray_node->rank_on_level / blocks_in_step *
-                len;
-
-            /* check that we do not run out of message boundary */
-            if (OPAL_UNLIKELY(s_offset + len > (size_t)count)) {
-                slen =  count - s_offset;
-                if (OPAL_UNLIKELY(slen <= 0)) {
-                    continue;
-                }
-            } else {
-                slen = len;
-            }
-
-            PTPCOLL_VERBOSE(10, ("Send data from %d, addr %p offset %d len %d %d %d tag %d",
-                        peer, data_buffer, s_offset, slen, len, blocks_in_step, tag));
-            rc = MCA_PML_CALL(isend((void *)((unsigned char *)data_buffer + s_offset),
-                        slen, MPI_BYTE,
-                        peer, tag, MCA_PML_BASE_SEND_STANDARD, comm,
-                        &(requests[*active_requests])));
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to send data"));
-                return OMPI_ERROR;
-            }
-            ++(*active_requests);
-        }
-
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if (0 == completed) {
-            /* cache data for next iteration */
-            ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration =
-                i; /* why not to store step for next iteration ?! */
-            ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask =
-                blocks_in_step * cm->narray_knomial_radix;
-            return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-        }
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-END_C_DECLS
-
-#endif
diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_component.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_component.c
deleted file mode 100644
index 9f2107882d..0000000000
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_component.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2015      Los Alamos National Security, LLC.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "bcol_ptpcoll.h"
-#include "ompi/mca/bcol/base/base.h"
-
-#include "bcol_ptpcoll_mca.h"
-#include "bcol_ptpcoll_utils.h"
-
-/*
- * Public string showing the bcol ptpcoll V2 component version number
- */
-const char *mca_bcol_ptpcoll_component_version_string =
-    "Open MPI bcol - ptpcoll collective MCA component version " OMPI_VERSION;
-
-
-/*
- * Local functions
- */
-
-static int ptpcoll_open(void);
-static int ptpcoll_close(void);
-
-/*
- * Instantiate the public struct with all of our public information
- * and pointers to our public functions in it
- */
-
-mca_bcol_ptpcoll_component_t mca_bcol_ptpcoll_component = {
-
-    /* First, fill in the super */
-
-    {
-        /* First, the mca_component_t struct containing meta
-           information about the component itself */
-
-        .bcol_version = {
-            MCA_BCOL_BASE_VERSION_2_0_0,
-
-            /* Component name and version */
-
-            .mca_component_name = "ptpcoll",
-            MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
-                                  OMPI_RELEASE_VERSION),
-
-            /* Component open and close functions */
-
-            .mca_open_component = ptpcoll_open,
-            .mca_close_component = ptpcoll_close,
-	    .mca_register_component_params = mca_bcol_ptpcoll_register_mca_params,
-        },
-
-        /* Initialization / querying functions */
-
-        .collm_init_query = mca_bcol_ptpcoll_init_query,
-        .collm_comm_query = mca_bcol_ptpcoll_comm_query,
-        .init_done = false,
-        .need_ordering = false,
-    },
-
-    /* component specific */
-
-};
-
-static void
-collreq_construct(mca_bcol_ptpcoll_collreq_t *collreq)
-{
-    collreq->requests = NULL;
-}
-
-static void
-collreq_destruct(mca_bcol_ptpcoll_collreq_t *collreq)
-{
-    if (NULL != collreq->requests) {
-        free(collreq->requests);
-    }
-}
-
-OBJ_CLASS_INSTANCE(mca_bcol_ptpcoll_collreq_t,
-        opal_free_list_item_t,
-        collreq_construct,
-        collreq_destruct);
-
-/*
- * Open the component
- */
-static int ptpcoll_open(void)
-{
-    return OMPI_SUCCESS;
-}
-
-/*
- * Close the component
- */
-static int ptpcoll_close(void)
-{
-    return OMPI_SUCCESS;
-}
-
-/* query to see if the component is available for use, and can
- * satisfy the thread and progress requirements
- */
-int mca_bcol_ptpcoll_init_query(bool enable_progress_threads,
-        bool enable_mpi_threads)
-{
-    /* at this stage there is no reason to disaulify this component */
-
-    /* done */
-    return OMPI_SUCCESS;
-}
-
-/* memory management routines */
-
-/* allocte memory - this is a no-op function intended to work with
- * mpool2, which will use malloc for allocation, if no other allocator
- * is available.
- */
-void * bcol_ptpcoll_allocate_memory(size_t length, size_t alignment,
- struct mca_bcol_base_module_t *bcol_module)
-{
-   /* do nothing */
-   return NULL;
-}
-
-/*
- * register memory - nothing to do
- */
-int bcol_ptpcoll_register_memory(void * in_ptr, size_t length, size_t alignment,
-     struct mca_bcol_base_module_t *bcol_module)
-{
-   /* nothing to do */
-   return OMPI_SUCCESS;
-}
-
-/* deregister memory - nothing to do
- */
-int bcol_ptpcoll_deregister_memory( void * in_ptr,
-     struct mca_bcol_base_module_t *bcol_module)
-{
-   /* nothing to do */
-   return OMPI_SUCCESS;
-}
-
-/* free memory - since we don't allocate, we also don't free */
-int bcol_ptpcoll_free_memory(void *ptr,
-        struct mca_bcol_base_module_t *bcol_module)
-{
-   /* nnthing to do */
-   return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_fanin.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_fanin.c
deleted file mode 100644
index 57dafce7bd..0000000000
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_fanin.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-
-#include "ompi/include/ompi/constants.h"
-#include "ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h"
-
-/*
- * Fanin routines - no user data
- */
-
-int bcol_ptpcoll_fanin( bcol_function_args_t *input_args,
-        struct mca_bcol_base_module_t *module)
-{
-    /* local variable */
-    int ret=OMPI_SUCCESS;
-    /* mca_bcol_ptpcoll_module_t *ptp_module=(mca_bcol_ptpcoll_module_t *) module; */
-
-    /* done */
-    return ret;
-}
diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_fanout.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_fanout.c
deleted file mode 100644
index ae5739391b..0000000000
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_fanout.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-
-#include "ompi/include/ompi/constants.h"
-#include "ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h"
-
-/*
- * Fanin routines - no user data
- */
-
-int bcol_ptpcoll_fanout( bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    /* local variable */
-    int ret = OMPI_SUCCESS;
-    /* TBD:
-    mca_bcol_ptpcoll_module_t *ptp_module=(mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
-    */
-
-    /* done */
-    return ret;
-}
diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_mca.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_mca.c
deleted file mode 100644
index 57caf7c110..0000000000
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_mca.c
+++ /dev/null
@@ -1,197 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-
-#include "bcol_ptpcoll_mca.h"
-#include "bcol_ptpcoll.h"
-
-/*
- * Local flags
- */
-enum {
-    REGINT_NEG_ONE_OK = 0x01,
-    REGINT_GE_ZERO = 0x02,
-    REGINT_GE_ONE = 0x04,
-    REGINT_NONZERO = 0x08,
-    REGINT_MAX = 0x88
-};
-
-enum {
-    REGSTR_EMPTY_OK = 0x01,
-
-    REGSTR_MAX = 0x88
-};
-
-#if 0 /* Pasha: we will be need this function in future */
-/*
- * utility routine for string parameter registration
- */
-static int reg_string(const char* param_name,
-                      const char* deprecated_param_name,
-                      const char* param_desc,
-                      const char* default_value, char **storage,
-                      int flags)
-{
-    int index;
-
-    *storage = default_value;
-    index = mca_base_component_var_register(&mca_bcol_ptpcoll_component.super.bcol_version,
-                                            param_name, param_desc, MCA_BASE_VAR_TYPE_STRING,
-                                            NULL, 0, 0, OPAL_INFO_LVL_9,
-                                            MCA_BASE_VAR_SCOPE_READONLY, storage);
-    if (NULL != deprecated_param_name) {
-        (void) mca_base_var_register_synonym(index, "ompi", "bcol", "ptpcoll",
-                                             deprecated_param_name,
-                                             MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-    }
-
-    if (0 != (flags & REGSTR_EMPTY_OK) && (NULL == *storage || 0 == strlen(*storage))) {
-        opal_output(0, "Bad parameter value for parameter \"%s\"",
-                param_name);
-        return OMPI_ERR_BAD_PARAM;
-    }
-
-    return OMPI_SUCCESS;
-}
-#endif
-
-/*
- * utility routine for integer parameter registration
- */
-static int reg_int(const char* param_name,
-                   const char* deprecated_param_name,
-                   const char* param_desc,
-                   int default_value, int *storage, int flags)
-{
-    int index;
-
-    *storage = default_value;
-    index = mca_base_component_var_register(&mca_bcol_ptpcoll_component.super.bcol_version,
-                                            param_name, param_desc, MCA_BASE_VAR_TYPE_INT,
-                                            NULL, 0, 0, OPAL_INFO_LVL_9,
-                                            MCA_BASE_VAR_SCOPE_READONLY, storage);
-    if (NULL != deprecated_param_name) {
-        (void) mca_base_var_register_synonym(index, "ompi", "bcol", "ptpcoll",
-                                             deprecated_param_name,
-                                             MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-    }
-
-    if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == *storage) {
-        return OMPI_SUCCESS;
-    }
-    if ((0 != (flags & REGINT_GE_ZERO) && *storage < 0) ||
-        (0 != (flags & REGINT_GE_ONE) && *storage < 1) ||
-        (0 != (flags & REGINT_NONZERO) && 0 == *storage)) {
-        opal_output(0, "Bad parameter value for parameter \"%s\"",
-                param_name);
-        return OMPI_ERR_BAD_PARAM;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int reg_bool(const char* param_name,
-                    const char* deprecated_param_name,
-                    const char* param_desc,
-                    bool default_value, bool *storage)
-{
-    int index;
-
-    *storage = default_value;
-    index = mca_base_component_var_register(&mca_bcol_ptpcoll_component.super.bcol_version,
-                                            param_name, param_desc, MCA_BASE_VAR_TYPE_BOOL,
-                                            NULL, 0, 0, OPAL_INFO_LVL_9,
-                                            MCA_BASE_VAR_SCOPE_READONLY, storage);
-    if (0 > index) {
-        return index;
-    }
-
-    if (NULL != deprecated_param_name) {
-        (void) mca_base_var_register_synonym(index, "ompi", "bcol", "ptpcoll",
-                                             deprecated_param_name,
-                                             MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-    }
-
-    return OMPI_SUCCESS;
-}
-
-int mca_bcol_ptpcoll_register_mca_params(void)
-{
-    int ret, tmp;
-    mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
-
-    ret = OMPI_SUCCESS;
-#define CHECK(expr) do {\
-        tmp = (expr); \
-        if (OMPI_SUCCESS != tmp) ret = tmp; \
-     } while (0)
-
-    CHECK(reg_int("priority", NULL,
-                  "PTPCOLL component priority"
-                  "(from 0(low) to 90 (high))", 90, &cm->super.priority, 0));
-
-    CHECK(reg_int("verbose", NULL,
-                  "Output some verbose PTPCOLL information "
-                  "(0 = no output, nonzero = output)", 0, &cm->verbose, REGINT_GE_ZERO));
-
-    CHECK(reg_int("k_nomial_radix", NULL,
-                  "The radix of K-Nomial Tree "
-                  "(starts from 2)", 2, &cm->k_nomial_radix, REGINT_GE_ONE));
-
-    CHECK(reg_int("narray_radix", NULL,
-                  "The radix of Narray Tree "
-                  "(starts from 2)", 2, &cm->narray_radix, REGINT_GE_ONE));
-
-    CHECK(reg_int("narray_knomial_radix", NULL,
-                  "The radix of Narray/Knomial Tree for scatther-gather type algorithms"
-                  "(starts from 2)", 2, &cm->narray_knomial_radix, REGINT_GE_ONE));
-
-    CHECK(reg_int("num_to_probe", NULL,
-                  "Number of probe operation in single source data check"
-                  "(starts from 8)", 8, &cm->num_to_probe, REGINT_GE_ONE));
-
-    CHECK(reg_int("bcast_small_msg_known_root_alg", NULL,
-                  "Algorithm selection for bcast small messages known root"
-                  "(1 - K-nomial, 2 - N-array)", 1, &cm->bcast_small_messages_known_root_alg,
-                  REGINT_GE_ZERO));
-
-    CHECK(reg_int("bcast_large_msg_known_root_alg", NULL,
-                  "Algorithm selection for bcast large messages known root"
-                  "(1 - Binomial scatther-gather, 2 - N-array scather, K-nomial gather)",
-                  1, &cm->bcast_large_messages_known_root_alg, REGINT_GE_ZERO));
-
-    CHECK(reg_int("barrier_alg", NULL,
-                  "Algorithm selection for Barrier"
-                  "(1 - Recursive doubling, 2 - Recursive K-ing)",
-                  1, &cm->barrier_alg, REGINT_GE_ZERO));
-
-    /* register parmeters controlling message fragementation */
-    CHECK(reg_int("min_frag_size", NULL,
-                "Minimum fragment size",
-                getpagesize(), &cm->super.min_frag_size, REGINT_GE_ONE));
-
-    CHECK(reg_int("max_frag_size", NULL,
-                "Maximum fragment size",
-                FRAG_SIZE_NO_LIMIT, &cm->super.max_frag_size, REGINT_NONZERO));
-
-    CHECK(reg_bool("can_use_user_buffers", NULL,
-                "User memory can be used by the collective algorithms",
-                1, &cm->super.can_use_user_buffers));
-
-    return ret;
-}
diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_mca.h b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_mca.h
deleted file mode 100644
index 4d1067d9e4..0000000000
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_mca.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-#ifndef MCA_BCOL_PTPCOLL_MCA_H
-#define MCA_BCOL_PTPCOLL_MCA_H
-
-#include "ompi_config.h"
-
-BEGIN_C_DECLS
-
-int mca_bcol_ptpcoll_register_mca_params(void);
-
-END_C_DECLS
-#endif
diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_module.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_module.c
deleted file mode 100644
index ca8c32ec8d..0000000000
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_module.c
+++ /dev/null
@@ -1,760 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2014-2015 Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <errno.h>
-
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "opal/util/show_help.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/mca/pml/pml.h"  /* need this for the max tag size */
-
-#include "bcol_ptpcoll.h"
-#include "bcol_ptpcoll_utils.h"
-#include "bcol_ptpcoll_bcast.h"
-#include "bcol_ptpcoll_allreduce.h"
-#include "bcol_ptpcoll_reduce.h"
-
-#define BCOL_PTP_CACHE_LINE_SIZE 128
-
-/*
- * Local functions
- */
-static int alloc_allreduce_offsets_array(mca_bcol_ptpcoll_module_t *ptpcoll_module)
-{
-    int rc = OMPI_SUCCESS, i = 0;
-    netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree;
-    int n_exchanges = k_node->n_exchanges;
-
-    /* Precalculate the allreduce offsets */
-    if (0 < k_node->n_exchanges) {
-        ptpcoll_module->allgather_offsets = (int **) calloc (n_exchanges, sizeof(int *));
-
-        if (!ptpcoll_module->allgather_offsets) {
-            return OMPI_ERROR;
-        }
-
-        for (i = 0; i < n_exchanges ; i++) {
-            ptpcoll_module->allgather_offsets[i] = (int *) calloc (NOFFSETS, sizeof(int));
-
-            if (!ptpcoll_module->allgather_offsets[i]){
-                return OMPI_ERROR;
-            }
-        }
-    }
-
-    return rc;
-}
-
-static int free_allreduce_offsets_array(mca_bcol_ptpcoll_module_t *ptpcoll_module)
-{
-    int rc = OMPI_SUCCESS, i = 0;
-    netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree;
-    int n_exchanges = k_node->n_exchanges;
-
-    if (ptpcoll_module->allgather_offsets) {
-        for (i=0; i < n_exchanges; i++) {
-            free (ptpcoll_module->allgather_offsets[i]);
-        }
-    }
-
-    free(ptpcoll_module->allgather_offsets);
-    ptpcoll_module->allgather_offsets = NULL;
-    return rc;
-}
-
-static void
-mca_bcol_ptpcoll_module_construct(mca_bcol_ptpcoll_module_t *ptpcoll_module)
-{
-    uint64_t i;
-    /* Pointer to component */
-    ptpcoll_module->narray_node = NULL;
-    ptpcoll_module->allgather_offsets = NULL;
-    ptpcoll_module->super.bcol_component = (mca_bcol_base_component_t *) &mca_bcol_ptpcoll_component;
-    ptpcoll_module->super.list_n_connected = NULL;
-    ptpcoll_module->super.hier_scather_offset = 0;
-    /* no header support in ptp */
-    ptpcoll_module->super.header_size = 0;
-    /* No network context */
-    ptpcoll_module->super.network_context = NULL;
-    /* set the upper limit on the tag */
-    i = 2;
-    ptpcoll_module->tag_mask = 1;
-    while ( i <= (uint64_t) mca_pml.pml_max_tag && i > 0) {
-        i <<= 1;
-    }
-    ptpcoll_module->ml_mem.ml_buf_desc = NULL;
-    ptpcoll_module->tag_mask = i - 1;
-}
-
-static void
-mca_bcol_ptpcoll_module_destruct(mca_bcol_ptpcoll_module_t *ptpcoll_module)
-{
-    int i;
-    mca_bcol_ptpcoll_local_mlmem_desc_t *ml_mem = &ptpcoll_module->ml_mem;
-
-    if (NULL != ml_mem->ml_buf_desc) {
-        /* Release the memory structs that were cache ML memory data */
-        uint32_t i, j, ci;
-        for (i = 0; i < ml_mem->num_banks; i++) {
-            for (j = 0; j < ml_mem->num_buffers_per_bank; j++) {
-                ci = i * ml_mem->num_buffers_per_bank + j;
-                if (NULL != ml_mem->ml_buf_desc[ci].requests) {
-                    free(ml_mem->ml_buf_desc[ci].requests);
-                }
-            }
-        }
-        /* release the buffer descriptor */
-        free(ml_mem->ml_buf_desc);
-        ml_mem->ml_buf_desc = NULL;
-    }
-
-    if (NULL != ptpcoll_module->allgather_offsets) {
-        free_allreduce_offsets_array(ptpcoll_module);
-    }
-
-    if (NULL != ptpcoll_module->narray_node) {
-        for (i = 0; i < ptpcoll_module->group_size; i++) {
-            if (NULL != ptpcoll_module->narray_node[i].children_ranks) {
-                free(ptpcoll_module->narray_node[i].children_ranks);
-            }
-        }
-
-        free(ptpcoll_module->narray_node);
-        ptpcoll_module->narray_node = NULL;
-    }
-
-    OBJ_DESTRUCT(&ptpcoll_module->collreqs_free);
-
-    if (NULL != ptpcoll_module->super.list_n_connected) {
-        free(ptpcoll_module->super.list_n_connected);
-        ptpcoll_module->super.list_n_connected = NULL;
-    }
-
-    for (i = 0; i < BCOL_NUM_OF_FUNCTIONS; i++){
-        OPAL_LIST_DESTRUCT((&ptpcoll_module->super.bcol_fns_table[i]));
-    }
-
-
-    if (NULL != ptpcoll_module->kn_proxy_extra_index) {
-        free(ptpcoll_module->kn_proxy_extra_index);
-        ptpcoll_module->kn_proxy_extra_index = NULL;
-    }
-
-    if (NULL != ptpcoll_module->alltoall_iovec) {
-        free(ptpcoll_module->alltoall_iovec);
-        ptpcoll_module->alltoall_iovec = NULL;
-    }
-
-    if (NULL != ptpcoll_module->narray_knomial_proxy_extra_index) {
-        free(ptpcoll_module->narray_knomial_proxy_extra_index);
-        ptpcoll_module->narray_knomial_proxy_extra_index = NULL;
-    }
-
-    if (NULL != ptpcoll_module->narray_knomial_node) {
-        for(i = 0; i < ptpcoll_module->full_narray_tree_size; i++) {
-            netpatterns_cleanup_narray_knomial_tree (ptpcoll_module->narray_knomial_node + i);
-        }
-        free(ptpcoll_module->narray_knomial_node);
-        ptpcoll_module->narray_knomial_node = NULL;
-    }
-
-    netpatterns_cleanup_recursive_knomial_allgather_tree_node(&ptpcoll_module->knomial_allgather_tree);
-    netpatterns_cleanup_recursive_knomial_tree_node(&ptpcoll_module->knomial_exchange_tree);
-
-}
-
-OBJ_CLASS_INSTANCE(mca_bcol_ptpcoll_module_t,
-                   mca_bcol_base_module_t,
-                   mca_bcol_ptpcoll_module_construct,
-                   mca_bcol_ptpcoll_module_destruct);
-
-static int init_ml_buf_desc(mca_bcol_ptpcoll_ml_buffer_desc_t **desc, void *base_addr, uint32_t num_banks,
-                            uint32_t num_buffers_per_bank, uint32_t size_buffer, uint32_t header_size, int group_size, int pow_k)
-{
-    uint32_t i, j, ci;
-    mca_bcol_ptpcoll_ml_buffer_desc_t *tmp_desc = NULL;
-    int k_nomial_radix = mca_bcol_ptpcoll_component.k_nomial_radix;
-    int pow_k_val = (0 == pow_k) ? 1 : pow_k;
-    int num_to_alloc =
-        ((k_nomial_radix - 1) * pow_k_val * 2 + 1 > mca_bcol_ptpcoll_component.narray_radix) ?
-        (k_nomial_radix - 1) * pow_k_val * 2 + 1 :
-        mca_bcol_ptpcoll_component.narray_radix * 2;
-
-
-    *desc = (mca_bcol_ptpcoll_ml_buffer_desc_t *)calloc(num_banks * num_buffers_per_bank,
-                                                        sizeof(mca_bcol_ptpcoll_ml_buffer_desc_t));
-    if (NULL == *desc) {
-        PTPCOLL_ERROR(("Failed to allocate memory"));
-        return OMPI_ERROR;
-    }
-
-    tmp_desc = *desc;
-
-    for (i = 0; i < num_banks; i++) {
-        for (j = 0; j < num_buffers_per_bank; j++) {
-            ci = i * num_buffers_per_bank + j;
-            tmp_desc[ci].bank_index = i;
-            tmp_desc[ci].buffer_index = j;
-            /* *2  is for gather session  +1 for extra peer */
-            tmp_desc[ci].requests = (ompi_request_t **)
-                calloc(num_to_alloc, sizeof(ompi_request_t *));
-            if (NULL == tmp_desc[ci].requests) {
-                PTPCOLL_ERROR(("Failed to allocate memory for requests"));
-                return OMPI_ERROR;
-            }
-            /*
-             * ptpcoll don't have any header, but other bcols may to have. So
-             * we need to take it in account.
-             */
-            tmp_desc[ci].data_addr = (void *)
-                ((unsigned char*)base_addr + ci * size_buffer + header_size);
-            PTPCOLL_VERBOSE(10, ("ml memory cache setup %d %d - %p", i, j, tmp_desc[ci].data_addr));
-
-            /* init reduce implementation flags */
-            tmp_desc[ci].reduce_init_called = false;
-            tmp_desc[ci].reduction_status = 0;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static void mca_bcol_ptpcoll_set_small_msg_thresholds(struct mca_bcol_base_module_t *super)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module =
-        (mca_bcol_ptpcoll_module_t *) super;
-    mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
-
-    /* Subtract out the maximum header size when calculating the thresholds. This
-     * will account for the headers used by the basesmuma component. If we do not
-     * take these headers into account we may overrun our buffer. */
-
-    /* Set the Allgather threshold equals to a ML buff size */
-    super->small_message_thresholds[BCOL_ALLGATHER] =
-        (ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX) /
-        ompi_comm_size(ptpcoll_module->super.sbgp_partner_module->group_comm);
-
-    /* Set the Bcast threshold, all Bcast algths have the same threshold */
-    super->small_message_thresholds[BCOL_BCAST] =
-        (ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX);
-
-    /* Set the Alltoall threshold, the Ring algth sets some limitation */
-    super->small_message_thresholds[BCOL_ALLTOALL] =
-        (ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX) / 2;
-
-    /* Set the Allreduce threshold, NARRAY algth sets some limitation */
-    super->small_message_thresholds[BCOL_ALLREDUCE] =
-        (ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX) / ptpcoll_module->k_nomial_radix;
-
-    /* Set the Reduce threshold, NARRAY algth sets some limitation */
-    super->small_message_thresholds[BCOL_REDUCE] =
-        (ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX) / cm->narray_radix;
-}
-
-/*
- * Cache information about ML memory
- */
-static int mca_bcol_ptpcoll_cache_ml_memory_info(struct mca_bcol_base_memory_block_desc_t *payload_block,
-                                                 uint32_t data_offset,
-                                                 struct mca_bcol_base_module_t *bcol,
-                                                 void *reg_data)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *) bcol;
-    mca_bcol_ptpcoll_local_mlmem_desc_t *ml_mem = &ptpcoll_module->ml_mem;
-    struct mca_bcol_base_memory_block_desc_t *desc = payload_block;
-    int group_size = ptpcoll_module->super.sbgp_partner_module->group_size;
-
-    PTPCOLL_VERBOSE(10, ("mca_bcol_ptpcoll_init_buffer_memory was called"));
-
-    /* cache ml mem desc tunings localy */
-    ml_mem->num_banks = desc->num_banks;
-    ml_mem->num_buffers_per_bank = desc->num_buffers_per_bank;
-    ml_mem->size_buffer = desc->size_buffer;
-
-    PTPCOLL_VERBOSE(10, ("ML buffer configuration num banks %d num_per_bank %d size %d base addr %p",
-                         desc->num_banks, desc->num_buffers_per_bank, desc->size_buffer, desc->block->base_addr));
-
-    /* Set first bank index for release */
-    ml_mem->bank_index_for_release = 0;
-
-    if (OMPI_SUCCESS != init_ml_buf_desc(&ml_mem->ml_buf_desc,
-                                         desc->block->base_addr,
-                                         ml_mem->num_banks,
-                                         ml_mem->num_buffers_per_bank,
-                                         ml_mem->size_buffer,
-                                         data_offset,
-                                         group_size,
-                                         ptpcoll_module->pow_k)) {
-        PTPCOLL_VERBOSE(10, ("Failed to allocate rdma memory descriptor\n"));
-        return OMPI_ERROR;
-    }
-
-    PTPCOLL_VERBOSE(10, ("ptpcoll_module = %p, ml_mem_desc = %p.\n",
-                         ptpcoll_module));
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * Load ptpcoll bcol functions
- */
-static void load_func(mca_bcol_ptpcoll_module_t *ptpcoll_module)
-{
-    int fnc;
-
-    /* reset everything to NULL */
-    for (fnc = 0; fnc < BCOL_NUM_OF_FUNCTIONS; fnc++) {
-
-        /*ptpcoll_module->super.bcol_function_table[fnc] = NULL;*/
-        ptpcoll_module->super.bcol_function_table[fnc] = NULL;
-        ptpcoll_module->super.bcol_function_init_table[fnc] = NULL;
-    }
-
-    ptpcoll_module->super.bcol_function_init_table[BCOL_BARRIER] = bcol_ptpcoll_barrier_init;
-
-    ptpcoll_module->super.bcol_function_init_table[BCOL_BCAST] = bcol_ptpcoll_bcast_init;
-    ptpcoll_module->super.bcol_function_init_table[BCOL_ALLREDUCE] = bcol_ptpcoll_allreduce_init;
-    ptpcoll_module->super.bcol_function_init_table[BCOL_ALLGATHER] = bcol_ptpcoll_allgather_init;
-    ptpcoll_module->super.bcol_function_table[BCOL_BCAST] = bcol_ptpcoll_bcast_k_nomial_anyroot;
-    ptpcoll_module->super.bcol_function_init_table[BCOL_ALLTOALL] = NULL;
-    ptpcoll_module->super.bcol_function_init_table[BCOL_SYNC] = mca_bcol_ptpcoll_memsync_init;
-    ptpcoll_module->super.bcol_function_init_table[BCOL_REDUCE] = bcol_ptpcoll_reduce_init;
-
-    /* ML memory cacher */
-    ptpcoll_module->super.bcol_memory_init = mca_bcol_ptpcoll_cache_ml_memory_info;
-
-    /* Set thresholds */
-    ptpcoll_module->super.set_small_msg_thresholds = mca_bcol_ptpcoll_set_small_msg_thresholds;
-
-    /* setup recursive k-ing tree */
-    ptpcoll_module->super.k_nomial_tree = mca_bcol_ptpcoll_setup_knomial_tree;
-}
-
-int mca_bcol_ptpcoll_setup_knomial_tree(mca_bcol_base_module_t *super)
-{
-    mca_bcol_ptpcoll_module_t *p2p_module = (mca_bcol_ptpcoll_module_t *) super;
-    int rc = 0;
-
-    rc = netpatterns_setup_recursive_knomial_allgather_tree_node(
-        p2p_module->super.sbgp_partner_module->group_size,
-        p2p_module->super.sbgp_partner_module->my_index,
-        mca_bcol_ptpcoll_component.k_nomial_radix,
-        super->list_n_connected,
-        &p2p_module->knomial_allgather_tree);
-
-    return rc;
-}
-
-/* The function used to calculate size */
-static int calc_full_tree_size(int radix, int group_size, int *num_leafs)
-{
-    int level_cnt = 1;
-    int total_cnt = 0;
-
-    while( total_cnt < group_size ) {
-        total_cnt += level_cnt;
-        level_cnt *= radix;
-    }
-
-    if (total_cnt > group_size) {
-        *num_leafs = level_cnt / radix;
-        return total_cnt - level_cnt / radix;
-    } else {
-        *num_leafs = level_cnt;
-        return group_size;
-    }
-}
-
-/* Setup N-array scatter Knomial-gather static information */
-static int load_narray_knomial_tree (mca_bcol_ptpcoll_module_t *ptpcoll_module)
-{
-    int rc, i, peer;
-    mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
-
-    ptpcoll_module->full_narray_tree_size = calc_full_tree_size(
-        cm->narray_knomial_radix,
-        ptpcoll_module->group_size,
-        &ptpcoll_module->full_narray_tree_num_leafs);
-
-    ptpcoll_module->narray_knomial_proxy_extra_index = (int *)
-        malloc(sizeof(int) * (cm->narray_knomial_radix));
-    if (NULL == ptpcoll_module->narray_knomial_proxy_extra_index) {
-        PTPCOLL_ERROR(("Failed to allocate memory"));
-        goto Error;
-    }
-
-    ptpcoll_module->narray_knomial_node = calloc(
-        ptpcoll_module->full_narray_tree_size,
-        sizeof(netpatterns_narray_knomial_tree_node_t));
-    if(NULL == ptpcoll_module->narray_knomial_node) {
-        goto Error;
-    }
-
-    PTPCOLL_VERBOSE(10 ,("My type is proxy, full tree size = %d [%d]",
-                         ptpcoll_module->full_narray_tree_size,
-                         cm->narray_knomial_radix
-                        ));
-
-    if (ptpcoll_module->super.sbgp_partner_module->my_index <
-        ptpcoll_module->full_narray_tree_size) {
-        if (ptpcoll_module->super.sbgp_partner_module->my_index <
-            ptpcoll_module->group_size - ptpcoll_module->full_narray_tree_size) {
-            ptpcoll_module->narray_type = PTPCOLL_PROXY;
-            for (i = 0; i < cm->narray_knomial_radix; i++) {
-                peer =
-                    ptpcoll_module->super.sbgp_partner_module->my_index *
-                    cm->narray_knomial_radix + i +
-                    ptpcoll_module->full_narray_tree_size;
-                if (peer >= ptpcoll_module->group_size) {
-                    break;
-                }
-                ptpcoll_module->narray_knomial_proxy_extra_index[i] = peer;
-            }
-            ptpcoll_module->narray_knomial_proxy_num = i;
-        } else {
-            ptpcoll_module->narray_type = PTPCOLL_IN_GROUP;;
-        }
-        /* Setting node info */
-        for(i = 0; i < ptpcoll_module->full_narray_tree_size; i++) {
-            rc = netpatterns_setup_narray_knomial_tree(
-                cm->narray_knomial_radix,
-                i,
-                ptpcoll_module->full_narray_tree_size,
-                &ptpcoll_module->narray_knomial_node[i]);
-            if(OMPI_SUCCESS != rc) {
-                goto Error;
-            }
-        }
-    } else {
-        ptpcoll_module->narray_type = PTPCOLL_EXTRA;
-        ptpcoll_module->narray_knomial_proxy_extra_index[0] =
-            (ptpcoll_module->super.sbgp_partner_module->my_index -
-             ptpcoll_module->full_narray_tree_size) /
-            cm->narray_knomial_radix;
-    }
-
-    return OMPI_SUCCESS;
-
-Error:
-    if (NULL != ptpcoll_module->narray_knomial_node) {
-        free(ptpcoll_module->narray_knomial_node);
-    }
-    if (NULL != ptpcoll_module->narray_knomial_proxy_extra_index) {
-        free(ptpcoll_module->narray_knomial_proxy_extra_index);
-    }
-    return OMPI_ERROR;
-}
-
-/* Setup N-array static information */
-static int load_narray_tree(mca_bcol_ptpcoll_module_t *ptpcoll_module)
-{
-    int rc, i;
-    mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
-
-    ptpcoll_module->narray_node = calloc(ptpcoll_module->group_size,
-                                         sizeof(netpatterns_tree_node_t));
-    if(NULL == ptpcoll_module->narray_node ) {
-        goto Error;
-    }
-
-    for(i = 0; i < ptpcoll_module->group_size; i++) {
-        rc = netpatterns_setup_narray_tree(
-            cm->narray_radix,
-            i,
-            ptpcoll_module->group_size,
-            &ptpcoll_module->narray_node[i]);
-        if(OMPI_SUCCESS != rc) {
-            goto Error;
-        }
-    }
-
-    return OMPI_SUCCESS;
-
-Error:
-    if (NULL != ptpcoll_module->narray_node) {
-        free(ptpcoll_module->narray_node);
-    }
-    return OMPI_ERROR;
-}
-
-static int load_knomial_info(mca_bcol_ptpcoll_module_t *ptpcoll_module)
-{
-    int i;
-    mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
-
-    ptpcoll_module->k_nomial_radix =
-        cm->k_nomial_radix > ptpcoll_module->group_size ?
-        ptpcoll_module->group_size :
-        cm->k_nomial_radix;
-
-    ptpcoll_module->pow_k = pow_k_calc(ptpcoll_module->k_nomial_radix,
-                                       ptpcoll_module->group_size,
-                                       &ptpcoll_module->pow_knum);
-
-    ptpcoll_module->kn_proxy_extra_index = (int *)
-        malloc(sizeof(int) * (ptpcoll_module->k_nomial_radix - 1));
-    if (NULL == ptpcoll_module->kn_proxy_extra_index) {
-        PTPCOLL_ERROR(("Failed to allocate memory"));
-        goto Error;
-    }
-
-    /* Setting peer type for K-nomial algorithm*/
-    if (ptpcoll_module->super.sbgp_partner_module->my_index < ptpcoll_module->pow_knum ) {
-        if (ptpcoll_module->super.sbgp_partner_module->my_index <
-            ptpcoll_module->group_size - ptpcoll_module->pow_knum) {
-            for (i = 0;
-                 i < (ptpcoll_module->k_nomial_radix - 1) &&
-                     ptpcoll_module->super.sbgp_partner_module->my_index *
-                     (ptpcoll_module->k_nomial_radix - 1)  +
-                     i + ptpcoll_module->pow_knum < ptpcoll_module->group_size
-                     ; i++) {
-                ptpcoll_module->pow_ktype = PTPCOLL_KN_PROXY;
-                ptpcoll_module->kn_proxy_extra_index[i] =
-                    ptpcoll_module->super.sbgp_partner_module->my_index *
-                    (ptpcoll_module->k_nomial_radix - 1) +
-                    i + ptpcoll_module->pow_knum;
-                PTPCOLL_VERBOSE(10 ,("My type is proxy, pow_knum = %d [%d] my extra %d",
-                                     ptpcoll_module->pow_knum,
-                                     ptpcoll_module->pow_k,
-                                     ptpcoll_module->kn_proxy_extra_index[i]));
-            }
-            ptpcoll_module->kn_proxy_extra_num = i;
-        } else {
-            PTPCOLL_VERBOSE(10 ,("My type is in group, pow_knum = %d [%d]", ptpcoll_module->pow_knum,
-                                 ptpcoll_module->pow_k));
-            ptpcoll_module->pow_ktype = PTPCOLL_KN_IN_GROUP;
-        }
-    } else {
-        ptpcoll_module->pow_ktype = PTPCOLL_KN_EXTRA;
-        ptpcoll_module->kn_proxy_extra_index[0] = (ptpcoll_module->super.sbgp_partner_module->my_index -
-                                                   ptpcoll_module->pow_knum) / (ptpcoll_module->k_nomial_radix - 1);
-        PTPCOLL_VERBOSE(10 ,("My type is extra , pow_knum = %d [%d] my proxy %d",
-                             ptpcoll_module->pow_knum,
-                             ptpcoll_module->pow_k,
-                             ptpcoll_module->kn_proxy_extra_index[0]));
-    }
-
-    return OMPI_SUCCESS;
-
-Error:
-    if (NULL == ptpcoll_module->kn_proxy_extra_index) {
-        free(ptpcoll_module->kn_proxy_extra_index);
-    }
-
-    return OMPI_ERROR;
-}
-
-static int load_binomial_info(mca_bcol_ptpcoll_module_t *ptpcoll_module)
-{
-    ptpcoll_module->pow_2 = pow_k_calc(2,
-                                       ptpcoll_module->group_size,
-                                       &ptpcoll_module->pow_2num);
-
-    assert(ptpcoll_module->pow_2num == 1 << ptpcoll_module->pow_2);
-    assert(ptpcoll_module->pow_2num  <= ptpcoll_module->group_size);
-
-    /* Setting peer type for binary algorithm*/
-    if (ptpcoll_module->super.sbgp_partner_module->my_index < ptpcoll_module->pow_2num ) {
-        if (ptpcoll_module->super.sbgp_partner_module->my_index <
-            ptpcoll_module->group_size - ptpcoll_module->pow_2num) {
-            PTPCOLL_VERBOSE(10 ,("My type is proxy, pow_2num = %d [%d]", ptpcoll_module->pow_2num,
-                                 ptpcoll_module->pow_2));
-            ptpcoll_module->pow_2type = PTPCOLL_PROXY;
-            ptpcoll_module->proxy_extra_index = ptpcoll_module->super.sbgp_partner_module->my_index +
-                ptpcoll_module->pow_2num;
-        } else {
-            PTPCOLL_VERBOSE(10 ,("My type is in group, pow_2num = %d [%d]", ptpcoll_module->pow_2num,
-                                 ptpcoll_module->pow_2));
-            ptpcoll_module->pow_2type = PTPCOLL_IN_GROUP;
-        }
-    } else {
-        PTPCOLL_VERBOSE(10 ,("My type is extra , pow_2num = %d [%d]", ptpcoll_module->pow_2num,
-                             ptpcoll_module->pow_2));
-        ptpcoll_module->pow_2type = PTPCOLL_EXTRA;
-        ptpcoll_module->proxy_extra_index = ptpcoll_module->super.sbgp_partner_module->my_index -
-            ptpcoll_module->pow_2num;
-    }
-    return OMPI_SUCCESS;
-}
-
-static int load_recursive_knomial_info(mca_bcol_ptpcoll_module_t *ptpcoll_module)
-{
-    int rc = OMPI_SUCCESS;
-    rc = netpatterns_setup_recursive_knomial_tree_node(
-        ptpcoll_module->group_size,
-        ptpcoll_module->super.sbgp_partner_module->my_index,
-        mca_bcol_ptpcoll_component.k_nomial_radix,
-        &ptpcoll_module->knomial_exchange_tree);
-    return rc;
-}
-
-static int bcol_ptpcoll_collreq_init(opal_free_list_item_t *item, void* ctx)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module= (mca_bcol_ptpcoll_module_t *) ctx;
-    mca_bcol_ptpcoll_collreq_t *collreq = (mca_bcol_ptpcoll_collreq_t *) item;
-
-    switch(mca_bcol_ptpcoll_component.barrier_alg) {
-    case 1:
-        collreq->requests = (ompi_request_t **)
-            calloc(2, sizeof(ompi_request_t *));
-        break;
-    case 2:
-        collreq->requests = (ompi_request_t **)
-            calloc(2 * ptpcoll_module->k_nomial_radix, sizeof(ompi_request_t *));
-        break;
-    }
-
-    if (NULL == collreq->requests) {
-        return OPAL_ERR_OUT_OF_RESOURCE;
-    }
-
-    return OPAL_SUCCESS;
-}
-
-/* query to see if the module is available for use on the given
- * communicator, and if so, what it's priority is.  This is where
- * the backing shared-memory file is created.
- */
-mca_bcol_base_module_t **mca_bcol_ptpcoll_comm_query(mca_sbgp_base_module_t *sbgp,
-                                                     int *num_modules)
-{
-    int rc;
-    /* local variables */
-    struct ompi_communicator_t *comm = sbgp->group_comm;
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = NULL;
-    mca_bcol_base_module_t **ptpcoll_modules = NULL;
-    int iovec_size;
-
-    /* initialize local variables */
-    *num_modules = 0;
-
-    /*
-     * This is activated only for intra-communicators
-     */
-    if (OMPI_COMM_IS_INTER(comm) ) {
-        return NULL;
-    }
-
-    /* allocate and initialize an sm-v2  module */
-    ptpcoll_modules = (mca_bcol_base_module_t **) malloc(sizeof(mca_bcol_base_module_t *));
-    if (NULL == ptpcoll_modules) {
-        return NULL;
-    }
-
-    ptpcoll_module = OBJ_NEW(mca_bcol_ptpcoll_module_t);
-    if (NULL == ptpcoll_module) {
-        free(ptpcoll_modules);
-        return NULL;
-    }
-
-    /* On this stage we support only one single module */
-    ptpcoll_modules[*num_modules] = &(ptpcoll_module->super);
-
-    (*num_modules)++;
-    /* set the subgroup */
-    ptpcoll_module->super.sbgp_partner_module = sbgp;
-    /* caching some useful information */
-    ptpcoll_module->group_size =
-        ptpcoll_module->super.sbgp_partner_module->group_size;
-
-    rc = load_binomial_info(ptpcoll_module);
-    if (OMPI_SUCCESS != rc) {
-        PTPCOLL_VERBOSE(10, ("Failed to load knomial info"));
-        goto CLEANUP;
-    }
-
-    rc = load_knomial_info(ptpcoll_module);
-    if (OMPI_SUCCESS != rc) {
-        PTPCOLL_VERBOSE(10, ("Failed to load knomial info"));
-        goto CLEANUP;
-    }
-
-    rc = load_narray_tree(ptpcoll_module);
-    if (OMPI_SUCCESS != rc) {
-        PTPCOLL_VERBOSE(10, ("Failed to load narray tree"));
-        goto CLEANUP;
-    }
-
-    rc = load_narray_knomial_tree(ptpcoll_module);
-    if (OMPI_SUCCESS != rc) {
-        PTPCOLL_VERBOSE(10, ("Failed to load narray-knomila tree"));
-        goto CLEANUP;
-    }
-
-    rc = load_recursive_knomial_info(ptpcoll_module);
-    if (OMPI_SUCCESS != rc) {
-        PTPCOLL_VERBOSE(10, ("Failed to load recursive knomial tree"));
-        goto CLEANUP;
-    }
-
-    /* creating collfrag free list */
-    OBJ_CONSTRUCT(&ptpcoll_module->collreqs_free, opal_free_list_t);
-    rc = opal_free_list_init (&ptpcoll_module->collreqs_free,
-                              sizeof(mca_bcol_ptpcoll_collreq_t),
-                              BCOL_PTP_CACHE_LINE_SIZE,
-                              OBJ_CLASS(mca_bcol_ptpcoll_collreq_t),
-                              0, BCOL_PTP_CACHE_LINE_SIZE,
-                              256 /* free_list_num */,
-                              -1  /* free_list_max, -1 = infinite */,
-                              32  /* free_list_inc */,
-                              NULL, 0, NULL,
-                              bcol_ptpcoll_collreq_init,
-                              ptpcoll_module);
-    if (OMPI_SUCCESS != rc) {
-        goto CLEANUP;
-    }
-
-    load_func(ptpcoll_module);
-
-    rc = alloc_allreduce_offsets_array(ptpcoll_module);
-    if (OMPI_SUCCESS != rc) {
-        goto CLEANUP;
-    }
-
-    /* Allocating iovec for PTP alltoall */
-    iovec_size = ptpcoll_module->group_size / 2 + ptpcoll_module->group_size % 2;
-    ptpcoll_module->alltoall_iovec = (struct iovec *) malloc(sizeof(struct iovec)
-                                                             * iovec_size);
-    ptpcoll_module->log_group_size = lognum(ptpcoll_module->group_size);
-
-    rc = mca_bcol_base_bcol_fns_table_init(&(ptpcoll_module->super));
-    if (OMPI_SUCCESS != rc) {
-        goto CLEANUP;
-    }
-
-    /* Zero copy is supported */
-    ptpcoll_module->super.supported_mode = MCA_BCOL_BASE_ZERO_COPY;
-
-    /* return */
-    return ptpcoll_modules;
-
-CLEANUP:
-
-    OBJ_RELEASE(ptpcoll_module);
-    free(ptpcoll_modules);
-    return NULL;
-}
diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_reduce.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_reduce.c
deleted file mode 100644
index d8fe566543..0000000000
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_reduce.c
+++ /dev/null
@@ -1,405 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/include/ompi/constants.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "bcol_ptpcoll_reduce.h"
-#include "bcol_ptpcoll_utils.h"
-
-static int bcol_ptpcoll_reduce_narray_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-static int bcol_ptpcoll_reduce_narray(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-
-#define NARRAY_RECV_NB(narray_node, process_shift, group_size,                            \
-                        recv_buffer, pack_len, tag, comm, recv_requests,                      \
-                        num_pending_recvs)                                                 \
-do {                                                                                       \
-    int n, rc = OMPI_SUCCESS;                                                              \
-    int dst;                                                                               \
-    int comm_dst;                                                                          \
-    int offset = 0 ;                                                                       \
-                                                                                           \
-    /* Recieve data from all relevant childrens  */                                        \
-    for (n = 0; n < narray_node->n_children; n++) {                                        \
-                                                                                           \
-        dst = narray_node->children_ranks[n] + process_shift;                              \
-        if (dst >= group_size) {                                                           \
-            dst -= group_size;                                                             \
-        }                                                                                  \
-        comm_dst = group_list[dst];                                                        \
-                                                                                           \
-        /* Non blocking send .... */                                                       \
-        PTPCOLL_VERBOSE(1 , ("Reduce, Irecv data to %d[%d], count %d, tag %d, addr %p",    \
-                    dst, comm_dst, pack_len, tag,                                             \
-                    data_buffer));                                                         \
-        rc = MCA_PML_CALL(irecv((void *)((unsigned char*)recv_buffer + offset), pack_len, MPI_BYTE,                     \
-                    comm_dst, tag, comm,                                      \
-                    &(recv_requests[*num_pending_recvs])));                                \
-        if( OMPI_SUCCESS != rc ) {                                                         \
-            PTPCOLL_VERBOSE(10, ("Failed to start non-blocking receive"));                 \
-            return OMPI_ERROR;                                                             \
-        }                                                                                  \
-        ++(*num_pending_recvs);                                                            \
-        offset += pack_len;                                                                \
-    }                                                                                      \
-} while(0)
-
-
-static inline int narray_reduce(void *data_buffer, void *recv_buffer,
-                                int nrecvs, int count,
-                                struct ompi_datatype_t *dtype, struct ompi_op_t *op,
-                                int *reduction_status) {
-    int pack_len = count * dtype->super.size;
-    int i = 0;
-    void *source_buffer = NULL, *result_buffer = NULL;
-
-    source_buffer = data_buffer;
-    result_buffer = recv_buffer;
-
-    for (i = 0; i < nrecvs; i++) {
-        ompi_op_reduce(op, (void*)((unsigned char*) source_buffer) ,
-                       (void*)((unsigned char*) result_buffer),
-                       count,dtype);
-
-        source_buffer = (void *)((unsigned char*)recv_buffer
-                                 + (i+1) * pack_len);
-    }
-
-    *reduction_status = 1;
-    return OMPI_SUCCESS;
-}
-static int bcol_ptpcoll_reduce_narray_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-
-    int tag = -1;
-    int rc;
-    int group_size = ptpcoll_module->group_size;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    uint32_t buffer_index = input_args->buffer_index;
-    struct ompi_op_t *op = input_args->op;
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **send_request =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests[0];
-    ompi_request_t **recv_requests =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests[1];
-    void *data_buffer = NULL;
-    void *src_buffer = (void *) (
-            (unsigned char *)input_args->sbuf +
-            (size_t)input_args->sbuf_offset);
-    void *recv_buffer = (void *) (
-            (unsigned char *)input_args->rbuf +
-            (size_t)input_args->rbuf_offset);
-    int count = input_args->count;
-    struct ompi_datatype_t *dtype = input_args->dtype;
-    int pack_len = input_args->count * input_args->dtype->super.size;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int matched = false;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    int relative_group_index = 0;
-    netpatterns_tree_node_t *narray_node = NULL;
-    bool not_sent = false;
-    int parent_rank  = -1, comm_parent_rank = -1;
-    int group_root_index = input_args->root;
-
-    if (!ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].reduce_init_called) {
-        bcol_ptpcoll_reduce_narray(input_args, const_args);
-    }
-    /*
-     * By default the src buffer is the data buffer,
-     * only after reduction, the recv buffer becomes the
-     * data buffer
-     */
-    data_buffer = src_buffer;
-
-    relative_group_index = my_group_index - group_root_index;
-    if (relative_group_index < 0) {
-        relative_group_index +=group_size;
-    }
-
-    /* keep tag within the limit support by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    /* mark this as a collective tag, to avoid conflict with user-level tags */
-    tag = -tag;
-
-    narray_node = &ptpcoll_module->narray_node[relative_group_index];
-
-    PTPCOLL_VERBOSE(3, ("reduce, Narray tree Progress"));
-
-    PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_reduce_narray, buffer index: %d "
-                         "tag: %d "
-                         "tag_mask: %d "
-                         "sn: %d "
-                         "root: %d [%d]"
-                         "buff: %p ",
-                         buffer_index, tag,
-                         ptpcoll_module->tag_mask, input_args->sequence_num,
-                         input_args->root_flag, input_args->root_route->rank,
-                         data_buffer));
-
-    /*
-      Check if the data was received
-     */
-    if (0 != *active_requests) {
-        matched = mca_bcol_ptpcoll_test_all_for_match
-            (active_requests, recv_requests, &rc);
-        if (OMPI_SUCCESS != rc) {
-            return OMPI_ERROR;
-        }
-
-
-        /* All data was received, then do a reduction*/
-        if(matched) {
-           narray_reduce(data_buffer, recv_buffer, narray_node->n_children, count, dtype, op,
-                   &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].reduction_status);
-
-           /*
-            * The reduction result is in the recv buffer, so it is the new data
-            * buffer
-            */
-           data_buffer = recv_buffer;
-
-           /* If not reduced, means also, you might not posted a send */
-            not_sent = true;
-        } else {
-            PTPCOLL_VERBOSE(10, ("reduce root is started"));
-            return BCOL_FN_STARTED;
-        }
-    }
-
-    /* I'm root, I'm done  */
-    if (input_args->root_flag) {
-       return BCOL_FN_COMPLETE;
-    }
-
-    PTPCOLL_VERBOSE(1,("Testing Sending Match"));
-
-    /* If send was not posted */
-    /* Manju: Leaf node should never post in the progress logic */
-    if (not_sent) {
-        parent_rank =
-            ptpcoll_module->narray_node[relative_group_index].parent_rank +
-            group_root_index;
-        if (parent_rank >= group_size) {
-            parent_rank -= group_size;
-        }
-
-        comm_parent_rank = group_list[parent_rank];
-        PTPCOLL_VERBOSE(1,("Sending data to %d ",comm_parent_rank));
-
-        rc = MCA_PML_CALL(isend(data_buffer, pack_len, MPI_BYTE,
-                    comm_parent_rank,
-                    tag, MCA_PML_BASE_SEND_STANDARD, comm, send_request));
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10, ("Failed to send data"));
-            return OMPI_ERROR;
-        }
-    }
-
-    if (0 == mca_bcol_ptpcoll_test_for_match(send_request, &rc)) {
-        PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc));
-        /* Data has not been sent. Return that the collective has been stated
-         * because we MUST call test on this request once it is finished to
-         * ensure that it is properly freed. */
-        return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-static int bcol_ptpcoll_reduce_narray(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-
-    int tag;
-    int rc;
-    int group_size = ptpcoll_module->group_size;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    uint32_t buffer_index = input_args->buffer_index;
-
-    struct ompi_op_t *op = input_args->op;
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **recv_requests =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests[1];
-    ompi_request_t **send_request =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests[0];
-
-    void *data_buffer = NULL;
-    void *src_buffer = (void *) (
-            (unsigned char *)input_args->sbuf +
-            (size_t)input_args->sbuf_offset);
-    void *recv_buffer = (void *) (
-            (unsigned char *)input_args->rbuf +
-            (size_t)input_args->rbuf_offset);
-    int count = input_args->count;
-    struct ompi_datatype_t *dtype = input_args->dtype;
-    int pack_len = input_args->count * input_args->dtype->super.size;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int matched = true;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    int group_root_index  = -1;
-    int relative_group_index = 0;
-    netpatterns_tree_node_t *narray_node = NULL;
-    int parent_rank  = -1, comm_parent_rank = -1;
-
-
-    /* This is first function call that should be called, not progress.
-     * The fragmentation code does this, so switch from progress to here.
-     * The flag indicates whether, we have entered this code *
-     */
-    ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].reduce_init_called = true;
-
-    PTPCOLL_VERBOSE(1, ("Reduce, Narray tree"));
-    /* reset active request counter */
-    (*active_requests) = 0;
-    /* keep tag within the limit support by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    /* mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-
-    PTPCOLL_VERBOSE(1, ("bcol_ptpcoll_reduce_narray, buffer index: %d "
-                         "tag: %d "
-                         "tag_mask: %d "
-                         "sn: %d "
-                         "root: %d "
-                         "buff: %p ",
-                         buffer_index, tag,
-                         ptpcoll_module->tag_mask, input_args->sequence_num,
-                         input_args->root_flag,
-                         src_buffer));
-
-    /* Compute Root Index Shift */
-    group_root_index = input_args->root;
-    relative_group_index = my_group_index - group_root_index;
-    if (relative_group_index < 0) {
-        relative_group_index += group_size;
-    }
-
-    narray_node = &ptpcoll_module->narray_node[relative_group_index];
-
-    if (0 == narray_node->n_children) {
-        PTPCOLL_VERBOSE(10, ("I'm leaf of the data"));
-        /*
-         * I'm root of the operation
-         * send data to N childrens
-         */
-        data_buffer = src_buffer;
-        goto NARRAY_SEND_DATA;
-    }
-
-    /* Not leaf, either an internal node or root */
-    NARRAY_RECV_NB(narray_node, group_root_index, group_size,
-                    recv_buffer, pack_len, tag, comm, recv_requests,
-                    active_requests);
-
-
-    /* We have not done reduction, yet */
-    ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].reduction_status = 0;
-
-    /* We can not block. So run couple of test for data arrival */
-    matched = mca_bcol_ptpcoll_test_all_for_match
-        (active_requests, recv_requests, &rc);
-
-    /* Check if received the data */
-    if(matched) {
-
-        narray_reduce(src_buffer, recv_buffer, narray_node->n_children,
-                        count, dtype, op, &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].reduction_status);
-        PTPCOLL_VERBOSE(1, ("Reduce, received data from  all childrend "));
-        data_buffer = recv_buffer;
-
-    } else {
-
-        PTPCOLL_VERBOSE(1, ("reduce root is started"));
-        return BCOL_FN_STARTED;
-    }
-
-    /* I'm root, I'm done  */
-    if (input_args->root_flag) {
-       return BCOL_FN_COMPLETE;
-    }
-
-
-NARRAY_SEND_DATA:
-
-    /*
-     * Send the data (reduce in case of internal nodes, or just data in
-     * case of leaf nodes) to the parent
-     */
-    narray_node = &ptpcoll_module->narray_node[relative_group_index];
-
-    parent_rank =
-        ptpcoll_module->narray_node[relative_group_index].parent_rank +
-        group_root_index;
-    if (parent_rank >= group_size) {
-        parent_rank -= group_size;
-    }
-
-    comm_parent_rank = group_list[parent_rank];
-    PTPCOLL_VERBOSE(1,("Sending data to %d ",comm_parent_rank));
-
-    rc = MCA_PML_CALL(isend(data_buffer, pack_len, MPI_BYTE,
-                comm_parent_rank,
-                tag, MCA_PML_BASE_SEND_STANDARD, comm, send_request));
-    if( OMPI_SUCCESS != rc ) {
-        PTPCOLL_VERBOSE(10, ("Failed to send data"));
-        return OMPI_ERROR;
-    }
-
-    /* We can not block. So run couple of test for data arrival */
-    if (0 == mca_bcol_ptpcoll_test_for_match(send_request, &rc)) {
-        PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc));
-        /* No data was received, return no match error */
-        return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-
-int bcol_ptpcoll_reduce_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    PTPCOLL_VERBOSE(1,("Initialization Reduce - Narray"));
-    comm_attribs.bcoll_type = BCOL_REDUCE;
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                bcol_ptpcoll_reduce_narray,
-                bcol_ptpcoll_reduce_narray_progress);
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_reduce.h b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_reduce.h
deleted file mode 100644
index 195ce7fad9..0000000000
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_reduce.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_PTPCOLL_REDUCE_H
-#define MCA_BCOL_PTPCOLL_REDUCE_H
-
-#include "ompi_config.h"
-#include "bcol_ptpcoll.h"
-#include "bcol_ptpcoll_utils.h"
-
-BEGIN_C_DECLS
-
-int bcol_ptpcoll_reduce_init(mca_bcol_base_module_t *super);
-
-int bcol_ptpcoll_reduce_init(mca_bcol_base_module_t *super);
-
-#endif /* MCA_BCOL_PTPCOLL_REDUCE_H */
diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_utils.c b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_utils.c
deleted file mode 100644
index 9677c4ba93..0000000000
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_utils.c
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-#include "ompi_config.h"
-
-#include "bcol_ptpcoll.h"
-#include "bcol_ptpcoll_utils.h"
-
-/*
- *  Return closet power of K, for the number, and the number
- */
-int pow_k_calc(int k, int number, int *out_number)
-{
-    int power = 0;
-    int n = 1;
-
-    while (n < number) {
-        n *= k;
-        ++power;
-    }
-
-    if (n > number) {
-        n /= k;
-        --power;
-    }
-    if (NULL != out_number) {
-        *out_number = n;
-    }
-
-    return power;
-}
-
-/*
- * Communicator rank to group index conversion function for K-nomial tree.
- * Complexity: (K-1) Log _base_K N
- *
- * Input:
- * my_group_index       - my process index in the group
- * comm_source          - the communicator rank of the source of data
- * radix                - radix of K-nomial tree
- * group_size           - the size of my group
- * group_array[]        - one to one map from group index to communicator rank
- *
- * Output:
- * Group index for comm_source.
- */
-
-int get_group_index_and_distance_for_binomial(int my_group_index, int comm_source,
-        int group_size, int *group_array, int *pow_distance)
-{
-    int group_index;
-    int i;
-    *pow_distance = 0;
-
-    for (i = 1; i < group_size; i<<=1, (*pow_distance)++) {
-        group_index = my_group_index ^ i;
-        if (comm_source == group_array[group_index]) {
-            return group_index;
-        }
-    }
-
-    *pow_distance = -1;
-    return -1;
-}
-
-int get_group_index_and_distance_for_k_nomial(int my_group_index, int comm_source, int radix,
-        int group_size, int *group_array, int *pow_distance)
-{
-    int group_index;
-    int offset = 1;      /* offset equal to 1 (radix_power) */
-    int radix_power = 1; /* radix power 0 */
-    *pow_distance = 0;
-
-    /*
-     *  Go trough range of possible offsets from my rank,
-     *  for each offset we calculate k-nomial tree root.
-     */
-    while(offset < group_size) {
-        /* K-nomial tree root calculation for the offset */
-        if (offset % (radix * radix_power)) {
-            group_index = my_group_index - offset;
-            /* wrap around if the group is negative */
-            if (group_index < 0) {
-                group_index += group_size;
-            }
-            PTPCOLL_VERBOSE(10, ("Checking %d", group_index));
-            if (comm_source == group_array[group_index]) {
-                return group_index;
-            }
-            offset += radix_power;
-        } else {
-            /* we done with the section of the tree, go to next one */
-            radix_power *= radix;
-            (*pow_distance)++;
-        }
-    }
-
-    /* No source was found, return -1 */
-    *pow_distance = -1;
-    return -1;
-}
-
-int get_group_index_for_k_nomial(int my_group_index, int comm_source, int radix, int group_size, int *group_array)
-{
-    int group_index;
-    int radix_power = 1; /* radix power 0 */
-    int offset = 1;      /* offset equal to 1 (radix_power) */
-
-    /*
-     *  Go trough range of possible offsets from my rank,
-     *  for each offset we calculate k-nomial tree root.
-     */
-    while(offset < group_size) {
-        /* K-nomial tree root calculation for the offset */
-        if (offset % (radix * radix_power)) {
-            group_index = my_group_index - offset;
-            /* wrap around if the group is negative */
-            if (group_index < 0) {
-                group_index += group_size;
-            }
-            if (comm_source == group_array[group_index]) {
-                return group_index;
-            }
-            offset += radix_power;
-        } else {
-            /* we done with the section of the tree, go to next one */
-            radix_power *= radix;
-        }
-    }
-
-    /* No source was found, return -1 */
-    return -1;
-}
diff --git a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_utils.h b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_utils.h
deleted file mode 100644
index 231a9f139b..0000000000
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_utils.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2012      Los Alamos National Security, LLC.
- *                         All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_PTPCOLL_UTILS_H
-#define MCA_BCOL_PTPCOLL_UTILS_H
-
-#include "ompi_config.h"
-
-#include "ompi/mca/rte/rte.h"
-
-BEGIN_C_DECLS
-
-/*
- *  Return closet power of K, for the number
- */
-int pow_k_calc(int k, int number, int *out_number);
-
-/*
- * Communicator rank to group index conversion function for K-nomial tree.
- */
-int get_group_index_for_k_nomial(int my_group_index, int comm_source, int radix, int group_size, int *group_array);
-
-/* the same like above, just more information on return */
-int get_group_index_and_distance_for_k_nomial(int my_group_index, int comm_source, int radix,
-        int group_size, int *group_array, int *pow_distance);
-
-int get_group_index_and_distance_for_binomial(int my_group_index, int comm_source,
-        int group_size, int *group_array, int *pow_distance);
-/*
- * Error and debug Macros/Functions
- */
-static inline int mca_bcol_ptpcoll_err(const char* fmt, ...)
-{
-    va_list list;
-    int ret;
-
-    va_start(list, fmt);
-    ret = vfprintf(stderr, fmt, list);
-    va_end(list);
-    return ret;
-}
-
-#define PTPCOLL_ERROR(args)                                         \
-    do {                                                            \
-        mca_bcol_ptpcoll_err("[%s]%s[%s:%d:%s] PTPCOLL ",           \
-                ompi_process_info.nodename,                         \
-                OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),                 \
-                __FILE__, __LINE__, __func__);                      \
-        mca_bcol_ptpcoll_err args;                                  \
-        mca_bcol_ptpcoll_err("\n");                                 \
-    } while(0)
-
-#if OPAL_ENABLE_DEBUG
-#define PTPCOLL_VERBOSE(level, args)                                \
-    do {                                                            \
-        if (mca_bcol_ptpcoll_component.verbose >= level) {          \
-            mca_bcol_ptpcoll_err("[%s]%s[%s:%d:%s] PTPCOLL ",       \
-                    ompi_process_info.nodename,                     \
-                    OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),             \
-                    __FILE__, __LINE__, __func__);                  \
-            mca_bcol_ptpcoll_err args;                              \
-            mca_bcol_ptpcoll_err("\n");                             \
-        }                                                           \
-    } while(0)
-#else
-#define PTPCOLL_VERBOSE(level, args)
-#endif
-
-END_C_DECLS
-
-#endif
diff --git a/ompi/mca/bcol/ptpcoll/owner.txt b/ompi/mca/bcol/ptpcoll/owner.txt
deleted file mode 100644
index 1c86df367b..0000000000
--- a/ompi/mca/bcol/ptpcoll/owner.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# owner/status file
-# owner: institution that is responsible for this package
-# status: e.g. active, maintenance, unmaintained
-#
-owner: ORNL
-status: unmaintained
diff --git a/ompi/mca/coll/ml/Makefile.am b/ompi/mca/coll/ml/Makefile.am
deleted file mode 100644
index a4022ebf7d..0000000000
--- a/ompi/mca/coll/ml/Makefile.am
+++ /dev/null
@@ -1,89 +0,0 @@
-#
-# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
-# Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
-#                         reserved.
-# Copyright (c) 2015      Cisco Systems, Inc.  All rights reserved.
-# Copyright (c) 2016      Research Organization for Information Science
-#                         and Technology (RIST). All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-AM_LFLAGS = -Pcoll_ml_config_yy
-LEX_OUTPUT_ROOT = lex.coll_ml_config_yy
-
-dist_ompidata_DATA = \
-        mca-coll-ml.config \
-		help-mpi-coll-ml.txt
-
-sources = coll_ml.h \
-        coll_ml_inlines.h \
-        coll_ml_module.c \
-        coll_ml_allocation.h \
-        coll_ml_allocation.c \
-        coll_ml_barrier.c \
-        coll_ml_bcast.c \
-        coll_ml_colls.h \
-        coll_ml_component.c \
-        coll_ml_copy_fns.c \
-        coll_ml_descriptors.c  \
-        coll_ml_functions.h \
-        coll_ml_hier_algorithms.c \
-        coll_ml_hier_algorithms_setup.c \
-        coll_ml_hier_algorithms_bcast_setup.c \
-        coll_ml_hier_algorithms_allreduce_setup.c \
-        coll_ml_hier_algorithms_reduce_setup.c \
-        coll_ml_hier_algorithms_common_setup.c \
-        coll_ml_hier_algorithms_common_setup.h \
-        coll_ml_hier_algorithms_allgather_setup.c \
-        coll_ml_hier_algorithm_memsync_setup.c \
-        coll_ml_custom_utils.h \
-        coll_ml_custom_utils.c \
-        coll_ml_progress.c \
-        coll_ml_reduce.c \
-        coll_ml_allreduce.c \
-        coll_ml_allgather.c \
-        coll_ml_mca.h \
-        coll_ml_mca.c \
-        coll_ml_lmngr.h \
-        coll_ml_lmngr.c \
-        coll_ml_hier_algorithms_barrier_setup.c \
-        coll_ml_select.h \
-        coll_ml_select.c \
-        coll_ml_memsync.c \
-        coll_ml_lex.h \
-        coll_ml_lex.l \
-        coll_ml_config.c \
-        coll_ml_config.h
-
-# Make the output library in this directory, and name it either
-# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
-# (for static builds).
-
-component_noinst =
-component_install =
-if MCA_BUILD_ompi_coll_ml_DSO
-component_install += mca_coll_ml.la
-else
-component_noinst += libmca_coll_ml.la
-endif
-
-# See ompi/mca/btl/ml/Makefile.am for an explanation of
-# libmca_common_ml.la.
-
-mcacomponentdir = $(ompilibdir)
-mcacomponent_LTLIBRARIES = $(component_install)
-mca_coll_ml_la_SOURCES = $(sources)
-mca_coll_ml_la_LDFLAGS = -module -avoid-version
-mca_coll_ml_la_LIBADD =
-
-
-noinst_LTLIBRARIES = $(component_noinst)
-libmca_coll_ml_la_SOURCES =$(sources)
-libmca_coll_ml_la_LDFLAGS = -module -avoid-version
-
-maintainer-clean-local:
-	rm -f coll_ml_lex.c
diff --git a/ompi/mca/coll/ml/coll_ml.h b/ompi/mca/coll/ml/coll_ml.h
deleted file mode 100644
index ab03c4f3e4..0000000000
--- a/ompi/mca/coll/ml/coll_ml.h
+++ /dev/null
@@ -1,1022 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2014-2015 Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/** @file */
-
-#ifndef MCA_COLL_ML_ML_H
-#define MCA_COLL_ML_ML_H
-
-#include "ompi_config.h"
-
-#include "ompi/mca/mca.h"
-#include "opal/mca/base/base.h"
-#include "opal/datatype/opal_convertor.h"
-#include "opal/threads/mutex.h"
-
-#include "ompi/mca/coll/coll.h"
-#include "ompi/request/request.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/sbgp/sbgp.h"
-#include "ompi/op/op.h"
-#include "opal/class/opal_free_list.h"
-
-#include "coll_ml_lmngr.h"
-#include "coll_ml_functions.h"
-#include "coll_ml_colls.h"
-#include "coll_ml_allocation.h"
-#include "coll_ml_config.h"
-
-BEGIN_C_DECLS
-
-/* macros for return status */
-enum {
-    ML_OMPI_COMPLETE = 1,
-    ML_OMPI_INCOMPLETE
-};
-
-enum {
-    ML_SMALL_MSG,
-    ML_LARGE_MSG,
-    ML_NUM_MSG
-};
-
-/* ML collectives IDs */
-enum {
-    /* blocking functions */
-    ML_ALLGATHER,
-    ML_ALLGATHERV,
-    ML_ALLREDUCE,
-    ML_ALLTOALL,
-    ML_ALLTOALLV,
-    ML_ALLTOALLW,
-    ML_BARRIER,
-    ML_BCAST,
-    ML_EXSCAN,
-    ML_GATHER,
-    ML_GATHERV,
-    ML_REDUCE,
-    ML_REDUCE_SCATTER,
-    ML_SCAN,
-    ML_SCATTER,
-    ML_SCATTERV,
-    ML_FANIN,
-    ML_FANOUT,
-
-    /* nonblocking functions */
-    ML_IALLGATHER,
-    ML_IALLGATHERV,
-    ML_IALLREDUCE,
-    ML_IALLTOALL,
-    ML_IALLTOALLV,
-    ML_IALLTOALLW,
-    ML_IBARRIER,
-    ML_IBCAST,
-    ML_IEXSCAN,
-    ML_IGATHER,
-    ML_IGATHERV,
-    ML_IREDUCE,
-    ML_IREDUCE_SCATTER,
-    ML_ISCAN,
-    ML_ISCATTER,
-    ML_ISCATTERV,
-    ML_IFANIN,
-    ML_IFANOUT,
-    ML_NUM_OF_FUNCTIONS
-};
-
-/* ML broadcast algorithms */
-enum {
-    COLL_ML_STATIC_BCAST,
-    COLL_ML_SEQ_BCAST,
-    COLL_ML_UNKNOWN_BCAST,
-};
-
-struct mca_bcol_base_module_t;
-
-/* collective function arguments - gives
- * one function signature for calling all collective setup
- * routines, with the initial call to a collective function having
- * the context to access the right parts of the data structure.
- * this information is used by each of the setup functions to
- * setup the correct information for each of the functions in the
- * hierarchy that will be called. */
-
-/* RLG NOTE:  Need to figure out what arguments to store here,
- *   and which ones directly in the message descriptor
- */
-struct mpi_coll_fn_params_t {
-    union {
-        struct {
-            ompi_communicator_t *comm;
-            int n_fanin_steps;
-            int n_fanout_steps;
-            int n_recursive_doubling_steps;
-        } ibarrier_recursive_doubling;
-
-        struct {
-            int root;
-            ompi_communicator_t *comm;
-            struct ompi_datatype_t *datatype;
-        } ibcast;
-    } coll_fn;
-};
-typedef struct mpi_coll_fn_params_t mpi_coll_fn_params_t;
-
-/* algorithm parmeters needed for the setup function */
-struct mpi_coll_algorithm_params_t {
-    union {
-        struct {
-            int n_fanin_steps;
-            int n_fanout_steps;
-            int n_recursive_doubling_steps;
-        } ibarrier_recursive_doubling;
-
-        struct {
-            int place_holder;
-        } ibcast;
-    } coll_fn;
-};
-typedef struct mpi_coll_algorithm_params_t mpi_coll_algorithm_params_t;
-
-/* setup function - used to setup each segment (or fragment)
- * to be processed
- */
-struct mca_coll_ml_module_t;
-struct mca_coll_ml_topology_t;
-
-typedef int (*coll_fragment_comm_setup_fn)(struct mca_coll_ml_module_t *ml_module,
-    mpi_coll_fn_params_t *fn_params, mpi_coll_algorithm_params_t *algorithm_params);
-/* full collective description */
-struct coll_ml_collective_description_t {
-    /* number of temp buffers */
-    int n_buffers;
-
-    /* description size */
-    int n_functions;
-
-    /* collective setup function - called for every non-blocking
-     * function, and for each fragment of such a message
-     */
-    coll_fragment_comm_setup_fn *coll_fn_setup_fn;
-
-    /* algorithm parameters */
-    mpi_coll_algorithm_params_t alg_params;
-
-    /* list of functions */
-    mca_bcol_base_function_t *functions;
-
-    /* function names - for debugging */
-    char **function_names;
-
-    /* Signalling collective completion */
-    bool completion_flag;
-};
-
-typedef struct coll_ml_collective_description_t coll_ml_collective_description_t;
-
-/* Utility data structure */
-struct rank_properties_t {
-    int rank;
-    int leaf;
-    int num_of_ranks_represented;
-}; typedef struct rank_properties_t rank_properties_t;
-
-/* data structure for holding node information for the nodes of the
- * hierarchical communications tree.
- */
-struct sub_group_params_t {
-    /* rank of root in the communicator */
-    int root_rank_in_comm;
-
-    /* index in subgroup */
-    int root_index;
-
-    /* number of ranks in subgroup */
-    int n_ranks;
-
-    /* index of the first element in the subgroup.  The
-     * assumption is that
-     * ranks for all subgroups are stored in a single
-     * linear array
-     */
-    int index_of_first_element;
-
-    /*
-     * level in the hierarchy - subgroups at the same
-     * level don't overlap. May not be the same as the
-     * sbgp level.
-     */
-    int level_in_hierarchy;
-
-    /*
-     * Information on the ranks in the subgroup.  This includes
-     * the rank, and wether or not the rank is a source/sink of
-     * of data in this subgroup, or just a "pass through".
-     */
-    rank_properties_t *rank_data;
-
-    /* level one index - for example,
-       for( i = 0; i < level_one_index; i++) will loop
-       through all level one subgroups, this is significant
-       since level one is a disjoint partitioning of all ranks
-       i.e. all ranks appear once and only once at level one
-     */
-    int level_one_index;
-};
-typedef struct sub_group_params_t sub_group_params_t;
-
-/* function to setup information on the order of a given bcol within
- * a specific ML-level algorithm.
- */
-int mca_coll_ml_setup_scratch_vals(mca_coll_ml_compound_functions_t *func_list,
-                int *scratch_indx, int *scratch_num, int n_hiers);
-
-/* driver for setting up collective communication description */
-
-int ml_coll_schedule_setup(struct mca_coll_ml_module_t *ml_module);
-
-int ml_coll_up_and_down_hier_setup(
-        struct mca_coll_ml_module_t *ml_module,
-        struct mca_coll_ml_topology_t *topo_info,
-        int up_function_idx,
-        int top_function_idx,
-        int down_function_idx,
-        int collective);
-
-int ml_coll_barrier_constant_group_data_setup(
-        struct mca_coll_ml_topology_t *topo_info,
-        mca_coll_ml_collective_operation_description_t  *schedule);
-
-/* Barrier */
-int ml_coll_hier_barrier_setup(struct mca_coll_ml_module_t *ml_module);
-
-/* allreduce */
-int ml_coll_hier_allreduce_setup(struct mca_coll_ml_module_t *ml_module);
-int ml_coll_hier_allreduce_setup_new(struct mca_coll_ml_module_t *ml_module);
-void ml_coll_hier_allreduce_cleanup_new(struct mca_coll_ml_module_t *ml_module);
-
-/* alltoall */
-int ml_coll_hier_alltoall_setup(struct mca_coll_ml_module_t *ml_module);
-int ml_coll_hier_alltoall_setup_new(struct mca_coll_ml_module_t *ml_module);
-
-/* allgather */
-int ml_coll_hier_allgather_setup(struct mca_coll_ml_module_t *ml_module);
-void ml_coll_hier_allgather_cleanup(struct mca_coll_ml_module_t *ml_module);
-
-/* gather */
-int ml_coll_hier_gather_setup(struct mca_coll_ml_module_t *ml_module);
-
-/* broadcast */
-int ml_coll_hier_bcast_setup(struct mca_coll_ml_module_t *ml_module);
-void ml_coll_hier_bcast_cleanup(struct mca_coll_ml_module_t *ml_module);
-
-/* reduce */
-int ml_coll_hier_reduce_setup(struct mca_coll_ml_module_t *ml_module);
-void ml_coll_hier_reduce_cleanup(struct mca_coll_ml_module_t *ml_module);
-
-/* reduce */
-int ml_coll_hier_scatter_setup(struct mca_coll_ml_module_t *ml_module);
-
-/* alltoall */
-int mca_coll_ml_alltoall(void *sbuf, int scount,
-        struct ompi_datatype_t *sdtype,
-        void* rbuf, int rcount,
-        struct ompi_datatype_t *rdtype,
-        struct ompi_communicator_t *comm,
-        mca_coll_base_module_t *module);
-
-int mca_coll_ml_alltoall_nb(void *sbuf, int scount,
-        struct ompi_datatype_t *sdtype,
-        void* rbuf, int rcount,
-        struct ompi_datatype_t *rdtype,
-        struct ompi_communicator_t *comm,
-        ompi_request_t **req,
-        mca_coll_base_module_t *module);
-
-
-/* allgather */
-int mca_coll_ml_allgather(const void *sbuf, int scount,
-        struct ompi_datatype_t *sdtype,
-        void* rbuf, int rcount,
-        struct ompi_datatype_t *rdtype,
-        struct ompi_communicator_t *comm,
-        mca_coll_base_module_t *module);
-
-/* non-blocking allgather */
-int mca_coll_ml_allgather_nb(const void *sbuf, int scount,
-        struct ompi_datatype_t *sdtype,
-        void* rbuf, int rcount,
-        struct ompi_datatype_t *rdtype,
-        struct ompi_communicator_t *comm,
-        ompi_request_t **req,
-        mca_coll_base_module_t *module);
-
-/* gather */
-int mca_coll_ml_gather(void *sbuf, int scount,
-        struct ompi_datatype_t *sdtype,
-        void* rbuf, int rcount,
-        struct ompi_datatype_t *rdtype,
-        int root,
-        struct ompi_communicator_t *comm,
-        mca_coll_base_module_t *module);
-
-/* nonblocking Barrier */
-int ml_coll_hier_nonblocking_barrier_setup(struct mca_coll_ml_module_t *ml_module, struct mca_coll_ml_topology_t *topo_info);
-
-/* Memory syncronization collective setup */
-int ml_coll_memsync_setup(struct mca_coll_ml_module_t *ml_module);
-
-/* Fragment descriptor */
-struct mca_coll_ml_descriptor_t;
-struct mca_coll_ml_fragment_t {
-    opal_list_item_t super;
-
-    struct mca_coll_ml_descriptor_t *full_msg_descriptor;
-    int offset; /*offset for progress pointer*/
-    int length; /*fragment length I assume*/
-    opal_convertor_t convertor; /*convertor for copy/pack data*/
-
-    /* current function index */
-    int current_fn_index;
-
-    /* array of function arguments */
-    struct bcol_function_args_t *fn_args;
-
-};
-typedef struct mca_coll_ml_fragment_t mca_coll_ml_fragment_t;
-OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_ml_fragment_t);
-
-#define MCA_COLL_ML_NO_BUFFER -1
-
-#define MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(coll_op, index, desc) \
-do {                                                                \
-    (coll_op)->variable_fn_params.buffer_index = index;             \
-    (coll_op)->fragment_data.buffer_desc = desc;                    \
-    /* pasha - why we duplicate it ? */                             \
-    (coll_op)->variable_fn_params.src_desc = desc;                  \
-    (coll_op)->variable_fn_params.hier_factor = 1;                  \
-    (coll_op)->variable_fn_params.need_dt_support = false;          \
-} while (0)
-
-/*Full message descriptor*/
-struct mca_coll_ml_descriptor_t {
-    ompi_request_t super; /*base request*/
-    struct ompi_datatype_t *datatype; /*ompi datatype*/
-    size_t count; /*count of user datatype elements*/
-    uint32_t sequence_num; /*sequence number for collective operation*/
-    size_t frags_limit; /*upper limit on # of fragments*/
-    size_t frags_start; /*number of fragments started*/
-
-    /*number of fragments completed*/
-    size_t frags_complete;
-
-    /* number of fragments needed to process this message */
-    size_t n_fragments;
-
-    volatile bool free_resource; /*signals release resource*/
-
-    /*pointer to reduction operation, e.g. MPI_MIN - need to handle
-     * user defined functions also */
-    /* ompi_predefined_op_t *operation;  */
-
-    /*pointer to a communication schedule, data struct undefined*/
-    struct coll_ml_collective_description_t *local_comm_description;
-
-    /* fragment descriptor - we always have a fragment descriptor
-     *   if we get a full message descriptor.  Optimization for
-     *   small messages */
-    mca_coll_ml_fragment_t fragment;
-    /* The ML memory buffer index that should consist the send and
-       recv information
-       if the index is -1, it means no buffer was allocated */
-    uint64_t buffer_index;
-};
-typedef struct mca_coll_ml_descriptor_t mca_coll_ml_descriptor_t;
-OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_ml_descriptor_t);
-
-/* sbgp and bcol module pairs */
-struct hierarchy_pairs {
-    mca_sbgp_base_module_t *subgroup_module;
-    struct mca_bcol_base_module_t **bcol_modules;
-    int num_bcol_modules;
-    int bcol_index;
-    mca_bcol_base_component_t *bcol_component;
-};
-typedef struct hierarchy_pairs hierarchy_pairs;
-
-/* list of ranks in each group */
-struct ml_level_t {
-    int n_modules;
-    hierarchy_pairs *modules;
-};
-
-typedef struct ml_level_t ml_level_t;
-
-enum {
-    COLL_ML_HR_FULL,          /* Full hierarchy topology, all bcols and sbgps attends in discovery */
-    COLL_ML_HR_ALLREDUCE,
-    COLL_ML_HR_NBS,           /* All hierarchy except base socket */
-    COLL_ML_HR_SINGLE_PTP,    /* Single flat ptp hierarchy */
-    COLL_ML_HR_SINGLE_IBOFFLOAD,    /* Single flat iboffload hierarchy */
-    COLL_ML_TOPO_MAX
-};
-
-/* Topology-hierarchy discovery function */
-struct mca_coll_ml_module_t; /* forward declaration for the function */
-
-typedef int (* mca_coll_topo_discovery_fn_t)
-    (struct mca_coll_ml_module_t *ml_module, int n_hierarchies);
-
-typedef enum {
-    COLL_ML_TOPO_DISABLED = 0,
-    COLL_ML_TOPO_ENABLED = 1
-} topo_status_t;
-
-/**
- * Structure to hold the sm coll component.  First it holds the
- * base coll component, and then holds a bunch of
- * sm-coll-component-specific stuff (e.g., current MCA param
- * values).
- */
-struct mca_coll_ml_component_t {
-    /** Base coll component */
-    mca_coll_base_component_2_0_0_t super;
-
-    /** MCA parameter: Priority of this component */
-    int ml_priority;
-
-    /** MCA parameter: subgrouping components to use */
-    char *subgroups_string;
-
-    /** MCA parameter: basic collectives components to use */
-    char *bcols_string;
-
-    /** verbosity level */
-    int verbose;
-
-    /** max of communicators available to run ML */
-    unsigned int max_comm;
-
-    /** min size of comm to be available to run ML */
-    int min_comm_size;
-
-    /* base sequence number to use - the expectation is that
-     * this will be used as a basis for generating IDs for
-     * specific collective operations
-     */
-    int64_t base_sequence_number;
-
-    /** memory pool */
-    mca_coll_ml_lmngr_t memory_manager;
-
-    /* We need it because some bcols cannot
-       support all possible allreduce data types */
-    bool need_allreduce_support;
-
-    int use_knomial_allreduce;
-
-    /* use hdl_framework */
-    bool use_hdl_bcast;
-
-    /* Enable / Disable fragmentation (0 - off, 1 - on, 2 - auto) */
-    int enable_fragmentation;
-
-    /* Broadcast algorithm */
-    int bcast_algorithm;
-
-    /* frag size that is used by list memory_manager */
-    size_t lmngr_block_size;
-
-    /* alignment that is used by list memory_manager */
-    size_t lmngr_alignment;
-
-    /* list size for memory_manager */
-    size_t lmngr_size;
-
-    /* number of payload memory banks */
-    int n_payload_mem_banks;
-
-    /* number of payload buffers per bank */
-    int n_payload_buffs_per_bank;
-
-    /* size of payload buffer */
-    unsigned long long payload_buffer_size;
-
-    /* pipeline depth for msg fragmentation */
-    int pipeline_depth;
-
-    /* Free list tunings */
-    int free_list_init_size;
-
-    int free_list_grow_size;
-
-    int free_list_max_size;
-
-    /*
-     * queues for asynchronous collective progress
-     */
-    /* tasks that have not started, either because dependencies are not
-     * statisfied, or resources are lacking
-     */
-    opal_list_t pending_tasks;
-    opal_mutex_t pending_tasks_mutex;
-
-    /* active incomplete tasks */
-    opal_list_t active_tasks;
-    opal_mutex_t active_tasks_mutex;
-
-    /* sequential collectives to progress */
-    opal_list_t sequential_collectives;
-    opal_mutex_t sequential_collectives_mutex;
-
-    bool progress_is_busy;
-
-    /* Temporary hack for IMB test - not all bcols have allgather */
-    bool disable_allgather;
-
-    /* Temporary hack for IMB test - not all bcols have alltoall */
-    bool disable_alltoall;
-
-    /* Disable Reduce */
-    bool disable_reduce;
-
-    /* Brucks alltoall mca and other params */
-    int use_brucks_smsg_alltoall;
-
-    mca_coll_topo_discovery_fn_t topo_discovery_fn[COLL_ML_TOPO_MAX];
-
-    /* Configure file for collectives */
-    char *config_file_name;
-
-    per_collective_configuration_t coll_config[ML_NUM_OF_FUNCTIONS][ML_NUM_MSG];
-};
-
-/**
- * Convenience typedef
- */
-typedef struct mca_coll_ml_component_t mca_coll_ml_component_t;
-
-/**
- * Global component instance
- */
-OMPI_MODULE_DECLSPEC extern mca_coll_ml_component_t mca_coll_ml_component;
-
-struct mca_coll_ml_leader_offset_info_t {
-    size_t offset;
-    int level_one_index;
-    bool leader;
-};
-typedef struct mca_coll_ml_leader_offset_info_t mca_coll_ml_leader_offset_info_t;
-
-/* Topolody data structure */
-struct mca_coll_ml_topology_t {
-    topo_status_t status; /* 0 - enabled , 1 - disabled */
-    /* information on the selected groups - needed for collective
-     ** algorithms */
-    int32_t global_lowest_hier_group_index;
-    int32_t global_highest_hier_group_index;
-    int number_of_all_subgroups;
-    int n_levels;
-    /* bcols bits that describe supported features/modes */
-    uint64_t all_bcols_mode;
-    mca_bcol_base_route_info_t *route_vector;
-    coll_ml_collective_description_t *hierarchical_algorithms[BCOL_NUM_OF_FUNCTIONS];
-    sub_group_params_t *array_of_all_subgroups;
-    /* (sbgp, bcol) pairs */
-    hierarchy_pairs *component_pairs;
-    /* ordering of ranks when I am the root of the operation.
-     * This ordering guarantees that data need to be re-ordered
-     * only at the first or last step in rooted operations,
-     * depending on whether the opearation is a scatter or
-     * gather operation.
-     */
-    int *sort_list;
-    mca_coll_ml_leader_offset_info_t *hier_layout_info;
-    /* are ranks laid out contiguously */
-    bool ranks_contiguous;
-    struct ordering_info_t {
-        int next_inorder;
-        int next_order_num;
-        int num_bcols_need_ordering;
-    } topo_ordering_info;
-};
-typedef struct mca_coll_ml_topology_t mca_coll_ml_topology_t;
-
-struct mca_coll_ml_bcol_list_item_t {
-    opal_list_item_t super;
-    mca_bcol_base_module_t *bcol_module;
-};
-typedef struct mca_coll_ml_bcol_list_item_t mca_coll_ml_bcol_list_item_t;
-OPAL_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_ml_bcol_list_item_t);
-
-#define MCA_COLL_MAX_NUM_COLLECTIVES 40 /* ... I do not remember how much exactly collectives do we have */
-#define MCA_COLL_MAX_NUM_SUBTYPES 15    /* Maximum number of algorithms per collective */
-
-struct mca_coll_ml_module_t {
-    /* base structure */
-    mca_coll_base_module_t super;
-
-    /* ML module status - 0 was not initialized, 1 - was initialized */
-    bool initialized;
-    /* communicator */
-    struct ompi_communicator_t *comm;
-
-    /* reference convertor */
-    opal_convertor_t *reference_convertor;
-
-    mca_coll_ml_topology_t topo_list[COLL_ML_TOPO_MAX];
-
-    /* Collectives - Topology map */
-    int collectives_topology_map
-        [MCA_COLL_MAX_NUM_COLLECTIVES][MCA_COLL_MAX_NUM_SUBTYPES];
-
-    /* largest number of function calls for the collective routines.
-     * This is used to allocate resources */
-    int max_fn_calls;
-
-    /* collective sequence number - unique id for barrier type operations */
-    int32_t no_data_collective_sequence_num;
-
-    /* collective sequence number - unique id for each collective */
-    int32_t collective_sequence_num;
-
-    /** ompi free list of full message descriptors **/
-    opal_free_list_t message_descriptors;
-
-    /** ompi free list of message fragment descriptors **/
-    opal_free_list_t fragment_descriptors;
-
-    /** pointer to the payload memory block **/
-    struct mca_bcol_base_memory_block_desc_t *payload_block;
-
-    /** the maximum size of collective function description */
-    int max_dag_size;
-
-    /** data used to initialize coll_ml_collective_descriptors */
-    struct coll_desc_init {
-        int max_dag_size;
-        size_t max_n_bytes_per_proc_total;
-        mca_coll_base_module_t *bcol_base_module;
-    } coll_desc_init_data;
-
-    /** collective operation descriptor free list - used to manage a single
-     *  collective operation. */
-    opal_free_list_t coll_ml_collective_descriptors;
-
-    /** multiple function collective operation support */
-    /** broadcast */
-    mca_coll_ml_collective_operation_description_t *
-        coll_ml_bcast_functions[ML_NUM_BCAST_FUNCTIONS];
-
-    /* bcast size selection criteria - cutoff for the largest size of
-     * data for which to apply the specified collective operation.
-     * This gives us the ability to choose algorithm based on size */
-    size_t bcast_cutoff_size[ML_N_DATASIZE_BINS];
-
-     /** Allreduce functions */
-    mca_coll_ml_collective_operation_description_t *
-        coll_ml_allreduce_functions[ML_NUM_ALLREDUCE_FUNCTIONS];
-
-    /** Reduce functions */
-    mca_coll_ml_collective_operation_description_t *
-        coll_ml_reduce_functions[ML_NUM_REDUCE_FUNCTIONS];
-
-
-    /** scatter */
-    mca_coll_ml_collective_operation_description_t *
-        coll_ml_scatter_functions[ML_NUM_SCATTER_FUNCTIONS];
-
-    /** alltoall */
-    mca_coll_ml_collective_operation_description_t *
-        coll_ml_alltoall_functions[ML_NUM_ALLTOALL_FUNCTIONS];
-
-   /** allgather */
-    mca_coll_ml_collective_operation_description_t *
-        coll_ml_allgather_functions[ML_NUM_ALLGATHER_FUNCTIONS];
-
-   /** gather */
-    mca_coll_ml_collective_operation_description_t *
-        coll_ml_gather_functions[ML_NUM_GATHER_FUNCTIONS];
-
-    /** Barrier */
-    mca_coll_ml_collective_operation_description_t *
-                                coll_ml_barrier_function;
-
-    /** ML Memory Syncronization collective operation */
-    mca_coll_ml_collective_operation_description_t *
-        coll_ml_memsync_function;
-
-    /** The table of allreduce functions for specific type and op **/
-    bool allreduce_matrix[OMPI_OP_NUM_OF_TYPES][OMPI_DATATYPE_MAX_PREDEFINED][BCOL_NUM_OF_ELEM_TYPES];
-
-    /* data offset from ML */
-    int32_t  data_offset;
-
-    int small_message_thresholds[BCOL_NUM_OF_FUNCTIONS];
-
-    /* fragmenation parameters */
-    int use_user_buffers;
-    uint64_t fragment_size;
-    uint32_t ml_fragment_size;
-
-    /* Bcast index table. Pasha: Do we need to define something more generic ?
-     the table  x 2 (large/small)*/
-    int bcast_fn_index_table[2];
-
-    /* List of pointer to bcols that have been initilized and used.
-     * So far we use it only for ML memory management */
-    opal_list_t active_bcols_list;
-
-    /* Buffer size required for Bruck's algorithm */
-    int brucks_buffer_threshold_const;
-
-    /* log comm size */
-    /* We require this for alltoall algorithm */
-    int log_comm_size;
-    /* On this list we keep coll_op descriptors that were not
-     * be able to start, since no ml buffers were available */
-    opal_list_t waiting_for_memory_list;
-
-    /* fallback collectives */
-    mca_coll_base_comm_coll_t fallback;
-};
-
-typedef struct mca_coll_ml_module_t mca_coll_ml_module_t;
-OBJ_CLASS_DECLARATION(mca_coll_ml_module_t);
-
-
-/* query to see if the component is available for use, and can
- * satisfy the thread and progress requirements
- */
-int mca_coll_ml_init_query(bool enable_progress_threads,
-        bool enable_mpi_threads);
-
-/* query to see if the module is available for use on the given
- * communicator, and if so, what it's priority is.  This is where
- * the backing shared-memory file is created.
- */
-mca_coll_base_module_t *
-mca_coll_ml_comm_query(struct ompi_communicator_t *comm, int *priority);
-
-/* Barrier - blocking */
-int mca_coll_ml_barrier_intra(struct ompi_communicator_t *comm,
-                              mca_coll_base_module_t *module);
-
-/* Barrier - non-blocking */
-int mca_coll_ml_ibarrier_intra(struct ompi_communicator_t *comm,
-                               ompi_request_t **req,
-                               mca_coll_base_module_t *module);
-
-/* Allreduce with EXTRA TOPO using - blocking */
-int mca_coll_ml_allreduce_dispatch(const void *sbuf, void *rbuf, int count,
-                                struct ompi_datatype_t *dtype, struct ompi_op_t *op,
-                                struct ompi_communicator_t *comm, mca_coll_base_module_t *module);
-
-/* Allreduce with EXTRA TOPO using - Non-blocking */
-int mca_coll_ml_allreduce_dispatch_nb(const void *sbuf, void *rbuf, int count,
-                                   ompi_datatype_t *dtype, ompi_op_t *op,
-                                   ompi_communicator_t *comm,
-                                   ompi_request_t **req,
-                                   mca_coll_base_module_t *module);
-
-/* Allreduce - blocking */
-int mca_coll_ml_allreduce(const void *sbuf, void *rbuf, int count,
-                                struct ompi_datatype_t *dtype, struct ompi_op_t *op,
-                                struct ompi_communicator_t *comm,
-                                mca_coll_base_module_t *module);
-
-/* Allreduce - Non-blocking */
-int mca_coll_ml_allreduce_nb(const void *sbuf, void *rbuf, int count,
-                                struct ompi_datatype_t *dtype, struct ompi_op_t *op,
-                                struct ompi_communicator_t *comm,
-                                ompi_request_t **req,
-                                mca_coll_base_module_t *module);
-
-/* Reduce - Blocking */
-int mca_coll_ml_reduce(const void *sbuf, void *rbuf, int count,
-        struct ompi_datatype_t *dtype, struct ompi_op_t *op,
-        int root, struct ompi_communicator_t *comm,
-        mca_coll_base_module_t *module);
-
-int mca_coll_ml_reduce_nb(const void *sbuf, void *rbuf, int count,
-        struct ompi_datatype_t *dtype, struct ompi_op_t *op,
-        int root, struct ompi_communicator_t *comm,
-        ompi_request_t **req,
-        mca_coll_base_module_t *module);
-
-int mca_coll_ml_memsync_intra(mca_coll_ml_module_t *module, int bank_index);
-
-
-int coll_ml_progress_individual_message(mca_coll_ml_fragment_t *frag_descriptor);
-
-/*
- * the ml entry point for the broadcast function
- */
-int mca_coll_ml_parallel_bcast(void *buf, int count, struct ompi_datatype_t *dtype,
-        int root, struct ompi_communicator_t *comm,
-        mca_coll_base_module_t *module);
-int mca_coll_ml_parallel_bcast_nb(void *buf, int count, struct ompi_datatype_t *dtype,
-        int root, struct ompi_communicator_t *comm,
-        ompi_request_t **req,
-        mca_coll_base_module_t *module);
-int mca_coll_ml_bcast_sequential_root(void *buf, int count, struct ompi_datatype_t *dtype,
-        int root, struct ompi_communicator_t *comm,
-        mca_coll_base_module_t *module);
-
-/*
- * The ml function interface for non-blocking routines
- */
-int mca_coll_ml_bcast_unknown_root_nb(void *buf, int count, struct ompi_datatype_t *dtype,
-                                         int root, struct ompi_communicator_t *comm,
-                                         ompi_request_t **req,
-                                         mca_coll_base_module_t *module);
-
-int mca_coll_ml_bcast_known_root_nb(void *buf, int count, struct ompi_datatype_t *dtype,
-                                         int root, struct ompi_communicator_t *comm,
-                                         ompi_request_t **req,
-                                         mca_coll_base_module_t *module);
-OMPI_DECLSPEC int mca_coll_ml_bcast_unknown_root_with_frags_nb(void *buf, int count,
-        struct ompi_datatype_t *dtype,
-        int root, struct ompi_communicator_t *comm,
-        ompi_request_t **req, mca_coll_base_module_t *module);
-
-/* This routine sets up a sequential hierarchical scatter algorithm.  The
- * assumptions are that each rank knows in which sub-group that data will show
- * up first, and that the scatter is executed sequentially, one subgroup at a
- * time.  This is needed, when the full collective needs to be specified before
- * the collective operation starts up.  The algorithm handles all data sizes
- * and data types.
- */
-
-OMPI_DECLSPEC int mca_coll_ml_scatter_sequential(
-        void *sbuf, int scount, struct ompi_datatype_t *sdtype,
-        void *rbuf, int rcount, struct ompi_datatype_t *rdtype,
-        int root, struct ompi_communicator_t *comm,
-        mca_coll_base_module_t *module);
-
-#if 0
-int mca_coll_ml_bcast_small_dynamic_root(void *buf, int count, struct ompi_datatype_t *dtype,
-        int root, struct ompi_communicator_t *comm,
-        mca_coll_base_module_t *module);
-int mca_coll_ml_bcast_small_known_root(void *buf, int count, struct ompi_datatype_t *dtype,
-        int root, struct ompi_communicator_t *comm,
-        mca_coll_base_module_t *module);
-#endif
-
-/* Topology discovery function */
-
-int mca_coll_ml_fulltree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
-        int n_hierarchies);
-int mca_coll_ml_allreduce_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
-        int n_hierarchies);
-int mca_coll_ml_fulltree_exclude_basesmsocket_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
-        int n_hierarchies);
-int mca_coll_ml_fulltree_ptp_only_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
-        int n_hierarchies);
-int mca_coll_ml_fulltree_iboffload_only_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
-        int n_hierarchies);
-
-void mca_coll_ml_allreduce_matrix_init(mca_coll_ml_module_t *ml_module,
-                     const mca_bcol_base_component_2_0_0_t *bcol_component);
-static inline int mca_coll_ml_err(const char* fmt, ...)
-{
-    va_list list;
-    int ret;
-
-    va_start(list, fmt);
-    ret = vfprintf(stderr, fmt, list);
-    va_end(list);
-    return ret;
-}
-
-
-#define ML_ERROR(args)                                       \
-do {                                                     \
-    mca_coll_ml_err("[%s]%s[%s:%d:%s] COLL-ML ",         \
-        ompi_process_info.nodename,                      \
-        OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),              \
-        __FILE__, __LINE__, __func__);                   \
-    mca_coll_ml_err args;                                \
-    mca_coll_ml_err("\n");                               \
-} while(0)
-
-#if OPAL_ENABLE_DEBUG
-#define ML_VERBOSE(level, args)                              \
-do {                                                     \
-    if(mca_coll_ml_component.verbose >= level) {         \
-        mca_coll_ml_err("[%s]%s[%s:%d:%s] COLL-ML ",     \
-                ompi_process_info.nodename,              \
-                OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),      \
-                __FILE__, __LINE__, __func__);           \
-        mca_coll_ml_err args;                            \
-        mca_coll_ml_err("\n");                           \
-    }                                                    \
-} while(0)
-#else
-#define ML_VERBOSE(level, args)
-#endif
-
-#define IS_BCOL_TYPE_IDENTICAL(bcol1, bcol2)                                                         \
-    (      (NULL != bcol1 && NULL != bcol2) &&                                                       \
-     (     /* chech if the len is the same */                                                        \
-     (strlen(((mca_base_component_t *)((bcol1)->bcol_component))->mca_component_name) ==             \
-      strlen(((mca_base_component_t *)((bcol2)->bcol_component))->mca_component_name))               \
-      &&  /* check if the string are identical */                                                    \
-     (0 == strncmp(((mca_base_component_t *)((bcol1)->bcol_component))->mca_component_name,          \
-                   ((mca_base_component_t *)((bcol2)->bcol_component))->mca_component_name,          \
-                   strlen(((mca_base_component_t *)((bcol2)->bcol_component))->mca_component_name))) \
-     ) ? true : false)
-
-#define GET_BCOL(module, indx) ((module)->component_pairs[(indx)].bcol_modules[0])
-
-#define GET_BCOL_SYNC_FN(bcol) ((bcol)->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING] \
-                                                          [BCOL_SYNC][1][0][0])
-
-/* Allocator macros */
-#define BUFFER_INDEX(bank,nbuffs,buffer) (bank*nbuffs+buffer)
-
-#define ML_GET_FRAG_SIZE(op, coll)                                 \
-    ((op)->fragment_data.message_descriptor->n_bytes_total -       \
-     (op)->fragment_data.message_descriptor->n_bytes_scheduled <   \
-     (size_t) OP_ML_MODULE((op))->small_message_thresholds[coll] ? \
-     (op)->fragment_data.message_descriptor->n_bytes_total -       \
-     (op)->fragment_data.message_descriptor->n_bytes_scheduled :   \
-     (size_t) OP_ML_MODULE((op))->small_message_thresholds[coll])
-
-/* Abort mpi process in case of fatal error */
-void mca_coll_ml_abort_ml(char *message);
-
-#define ML_SET_VARIABLE_PARAMS_BCAST(op, ml, cnt, datatype, b_desc,       \
-                        s_offset, r_offset, frag_len, buf)                \
-do {                                                                      \
-            op->variable_fn_params.sequence_num =                         \
-            OPAL_THREAD_ADD32(&((ml)->collective_sequence_num), 1);       \
-            op->variable_fn_params.count = cnt;                           \
-            op->variable_fn_params.dtype = datatype;                      \
-            op->variable_fn_params.buffer_index = (b_desc)->buffer_index; \
-            op->variable_fn_params.src_desc = (b_desc);                   \
-            op->variable_fn_params.sbuf_offset = s_offset;                \
-            op->variable_fn_params.rbuf_offset = r_offset;                \
-            op->variable_fn_params.frag_size = frag_len;                  \
-            op->variable_fn_params.sbuf = buf;                            \
-} while (0)
-
-#define MCA_COLL_ML_OP_BASIC_SETUP(op, total_bytes, offset_into_user_buff, src, dst, collective_schedule)      \
-    do {                                                                                    \
-        op->coll_schedule                         = collective_schedule;                    \
-        op->process_fn                            = NULL;                                   \
-        op->full_message.n_bytes_total            = total_bytes;                            \
-        op->full_message.n_bytes_delivered        = 0;                                      \
-        op->full_message.n_bytes_scheduled        = 0;                                      \
-        op->full_message.dest_user_addr           = dst;                                    \
-        op->full_message.src_user_addr            = src;                                    \
-        op->full_message.n_active                 = 0;                                      \
-        op->full_message.n_bytes_per_proc_total   = 0;                                      \
-        op->full_message.send_count               = 0;                                      \
-        op->full_message.recv_count               = 0;                                      \
-        op->full_message.send_extent              = 0;                                      \
-        op->full_message.recv_extent              = 0;                                      \
-        op->full_message.offset_into_send_buffer  = 0;                                      \
-        op->full_message.offset_into_recv_buffer  = 0;                                      \
-        op->full_message.send_data_type           = 0;                                      \
-        op->full_message.recv_data_type           = 0;                                      \
-        op->full_message.fragment_launcher            = 0;                                  \
-        op->sequential_routine.current_active_bcol_fn = 0;                                  \
-        op->sequential_routine.current_bcol_status    = SEQ_TASK_NOT_STARTED;               \
-                                                                                            \
-        op->fragment_data.offset_into_user_buffer = offset_into_user_buff;                  \
-        /* Pasha, is it constant ? what to put here */                                      \
-        op->fragment_data.fragment_size           = total_bytes;                            \
-        op->fragment_data.message_descriptor      = &op->full_message;                      \
-        op->fragment_data.current_coll_op         = -1;                                     \
-    } while (0)
-
-/* This routine re-orders and packs user data.  The assumption is that
- * there is per-process data, the amount of data is the same for all * ranks,
- * and the user data is contigous.
- */
-int mca_coll_ml_pack_reorder_contiguous_data(
-        mca_coll_ml_collective_operation_progress_t *coll_op);
-
-/* This routine re-orders and packs user data.  The assumption is that
- * there is per-process data, the amount of data is the same for all * ranks,
- * and the user data is noncontigous.
- */
-int mca_coll_ml_pack_reorder_noncontiguous_data(
-        mca_coll_ml_collective_operation_progress_t *coll_op);
-
-END_C_DECLS
-
-
-#endif /* MCA_COLL_ML_ML_H */
diff --git a/ompi/mca/coll/ml/coll_ml_allgather.c b/ompi/mca/coll/ml/coll_ml_allgather.c
deleted file mode 100644
index a1c71322cd..0000000000
--- a/ompi/mca/coll/ml/coll_ml_allgather.c
+++ /dev/null
@@ -1,633 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2015      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/** @file */
-
-#include "ompi_config.h"
-
-#include <stdlib.h>
-
-#include "ompi/constants.h"
-#include "opal/threads/mutex.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/coll/coll.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "opal/sys/atomic.h"
-#include "coll_ml.h"
-#include "coll_ml_select.h"
-#include "coll_ml_allocation.h"
-
-static int mca_coll_ml_allgather_small_unpack_data(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    bool rcontig = coll_op->full_message.recv_data_continguous;
-    int n_ranks_in_comm = ompi_comm_size(OP_ML_MODULE(coll_op)->comm);
-
-    void *dest = (void *)((uintptr_t)coll_op->full_message.dest_user_addr +
-            (uintptr_t)coll_op->full_message.n_bytes_delivered);
-    void *src = (void *)((uintptr_t)coll_op->fragment_data.buffer_desc->data_addr +
-            (size_t)coll_op->variable_fn_params.rbuf_offset);
-
-    if (rcontig) {
-        memcpy(dest, src, n_ranks_in_comm * coll_op->full_message.n_bytes_scheduled);
-    } else {
-        mca_coll_ml_convertor_unpack(src, n_ranks_in_comm * coll_op->full_message.n_bytes_scheduled,
-                                          &coll_op->fragment_data.message_descriptor->recv_convertor);
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static inline void copy_data (mca_coll_ml_collective_operation_progress_t *coll_op, rank_properties_t *rank_props, int soffset) {
-    bool rcontig = coll_op->fragment_data.message_descriptor->recv_data_continguous;
-    size_t total_bytes = coll_op->fragment_data.message_descriptor->n_bytes_total;
-    size_t pack_len = coll_op->fragment_data.fragment_size;
-    int doffset = rank_props->rank;
-    void *dest, *src;
-
-    src = (void *) ((uintptr_t)coll_op->fragment_data.buffer_desc->data_addr +
-                    (size_t)coll_op->variable_fn_params.rbuf_offset + soffset * pack_len);
-
-    if (rcontig) {
-        dest = (void *) ((uintptr_t) coll_op->full_message.dest_user_addr +
-                         (uintptr_t) coll_op->fragment_data.offset_into_user_buffer +
-                         doffset * total_bytes);
-
-        memcpy(dest, src, pack_len);
-    } else {
-        size_t position;
-        opal_convertor_t *recv_convertor =
-            &coll_op->fragment_data.message_descriptor->recv_convertor;
-
-        position = (size_t) coll_op->fragment_data.offset_into_user_buffer +
-            doffset * total_bytes;
-
-        opal_convertor_set_position(recv_convertor, &position);
-        mca_coll_ml_convertor_unpack(src, pack_len, recv_convertor);
-    }
-}
-
-static int mca_coll_ml_allgather_noncontiguous_unpack_data(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    int i, j, n_level_one_sbgps;
-    size_t soffset;
-
-    mca_coll_ml_topology_t *topo_info = coll_op->coll_schedule->topo_info;
-    sub_group_params_t *array_of_all_subgroup_ranks = topo_info->array_of_all_subgroups;
-
-    n_level_one_sbgps = array_of_all_subgroup_ranks->level_one_index;
-
-    for (i = 0 ; i < n_level_one_sbgps; i++) {
-        /* determine where in the source buffer the data can be found */
-        soffset = array_of_all_subgroup_ranks[i].index_of_first_element;
-        for (j = 0 ; j < array_of_all_subgroup_ranks[i].n_ranks; j++, ++soffset) {
-            copy_data (coll_op, array_of_all_subgroup_ranks[i].rank_data + j, soffset);
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/* Allgather dependencies seem easy, everyone needs to work from the "bottom up".
- * Following Pasha, I too will put the simplest dependencies graph and change it later
- * when we add hierarchy. Basically, allgather has the same dependency profile as the
- * sequential broadcast except that there is only a single ordering of tasks.
- */
-static int mca_coll_ml_allgather_task_setup(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    int fn_idx, h_level, my_index, root;
-    mca_sbgp_base_module_t *sbgp;
-    mca_coll_ml_topology_t *topo = coll_op->coll_schedule->topo_info;
-
-    fn_idx      = coll_op->sequential_routine.current_active_bcol_fn;
-    h_level     = coll_op->coll_schedule->component_functions[fn_idx].h_level;
-    sbgp        = topo->component_pairs[h_level].
-                  subgroup_module;
-    my_index    = sbgp->my_index;
-
-    /* In the case of allgather, the local leader is always the root */
-    root = 0;
-    if (my_index == root) {
-        coll_op->variable_fn_params.root_flag = true;
-        coll_op->variable_fn_params.root_route = NULL;
-    } else {
-        coll_op->variable_fn_params.root_flag = false;
-        coll_op->variable_fn_params.root_route = &topo->route_vector[root];
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_coll_ml_allgather_frag_progress(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    /* local variables */
-    int ret;
-    size_t frag_len, dt_size;
-
-    const void *buf;
-    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc;
-    mca_coll_ml_collective_operation_progress_t *new_op;
-
-    mca_coll_ml_module_t *ml_module = OP_ML_MODULE(coll_op);
-    bool scontig = coll_op->fragment_data.message_descriptor->send_data_continguous;
-
-    ompi_datatype_type_size(coll_op->variable_fn_params.dtype, &dt_size);
-    /* Keep the pipeline filled with fragments */
-    while (coll_op->fragment_data.message_descriptor->n_active <
-        coll_op->fragment_data.message_descriptor->pipeline_depth) {
-        /* If an active fragment happens to have completed the collective during
-         * a hop into the progress engine, then don't launch a new fragment,
-         * instead break and return.
-         */
-        if (coll_op->fragment_data.message_descriptor->n_bytes_scheduled
-            == coll_op->fragment_data.message_descriptor->n_bytes_total) {
-            break;
-        }
-        /* Get an ml buffer */
-        src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        if (NULL == src_buffer_desc) {
-            /* If there exist outstanding fragments, then break out
-             * and let an active fragment deal with this later,
-             * there are no buffers available.
-             */
-            if (0 < coll_op->fragment_data.message_descriptor->n_active) {
-                return OMPI_SUCCESS;
-            } else {
-                /* The fragment is already on list and
-                 * the we still have no ml resources
-                 * Return busy */
-                if (coll_op->pending & REQ_OUT_OF_MEMORY) {
-                    ML_VERBOSE(10,("Out of resources %p", coll_op));
-                    return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-                }
-
-                coll_op->pending |= REQ_OUT_OF_MEMORY;
-                opal_list_append(&((OP_ML_MODULE(coll_op))->waiting_for_memory_list),
-                        (opal_list_item_t *)coll_op);
-                ML_VERBOSE(10,("Out of resources %p adding to pending queue", coll_op));
-                return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-            }
-        }
-
-        /* Get a new collective descriptor and initialize it */
-        new_op =  mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                     ml_module->coll_ml_allgather_functions[ML_SMALL_DATA_ALLGATHER],
-                     coll_op->fragment_data.message_descriptor->src_user_addr,
-                     coll_op->fragment_data.message_descriptor->dest_user_addr,
-                     coll_op->fragment_data.message_descriptor->n_bytes_total,
-                     coll_op->fragment_data.message_descriptor->n_bytes_scheduled);
-
-        new_op->fragment_data.current_coll_op = coll_op->fragment_data.current_coll_op;
-        new_op->fragment_data.message_descriptor = coll_op->fragment_data.message_descriptor;
-
-        /* set the task setup callback  */
-        new_op->sequential_routine.seq_task_setup = mca_coll_ml_allgather_task_setup;
-
-        /*
-        MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(new_op,
-                src_buffer_desc->buffer_index, src_buffer_desc);
-        */
-
-        /* We need this address for pointer arithmetic in memcpy */
-        buf = coll_op->fragment_data.message_descriptor->src_user_addr;
-
-        if (!scontig) {
-            frag_len = ml_module->small_message_thresholds[BCOL_ALLGATHER];
-            mca_coll_ml_convertor_get_send_frag_size(
-                            ml_module, &frag_len,
-                            coll_op->fragment_data.message_descriptor);
-
-            mca_coll_ml_convertor_pack(
-                (void *) ((uintptr_t) src_buffer_desc->data_addr +
-                frag_len * coll_op->coll_schedule->topo_info->hier_layout_info[0].offset +
-                frag_len * coll_op->coll_schedule->topo_info->hier_layout_info[0].level_one_index),
-                frag_len, &coll_op->fragment_data.message_descriptor->send_convertor);
-       } else {
-            /* calculate new frag length, there are some issues here */
-            frag_len = (coll_op->fragment_data.message_descriptor->n_bytes_total -
-                    coll_op->fragment_data.message_descriptor->n_bytes_scheduled <
-                    coll_op->fragment_data.fragment_size ?
-                    coll_op->fragment_data.message_descriptor->n_bytes_total -
-                    coll_op->fragment_data.message_descriptor->n_bytes_scheduled :
-                    coll_op->fragment_data.fragment_size);
-
-            /* everybody copies in, based on the new values */
-            memcpy((void *) ((uintptr_t)src_buffer_desc->data_addr +
-                    frag_len * new_op->coll_schedule->topo_info->hier_layout_info[0].offset +
-                    frag_len * new_op->coll_schedule->topo_info->hier_layout_info[0].level_one_index),
-                    (void *) ((uintptr_t) buf + (uintptr_t)
-                            coll_op->fragment_data.message_descriptor->n_bytes_scheduled), frag_len);
-        }
-
-        new_op->variable_fn_params.sbuf = (void *) src_buffer_desc->data_addr;
-        new_op->variable_fn_params.rbuf = (void *) src_buffer_desc->data_addr;
-
-        /* update the number of bytes scheduled */
-        new_op->fragment_data.message_descriptor->n_bytes_scheduled += frag_len;
-        /* everyone needs an unpack function */
-        new_op->process_fn = mca_coll_ml_allgather_noncontiguous_unpack_data;
-
-        new_op->fragment_data.fragment_size = frag_len;
-        new_op->fragment_data.buffer_desc = src_buffer_desc;
-
-        /* Setup fragment specific data */
-        ++(new_op->fragment_data.message_descriptor->n_active);
-
-        ML_VERBOSE(10, ("Start more, My index %d ",
-                    new_op->fragment_data.buffer_desc->buffer_index));
-
-        /* this is a bit buggy */
-        ML_SET_VARIABLE_PARAMS_BCAST(
-                new_op,
-                OP_ML_MODULE(new_op),
-                frag_len /* yes, we have consistent units, so this makes sense */,
-                MPI_BYTE /* we fragment according to buffer size
-                          * we don't reduce the data thus we needn't
-                          * keep "whole" datatypes, we may freely
-                          * fragment without regard for multiples
-                          * of any specific datatype
-                          */,
-                src_buffer_desc,
-                0,
-                0,
-                frag_len,
-                src_buffer_desc->data_addr);
-        /* initialize first coll */
-        ret = new_op->sequential_routine.seq_task_setup(new_op);
-        if (OMPI_SUCCESS != ret) {
-            ML_VERBOSE(3, ("Fragment failed to initialize itself"));
-            return ret;
-        }
-
-        new_op->variable_fn_params.buffer_size = frag_len;
-        new_op->variable_fn_params.hier_factor = coll_op->variable_fn_params.hier_factor;
-        new_op->variable_fn_params.root = 0;
-
-        MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(new_op);
-
-        /* append this collective !! */
-        OPAL_THREAD_LOCK(&(mca_coll_ml_component.sequential_collectives_mutex));
-        opal_list_append(&mca_coll_ml_component.sequential_collectives,
-                                    (opal_list_item_t *)new_op);
-        OPAL_THREAD_UNLOCK(&(mca_coll_ml_component.sequential_collectives_mutex));
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__
-int mca_coll_ml_allgather_start (const void *sbuf, int scount,
-                                 struct ompi_datatype_t *sdtype,
-                                 void* rbuf, int rcount,
-                                 struct ompi_datatype_t *rdtype,
-                                 struct ompi_communicator_t *comm,
-                                 mca_coll_base_module_t *module,
-                                 ompi_request_t **req)
-{
-    size_t pack_len, sdt_size;
-    int ret, n_fragments = 1, comm_size;
-
-    mca_coll_ml_topology_t *topo_info;
-    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc;
-
-    mca_coll_ml_component_t *cm = &mca_coll_ml_component;
-
-    mca_coll_ml_collective_operation_progress_t *coll_op;
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module;
-
-    ptrdiff_t lb, extent;
-    bool scontig, rcontig, in_place = false;
-
-    /* check for in place setting */
-    if (MPI_IN_PLACE == sbuf) {
-        in_place = true;
-        sdtype = rdtype;
-        scount = rcount;
-    }
-
-    /* scontig could be != to rcontig */
-    scontig = ompi_datatype_is_contiguous_memory_layout(sdtype, scount);
-    rcontig = ompi_datatype_is_contiguous_memory_layout(rdtype, rcount);
-
-    comm_size = ompi_comm_size(comm);
-
-    ML_VERBOSE(10, ("Starting allgather"));
-
-    assert(NULL != sdtype);
-    /* Calculate size of the data,
-     * at this stage, only contiguous data is supported */
-
-    /* this is valid for allagther */
-    ompi_datatype_type_size(sdtype, &sdt_size);
-    pack_len = scount * sdt_size;
-
-    if (in_place) {
-        sbuf = (char *) rbuf + ompi_comm_rank(comm) * pack_len;
-    }
-
-    /* Allocate collective schedule and pack message */
-    /* this is the total ending message size that will need to fit in the ml-buffer */
-    if (pack_len <= (size_t) ml_module->small_message_thresholds[BCOL_ALLGATHER]) {
-        /* The len of the message can not be larger than ML buffer size */
-        ML_VERBOSE(10, ("Single frag %d %d %d", pack_len, comm_size, ml_module->payload_block->size_buffer));
-        assert(pack_len * comm_size <= ml_module->payload_block->size_buffer);
-
-        src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        while (NULL == src_buffer_desc) {
-            opal_progress();
-            src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        }
-
-        /* change 1 */
-        coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                ml_module->coll_ml_allgather_functions[ML_SMALL_DATA_ALLGATHER],
-                sbuf, rbuf, pack_len, 0 /* offset for first pack */);
-
-        MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(coll_op,
-                src_buffer_desc->buffer_index, src_buffer_desc);
-
-        coll_op->fragment_data.current_coll_op = ML_SMALL_DATA_ALLGATHER;
-        /* task setup callback function */
-        coll_op->sequential_routine.seq_task_setup = mca_coll_ml_allgather_task_setup;
-
-        /* change 2 */
-        if (!scontig) {
-            coll_op->full_message.n_bytes_scheduled =
-                mca_coll_ml_convertor_prepare(sdtype, scount, sbuf,
-                    &coll_op->full_message.send_convertor, MCA_COLL_ML_NET_STREAM_SEND);
-
-            mca_coll_ml_convertor_pack(
-                        (void *) ((uintptr_t) src_buffer_desc->data_addr + pack_len *
-                                  (coll_op->coll_schedule->topo_info->hier_layout_info[0].offset +
-                                   coll_op->coll_schedule->topo_info->hier_layout_info[0].level_one_index)),
-                        pack_len, &coll_op->full_message.send_convertor);
-        } else {
-            /* change 3 */
-            memcpy((void *)((uintptr_t) src_buffer_desc->data_addr + pack_len *
-                            (coll_op->coll_schedule->topo_info->hier_layout_info[0].offset +
-                             coll_op->coll_schedule->topo_info->hier_layout_info[0].level_one_index)),
-                   sbuf, pack_len);
-
-            coll_op->full_message.n_bytes_scheduled = pack_len;
-        }
-
-        if (!rcontig) {
-            mca_coll_ml_convertor_prepare(rdtype, rcount * comm_size, rbuf,
-                &coll_op->full_message.recv_convertor, MCA_COLL_ML_NET_STREAM_RECV);
-        }
-
-        if (coll_op->coll_schedule->topo_info->ranks_contiguous) {
-            coll_op->process_fn = mca_coll_ml_allgather_small_unpack_data;
-        } else {
-            coll_op->process_fn = mca_coll_ml_allgather_noncontiguous_unpack_data;
-        }
-
-        /* whole ml-buffer is used to send AND receive */
-        coll_op->variable_fn_params.sbuf = (void *) src_buffer_desc->data_addr;
-        coll_op->variable_fn_params.rbuf = (void *) src_buffer_desc->data_addr;
-
-        /* we can set the initial offset here */
-        coll_op->variable_fn_params.sbuf_offset = 0;
-        coll_op->variable_fn_params.rbuf_offset = 0;
-
-        coll_op->variable_fn_params.count = scount;
-        coll_op->fragment_data.fragment_size =
-                             coll_op->full_message.n_bytes_scheduled;
-
-        /* For small CINCO, we may use the native datatype */
-        coll_op->variable_fn_params.dtype = sdtype;
-        coll_op->variable_fn_params.buffer_size = pack_len;
-        coll_op->variable_fn_params.root = 0;
-    } else if (cm->enable_fragmentation || pack_len * comm_size < (1 << 20)) {
-        /* calculate the number of fragments and the size of each frag */
-        size_t n_dts_per_frag, frag_len;
-        int pipeline_depth = mca_coll_ml_component.pipeline_depth;
-
-        /* Calculate the number of fragments required for this message careful watch the integer division !*/
-        frag_len = (pack_len <= (size_t) ml_module->small_message_thresholds[BCOL_ALLGATHER] ?
-                pack_len : (size_t) ml_module->small_message_thresholds[BCOL_ALLGATHER]);
-
-        n_dts_per_frag = frag_len / sdt_size;
-        n_fragments = (pack_len + sdt_size * n_dts_per_frag - 1) / (sdt_size * n_dts_per_frag);
-        pipeline_depth = (n_fragments < pipeline_depth ? n_fragments : pipeline_depth);
-
-        src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        while (NULL == src_buffer_desc) {
-            opal_progress();
-            src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        }
-
-        /* change 4 */
-        coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                ml_module->coll_ml_allgather_functions[ML_SMALL_DATA_ALLGATHER],
-                sbuf, rbuf, pack_len,
-                0 /* offset for first pack */);
-
-        MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(coll_op,
-                src_buffer_desc->buffer_index, src_buffer_desc);
-        topo_info = coll_op->coll_schedule->topo_info;
-
-        /* task setup callback function */
-        coll_op->sequential_routine.seq_task_setup = mca_coll_ml_allgather_task_setup;
-
-        if (!scontig) {
-            coll_op->full_message.send_converter_bytes_packed =
-                        mca_coll_ml_convertor_prepare(
-                                sdtype, scount, NULL,
-                                &coll_op->full_message.dummy_convertor,
-                                MCA_COLL_ML_NET_STREAM_SEND);
-
-            coll_op->full_message.dummy_conv_position = 0;
-            mca_coll_ml_convertor_get_send_frag_size(
-                                    ml_module, &frag_len,
-                                    &coll_op->full_message);
-
-            /* change 5 */
-            mca_coll_ml_convertor_prepare(sdtype, scount, sbuf,
-                    &coll_op->full_message.send_convertor, MCA_COLL_ML_NET_STREAM_SEND);
-
-            mca_coll_ml_convertor_pack(
-                    (void *) ((uintptr_t) src_buffer_desc->data_addr + frag_len *
-                              (topo_info->hier_layout_info[0].offset +
-                               topo_info->hier_layout_info[0].level_one_index)),
-                    frag_len, &coll_op->full_message.send_convertor);
-        } else {
-            /* change 6 */
-            memcpy((void *)((uintptr_t)src_buffer_desc->data_addr + frag_len *
-                            (topo_info->hier_layout_info[0].offset +
-                             topo_info->hier_layout_info[0].level_one_index)),
-                    sbuf, frag_len);
-        }
-
-        if (!rcontig) {
-            mca_coll_ml_convertor_prepare(rdtype, rcount * comm_size, rbuf,
-                    &coll_op->full_message.recv_convertor, MCA_COLL_ML_NET_STREAM_RECV);
-        }
-
-        coll_op->process_fn = mca_coll_ml_allgather_noncontiguous_unpack_data;
-
-        /* hopefully this doesn't royaly screw things up idea behind this is the
-         * whole ml-buffer is used to send and receive
-         */
-        coll_op->variable_fn_params.sbuf = (void *) src_buffer_desc->data_addr;
-        coll_op->variable_fn_params.rbuf = (void *) src_buffer_desc->data_addr;
-
-        /* we can set the initial offset here */
-        coll_op->variable_fn_params.sbuf_offset = 0;
-        coll_op->variable_fn_params.rbuf_offset = 0;
-
-        coll_op->fragment_data.buffer_desc = src_buffer_desc;
-
-        coll_op->fragment_data.fragment_size = frag_len;
-        coll_op->fragment_data.message_descriptor->n_active = 1;
-
-        coll_op->full_message.n_bytes_scheduled = frag_len;
-        coll_op->full_message.fragment_launcher = mca_coll_ml_allgather_frag_progress;
-
-        coll_op->full_message.pipeline_depth = pipeline_depth;
-        coll_op->fragment_data.current_coll_op = ML_SMALL_DATA_ALLGATHER;
-
-        /* remember this is different for frags !! Caused data corruption when
-         * not properly set. Need to be sure you have consistent units.
-         */
-        coll_op->variable_fn_params.count = frag_len;
-        coll_op->variable_fn_params.dtype = MPI_BYTE; /* for fragmented data, we work in
-                                                       * units of bytes. This means that
-                                                       * all of our arithmetic is done
-                                                       * in terms of bytes
-                                                       */
-
-        coll_op->variable_fn_params.root = 0;
-        coll_op->variable_fn_params.frag_size = frag_len;
-        coll_op->variable_fn_params.buffer_size = frag_len;
-    } else {
-        /* change 7 */
-        ML_VERBOSE(10, ("ML_ALLGATHER_LARGE_DATA_KNOWN case."));
-        coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                ml_module->coll_ml_allgather_functions[ML_LARGE_DATA_ALLGATHER],
-                sbuf, rbuf, pack_len, 0 /* offset for first pack */);
-        topo_info = coll_op->coll_schedule->topo_info;
-        if (MCA_BCOL_BASE_NO_ML_BUFFER_FOR_LARGE_MSG & topo_info->all_bcols_mode) {
-            MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(coll_op, MCA_COLL_ML_NO_BUFFER, NULL);
-        } else {
-            src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-            while (NULL == src_buffer_desc) {
-                opal_progress();
-                src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-            }
-
-            MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(coll_op, src_buffer_desc->buffer_index, src_buffer_desc);
-        }
-
-        /* not sure if I really need this here */
-        coll_op->sequential_routine.seq_task_setup = mca_coll_ml_allgather_task_setup;
-        coll_op->process_fn = NULL;
-        /* probably the most important piece */
-        coll_op->variable_fn_params.sbuf = sbuf;
-        coll_op->variable_fn_params.rbuf = rbuf;
-        coll_op->variable_fn_params.sbuf_offset = 0;
-        coll_op->variable_fn_params.rbuf_offset = 0;
-        coll_op->variable_fn_params.count = scount;
-        coll_op->variable_fn_params.dtype = sdtype;/* for zero copy, we want the
-                                                    * native datatype and actual count
-                                                    */
-        coll_op->variable_fn_params.root = 0;
-
-        /* you still need to copy in your own data into the rbuf */
-        /* don't need to do this if you have in place data */
-        if (!in_place) {
-            memcpy((char *) rbuf + ompi_comm_rank(comm) * pack_len, sbuf, pack_len);
-        }
-    }
-
-    coll_op->full_message.send_count = scount;
-    coll_op->full_message.recv_count = rcount;
-
-    coll_op->full_message.send_data_continguous = scontig;
-    coll_op->full_message.recv_data_continguous = rcontig;
-
-    ompi_datatype_get_extent(sdtype, &lb, &extent);
-    coll_op->full_message.send_extent = (size_t) extent;
-
-    ompi_datatype_get_extent(rdtype, &lb, &extent);
-    coll_op->full_message.recv_extent = (size_t) extent;
-
-
-    /* Fill in the function arguments */
-    coll_op->variable_fn_params.sequence_num =
-        OPAL_THREAD_ADD32(&(ml_module->collective_sequence_num), 1);
-    coll_op->variable_fn_params.hier_factor = comm_size;
-
-    MCA_COLL_ML_SET_ORDER_INFO(coll_op, n_fragments);
-
-
-    ret = mca_coll_ml_launch_sequential_collective (coll_op);
-    if (OMPI_SUCCESS != ret) {
-        ML_VERBOSE(10, ("Failed to launch"));
-        return ret;
-    }
-
-    *req = &coll_op->full_message.super;
-
-    return OMPI_SUCCESS;
-}
-
-int mca_coll_ml_allgather(const void *sbuf, int scount,
-                          struct ompi_datatype_t *sdtype,
-                          void* rbuf, int rcount,
-                          struct ompi_datatype_t *rdtype,
-                          struct ompi_communicator_t *comm,
-                          mca_coll_base_module_t *module)
-{
-    ompi_request_t *req;
-    int ret;
-
-    ML_VERBOSE(10, ("Starting blocking allgather"));
-
-    ret = mca_coll_ml_allgather_start (sbuf, scount, sdtype,
-                                       rbuf, rcount, rdtype,
-                                       comm, module, &req);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        return ret;
-    }
-
-    ret = ompi_request_wait (&req, MPI_STATUS_IGNORE);
-
-    ML_VERBOSE(10, ("Blocking allgather is complete"));
-
-    return ret;
-}
-
-int mca_coll_ml_allgather_nb(const void *sbuf, int scount,
-                             struct ompi_datatype_t *sdtype,
-                             void* rbuf, int rcount,
-                             struct ompi_datatype_t *rdtype,
-                             struct ompi_communicator_t *comm,
-                             ompi_request_t **req,
-                             mca_coll_base_module_t *module)
-{
-    int ret;
-
-    ML_VERBOSE(10, ("Starting non-blocking allgather"));
-
-    ret = mca_coll_ml_allgather_start (sbuf, scount, sdtype,
-                                       rbuf, rcount, rdtype,
-                                       comm, module, req);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        return ret;
-    }
-
-    ML_VERBOSE(10, ("Non-blocking allgather started"));
-
-    return ret;
-}
diff --git a/ompi/mca/coll/ml/coll_ml_allocation.c b/ompi/mca/coll/ml/coll_ml_allocation.c
deleted file mode 100644
index ac0ebbebc0..0000000000
--- a/ompi/mca/coll/ml/coll_ml_allocation.c
+++ /dev/null
@@ -1,213 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2015      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-
-#include "ompi_config.h"
-#include <stdlib.h>
-
-#include "coll_ml.h"
-#include "coll_ml_inlines.h"
-#include "coll_ml_allocation.h"
-
-mca_bcol_base_memory_block_desc_t *mca_coll_ml_allocate_block(struct mca_coll_ml_component_t *ml_component,
-                                                   mca_bcol_base_memory_block_desc_t *ml_memblock)
-{
-    mca_bcol_base_memory_block_desc_t *ret = NULL;
-    mca_bcol_base_memory_block_desc_t *memory_block = NULL;
-    mca_coll_ml_lmngr_t *memory_manager = NULL;
-
-    if (ml_memblock) {
-        ML_ERROR(("Memory already allocated - expecting NULL pointer"));
-        return ret;
-    }
-    memory_block = (mca_bcol_base_memory_block_desc_t*) calloc(1, sizeof(mca_bcol_base_memory_block_desc_t));
-
-    if (NULL == memory_block){
-        ML_ERROR(("Couldn't allocate memory for ml_memblock"));
-        return ret;
-    }
-
-    memory_manager = &ml_component->memory_manager;
-    memory_block->block = mca_coll_ml_lmngr_alloc(memory_manager);
-    memory_block->size_block = memory_manager->list_block_size;
-
-    if (!memory_block->block){
-        ML_VERBOSE(1, ("lmngr failed."));
-        free(memory_block);
-        return NULL;
-    }
-
-    return memory_block;
-}
-
-void mca_coll_ml_free_block (mca_bcol_base_memory_block_desc_t *ml_memblock)
-{
-    if (!ml_memblock)
-        return;
-
-    if (ml_memblock->buffer_descs){
-        free(ml_memblock->buffer_descs);
-    }
-
-    mca_coll_ml_lmngr_free(ml_memblock->block);
-    free(ml_memblock->bank_release_counters);
-    free(ml_memblock->ready_for_memsync);
-    free(ml_memblock->bank_is_busy);
-    free(ml_memblock);
-}
-
-int mca_coll_ml_initialize_block(mca_bcol_base_memory_block_desc_t *ml_memblock,
-                                 uint32_t num_buffers,
-                                 uint32_t num_banks,
-                                 uint32_t buffer_size,
-                                 int32_t data_offset,
-                                 opal_list_t *bcols_in_use)
-{
-    int ret = OMPI_SUCCESS;
-    uint32_t bank_loop, buff_loop;
-    uint64_t addr_offset = 0;
-    mca_bcol_base_payload_buffer_desc_t *pbuff_descs = NULL,*pbuff_desc = NULL;
-
-    if (0 == num_banks || 0 == num_buffers || 0 == buffer_size) {
-        return OMPI_ERR_BAD_PARAM;
-    }
-
-    if (NULL == ml_memblock){
-        ML_ERROR(("Memory block not initialized"));
-        ret = OMPI_ERROR;
-        goto exit_ERROR;
-    }
-
-    if (ml_memblock->size_block < (num_buffers * num_banks * buffer_size) ){
-        ML_ERROR(("Not enough memory for all buffers  and banks in the memory block"));
-        ret = OMPI_ERROR;
-        goto exit_ERROR;
-    }
-
-    pbuff_descs = (mca_bcol_base_payload_buffer_desc_t*) malloc(sizeof(mca_bcol_base_payload_buffer_desc_t)
-            * num_banks * num_buffers);
-    if (NULL == pbuff_descs) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    for(bank_loop = 0; bank_loop < num_banks; bank_loop++)
-        for(buff_loop = 0; buff_loop < num_buffers; buff_loop++){
-            pbuff_desc = &pbuff_descs[bank_loop*num_buffers + buff_loop];
-
-            pbuff_desc->base_data_addr = (void *)
-                ((char *)ml_memblock->block->base_addr + addr_offset);
-            pbuff_desc->data_addr = (void *)
-                ((char *)pbuff_desc->base_data_addr + (size_t)data_offset);
-
-            addr_offset+=buffer_size;
-            pbuff_desc->buffer_index = BUFFER_INDEX(bank_loop,num_buffers,buff_loop);
-
-            pbuff_desc->bank_index=bank_loop;
-            pbuff_desc->generation_number=0;
-        }
-
-    /* Initialize ml memory block */
-    /* gvm FIX:This counter when zero indicates that the bank is ready for
-     * recycle. This is  initialized to number of bcol components as each bcol is responsible for
-     * releasing the buffers of a bank. This initialization will have
-     * faulty behavior, example in case of multiple interfaces,  when more than
-     * one bcol module of the component type is in use.
-     */
-    ml_memblock->bank_release_counters = (uint32_t *) calloc(num_banks, sizeof(uint32_t));
-    if (NULL == ml_memblock->bank_release_counters) {
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-
-    ml_memblock->ready_for_memsync = (bool *) calloc(num_banks, sizeof(bool));
-    if (NULL == ml_memblock->ready_for_memsync) {
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-
-    ml_memblock->bank_is_busy = (bool *) calloc(num_banks, sizeof(bool));
-    if (NULL == ml_memblock->bank_is_busy) {
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-
-    /* Set index for first bank to sync */
-    ml_memblock->memsync_counter = 0;
-
-    /* use first bank and first buffer */
-    ml_memblock->next_free_buffer = 0;
-
-    ml_memblock->block_addr_offset = addr_offset;
-    ml_memblock->num_buffers_per_bank = num_buffers;
-    ml_memblock->num_banks = num_banks;
-    ml_memblock->size_buffer = buffer_size;
-    ml_memblock->buffer_descs = pbuff_descs;
-
-    return ret;
-
-exit_ERROR:
-    /* Free all buffer descriptors */
-    if (pbuff_descs){
-        free(pbuff_descs);
-    }
-
-    return ret;
-}
-
-mca_bcol_base_payload_buffer_desc_t *mca_coll_ml_alloc_buffer (mca_coll_ml_module_t *module)
-{
-    uint64_t bindex;
-    uint32_t bank, buffer, num_buffers;
-    mca_bcol_base_memory_block_desc_t *ml_memblock = module->payload_block;
-    mca_bcol_base_payload_buffer_desc_t *pbuff_descs = NULL,
-        *ml_membuffer = NULL;
-
-    /* Return a buffer */
-    num_buffers = ml_memblock->num_buffers_per_bank;
-    pbuff_descs = ml_memblock->buffer_descs;
-    bindex = ml_memblock->next_free_buffer;
-    buffer = bindex % num_buffers;
-    bank = bindex/num_buffers;
-
-    ML_VERBOSE(10, ("ML allocator: allocating buffer index %d, bank index %d", buffer, bank));
-
-    /* First buffer in bank, use next bank */
-    if (0 == buffer) {
-        if(!ml_memblock->bank_is_busy[bank]) {
-            /* the bank is free, mark it busy */
-            ml_memblock->bank_is_busy[bank] = true;
-            ML_VERBOSE(10, ("ML allocator: reset bank %d to value %d", bank,
-                            ml_memblock->bank_release_counters[bank]));
-        } else {
-            /* the bank is busy, return NULL and upper layer will handle it */
-            ML_VERBOSE(10, ("No free payload buffers are available for use."
-                            " Next memory bank is still used by one of bcols"));
-            return NULL;
-        }
-    }
-
-    assert(true == ml_memblock->bank_is_busy[bank]);
-
-    ml_membuffer = &pbuff_descs[bindex];
-    ML_VERBOSE(10, ("ML allocator: ml buffer index %d", bindex));
-
-    /* Compute next free buffer */
-    buffer = (buffer == num_buffers - 1) ? 0 : buffer + 1;
-    if (0 == buffer) {
-        bank = (bank == ml_memblock->num_banks - 1) ? 0 : bank + 1;
-    }
-
-    ml_memblock->next_free_buffer = BUFFER_INDEX(bank,num_buffers,buffer);
-
-    return ml_membuffer;
-}
diff --git a/ompi/mca/coll/ml/coll_ml_allocation.h b/ompi/mca/coll/ml/coll_ml_allocation.h
deleted file mode 100644
index 7bb7f63242..0000000000
--- a/ompi/mca/coll/ml/coll_ml_allocation.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_ML_ALLOC_H
-#define MCA_ML_ALLOC_H
-
-#include "ompi_config.h"
-#include "ompi/include/ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/coll/coll.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "opal/sys/atomic.h"
-#include "opal/mca/mpool/base/base.h"
-#include "coll_ml_lmngr.h"
-
-/*
-  Returns a block of memory from mpool
-
-  ARGS:
-  IN ml_component: component descriptor
-  OUT ml_memblock: block_addr - Starting address of the memory block
-                   size       - Size of the block
-                   register_info - Register information passed from the mpool
-
-  Return
-  On Sucess : Returns size of memory block
-  On Failure: Returns -1
-
- */
-
-struct mca_coll_ml_component_t;
-struct mca_coll_ml_module_t;
-
-mca_bcol_base_memory_block_desc_t *mca_coll_ml_allocate_block(
-                struct mca_coll_ml_component_t  *ml_component,
-                struct mca_bcol_base_memory_block_desc_t *ml_memblock
-                );
-    /* Allocate the memory from mpool */
-    /* Register the memory block with bcols */
-
-void mca_coll_ml_free_block(
-                 mca_bcol_base_memory_block_desc_t *ml_memblock
-                );
-
-
-
-
-/*
-   Initialize the memory block and map into buffers and memory banks, and
-   also buffer descriptors are initialized.
-
-   IN ml_memblock: Memory block descriptor
-   IN num_buffers: number of buffers
-   IN num_banks: number of banks
-   Return
-   On Sucess: OMPI_SUCCESS
-   On Failure: OMPI_ERROR
- */
-int mca_coll_ml_initialize_block(
-        mca_bcol_base_memory_block_desc_t *ml_memblock,
-        uint32_t num_buffers,
-        uint32_t num_banks,
-        uint32_t buffer_size,
-        int32_t data_offset,
-        opal_list_t *bcols_in_use
-        );
-    /* Map blocks into buffers and banks */
-    /* Initialize the descriptors */
-
-
-
-/*
-   Allocate a memory buffer from the block
-    IN ml_memblock: Memory block descriptor
-    OUT ml_membuffer: Buffer allocated for data from the block
-
-   Return
-   On Sucess: OMPI_SUCCESS
-   On Failure: OMPI_ERROR
- */
-mca_bcol_base_payload_buffer_desc_t *mca_coll_ml_alloc_buffer(
-            struct mca_coll_ml_module_t *module);
-
-int mca_coll_ml_free_buffer(
-        mca_bcol_base_memory_block_desc_t *ml_memblock,
-        struct mca_bcol_base_payload_buffer_desc_t *ml_membuffer
-        );
-
-/*
-   Register the memory block with bcol component
-
-   IN ml_memblock: Memory block descriptor
-   OUT registerations (ml_memblock)
-
-   Return
-   On Sucess: OMPI_SUCCESS
-   On Failure: OMPI_ERROR
-
-  */
-int mca_coll_ml_register_block_bcol(
-                mca_bcol_base_memory_block_desc_t *ml_memblock
-                );
-
-#endif /* MCA_ML_ALLOC_H */
diff --git a/ompi/mca/coll/ml/coll_ml_allreduce.c b/ompi/mca/coll/ml/coll_ml_allreduce.c
deleted file mode 100644
index 85457254b8..0000000000
--- a/ompi/mca/coll/ml/coll_ml_allreduce.c
+++ /dev/null
@@ -1,553 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2015      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/** @file */
-
-#include "ompi_config.h"
-
-#include <stdlib.h>
-
-#include "ompi/constants.h"
-#include "opal/threads/mutex.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/coll/coll.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "opal/sys/atomic.h"
-#include "coll_ml.h"
-#include "coll_ml_select.h"
-#include "coll_ml_allocation.h"
-
-static int mca_coll_ml_allreduce_small_unpack(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    int ret;
-    /* need to put in more */
-    int count = coll_op->variable_fn_params.count;
-    ompi_datatype_t *dtype = coll_op->variable_fn_params.dtype;
-
-    void *dest = (void *)((uintptr_t)coll_op->full_message.dest_user_addr +
-            (uintptr_t)coll_op->fragment_data.offset_into_user_buffer);
-    void *src = (void *)((uintptr_t)coll_op->fragment_data.buffer_desc->data_addr +
-            (size_t)coll_op->variable_fn_params.rbuf_offset);
-
-    ret = ompi_datatype_copy_content_same_ddt(dtype, (int32_t) count, (char *) dest,
-            (char *) src);
-    if (ret < 0) {
-        return OMPI_ERROR;
-    }
-
-    ML_VERBOSE(10, ("sbuf addr %p, sbuf offset %d, rbuf addr %p, rbuf offset %d.",
-                    src, coll_op->variable_fn_params.sbuf_offset, dest,
-                    coll_op->variable_fn_params.rbuf_offset));
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_coll_ml_allreduce_task_setup(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    int fn_idx, h_level, my_index, root;
-    mca_sbgp_base_module_t *sbgp;
-    mca_coll_ml_topology_t *topo = coll_op->coll_schedule->topo_info;
-
-    fn_idx      = coll_op->sequential_routine.current_active_bcol_fn;
-    h_level     = coll_op->coll_schedule->component_functions[fn_idx].h_level;
-    sbgp        = topo->component_pairs[h_level].subgroup_module;
-    my_index    = sbgp->my_index;
-
-    /* In the case of allreduce, the local leader is always the root */
-    root = 0;
-    if (my_index == root) {
-        coll_op->variable_fn_params.root_flag = true;
-        coll_op->variable_fn_params.root_route = NULL;
-    } else {
-        coll_op->variable_fn_params.root_flag = false;
-        coll_op->variable_fn_params.root_route = &topo->route_vector[root];
-    }
-
-    /* NTH: This was copied from the old allreduce launcher. */
-    if (0 < fn_idx) {
-        coll_op->variable_fn_params.sbuf = coll_op->variable_fn_params.rbuf;
-        coll_op->variable_fn_params.userbuf = coll_op->variable_fn_params.rbuf;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_coll_ml_allreduce_frag_progress(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    /* local variables */
-    const void *buf;
-
-    size_t dt_size;
-    int ret, frag_len, count;
-
-    ptrdiff_t lb, extent;
-
-    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc;
-    mca_coll_ml_collective_operation_progress_t *new_op;
-
-    mca_coll_ml_module_t *ml_module = OP_ML_MODULE(coll_op);
-
-    ret = ompi_datatype_get_extent(coll_op->variable_fn_params.dtype, &lb, &extent);
-    if (ret < 0) {
-     return OMPI_ERROR;
-    }
-
-    dt_size = (size_t) extent;
-
-    /* Keep the pipeline filled with fragments */
-    while (coll_op->fragment_data.message_descriptor->n_active <
-        coll_op->fragment_data.message_descriptor->pipeline_depth) {
-        /* If an active fragment happens to have completed the collective during
-         * a hop into the progress engine, then don't launch a new fragment,
-         * instead break and return.
-         */
-        if (coll_op->fragment_data.message_descriptor->n_bytes_scheduled
-            == coll_op->fragment_data.message_descriptor->n_bytes_total) {
-            break;
-        }
-
-        /* Get an ml buffer */
-        src_buffer_desc = mca_coll_ml_alloc_buffer(OP_ML_MODULE(coll_op));
-        if (NULL == src_buffer_desc) {
-            /* If there exist outstanding fragments, then break out
-             * and let an active fragment deal with this later,
-             * there are no buffers available.
-             */
-            if (0 < coll_op->fragment_data.message_descriptor->n_active) {
-                return OMPI_SUCCESS;
-            }
-
-            /* It is useless to call progress from here, since
-             * ml progress can't be executed as result ml memsync
-             * call will not be completed and no memory will be
-             * recycled. So we put the element on the list, and we will
-             * progress it later when memsync will recycle some memory*/
-
-            /* The fragment is already on list and
-             * the we still have no ml resources
-             * Return busy */
-            if (!(coll_op->pending & REQ_OUT_OF_MEMORY)) {
-                coll_op->pending |= REQ_OUT_OF_MEMORY;
-                opal_list_append(&((OP_ML_MODULE(coll_op))->waiting_for_memory_list),
-                                 (opal_list_item_t *)coll_op);
-                ML_VERBOSE(10,("Out of resources %p adding to pending queue", coll_op));
-            } else {
-                ML_VERBOSE(10,("Out of resources %p", coll_op));
-            }
-
-            return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-        }
-
-        /* Get a new collective descriptor and initialize it */
-        new_op =  mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                ml_module->coll_ml_allreduce_functions[coll_op->fragment_data.current_coll_op],
-                coll_op->fragment_data.message_descriptor->src_user_addr,
-                coll_op->fragment_data.message_descriptor->dest_user_addr,
-                coll_op->fragment_data.message_descriptor->n_bytes_total,
-                coll_op->fragment_data.message_descriptor->n_bytes_scheduled);
-
-        MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(new_op,
-                src_buffer_desc->buffer_index, src_buffer_desc);
-
-        new_op->fragment_data.current_coll_op = coll_op->fragment_data.current_coll_op;
-        new_op->fragment_data.message_descriptor = coll_op->fragment_data.message_descriptor;
-
-        /* set the task setup callback  */
-        new_op->sequential_routine.seq_task_setup = mca_coll_ml_allreduce_task_setup;
-        /* We need this address for pointer arithmetic in memcpy */
-        buf = coll_op->fragment_data.message_descriptor->src_user_addr;
-        /* calculate the number of data types in this packet */
-        count = (coll_op->fragment_data.message_descriptor->n_bytes_total -
-                coll_op->fragment_data.message_descriptor->n_bytes_scheduled <
-                 (size_t) OP_ML_MODULE(coll_op)->small_message_thresholds[BCOL_ALLREDUCE] ?
-                (coll_op->fragment_data.message_descriptor->n_bytes_total -
-                coll_op->fragment_data.message_descriptor->n_bytes_scheduled) / dt_size :
-                (size_t) coll_op->variable_fn_params.count);
-
-        /* calculate the fragment length */
-        frag_len = count*dt_size;
-
-        ret = ompi_datatype_copy_content_same_ddt(coll_op->variable_fn_params.dtype, count,
-                (char *) src_buffer_desc->data_addr, (char *) ((uintptr_t) buf + (uintptr_t)
-                    coll_op->fragment_data.message_descriptor->n_bytes_scheduled));
-        if (ret < 0) {
-            return OMPI_ERROR;
-        }
-
-        /* No unpack for root */
-        new_op->process_fn = mca_coll_ml_allreduce_small_unpack;
-
-        /* Setup fragment specific data */
-        new_op->fragment_data.message_descriptor->n_bytes_scheduled += frag_len;
-        new_op->fragment_data.buffer_desc = src_buffer_desc;
-        new_op->fragment_data.fragment_size = frag_len;
-        (new_op->fragment_data.message_descriptor->n_active)++;
-
-        ML_SET_VARIABLE_PARAMS_BCAST(
-                new_op,
-                OP_ML_MODULE(new_op),
-                count,
-                MPI_BYTE,
-                src_buffer_desc,
-                0,
-                0,
-                frag_len,
-                src_buffer_desc->data_addr);
-        /* Fill in bcast specific arguments */
-        /* TBD: remove buffer_size */
-        new_op->variable_fn_params.buffer_size = frag_len;
-        new_op->variable_fn_params.count = count;
-        new_op->variable_fn_params.hier_factor = coll_op->variable_fn_params.hier_factor;
-        new_op->variable_fn_params.op = coll_op->variable_fn_params.op;
-        new_op->variable_fn_params.dtype = coll_op->variable_fn_params.dtype;
-        new_op->variable_fn_params.root = 0;
-        new_op->variable_fn_params.sbuf = src_buffer_desc->data_addr;
-        new_op->variable_fn_params.rbuf = src_buffer_desc->data_addr;
-        new_op->sequential_routine.current_bcol_status = SEQ_TASK_PENDING;
-
-        MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(new_op);
-
-        ML_VERBOSE(10,("FFFF Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d %d",
-                    new_op->variable_fn_params.buffer_size,
-                    new_op->fragment_data.fragment_size,
-                    new_op->fragment_data.message_descriptor->n_bytes_scheduled));
-        /* initialize first coll */
-        ret = new_op->sequential_routine.seq_task_setup(new_op);
-        if (OMPI_SUCCESS != ret) {
-            ML_VERBOSE(3,("Fragment failed to initialize itself"));
-            return ret;
-        }
-
-        /* append this collective !! */
-        OPAL_THREAD_LOCK(&(mca_coll_ml_component.sequential_collectives_mutex));
-        opal_list_append(&mca_coll_ml_component.sequential_collectives,
-                (opal_list_item_t *)new_op);
-        OPAL_THREAD_UNLOCK(&(mca_coll_ml_component.sequential_collectives_mutex));
-
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__
-int parallel_allreduce_start(const void *sbuf, void *rbuf, int count,
-                                struct ompi_datatype_t *dtype, struct ompi_op_t *op,
-                                struct ompi_communicator_t *comm,
-                                mca_coll_ml_module_t *ml_module,
-                                ompi_request_t **req,
-                                int small_data_allreduce,
-                                int large_data_allreduce)
-{
-    int ret, n_fragments = 1, frag_len,
-        pipeline_depth, n_dts_per_frag ;
-
-    ptrdiff_t lb, extent;
-    size_t pack_len, dt_size;
-
-    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc;
-    mca_coll_ml_collective_operation_progress_t *coll_op;
-
-    mca_coll_ml_component_t *cm = &mca_coll_ml_component;
-
-    bool contiguous = ompi_datatype_is_contiguous_memory_layout(dtype, count);
-
-    if (MPI_IN_PLACE == sbuf) {
-        sbuf = rbuf;
-    }
-
-    ret = ompi_datatype_get_extent(dtype, &lb, &extent);
-    if (ret < 0) {
-        return OMPI_ERROR;
-    }
-
-    dt_size = (size_t) extent;
-    pack_len = count * dt_size;
-
-    ML_VERBOSE(1,("The allreduce requested %d enable fragmentation %d ",
-                    pack_len,
-                    cm->enable_fragmentation));
-    if (pack_len <= (size_t) ml_module->small_message_thresholds[BCOL_ALLREDUCE]) {
-        /* The len of the message can not be larger than ML buffer size */
-        assert(pack_len <= ml_module->payload_block->size_buffer);
-
-        ML_VERBOSE(1,("Using small data allreduce (threshold = %d)",
-                    ml_module->small_message_thresholds[BCOL_ALLREDUCE]));
-
-        src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        while (OPAL_UNLIKELY(NULL == src_buffer_desc)) {
-            opal_progress();
-            src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        }
-
-        coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                ml_module->coll_ml_allreduce_functions[small_data_allreduce],
-                sbuf, rbuf, pack_len, 0);
-
-        coll_op->variable_fn_params.rbuf = src_buffer_desc->data_addr;
-        coll_op->variable_fn_params.sbuf = src_buffer_desc->data_addr;
-        coll_op->variable_fn_params.count = count;
-
-        ret = ompi_datatype_copy_content_same_ddt(dtype, count,
-                (void *) (uintptr_t) src_buffer_desc->data_addr, (char *) sbuf);
-        if (ret < 0){
-            return OMPI_ERROR;
-        }
-
-        /* unpack function */
-        coll_op->process_fn = mca_coll_ml_allreduce_small_unpack;
-    } else if (cm->enable_fragmentation || !contiguous) {
-        ML_VERBOSE(1,("Using Fragmented Allreduce"));
-
-        /* fragment the data */
-        /* check for retarded application programming decisions */
-        if (dt_size > (size_t) ml_module->small_message_thresholds[BCOL_ALLREDUCE]) {
-            ML_ERROR(("Sorry, but we don't support datatypes that large"));
-            return OMPI_ERROR;
-        }
-
-        /* calculate the number of data types that can fit per ml-buffer */
-        n_dts_per_frag = ml_module->small_message_thresholds[BCOL_ALLREDUCE] / dt_size;
-
-        /* calculate the number of fragments */
-        n_fragments = (count + n_dts_per_frag - 1) / n_dts_per_frag; /* round up */
-
-        /* calculate the actual pipeline depth */
-        pipeline_depth = n_fragments < cm->pipeline_depth ? n_fragments : cm->pipeline_depth;
-
-        /* calculate the fragment size */
-        frag_len = n_dts_per_frag * dt_size;
-
-        /* allocate an ml buffer */
-        src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        while (NULL == src_buffer_desc) {
-            opal_progress();
-            src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        }
-
-        coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                ml_module->coll_ml_allreduce_functions[small_data_allreduce],
-                sbuf, rbuf, pack_len, 0 /* offset for first pack */);
-
-        /* task setup callback function */
-        coll_op->sequential_routine.seq_task_setup = mca_coll_ml_allreduce_task_setup;
-
-        coll_op->process_fn = mca_coll_ml_allreduce_small_unpack;
-
-        coll_op->variable_fn_params.sbuf = (void *) src_buffer_desc->data_addr;
-        coll_op->variable_fn_params.rbuf = (void *) src_buffer_desc->data_addr;
-
-        coll_op->fragment_data.message_descriptor->n_active = 1;
-        coll_op->full_message.n_bytes_scheduled = frag_len;
-        coll_op->full_message.fragment_launcher = mca_coll_ml_allreduce_frag_progress;
-        coll_op->full_message.pipeline_depth = pipeline_depth;
-        coll_op->fragment_data.current_coll_op = small_data_allreduce;
-        coll_op->fragment_data.fragment_size = frag_len;
-
-        coll_op->variable_fn_params.count = n_dts_per_frag;  /* seems fishy */
-        coll_op->variable_fn_params.buffer_size = frag_len;
-
-        /* copy into the ml-buffer */
-        ret = ompi_datatype_copy_content_same_ddt(dtype, n_dts_per_frag,
-                (char *) src_buffer_desc->data_addr, (char *) sbuf);
-        if (ret < 0) {
-            return OMPI_ERROR;
-        }
-    } else {
-        ML_VERBOSE(1,("Using zero-copy ptp allreduce"));
-        coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                ml_module->coll_ml_allreduce_functions[large_data_allreduce],
-                sbuf, rbuf, pack_len, 0);
-
-        coll_op->variable_fn_params.userbuf =
-            coll_op->variable_fn_params.sbuf = sbuf;
-
-        coll_op->variable_fn_params.rbuf = rbuf;
-
-        /* The ML buffer is used for testing. Later, when we
-         * switch to use knem/mmap/portals this should be replaced
-         * appropriately
-         */
-        src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        while (NULL == src_buffer_desc) {
-            opal_progress();
-            src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        }
-
-        coll_op->variable_fn_params.count = count;
-    }
-
-    MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(coll_op, src_buffer_desc->buffer_index,
-                                          src_buffer_desc);
-
-    /* set the offset */
-    coll_op->variable_fn_params.sbuf_offset = 0;
-    coll_op->variable_fn_params.rbuf_offset = 0;
-
-    /* Fill in the function arguments */
-    coll_op->variable_fn_params.sequence_num =
-        OPAL_THREAD_ADD32(&(ml_module->collective_sequence_num), 1);
-    coll_op->sequential_routine.current_active_bcol_fn = 0;
-    coll_op->variable_fn_params.dtype = dtype;
-    coll_op->variable_fn_params.op = op;
-    coll_op->variable_fn_params.root = 0;
-    coll_op->sequential_routine.seq_task_setup = mca_coll_ml_allreduce_task_setup; /* invoked after each level in sequential
-                                                                                    * progress call
-                                                                                    */
-    MCA_COLL_ML_SET_ORDER_INFO(coll_op, n_fragments);
-
-    ret = mca_coll_ml_launch_sequential_collective (coll_op);
-    if (ret != OMPI_SUCCESS) {
-        ML_VERBOSE(10, ("Failed to launch"));
-        return ret;
-    }
-
-    *req = &coll_op->full_message.super;
-
-    return OMPI_SUCCESS;
-}
-
-int mca_coll_ml_allreduce(const void *sbuf, void *rbuf, int count,
-                           struct ompi_datatype_t *dtype, struct ompi_op_t *op,
-                           struct ompi_communicator_t *comm,
-                           mca_coll_base_module_t *module)
-{
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t*)module;
-    ompi_request_t *req;
-    int ret;
-
-    if (OPAL_UNLIKELY(!ompi_op_is_commute(op))) {
-        /* coll/ml does not handle non-communative operations at this time. fallback
-         * on another collective module */
-        return ml_module->fallback.coll_allreduce (sbuf, rbuf, count, dtype, op, comm,
-                                                   ml_module->fallback.coll_allreduce_module);
-    }
-
-    ret = parallel_allreduce_start(sbuf, rbuf, count, dtype, op, comm,
-                                   (mca_coll_ml_module_t *) module, &req,
-                                    ML_SMALL_DATA_ALLREDUCE,
-                                    ML_LARGE_DATA_ALLREDUCE);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        ML_ERROR(("Failed to launch"));
-        return ret;
-    }
-
-    ompi_request_wait_completion(req);
-    ompi_request_free(&req);
-
-    ML_VERBOSE(10, ("Blocking NB allreduce is done"));
-
-    return OMPI_SUCCESS;
-}
-
-int mca_coll_ml_allreduce_nb(const void *sbuf, void *rbuf, int count,
-                           struct ompi_datatype_t *dtype, struct ompi_op_t *op,
-                           struct ompi_communicator_t *comm,
-                           ompi_request_t **req,
-                           mca_coll_base_module_t *module)
-{
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t*)module;
-    int ret;
-
-    if (OPAL_UNLIKELY(!ompi_op_is_commute(op))) {
-        /* coll/ml does not handle non-communative operations at this time. fallback
-         * on another collective module */
-        return ml_module->fallback.coll_iallreduce (sbuf, rbuf, count, dtype, op, comm, req,
-                                                    ml_module->fallback.coll_iallreduce_module);
-    }
-
-    ret = parallel_allreduce_start(sbuf, rbuf, count, dtype, op, comm,
-                                   (mca_coll_ml_module_t *) module, req,
-                                    ML_SMALL_DATA_ALLREDUCE,
-                                    ML_LARGE_DATA_ALLREDUCE);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        ML_ERROR(("Failed to launch"));
-        return ret;
-    }
-
-    ML_VERBOSE(10, ("Blocking NB allreduce is done"));
-
-    return OMPI_SUCCESS;
-}
-
-int mca_coll_ml_allreduce_dispatch(const void *sbuf, void *rbuf, int count,
-                                   struct ompi_datatype_t *dtype, struct ompi_op_t *op,
-                                   struct ompi_communicator_t *comm, mca_coll_base_module_t *module)
-{
-    int rc;
-    bool use_extra_topo;
-    ompi_request_t *req;
-
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module;
-
-    use_extra_topo = (count > 1) ?
-            !ml_module->allreduce_matrix[op->op_type][dtype->id][BCOL_MULTI_ELEM_TYPE] :
-            !ml_module->allreduce_matrix[op->op_type][dtype->id][BCOL_SINGLE_ELEM_TYPE];
-
-    if (use_extra_topo) {
-        rc = parallel_allreduce_start(sbuf, rbuf, count, dtype,
-                                         op, comm, ml_module, &req,
-                                         ML_SMALL_DATA_EXTRA_TOPO_ALLREDUCE,
-                                         ML_LARGE_DATA_EXTRA_TOPO_ALLREDUCE);
-    } else {
-        rc = parallel_allreduce_start(sbuf, rbuf, count, dtype,
-                                         op, comm, ml_module, &req,
-                                         ML_SMALL_DATA_ALLREDUCE,
-                                         ML_LARGE_DATA_ALLREDUCE);
-    }
-
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        ML_ERROR(("Failed to launch"));
-        return rc;
-    }
-
-    ompi_request_wait_completion(req);
-    ompi_request_free(&req);
-
-    return OMPI_SUCCESS;
-}
-
-int mca_coll_ml_allreduce_dispatch_nb(const void *sbuf, void *rbuf, int count,
-                                   ompi_datatype_t *dtype, ompi_op_t *op,
-                                   ompi_communicator_t *comm,
-                                   ompi_request_t **req,
-                                   mca_coll_base_module_t *module)
-{
-    int rc;
-    bool use_extra_topo;
-
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module;
-
-    use_extra_topo = (count > 1) ?
-            !ml_module->allreduce_matrix[op->op_type][dtype->id][BCOL_MULTI_ELEM_TYPE] :
-            !ml_module->allreduce_matrix[op->op_type][dtype->id][BCOL_SINGLE_ELEM_TYPE];
-
-    if (use_extra_topo) {
-        rc = parallel_allreduce_start(sbuf, rbuf, count, dtype,
-                                         op, comm, ml_module, req,
-                                         ML_SMALL_DATA_EXTRA_TOPO_ALLREDUCE,
-                                         ML_LARGE_DATA_EXTRA_TOPO_ALLREDUCE);
-    } else {
-        rc = parallel_allreduce_start(sbuf, rbuf, count, dtype,
-                                         op, comm, ml_module, req,
-                                         ML_SMALL_DATA_ALLREDUCE,
-                                         ML_LARGE_DATA_ALLREDUCE);
-    }
-
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        ML_ERROR(("Failed to launch"));
-        return rc;
-    }
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/coll/ml/coll_ml_barrier.c b/ompi/mca/coll/ml/coll_ml_barrier.c
deleted file mode 100644
index 6748d30054..0000000000
--- a/ompi/mca/coll/ml/coll_ml_barrier.c
+++ /dev/null
@@ -1,146 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2015      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/** @file */
-
-#include "ompi_config.h"
-
-#include "ompi/constants.h"
-#include "opal/threads/mutex.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/coll/coll.h"
-#include "opal/sys/atomic.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
-#include "ompi/mca/coll/ml/coll_ml_inlines.h"
-
-static void mca_coll_ml_barrier_task_setup(
-                mca_coll_ml_task_status_t *task_status,
-                int index, mca_coll_ml_compound_functions_t *func)
-{
-    task_status->rt_num_dependencies = func->num_dependencies;
-    task_status->rt_num_dependent_tasks = func->num_dependent_tasks;
-    task_status->rt_dependent_task_indices = func->dependent_task_indices;
-}
-
-static int mca_coll_ml_barrier_launch(mca_coll_ml_module_t *ml_module,
-                                     ompi_request_t **req)
-{
-    opal_free_list_item_t *item;
-    mca_coll_ml_collective_operation_progress_t *coll_op;
-    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL;
-
-    /* allocate an ml buffer for signaling purposes */
-    src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-
-    while (NULL == src_buffer_desc) {
-        opal_progress();
-        src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-    }
-
-
-    /* Blocking call on fragment allocation (Maybe we want to make it non blocking ?) */
-    item = opal_free_list_wait (&(ml_module->coll_ml_collective_descriptors));
-
-    coll_op = (mca_coll_ml_collective_operation_progress_t *) item;
-    assert(NULL != coll_op);
-
-    ML_VERBOSE(10, ("Get coll request %p", coll_op));
-
-    MCA_COLL_ML_OP_BASIC_SETUP(coll_op, 0, 0, NULL, NULL, ml_module->coll_ml_barrier_function);
-
-    coll_op->fragment_data.buffer_desc = src_buffer_desc;
-    coll_op->dag_description.num_tasks_completed = 0;
-
-    coll_op->variable_fn_params.buffer_index = src_buffer_desc->buffer_index;
-
-    coll_op->variable_fn_params.sequence_num =
-        OPAL_THREAD_ADD32(&(ml_module->collective_sequence_num), 1);
-
-    /* Pointer to a coll finalize function */
-    coll_op->process_fn = NULL;
-
-    (*req) = &coll_op->full_message.super;
-
-    OMPI_REQUEST_INIT((*req), false);
-
-    (*req)->req_status._cancelled = 0;
-    (*req)->req_state = OMPI_REQUEST_ACTIVE;
-    (*req)->req_status.MPI_ERROR = OMPI_SUCCESS;
-
-    /* Set order info if there is a bcol needs ordering */
-    MCA_COLL_ML_SET_ORDER_INFO(coll_op, 1);
-
-    return mca_coll_ml_generic_collectives_launcher(coll_op, mca_coll_ml_barrier_task_setup);
-}
-
-/**
- * Hierarchical blocking barrier
- */
-int mca_coll_ml_barrier_intra(struct ompi_communicator_t *comm,
-                              mca_coll_base_module_t *module)
-{
-    int rc;
-    ompi_request_t *req;
-
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module;
-
-#if OPAL_ENABLE_DEBUG
-    static int barriers_count = 0;
-#endif
-
-    ML_VERBOSE(10, ("Barrier num %d start.", ++barriers_count));
-
-    rc = mca_coll_ml_barrier_launch(ml_module, &req);
-    if (OPAL_UNLIKELY(rc != OMPI_SUCCESS)) {
-        ML_ERROR(("Failed to launch a barrier."));
-        return rc;
-    }
-
-    /* Blocking barrier */
-    ompi_request_wait_completion(req);
-    ompi_request_free(&req);
-
-    ML_VERBOSE(10, ("Barrier num %d was done.", barriers_count));
-
-    return OMPI_SUCCESS;
-}
-
-/**
- * Hierarchical non-blocking barrier
- */
-int mca_coll_ml_ibarrier_intra(struct ompi_communicator_t *comm,
-                               ompi_request_t **req,
-                               mca_coll_base_module_t *module)
-{
-    int rc;
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module;
-
-#if OPAL_ENABLE_DEBUG
-    static int barriers_count = 0;
-#endif
-
-    ML_VERBOSE(10, ("IBarrier num %d start.", ++barriers_count));
-
-    rc = mca_coll_ml_barrier_launch(ml_module, req);
-    if (OPAL_UNLIKELY(rc != OMPI_SUCCESS)) {
-        ML_ERROR(("Failed to launch a barrier."));
-        return rc;
-    }
-
-    ML_VERBOSE(10, ("IBarrier num %d was done.", barriers_count));
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/coll/ml/coll_ml_bcast.c b/ompi/mca/coll/ml/coll_ml_bcast.c
deleted file mode 100644
index 891838f944..0000000000
--- a/ompi/mca/coll/ml/coll_ml_bcast.c
+++ /dev/null
@@ -1,849 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/** @file */
-
-#include "ompi_config.h"
-
-#include <unistd.h>
-#include <sys/uio.h>
-
-#include "opal/threads/mutex.h"
-#include "opal/sys/atomic.h"
-
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/coll/coll.h"
-#include "ompi/mca/bcol/bcol.h"
-
-#include "coll_ml.h"
-#include "coll_ml_inlines.h"
-#include "coll_ml_colls.h"
-#include "coll_ml_allocation.h"
-
-#define ML_BUFFER_ALLOC_WAIT(ml, buffer)        \
-do {                                            \
-    buffer = mca_coll_ml_alloc_buffer(ml);      \
-    while (NULL == buffer) {                    \
-        opal_progress();                        \
-        buffer = mca_coll_ml_alloc_buffer(ml);  \
-    }                                           \
-} while (0)
-
-#define COLL_ML_SETUP_ORDERING_INFO(op, last, prev)                   \
-do {                                                                  \
-    /* Don't change order of commands !!!! */                         \
-    (op)->prev_frag = prev;                                           \
-    (op)->fragment_data.message_descriptor->last_started_frag = last; \
-    /* op->next_to_process_frag = NULL;   */                          \
-} while (0)
-
-#define ALLOCATE_AND_PACK_CONTIG_BCAST_FRAG(ml_module, op, coll_index, root,    \
-        total_len, frag_len, buf, ml_buff_desc)                                 \
-do {                                                                            \
-    op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,                   \
-            ml_module->coll_ml_bcast_functions[coll_index],                     \
-            buf, buf,                                                           \
-            total_len,                                                          \
-            0 /* offset for first pack */);                                     \
-    if (OPAL_LIKELY(frag_len > 0)) {                                            \
-        if (ompi_comm_rank(ml_module->comm) == root) {                          \
-            /* single frag, pack the data */                                    \
-            memcpy((void *)(uintptr_t)(ml_buff_desc)->data_addr,                \
-                    buf, frag_len);                                             \
-            /* No unpack for root */                                            \
-            op->process_fn = NULL;                                              \
-        } else {                                                                \
-            op->process_fn = mca_coll_ml_bcast_small_unpack_data;               \
-        }                                                                       \
-    }                                                                           \
-    op->full_message.n_bytes_scheduled = frag_len;                              \
-} while (0)
-
-#define SMALL_BCAST 0
-#define LARGE_BCAST (SMALL_BCAST + 1)
-
-/* bcast data unpack */
-static int mca_coll_ml_bcast_converter_unpack_data(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    struct iovec iov;
-    uint32_t iov_count = 1;
-    size_t max_data = 0;
-
-    mca_coll_ml_collective_operation_progress_t *next_op;
-    mca_coll_ml_module_t *ml_module =
-                 (mca_coll_ml_module_t *) coll_op->coll_module;
-
-    size_t max_index =
-        ml_module->payload_block->num_banks * ml_module->payload_block->num_buffers_per_bank;
-
-    bool is_first = true;
-    int ret;
-
-    /* Check if the fragment delivered in order */
-    if (coll_op->fragment_data.buffer_desc->buffer_index !=
-            coll_op->fragment_data.message_descriptor->next_expected_index) {
-        mca_coll_ml_collective_operation_progress_t *prev_coll_op = coll_op->prev_frag;
-        assert(NULL == prev_coll_op->next_to_process_frag);
-        /* make sure that previous process will have pointer to the out
-         of order process */
-        prev_coll_op->next_to_process_frag = coll_op;
-        assert(!(coll_op->pending & REQ_OUT_OF_ORDER));
-        coll_op->pending |= REQ_OUT_OF_ORDER;
-        /* we will unpack it later */
-        ML_VERBOSE(10, ("Get %d expecting %d previous %d",
-                    coll_op->fragment_data.buffer_desc->buffer_index,
-                    coll_op->fragment_data.message_descriptor->next_expected_index,
-                    prev_coll_op->fragment_data.buffer_desc->buffer_index));
-        return ORTE_ERR_NO_MATCH_YET;
-    }
-
-    do {
-        iov.iov_len = coll_op->fragment_data.fragment_size;
-        iov.iov_base = (void *)((uintptr_t) coll_op->fragment_data.buffer_desc->data_addr);
-
-        ML_VERBOSE(10, ("Data unpack with convertern index %d",
-                         coll_op->fragment_data.buffer_desc->buffer_index));
-
-        opal_convertor_unpack(&coll_op->fragment_data.message_descriptor->recv_convertor,
-                &iov, &iov_count, &max_data);
-
-        /* update next index */
-        ++coll_op->fragment_data.message_descriptor->next_expected_index;
-        if (coll_op->fragment_data.message_descriptor->next_expected_index >= max_index) {
-            coll_op->fragment_data.message_descriptor->next_expected_index = 0;
-        }
-
-        /* Return to queue if the packet is done,
-           the exeption is first packet, we release it later.
-         */
-        next_op = coll_op->next_to_process_frag;
-        coll_op->next_to_process_frag = NULL;
-        if ((!is_first) &&
-                (0 != coll_op->fragment_data.offset_into_user_buffer)) {
-            assert(coll_op->pending & REQ_OUT_OF_ORDER);
-            coll_op->pending ^= REQ_OUT_OF_ORDER;
-            /* Pasha: On one hand - I'm not sure that conceptually it is right place to call buffer recycling. Potentially,
-               coll_ml_fragment_completion_processing() sounds like right place for out of order unpack/sync handling.
-             * On the other hand - non contiguous data is not supper common and we would like to minimize effect on critical pass
-             * for non contiguous data types. */
-            ret = mca_coll_ml_buffer_recycling(coll_op);
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-                return OMPI_ERROR;
-            }
-
-            CHECK_AND_RECYCLE(coll_op);
-        }
-
-        coll_op = next_op;
-        is_first = false;
-    } while (NULL != coll_op);
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_coll_ml_bcast_small_unpack_data(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    void * dest = (void *)((uintptr_t) coll_op->full_message.dest_user_addr +
-                           (uintptr_t) coll_op->full_message.n_bytes_delivered);
-    void * src = (void *)((uintptr_t) coll_op->fragment_data.buffer_desc->data_addr);
-
-    memcpy(dest, src, coll_op->fragment_data.fragment_size);
-    return OMPI_SUCCESS;
-}
-
-static int mca_coll_ml_bcast_large_unpack_data(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    void * dest = (void *)((uintptr_t) coll_op->fragment_data.message_descriptor->dest_user_addr +
-                           (uintptr_t) coll_op->fragment_data.offset_into_user_buffer);
-    void * src = (void *)((uintptr_t) coll_op->fragment_data.buffer_desc->data_addr);
-
-    memcpy(dest, src, coll_op->fragment_data.fragment_size);
-    return OMPI_SUCCESS;
-}
-
-static int mca_coll_ml_bcast_frag_converter_progress(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    /* local variables */
-    int ret, frag_len;
-    size_t max_data = 0;
-
-    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL;
-    mca_coll_ml_collective_operation_progress_t *new_op = NULL;
-    mca_coll_ml_task_setup_fn_t task_setup = NULL;
-    mca_coll_ml_module_t *ml_module = OP_ML_MODULE(coll_op);
-
-    /* Keep the pipeline filled with fragments */
-    while (coll_op->fragment_data.message_descriptor->n_active <
-                 mca_coll_ml_component.pipeline_depth) {
-        /* If an active fragment happens to have completed the collective during
-         * a hop into the progress engine, then don't launch a new fragment,
-         * instead break and return.
-         */
-        if (coll_op->fragment_data.message_descriptor->n_bytes_scheduled
-            == coll_op->fragment_data.message_descriptor->n_bytes_total) {
-            break;
-        }
-
-        /* Get an ml buffer */
-        src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        if (OPAL_UNLIKELY(NULL == src_buffer_desc)) {
-            /* If there exist outstanding fragments, then break out
-             * and let an active fragment deal with this later,
-             * there are no buffers available.
-             */
-            if (0 < coll_op->fragment_data.message_descriptor->n_active) {
-                return OMPI_SUCCESS;
-            }
-
-            /* It is useless to call progress from here, since
-             * ml progress can't be executed as result ml memsync
-             * call will not be completed and no memory will be
-             * recycled. So we put the element on the list, and we will
-             * progress it later when memsync will recycle some memory*/
-
-            /* The fragment is already on list and
-             * the we still have no ml resources
-             * Return busy */
-            if (!(coll_op->pending & REQ_OUT_OF_MEMORY)) {
-              coll_op->pending |= REQ_OUT_OF_MEMORY;
-              opal_list_append(&ml_module->waiting_for_memory_list,
-                               (opal_list_item_t *)coll_op);
-            }
-
-            return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-        }
-
-        /* Get a new collective descriptor and initialize it */
-        new_op = mca_coll_ml_duplicate_op_prog_single_frag_dag
-            (ml_module, coll_op);
-        /* We need this address for pointer arithmetic in memcpy */
-        frag_len = ML_GET_FRAG_SIZE(coll_op, BCOL_BCAST);
-        /* Decide based on global flag, not variable one */
-        if (coll_op->fragment_data.message_descriptor->root) {
-            struct iovec iov;
-            uint32_t iov_count = 1;
-
-            /* OBJ_RETAIN(new_op->variable_fn_params.dtype); */
-            iov.iov_base = (IOVBASE_TYPE*) src_buffer_desc->data_addr;
-            iov.iov_len  = ml_module->small_message_thresholds[BCOL_BCAST];
-            assert(0 != iov.iov_len);
-
-            max_data = ml_module->small_message_thresholds[BCOL_BCAST];
-            opal_convertor_pack(&new_op->fragment_data.message_descriptor->send_convertor,
-                                &iov, &iov_count, &max_data);
-
-            new_op->process_fn = NULL;
-            new_op->variable_fn_params.root_flag = true;
-            new_op->variable_fn_params.root_route = NULL;
-
-            task_setup = OP_ML_MODULE(new_op)->
-                coll_ml_bcast_functions[new_op->fragment_data.current_coll_op]->
-                task_setup_fn[COLL_ML_ROOT_TASK_FN];
-        } else {
-            new_op->process_fn = mca_coll_ml_bcast_converter_unpack_data;
-            new_op->variable_fn_params.root_flag = false;
-            new_op->variable_fn_params.root_route = coll_op->variable_fn_params.root_route;
-
-            task_setup = OP_ML_MODULE(new_op)->
-                coll_ml_bcast_functions[new_op->fragment_data.current_coll_op]->
-                task_setup_fn[COLL_ML_GENERAL_TASK_FN];
-
-            max_data = ml_module->small_message_thresholds[BCOL_BCAST];
-            mca_coll_ml_convertor_get_send_frag_size(
-                                    ml_module, &max_data,
-                                    new_op->fragment_data.message_descriptor);
-        }
-
-        new_op->fragment_data.message_descriptor->n_bytes_scheduled += max_data;
-        new_op->fragment_data.fragment_size = max_data;
-        new_op->fragment_data.buffer_desc = src_buffer_desc;
-
-        /* Setup fragment specific data */
-        ++(new_op->fragment_data.message_descriptor->n_active);
-
-        COLL_ML_SETUP_ORDERING_INFO(new_op, new_op,
-                new_op->fragment_data.message_descriptor->last_started_frag);
-        ML_VERBOSE(10, ("Start more, My index %d my prev %d",
-                    new_op->fragment_data.buffer_desc->buffer_index,
-                    new_op->prev_frag->fragment_data.buffer_desc->buffer_index));
-
-        ML_SET_VARIABLE_PARAMS_BCAST(
-                new_op,
-                OP_ML_MODULE(new_op),
-                frag_len,
-                MPI_BYTE,
-                src_buffer_desc,
-                0,
-                0,
-                frag_len,
-                src_buffer_desc->data_addr);
-
-        /* TBD: remove buffer_size */
-        new_op->variable_fn_params.buffer_size = coll_op->variable_fn_params.buffer_size;
-        new_op->variable_fn_params.hier_factor = coll_op->variable_fn_params.hier_factor;
-
-        /* Set order info for new frag if there is a bcol needs ordering */
-        MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(new_op);
-
-        /* Launch this collective !! */
-        ret = mca_coll_ml_generic_collectives_append_to_queue(new_op, task_setup);
-
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-            ML_ERROR(("Failed to launch"));
-            return ret;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_coll_ml_bcast_frag_progress(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    /* local variables */
-    int ret;
-    int frag_len, current_coll_op = coll_op->fragment_data.current_coll_op;
-    size_t dt_size;
-    void *buf;
-
-    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL;
-    mca_coll_ml_collective_operation_progress_t *new_op = NULL;
-    mca_coll_ml_task_setup_fn_t task_setup = NULL;
-
-    ompi_datatype_type_size(coll_op->variable_fn_params.dtype, &dt_size);
-
-    /* Keep the pipeline filled with fragments */
-    while (coll_op->fragment_data.message_descriptor->n_active <
-                  coll_op->fragment_data.message_descriptor->pipeline_depth) {
-        /* If an active fragment happens to have completed the collective during
-         * a hop into the progress engine, then don't launch a new fragment,
-         * instead break and return.
-         */
-        if (coll_op->fragment_data.message_descriptor->n_bytes_scheduled
-            == coll_op->fragment_data.message_descriptor->n_bytes_total) {
-            break;
-        }
-
-        /* Get an ml buffer */
-        src_buffer_desc = mca_coll_ml_alloc_buffer(OP_ML_MODULE(coll_op));
-        if (NULL == src_buffer_desc) {
-            /* If there exist outstanding fragments, then break out
-             * and let an active fragment deal with this later,
-             * there are no buffers available.
-             */
-            if (0 < coll_op->fragment_data.message_descriptor->n_active) {
-                return OMPI_SUCCESS;
-            }
-
-            /* It is useless to call progress from here, since
-             * ml progress can't be executed as result ml memsync
-             * call will not be completed and no memory will be
-             * recycled. So we put the element on the list, and we will
-             * progress it later when memsync will recycle some memory*/
-
-            /* The fragment is already on list and
-             * the we still have no ml resources
-             * Return busy */
-            if (!(coll_op->pending & REQ_OUT_OF_MEMORY)) {
-                ML_VERBOSE(10,("Out of resources %p adding to pending queue", coll_op));
-                coll_op->pending |= REQ_OUT_OF_MEMORY;
-                opal_list_append(&((OP_ML_MODULE(coll_op))->waiting_for_memory_list),
-                                (opal_list_item_t *) coll_op);
-            } else {
-                ML_VERBOSE(10,("Out of resources %p", coll_op));
-            }
-
-            return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-        }
-
-        /* Get a new collective descriptor and initialize it */
-        new_op = mca_coll_ml_duplicate_op_prog_single_frag_dag
-            (OP_ML_MODULE(coll_op), coll_op);
-        /* We need this address for pointer arithmetic in memcpy */
-        buf = coll_op->fragment_data.message_descriptor->dest_user_addr;
-        frag_len = ML_GET_FRAG_SIZE(coll_op, BCOL_BCAST);
-
-        /* Decide based on global flag, not variable one */
-        if (coll_op->fragment_data.message_descriptor->root) {
-            memcpy((void *)(uintptr_t)src_buffer_desc->data_addr,
-                    (void *) ((uintptr_t) buf + (uintptr_t) coll_op->
-                    fragment_data.message_descriptor->n_bytes_scheduled) , frag_len);
-
-            /* No unpack for root */
-            new_op->process_fn = NULL;
-            new_op->variable_fn_params.root_flag = true;
-            new_op->variable_fn_params.root_route = NULL;
-            task_setup = OP_ML_MODULE(new_op)->coll_ml_bcast_functions[current_coll_op]->
-                task_setup_fn[COLL_ML_ROOT_TASK_FN];
-
-        } else {
-            new_op->process_fn = mca_coll_ml_bcast_large_unpack_data;
-            new_op->variable_fn_params.root_flag = false;
-            new_op->variable_fn_params.root_route = coll_op->variable_fn_params.root_route;
-            task_setup = OP_ML_MODULE(new_op)->coll_ml_bcast_functions[current_coll_op]->
-                task_setup_fn[COLL_ML_GENERAL_TASK_FN];
-        }
-
-        /* Setup fragment specific data */
-        new_op->fragment_data.message_descriptor->n_bytes_scheduled += frag_len;
-        new_op->fragment_data.buffer_desc = src_buffer_desc;
-        new_op->fragment_data.fragment_size = frag_len;
-        new_op->fragment_data.message_descriptor->n_active++;
-
-        ML_SET_VARIABLE_PARAMS_BCAST(
-                new_op,
-                OP_ML_MODULE(new_op),
-                frag_len,
-                MPI_BYTE,
-                src_buffer_desc,
-                0,
-                0,
-                frag_len,
-                src_buffer_desc->data_addr);
-
-        /* Fill in bcast specific arguments */
-        /* TBD: remove buffer_size */
-        new_op->variable_fn_params.buffer_size = coll_op->variable_fn_params.buffer_size;
-        new_op->variable_fn_params.hier_factor = coll_op->variable_fn_params.hier_factor;
-
-        /* Set order info for new frag if there is a bcol needs ordering */
-        MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(new_op);
-
-        ML_VERBOSE(10, ("FFFF Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d %d",
-                         new_op->variable_fn_params.buffer_size ,
-                         new_op->fragment_data.fragment_size,
-                         new_op->fragment_data.message_descriptor->n_bytes_scheduled));
-
-        /* Launch this collective !! */
-        ret = mca_coll_ml_generic_collectives_append_to_queue(new_op, task_setup);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-            ML_VERBOSE(10, ("Failed to launch"));
-            return ret;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-#define BCAST_FRAGMENTATION_IS_ENABLED(module)  \
-    (module->bcast_fn_index_table[LARGE_BCAST] < ML_BCAST_LARGE_DATA_KNOWN)
-
-static inline __opal_attribute_always_inline__
-   int parallel_bcast_start(void *buf, int count, struct ompi_datatype_t *dtype,
-                            int root, mca_coll_base_module_t *module, ompi_request_t **req)
-{
-    size_t pack_len = 0;
-    size_t dt_size = 0;
-    bool contig = false;
-    int bcast_index, n_fragments = 1;
-
-    mca_coll_ml_collective_operation_progress_t * coll_op = NULL;
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module;
-    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL;
-    mca_coll_ml_task_setup_fn_t task_setup;
-    OPAL_PTRDIFF_TYPE lb, extent;
-
-    /* actual starting place of the user buffer (lb added) */
-    void *actual_buf;
-
-    ML_VERBOSE(10, ("Starting bcast, mca_coll_ml_bcast_uknown_root buf: %p", buf));
-
-    ompi_datatype_type_size(dtype, &dt_size);
-    pack_len = count * dt_size;
-
-    /* Setup data buffer */
-    ML_BUFFER_ALLOC_WAIT(ml_module, src_buffer_desc);
-    /* Get information about memory layout */
-    contig = opal_datatype_is_contiguous_memory_layout((opal_datatype_t *)dtype, count);
-
-    ompi_datatype_get_extent (dtype, &lb, &extent);
-
-    actual_buf = (void *) ((uintptr_t) buf + lb);
-
-    /* Allocate collective schedule and pack message */
-    if (contig) {
-        if (pack_len <= (size_t) ml_module->small_message_thresholds[BCOL_BCAST]) {
-            assert(pack_len <=  ml_module->payload_block->size_buffer);
-            bcast_index = ml_module->bcast_fn_index_table[SMALL_BCAST];
-
-            ML_VERBOSE(10, ("Contig + small message %d [0-sk, 1-lk, 3-su, 4-lu]", bcast_index));
-            ALLOCATE_AND_PACK_CONTIG_BCAST_FRAG(ml_module, coll_op, bcast_index, root, pack_len,
-                                                pack_len, actual_buf, src_buffer_desc);
-
-            ML_SET_VARIABLE_PARAMS_BCAST(coll_op, ml_module, count, dtype,
-                    src_buffer_desc, 0, 0, ml_module->payload_block->size_buffer,
-                    (src_buffer_desc->data_addr));
-        } else if (BCAST_FRAGMENTATION_IS_ENABLED(ml_module)) {
-            /* We moved the fragmentation decision from communication creation time to
-               runtime, since for large messages the if latency is not so critical */
-            size_t n_dts_per_frag;
-            int frag_len, pipeline_depth = mca_coll_ml_component.pipeline_depth;
-            bcast_index = ml_module->bcast_fn_index_table[LARGE_BCAST];
-
-            ML_VERBOSE(10, ("Contig + fragmentation %d [0-sk, 1-lk, 3-su, 4-lu]", bcast_index));
-
-            /* Calculate the number of fragments required for this message */
-            frag_len = (pack_len < (size_t) ml_module->small_message_thresholds[BCOL_BCAST] ?
-                        pack_len : (size_t) ml_module->small_message_thresholds[BCOL_BCAST]);
-
-            n_dts_per_frag = frag_len/dt_size;
-            n_fragments = (pack_len + dt_size*n_dts_per_frag - 1)/(dt_size*n_dts_per_frag);
-            pipeline_depth = (n_fragments < pipeline_depth ? n_fragments : pipeline_depth);
-
-            ALLOCATE_AND_PACK_CONTIG_BCAST_FRAG(ml_module, coll_op, bcast_index, root, pack_len,
-                                                frag_len, actual_buf, src_buffer_desc);
-            ML_SET_VARIABLE_PARAMS_BCAST(coll_op, ml_module, (frag_len/dt_size), dtype,
-                    src_buffer_desc, 0, 0, frag_len, (src_buffer_desc->data_addr));
-
-            coll_op->full_message.fragment_launcher = mca_coll_ml_bcast_frag_progress;
-            coll_op->full_message.pipeline_depth = pipeline_depth;
-            /* Initialize fragment specific information */
-            coll_op->fragment_data.current_coll_op = bcast_index;
-            /* coll_op->fragment_data.message_descriptor->n_bytes_scheduled += frag_len; */
-            coll_op->fragment_data.fragment_size = frag_len;
-            coll_op->fragment_data.message_descriptor->n_active++;
-            /* should be removed */
-            coll_op->variable_fn_params.buffer_size = frag_len;
-
-            ML_VERBOSE(10, ("Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d",
-                             coll_op->variable_fn_params.buffer_size,
-                             coll_op->fragment_data.fragment_size));
-        } else {
-            bcast_index = ml_module->bcast_fn_index_table[LARGE_BCAST];
-            ML_VERBOSE(10, ("Contig + zero copy %d [0-sk, 1-lk, 3-su, 4-lu]", bcast_index));
-
-            coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                                                                ml_module->coll_ml_bcast_functions[bcast_index],
-                                                                actual_buf, actual_buf, pack_len,
-                                                                0 /* offset for first pack */);
-            /* For large messages (bcast) this points to userbuf */
-            /* Pasha: temporary work around for basesmuma, userbuf should
-               be removed  */
-            coll_op->variable_fn_params.userbuf = buf;
-            coll_op->process_fn = NULL;
-            coll_op->full_message.n_bytes_scheduled = pack_len;
-
-            ML_SET_VARIABLE_PARAMS_BCAST(coll_op, ml_module, count, dtype,
-                    src_buffer_desc, 0, 0,
-                    ml_module->payload_block->size_buffer, buf);
-        }
-    } else {
-        /* Non contiguous data type */
-        bcast_index = ml_module->bcast_fn_index_table[SMALL_BCAST];
-        ML_VERBOSE(10, ("NON Contig + fragmentation %d [0-sk, 1-lk, 3-su, 4-lu]", bcast_index));
-
-        coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                                                            ml_module->coll_ml_bcast_functions[bcast_index],
-                                                            actual_buf, actual_buf, pack_len,
-                                                            0 /* offset for first pack */);
-        if (OPAL_LIKELY(pack_len > 0)) {
-            size_t max_data = 0;
-
-            if (ompi_comm_rank(ml_module->comm) == root) {
-                struct iovec iov;
-                uint32_t iov_count = 1;
-
-                opal_convertor_copy_and_prepare_for_send(
-                        ompi_mpi_local_convertor,
-                        &dtype->super, count, buf, 0,
-                        &coll_op->full_message.send_convertor);
-
-                opal_convertor_get_packed_size(&coll_op->full_message.send_convertor,
-                                    &coll_op->full_message.send_converter_bytes_packed);
-
-                coll_op->full_message.n_bytes_total =
-                    coll_op->full_message.send_converter_bytes_packed;
-
-                iov.iov_base = (IOVBASE_TYPE*) src_buffer_desc->data_addr;
-                iov.iov_len  =  ml_module->small_message_thresholds[BCOL_BCAST];
-                max_data = ml_module->small_message_thresholds[BCOL_BCAST];
-                opal_convertor_pack(&coll_op->full_message.send_convertor,
-                                    &iov, &iov_count, &max_data);
-                coll_op->process_fn = NULL;
-                coll_op->full_message.n_bytes_scheduled = max_data;
-
-                /* We need prepare the data for future pipe line comunication */
-                coll_op->full_message.fragment_launcher = mca_coll_ml_bcast_frag_converter_progress;
-                coll_op->full_message.pipeline_depth = mca_coll_ml_component.pipeline_depth;
-                coll_op->full_message.root = true;
-
-            } else {
-                opal_convertor_copy_and_prepare_for_send(
-                        ompi_mpi_local_convertor,
-                        &dtype->super, count, NULL, 0,
-                        &coll_op->full_message.dummy_convertor);
-
-                /* In non-root case we use it for #bytes remaining to receive */
-                opal_convertor_get_packed_size(&coll_op->full_message.dummy_convertor,
-                                    &coll_op->full_message.send_converter_bytes_packed);
-
-                opal_convertor_copy_and_prepare_for_recv(
-                        ompi_mpi_local_convertor,
-                        &dtype->super, count, buf, 0,
-                        &coll_op->full_message.recv_convertor);
-
-                opal_convertor_get_unpacked_size(&coll_op->full_message.recv_convertor,
-                        &coll_op->full_message.recv_converter_bytes_packed);
-
-                coll_op->full_message.root = false;
-                coll_op->full_message.n_bytes_total =
-                    coll_op->full_message.recv_converter_bytes_packed;
-                coll_op->process_fn = mca_coll_ml_bcast_converter_unpack_data;
-
-                coll_op->full_message.fragment_launcher = mca_coll_ml_bcast_frag_converter_progress;
-                coll_op->full_message.pipeline_depth = mca_coll_ml_component.pipeline_depth;
-
-                max_data = ml_module->small_message_thresholds[BCOL_BCAST];
-                coll_op->full_message.dummy_conv_position = 0;
-                mca_coll_ml_convertor_get_send_frag_size(
-                                             ml_module, &max_data,
-                                             &coll_op->full_message);
-
-                coll_op->full_message.n_bytes_scheduled = max_data;
-            }
-        }
-        coll_op->fragment_data.current_coll_op = bcast_index;
-        coll_op->fragment_data.message_descriptor->n_active++;
-        coll_op->fragment_data.fragment_size = coll_op->full_message.n_bytes_scheduled;
-
-        /* Set initial index */
-        coll_op->full_message.next_expected_index = src_buffer_desc->buffer_index;
-
-        /* Prepare linking information for future frags */
-        COLL_ML_SETUP_ORDERING_INFO(coll_op, coll_op, NULL);
-
-        /* Since the data is already packed we will use MPI_BYTE and byte count as datatype */
-        ML_SET_VARIABLE_PARAMS_BCAST(coll_op, ml_module, coll_op->full_message.n_bytes_scheduled, MPI_BYTE,
-                src_buffer_desc, 0, 0, ml_module->payload_block->size_buffer,(src_buffer_desc->data_addr));
-
-        n_fragments = (coll_op->full_message.n_bytes_total +
-                       ml_module->small_message_thresholds[BCOL_BCAST] - 1) / ml_module->small_message_thresholds[BCOL_BCAST];
-    }
-
-    coll_op->variable_fn_params.hier_factor = 1;
-    coll_op->fragment_data.buffer_desc = src_buffer_desc;
-
-    /* Set order info if there is a bcol needs ordering */
-    MCA_COLL_ML_SET_ORDER_INFO(coll_op, n_fragments);
-
-    if (ompi_comm_rank(ml_module->comm) == root) {
-        coll_op->full_message.root =
-            coll_op->variable_fn_params.root_flag = true;
-        coll_op->variable_fn_params.root_route = NULL;
-        task_setup = ml_module->coll_ml_bcast_functions[bcast_index]->
-            task_setup_fn[COLL_ML_ROOT_TASK_FN];
-    } else {
-        coll_op->full_message.root =
-            coll_op->variable_fn_params.root_flag = false;
-
-        coll_op->variable_fn_params.root_route =
-            (NULL == coll_op->coll_schedule->topo_info->route_vector ?
-             NULL : &coll_op->coll_schedule->topo_info->route_vector[root]);
-
-        task_setup = ml_module->coll_ml_bcast_functions[bcast_index]->
-            task_setup_fn[COLL_ML_GENERAL_TASK_FN];
-    }
-
-    *req = &coll_op->full_message.super;
-    return mca_coll_ml_generic_collectives_launcher(coll_op, task_setup);
-}
-
-int mca_coll_ml_parallel_bcast(void *buf, int count, struct ompi_datatype_t *dtype,
-                            int root, struct ompi_communicator_t *comm,
-                            mca_coll_base_module_t *module)
-{
-    int ret;
-    ompi_request_t *req;
-
-    ret = parallel_bcast_start(buf, count, dtype, root, module, &req);
-    if (OPAL_UNLIKELY(ret != OMPI_SUCCESS)) {
-        ML_VERBOSE(10, ("Failed to launch"));
-        return ret;
-    }
-
-    /* Blocking bcast */
-    ompi_request_wait_completion(req);
-    ompi_request_free(&req);
-
-    ML_VERBOSE(10, ("Bcast is done mca_coll_ml_bcast_known"));
-
-    return OMPI_SUCCESS;
-}
-
-int mca_coll_ml_parallel_bcast_nb(void *buf, int count, struct ompi_datatype_t *dtype,
-                                  int root, struct ompi_communicator_t *comm,
-                                  ompi_request_t **req,
-                                  mca_coll_base_module_t *module)
-{
-    int ret;
-
-    ret = parallel_bcast_start(buf, count, dtype, root, module, req);
-    if (OPAL_UNLIKELY(ret != OMPI_SUCCESS)) {
-        ML_VERBOSE(10, ("Failed to launch"));
-        return ret;
-    }
-
-    ML_VERBOSE(10, ("Bcast is done mca_coll_ml_bcast_known"));
-
-    return OMPI_SUCCESS;
-}
-
-int mca_coll_ml_bcast_sequential_root(void *buf, int count, struct ompi_datatype_t *dtype,
-                                      int root, struct ompi_communicator_t *comm,
-                                      mca_coll_base_module_t *module)
-{
-
-    /* local variables */
-    int ret, fn_idx;
-    size_t pack_len = 0;
-    size_t dt_size = 0;
-
-    mca_coll_ml_collective_operation_progress_t * coll_op = NULL;
-    mca_coll_ml_compound_functions_t *fixed_schedule;
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module;
-    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL;
-    mca_bcol_base_coll_fn_desc_t *func;
-    OPAL_PTRDIFF_TYPE lb, extent;
-
-    /* actual starting place of the user buffer (lb added) */
-    void *actual_buf;
-
-    ML_VERBOSE(10, ("Starting static bcast, small messages"));
-
-    assert(NULL != dtype);
-    /* Calculate size of the data,
-     * on this stage only contiguous data is supported */
-    ompi_datatype_type_size(dtype, &dt_size);
-    pack_len = count * dt_size;
-    ompi_datatype_get_extent (dtype, &lb, &extent);
-
-    actual_buf = (void *) ((uintptr_t) buf + lb);
-
-    /* Setup data buffer */
-    src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-    while (NULL == src_buffer_desc) {
-        opal_progress();
-        src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-    }
-
-    /* Allocate collective schedule and pack message */
-    if (pack_len <= (size_t) ml_module->small_message_thresholds[BCOL_BCAST]) {
-        /* The len of the message can not be larger than ML buffer size */
-        assert(pack_len <=  ml_module->payload_block->size_buffer);
-
-        coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                                                            ml_module->coll_ml_bcast_functions[ML_BCAST_SMALL_DATA_SEQUENTIAL],
-                                                            actual_buf, actual_buf, pack_len,
-                                                            0 /* offset for first pack */);
-        if (ompi_comm_rank(comm) == root) {
-            /* single frag, pack the data */
-            memcpy((void *)(uintptr_t)src_buffer_desc->data_addr,
-                    buf, pack_len);
-            /* No unpack for root */
-            coll_op->process_fn = NULL;
-        } else {
-            coll_op->process_fn = mca_coll_ml_bcast_small_unpack_data;
-        }
-
-        coll_op->variable_fn_params.sbuf =
-                   src_buffer_desc->data_addr;
-    } else {
-        ML_VERBOSE(10, ("ML_BCAST_LARGE_DATA_KNOWN case."));
-        coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                                                            ml_module->coll_ml_bcast_functions[ML_BCAST_LARGE_DATA_SEQUENTIAL],
-                                                            actual_buf, actual_buf, pack_len,
-                                                            0 /* offset for first pack */);
-        /* For large messages (bcast) this points to userbuf */
-        /* Pasha: temporary work around for basesmuma, userbuf should
-           be removed  */
-        coll_op->variable_fn_params.userbuf =
-        coll_op->variable_fn_params.sbuf = actual_buf;
-
-        coll_op->process_fn = NULL;
-    }
-
-    /* Fill in the function arguments */
-    coll_op->variable_fn_params.sequence_num =
-        OPAL_THREAD_ADD32(&(ml_module->collective_sequence_num), 1);
-    coll_op->variable_fn_params.count = count;
-    coll_op->variable_fn_params.dtype = dtype;
-
-    coll_op->variable_fn_params.buffer_index = src_buffer_desc->buffer_index;
-    coll_op->variable_fn_params.src_desc = src_buffer_desc;
-    coll_op->variable_fn_params.sbuf_offset = 0;
-    coll_op->variable_fn_params.rbuf_offset = 0;
-
-    /* pasha - why we duplicate it ? */
-    coll_op->fragment_data.buffer_desc = src_buffer_desc;
-
-    /* pack data into payload buffer - NOTE: assume no fragmenation at this stage */
-    if (ompi_comm_rank(comm) == root) {
-        coll_op->variable_fn_params.root_flag = true;
-        coll_op->variable_fn_params.root_route =
-                    &coll_op->coll_schedule->topo_info->route_vector[root];
-
-        coll_op->full_message.n_bytes_scheduled = pack_len;
-    } else {
-        coll_op->variable_fn_params.root_flag = false;
-        coll_op->variable_fn_params.root_route =
-                    &coll_op->coll_schedule->topo_info->route_vector[root];
-    }
-
-    /* seems like we should fix a schedule here and now */
-    fixed_schedule = coll_op->coll_schedule->
-        comp_fn_arr[coll_op->variable_fn_params.root_route->level];
-
-    /* now we set this schedule as the compound function list */
-    coll_op->coll_schedule->component_functions = fixed_schedule;
-
-    coll_op->sequential_routine.current_active_bcol_fn = 0;
-
-    while (true) {
-        /* ready, aim, fire collective(s)!! */
-        fn_idx = coll_op->sequential_routine.current_active_bcol_fn;
-
-        func = fixed_schedule[fn_idx].bcol_function;
-        ret = func->coll_fn(&coll_op->variable_fn_params,
-                (struct mca_bcol_base_function_t *) &fixed_schedule[fn_idx].constant_group_data);
-        /* set the coll_fn_started flag to true */
-        if (BCOL_FN_COMPLETE == ret) {
-            /* done with this routine, bump the active counter */
-            coll_op->sequential_routine.current_active_bcol_fn++;
-            coll_op->variable_fn_params.root_flag = true;
-            /* check for collective completion */
-            if (coll_op->sequential_routine.current_active_bcol_fn ==
-                    coll_op->coll_schedule->n_fns) {
-                /* handle fragment completion */
-                ret = coll_ml_fragment_completion_processing(coll_op);
-                if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-                    mca_coll_ml_abort_ml("Failed to run coll_ml_fragment_completion_processing");
-                }
-
-                /* break out of while loop */
-                break;
-            }
-        } else {
-            /* put entire collective opperation onto sequential queue */
-            opal_list_append(&mca_coll_ml_component.sequential_collectives,
-                            (opal_list_item_t *) coll_op);
-            break;
-        }
-    }
-
-    /* Blocking bcast */
-    ompi_request_wait_completion(&coll_op->full_message.super);
-    ompi_request_free((ompi_request_t **) &coll_op);
-
-    ML_VERBOSE(10, ("Bcast is done"));
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/coll/ml/coll_ml_colls.h b/ompi/mca/coll/ml/coll_ml_colls.h
deleted file mode 100644
index fcefa19b44..0000000000
--- a/ompi/mca/coll/ml/coll_ml_colls.h
+++ /dev/null
@@ -1,552 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2014-2015 Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_COLL_ML_COLLS_H
-#define MCA_COLL_ML_COLLS_H
-
-#include "ompi_config.h"
-#include "ompi/mca/bcol/bcol.h"
-
-#define COLL_ML_FN_NAME_LEN 256
-
-
-/* utility information used to coordinate activities, such as resource
- * management between different functions in the hierarchy
- */
-struct mca_coll_ml_utility_data_t {
-
-    /* RLG - temp fix  !!!! - really need to remove this, but right now
-     do not want to change the signature of the collective primitives to
-     use coll_ml_utility_data_t rather than mca_bcol_base_function_t */
-    int dummy;
-
-    /* module */
-    struct mca_bcol_base_module_t *bcol_module;
-
-    /* */
-    int index_in_consecutive_same_bcol_calls;
-
-    /* number of times functions from this bcol are called in order */
-    int n_of_this_type_in_a_row;
-
-    /* number of times functions from this module are called
-     * in the collective operation. */
-    int n_of_this_type_in_collective;
-    int index_of_this_type_in_collective;
-
-};
-typedef struct mca_coll_ml_utility_data_t mca_coll_ml_utility_data_t;
-
-
-/* forward declaration */
-struct mca_coll_ml_collective_operation_progress_t;
-struct mca_coll_ml_task_status_t;
-
-typedef int (* mca_coll_ml_process_op_fn_t)
-    (struct mca_coll_ml_collective_operation_progress_t *coll_op);
-
-typedef int (* mca_coll_ml_task_comp_fn_t)
-    (struct mca_coll_ml_task_status_t *coll_op);
-
-typedef int (* mca_coll_ml_fragment_launch_fn_t)
-    ( struct mca_coll_ml_collective_operation_progress_t *coll_op);
-
-typedef int (* mca_coll_ml_sequential_task_setup_fn_t)
-    ( struct mca_coll_ml_collective_operation_progress_t *coll_op);
-/* This data structure defines the dependencies for a given
- * compound operation.  We will use this as a basis for implementing
- * collective operations.
- */
-struct mca_coll_ml_compound_functions_t {
-    /* label */
-    char fn_name[COLL_ML_FN_NAME_LEN];
-
-    /* hierarchy level that is used for this bcol */
-    int h_level;
-
-    /* the list of functions that make up this task */
-    /* coll_bcol_collective_description_t *bcol_function; */
-    mca_bcol_base_coll_fn_desc_t *bcol_function;
-    /* task completion function for this compound function */
-    mca_coll_ml_task_comp_fn_t task_comp_fn;
-
-    /* module specific information that is a constant on a per group
-     * basis
-     */
-    mca_coll_ml_utility_data_t constant_group_data;
-
-    /* number of dependencies to be satified before these function can be
-     * started */
-    int num_dependencies;
-
-    /*
-     * number of notifications to perform on completion.  The assumption
-     * is that a counter will be incremented.
-     */
-    int num_dependent_tasks;
-
-    /*
-     * pointers to counters that need be updated.  This assumes
-     * an array of tasks is used to describe the ML level
-     * collective operation, with these indecies referencing elements
-     * in this array.
-     */
-    int *dependent_task_indices;
-
-};
-
-typedef struct mca_coll_ml_compound_functions_t mca_coll_ml_compound_functions_t;
-
-/* Forward declaration for operation_description_t */
-struct mca_coll_ml_module_t;
-
-enum {
-    COLL_ML_GENERAL_TASK_FN,
-    COLL_ML_ROOT_TASK_FN,
-    COLL_ML_MAX_TASK_FN
-};
-
-enum {
-    SEQ_TASK_NOT_STARTED,
-    SEQ_TASK_PENDING,
-    SEQ_TASK_IN_PROG
-};
-
-typedef void (*mca_coll_ml_task_setup_fn_t) (struct mca_coll_ml_task_status_t *task_status, int index, struct mca_coll_ml_compound_functions_t *func);
-
-/*
- * Collective operation definition
- */
-struct mca_coll_ml_collective_operation_description_t {
-
-    /*
-     * Type of collective opeartion - there are two types:
-     * 1) sequential progress through the collectives is sufficient
-     * 2) general treatment, popping tasks onto execution queus is needed.
-     */
-    int progress_type;
-
-    struct mca_coll_ml_topology_t *topo_info;
-
-    /*
-     * number of functions in collective operation
-     */
-    int n_fns;
-
-    /*
-     * list of functions
-     */
-    mca_coll_ml_compound_functions_t *component_functions;
-
-    /*
-     * array of lists of functions
-     */
-    mca_coll_ml_compound_functions_t **comp_fn_arr;
-
-    /*
-     * indices into the list - fixes a sequential schedule
-     */
-    int *sch_idx;
-
-    /*
-     * Task setup functions, so far we have only 3 - root and non-root
-     */
-    mca_coll_ml_task_setup_fn_t task_setup_fn[COLL_ML_MAX_TASK_FN];
-
-    /* number of functions are called for bcols need ordering */
-    int n_fns_need_ordering;
-};
-typedef struct mca_coll_ml_collective_operation_description_t
-               mca_coll_ml_collective_operation_description_t;
-
-/* Data structure used to track the state of individual bcol
- * functions.  This is used to track dependencies and completion
- * to progress the ML level function correctly.
- *
- * mca_coll_ml_task_status_t will be associated with an
- * mca_coll_ml_collective_operation_progress_t structure for
- * the duration of the lifetime of a communicator.
- * An array of task statuses will be stored with
- * the mca_coll_ml_collective_operation_progress_t data structure, so
- * that the taks status elements do not need to be moved back to
- * a free list before they are re-used.  When the ML level function
- * is complete, all mca_coll_ml_task_status_t are available for
- * re-use.
- */
-struct mca_coll_ml_task_status_t{
-    /* need to move this between lists to progress this correctly */
-    opal_list_item_t item;
-
-    /* number of dependencies satisfied */
-    int n_dep_satisfied;
-
-    /* ***************************************************************
-     * Pasha:
-     * I'm adding to the status: num_dependencies, num_dependent_tasks and
-     * dependent_task_indices. The information originally resided on mca_coll_ml_compound_functions_t.
-     * For collective operation with static nature it is not problem.
-     * But for Bcast operation, where run time parameters, like root, actually
-     * define the dependency. rt prefix mean run-time.
-     */
-
-    /* number of dependencies to be satisfied before these function can be
-     * started */
-    int rt_num_dependencies;
-
-    /*
-     * number of notifications to perform on completion.  The assumption
-     * is that a counter will be incremented.
-     */
-    int rt_num_dependent_tasks;
-
-    /*
-     * pointers to counters that need be updated.  This assumes
-     * an array of tasks is used to describe the ML level
-     * collective operation, with these indecies referencing elements
-     * in this array.
-     */
-    int *rt_dependent_task_indices;
-    /*
-     *
-     * ***************************************************************/
-
-    /* index in collective schedule */
-    int my_index_in_coll_schedule;
-
-    /* function pointers */
-    mca_bcol_base_coll_fn_desc_t *bcol_fn;
-
-    /* association with a specific collective task - the ML
-     * mca_coll_ml_collective_operation_progress_t stores the
-     * specific function parameters */
-    struct mca_coll_ml_collective_operation_progress_t *ml_coll_operation;
-
-    mca_coll_ml_task_comp_fn_t task_comp_fn;
-};
-typedef struct mca_coll_ml_task_status_t mca_coll_ml_task_status_t;
-
-typedef enum mca_coll_ml_pending_type_t {
-    REQ_OUT_OF_ORDER = 1,
-    REQ_OUT_OF_MEMORY = 1 << 1
-} mca_coll_ml_pending_type_t;
-
-/* Forward declaration */
-struct mca_bcol_base_payload_buffer_desc_t;
-/* Data structure used to track ML level collective operation
- * progress.
- */
-struct mca_coll_ml_collective_operation_progress_t {
-    /* need this to put on a list properly */
-    /* Full message information */
-    struct full_message_t {
-        /* make this a list item */
-        ompi_request_t super;
-        /* Next expected fragment.
-         * It used for controling order of converter unpack operation */
-        size_t next_expected_index;
-        /* Pointer to last intilized fragment.
-         * It used for controling order of converter unpack operation */
-        struct mca_coll_ml_collective_operation_progress_t *last_started_frag;
-        /* destination data address in user memory */
-        void *dest_user_addr;
-        /* source data address in user memory */
-        const void *src_user_addr;
-        /* total message size */
-        size_t n_bytes_total;
-        /* per-process total message size - relevant for operations
-         * such as gather and scatter, where each rank has it's
-         * own unique data
-         */
-        size_t n_bytes_per_proc_total;
-        size_t max_n_bytes_per_proc_total;
-        /* data processes - from a local perspective */
-        size_t n_bytes_delivered;
-        /* current offset - where to continue with next fragment */
-        size_t n_bytes_scheduled;
-        /* number of fragments needed to process this message */
-        size_t n_fragments;
-        /* number of active frags */
-        int n_active;
-        /* actual pipeline depth */
-        int pipeline_depth;
-        /* am I the real root of the collective ? */
-        bool root;
-        /* collective fragment launcher */
-        mca_coll_ml_fragment_launch_fn_t fragment_launcher;
-        /* is data contingous */
-        bool send_data_continguous;
-        bool recv_data_continguous;
-        /* data type count */
-        int64_t send_count;
-        int64_t recv_count;
-        /* extent of the data types */
-        size_t send_extent;
-        size_t recv_extent;
-        /* send data type */
-        struct ompi_datatype_t * send_data_type;
-        /* needed for non-contigous buffers */
-        size_t offset_into_send_buffer;
-        /* receive data type */
-        struct ompi_datatype_t * recv_data_type;
-        /* needed for non-contigous buffers */
-        size_t offset_into_recv_buffer;
-        /* Convertors for non contigous data */
-        opal_convertor_t send_convertor;
-        opal_convertor_t recv_convertor;
-        /* Will be used by receiver for #bytes calc in the next frag */
-        opal_convertor_t dummy_convertor;
-        size_t dummy_conv_position;
-        /* Size of packed data */
-        size_t send_converter_bytes_packed;
-        size_t recv_converter_bytes_packed;
-        /* In case if ordering is needed: order num for next frag */
-        int next_frag_num;
-        /* The variable is used by non-blocking memory synchronization code
-         * for caching bank index */
-        int bank_index_to_recycle;
-        /* need a handle for collective progress e.g. alltoall*/
-        bcol_fragment_descriptor_t frag_info;
-    } full_message;
-
-    /* collective operation being progressed */
-    mca_coll_ml_collective_operation_description_t *coll_schedule;
-    /* */
-    mca_coll_ml_process_op_fn_t process_fn;
-
-    mca_coll_base_module_t *coll_module;
-
-    /* If not null , we have to release next fragment */
-    struct mca_coll_ml_collective_operation_progress_t *next_to_process_frag;
-    /* pointer to previous fragment */
-    struct mca_coll_ml_collective_operation_progress_t *prev_frag;
-    /* This flag marks that the fragment is pending on the waiting
-     * to be processed prior to recycling
-     */
-    enum mca_coll_ml_pending_type_t pending;
-
-    /* Fragment data */
-    struct fragment_data_t {
-        /* current buffer pointer - offset (in bytes) into the user data */
-        size_t offset_into_user_buffer;
-        size_t offset_into_user_buffer_per_proc;
-
-        /* amount of data (in bytes) in this fragment - amount of data
-         * actually processed */
-        size_t fragment_size;
-        size_t per_rank_fragment_size;
-        size_t data_type_count_per_frag;
-
-        /* pointer to full message progress data */
-        struct full_message_t *message_descriptor;
-
-        /* ML buffer descriptor attached to this buffer */
-        struct mca_bcol_base_payload_buffer_desc_t *buffer_desc;
-        /* handle for collective progress, e.g. alltoall */
-        bcol_fragment_descriptor_t bcol_fragment_desc;
-
-        /* Which collective algorithm */
-        int current_coll_op;
-    } fragment_data;
-
-    /* specific function parameters */
-    /* the assumption is that the variable parameters passed into
-     * the ML level function will persist until the collective operation
-     * is complete.  For a blocking function this is until the collective
-     * function is exited, and for nonblocking collective functions this
-     * is until test or wait completes the collective.
-     */
-    int global_root;
-    bcol_function_args_t variable_fn_params;
-
-    struct{
-        /* current active function - for sequential algorithms */
-        int current_active_bcol_fn;
-
-        /* current function status - not started, or in progress.
-         * When the routine has completed, the active bcol index is
-         * incremented, so no need to keep track of a completed
-         * status.
-         */
-        int current_bcol_status;
-
-        /* use this call back to setup algorithm specific info
-           after each level necessary
-          */
-       mca_coll_ml_sequential_task_setup_fn_t seq_task_setup;
-
-    } sequential_routine;
-
-    struct{
-        /*
-         * BCOL function status - individual elements will be posted to
-         * ml level component queues, as appropriate.
-         */
-        mca_coll_ml_task_status_t *status_array;
-
-        /* number of completed tasks - need this for collective completion.
-         * Resource completion is tracked by each BCOL module .
-         */
-        int num_tasks_completed;
-    } dag_description;
-};
-typedef struct mca_coll_ml_collective_operation_progress_t
-mca_coll_ml_collective_operation_progress_t;
-OBJ_CLASS_DECLARATION(mca_coll_ml_collective_operation_progress_t);
-
-#define OP_ML_MODULE(op) ((mca_coll_ml_module_t *)((op)->coll_module))
-#define GET_COMM(op) ((OP_ML_MODULE(op))->comm)
-#define IS_COLL_SYNCMEM(op) (ML_MEMSYNC == op->fragment_data.current_coll_op)
-
-#define CHECK_AND_RECYCLE(op)                                                   \
-do {                                                                            \
-    if (0 == (op)->pending) {                                                   \
-        /* Caching 2 values that we can't to touch on op after returing it */   \
-        /* back to the free list  (free list may release memory on distruct )*/ \
-        struct ompi_communicator_t *comm = GET_COMM(op);                        \
-        bool is_coll_sync = IS_COLL_SYNCMEM(op);                                \
-        ML_VERBOSE(10, ("Releasing %p", op));                                   \
-        OMPI_REQUEST_FINI(&(op)->full_message.super);                           \
-        opal_free_list_return (&(((mca_coll_ml_module_t *)(op)->coll_module)->  \
-                                 coll_ml_collective_descriptors),               \
-                               (opal_free_list_item_t *)op);                    \
-        /* Special check for memory synchronization completion */               \
-        /* We have to return it first to free list, since the communicator  */  \
-        /* release potentially may trigger ML module distraction and having */  \
-        /* the element not on the list may cause memory leak.               */  \
-        if (OPAL_UNLIKELY(is_coll_sync)) {                                      \
-            if (OMPI_COMM_IS_INTRINSIC(comm)) {                                 \
-                opal_show_help("help-mpi-coll-ml.txt",                          \
-                               "coll-ml-check-fatal-error", true,               \
-                               comm->c_name);                                   \
-                ompi_mpi_abort(comm, 6);                                        \
-            } else {                                                            \
-                opal_show_help("help-mpi-coll-ml.txt",                          \
-                               "coll-ml-check-error", true,                     \
-                               comm->c_name);                                   \
-                /* After this point it is UNSAFE to touch ml module */          \
-                /* or communicator */                                           \
-                OBJ_RELEASE(comm);                                              \
-            }                                                                   \
-        }                                                                       \
-    }                                                                           \
-} while (0)
-
-#define MCA_COLL_ML_SET_ORDER_INFO(coll_progress, num_frags)                      \
-do {                                                                              \
-    mca_coll_ml_topology_t *topo = (coll_progress)->coll_schedule->topo_info;     \
-    bcol_function_args_t *variable_params = &(coll_progress)->variable_fn_params; \
-    if (topo->topo_ordering_info.num_bcols_need_ordering > 0) {                   \
-        variable_params->order_info.bcols_started = 0;                            \
-        variable_params->order_info.order_num =                                   \
-                      topo->topo_ordering_info.next_order_num;                    \
-        variable_params->order_info.n_fns_need_ordering =                         \
-                       (coll_progress)->coll_schedule->n_fns_need_ordering;       \
-        topo->topo_ordering_info.next_order_num += num_frags;                     \
-        (coll_progress)->fragment_data.message_descriptor->next_frag_num =        \
-                                      variable_params->order_info.order_num + 1;  \
-    }                                                                             \
-} while (0)
-
-#define MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(coll_progress)                                    \
-do {                                                                                          \
-    mca_coll_ml_topology_t *topo = (coll_progress)->coll_schedule->topo_info;                 \
-    if (topo->topo_ordering_info.num_bcols_need_ordering > 0) {                               \
-        bcol_function_args_t *variable_params = &(coll_progress)->variable_fn_params;         \
-        struct fragment_data_t *frag_data = &(coll_progress)->fragment_data;                  \
-        variable_params->order_info.bcols_started = 0;                                        \
-        variable_params->order_info.order_num = frag_data->message_descriptor->next_frag_num; \
-        variable_params->order_info.n_fns_need_ordering =                                     \
-                       (coll_progress)->coll_schedule->n_fns_need_ordering;                   \
-        frag_data->message_descriptor->next_frag_num++;                                       \
-    }                                                                                         \
-} while (0)
-
-#define MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO(schedule)                           \
-do {                                                                            \
-    int i;                                                                      \
-    (schedule)->n_fns_need_ordering = 0;                                        \
-    for (i = 0; i < (schedule)->n_fns; ++i) {                                   \
-        mca_bcol_base_module_t *current_bcol =                                  \
-            (schedule)->component_functions[i].constant_group_data.bcol_module; \
-        assert (NULL != current_bcol);                                          \
-        if (current_bcol->bcol_component->need_ordering) {                      \
-            (schedule)->n_fns_need_ordering++;                                  \
-        }                                                                       \
-    }                                                                           \
-} while (0)
-
-enum {
-    MCA_COLL_ML_NET_STREAM_SEND,
-    MCA_COLL_ML_NET_STREAM_RECV
-};
-
-static inline  __opal_attribute_always_inline__
-    int mca_coll_ml_convertor_prepare(ompi_datatype_t *dtype, int count, const void *buff,
-                                            opal_convertor_t *convertor, int stream)
-{
-    size_t bytes_packed;
-
-    if (MCA_COLL_ML_NET_STREAM_SEND == stream) {
-        opal_convertor_copy_and_prepare_for_send(
-                ompi_mpi_local_convertor,
-                &dtype->super, count, buff, 0,
-                convertor);
-    } else {
-        opal_convertor_copy_and_prepare_for_recv(
-                ompi_mpi_local_convertor,
-                &dtype->super, count, buff, 0,
-                convertor);
-    }
-
-    opal_convertor_get_packed_size(convertor, &bytes_packed);
-
-    return bytes_packed;
-}
-
-static inline  __opal_attribute_always_inline__
-    int mca_coll_ml_convertor_pack(void *data_addr, size_t buff_size,
-                                            opal_convertor_t *convertor)
-{
-    struct iovec iov;
-
-    size_t max_data = 0;
-    uint32_t iov_count = 1;
-
-    iov.iov_base = (IOVBASE_TYPE*) data_addr;
-    iov.iov_len  = buff_size;
-
-    opal_convertor_pack(convertor, &iov, &iov_count, &max_data);
-
-    return max_data;
-}
-
-static inline  __opal_attribute_always_inline__
-    int mca_coll_ml_convertor_unpack(void *data_addr, size_t buff_size,
-                                            opal_convertor_t *convertor)
-{
-    struct iovec iov;
-
-    size_t max_data = 0;
-    uint32_t iov_count = 1;
-
-    iov.iov_base = (void *) (uintptr_t) data_addr;
-    iov.iov_len  = buff_size;
-
-    opal_convertor_unpack(convertor, &iov, &iov_count, &max_data);
-
-    return max_data;
-}
-#endif /* MCA_COLL_ML_COLLS_H */
-
diff --git a/ompi/mca/coll/ml/coll_ml_component.c b/ompi/mca/coll/ml/coll_ml_component.c
deleted file mode 100644
index 4b4cf277c1..0000000000
--- a/ompi/mca/coll/ml/coll_ml_component.c
+++ /dev/null
@@ -1,449 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2014      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/**
- * @file
- *
- * Most of the description of the data layout is in the
- * coll_sm_module.c file.
- */
-
-#include "ompi_config.h"
-
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-
-#include "ompi/constants.h"
-#include "ompi/mca/coll/base/base.h"
-#include "opal/mca/mpool/base/base.h"
-#include "opal/mca/mpool/mpool.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/mca/sbgp/base/base.h"
-
-#include "coll_ml.h"
-#include "coll_ml_inlines.h"
-
-#include "ompi/patterns/net/netpatterns.h"
-#include "coll_ml_mca.h"
-#include "coll_ml_custom_utils.h"
-
-
-/*
- * Public string showing the coll ompi_ml V2 component version number
- */
-const char *mca_coll_ml_component_version_string =
-"Open MPI ml-V2 collective MCA component version " OMPI_VERSION;
-
-/*
- * Local functions
- */
-
-static int ml_open(void);
-static int ml_close(void);
-static int coll_ml_progress(void);
-
-/*
- * Instantiate the public struct with all of our public information
- * and pointers to our public functions in it
- */
-
-mca_coll_ml_component_t mca_coll_ml_component = {
-
-    /* First, fill in the super */
-
-    .super = {
-        /* First, the mca_component_t struct containing meta
-           information about the component itself */
-
-        .collm_version = {
-            MCA_COLL_BASE_VERSION_2_0_0,
-
-            /* Component name and version */
-
-            .mca_component_name = "ml",
-            MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
-                                  OMPI_RELEASE_VERSION),
-
-            /* Component open, close, and register functions */
-
-            .mca_open_component = ml_open,
-            .mca_close_component = ml_close,
-            .mca_register_component_params = mca_coll_ml_register_params
-        },
-        .collm_data = {
-            /* The component is not checkpoint ready */
-            MCA_BASE_METADATA_PARAM_NONE
-        },
-
-        /* Initialization / querying functions */
-        .collm_init_query = mca_coll_ml_init_query,
-        .collm_comm_query = mca_coll_ml_comm_query,
-    },
-};
-
-void mca_coll_ml_abort_ml(char *message)
-{
-    ML_ERROR(("ML Collective FATAL ERROR: %s", message));
-    /* shutdown the MPI */
-    ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_INTERN);
-}
-/*
- * progress function
- */
-
-#define INDEX(task) ((task)->my_index_in_coll_schedule)
-#define ACTIVE_L    (&mca_coll_ml_component.active_tasks)
-#define PENDING_L   (&mca_coll_ml_component.pending_tasks)
-#define SEQ_L       (&mca_coll_ml_component.sequential_collectives)
-
-static int coll_ml_progress()
-{
-
-    int rc = OMPI_SUCCESS;
-    int fn_idx;
-
-    mca_coll_ml_task_status_t *task_status, *task_status_tmp;
-    mca_coll_ml_collective_operation_progress_t *seq_coll_op;
-    mca_coll_ml_collective_operation_progress_t *seq_coll_op_tmp;
-
-    mca_bcol_base_module_collective_fn_primitives_t progress_fn,
-                                                    coll_fn;
-    mca_coll_ml_utility_data_t *const_args;
-    mca_coll_ml_component_t *cm = &mca_coll_ml_component;
-
-    /* Pasha: Not sure that is it correct way to resolve the problem.
-       Iprobe call for progress engine. The progress engine calls for our
-       progress and as result the first element on the list is progressed again
-       and so we call for Iprobe again.... as result we get HUGE stack.
-
-       One way to prevent it - remove the item from the list, and once you finish
-       to process it - put it back.
-
-       Other way - put flag on component, if the progress is running - exit immediate.
-     */
-    if (cm->progress_is_busy) {
-        /* We are already working...*/
-        return OMPI_SUCCESS;
-    } else {
-        cm->progress_is_busy = true;
-    }
-
-    /* progress sequential collective operations */
-    /* RLG - need to do better here for parallel progress */
-    OPAL_THREAD_LOCK(&(cm->sequential_collectives_mutex));
-    OPAL_LIST_FOREACH_SAFE(seq_coll_op, seq_coll_op_tmp, SEQ_L, mca_coll_ml_collective_operation_progress_t) {
-        do {
-            fn_idx      = seq_coll_op->sequential_routine.current_active_bcol_fn;
-            /* initialize the task */
-
-            if (SEQ_TASK_IN_PROG == seq_coll_op->sequential_routine.current_bcol_status){
-                progress_fn = seq_coll_op->coll_schedule->
-                    component_functions[fn_idx].bcol_function->progress_fn;
-            } else {
-                /* PPP Pasha - apparently task setup should be called only here. see linr 190 */
-                progress_fn = seq_coll_op->coll_schedule->
-                    component_functions[fn_idx].bcol_function->coll_fn;
-            }
-
-            const_args  = &seq_coll_op->coll_schedule->component_functions[fn_idx].constant_group_data;
-            /* RLG - note need to move to useing coll_ml_utility_data_t as
-             * collective argument, rather than  mca_bcol_base_function_t
-             */
-            rc = progress_fn(&(seq_coll_op->variable_fn_params), (mca_bcol_base_function_t *)const_args);
-            if (BCOL_FN_COMPLETE == rc) {
-                /* done with this routine */
-                seq_coll_op->sequential_routine.current_active_bcol_fn++;
-                /* this is totally hardwired for bcast, need a general call-back */
-
-                fn_idx = seq_coll_op->sequential_routine.current_active_bcol_fn;
-                if (fn_idx == seq_coll_op->coll_schedule->n_fns) {
-                    /* done with this collective - recycle descriptor */
-
-                    /* remove from the progress list */
-                    (void) opal_list_remove_item(SEQ_L, (opal_list_item_t *)seq_coll_op);
-
-                    /* handle fragment completion */
-                    rc = coll_ml_fragment_completion_processing(seq_coll_op);
-
-                    if (OMPI_SUCCESS != rc) {
-                        mca_coll_ml_abort_ml("Failed to run coll_ml_fragment_completion_processing");
-                    }
-                } else {
-                    rc = seq_coll_op->sequential_routine.seq_task_setup(seq_coll_op);
-                    if (OMPI_SUCCESS != rc) {
-                        mca_coll_ml_abort_ml("Failed to run sequential task setup");
-                    }
-
-                    seq_coll_op->sequential_routine.current_bcol_status = SEQ_TASK_PENDING;
-                    continue;
-                }
-            } else if (BCOL_FN_NOT_STARTED == rc) {
-                seq_coll_op->sequential_routine.current_bcol_status = SEQ_TASK_PENDING;
-            } else if (BCOL_FN_STARTED == rc) {
-                seq_coll_op->sequential_routine.current_bcol_status = SEQ_TASK_IN_PROG;
-            }
-
-            break;
-        } while (true);
-    }
-    OPAL_THREAD_UNLOCK(&(cm->sequential_collectives_mutex));
-
-    /* general dag's */
-    /* see if active tasks can be progressed */
-    OPAL_THREAD_LOCK(&(cm->active_tasks_mutex));
-    OPAL_LIST_FOREACH(task_status, ACTIVE_L, mca_coll_ml_task_status_t) {
-        /* progress task */
-        progress_fn = task_status->bcol_fn->progress_fn;
-        const_args = &task_status->ml_coll_operation->coll_schedule->
-            component_functions[INDEX(task_status)].constant_group_data;
-        rc = progress_fn(&(task_status->ml_coll_operation->variable_fn_params),
-                (mca_bcol_base_function_t *)const_args);
-        if (BCOL_FN_COMPLETE == rc) {
-            ML_VERBOSE(3, ("GOT BCOL_COMPLETED!!!!"));
-            rc = mca_coll_ml_task_completion_processing(&task_status, ACTIVE_L);
-            if (OMPI_SUCCESS != rc) {
-                mca_coll_ml_abort_ml("Failed to run mca_coll_ml_task_completion_processing");
-            }
-        } else if (BCOL_FN_STARTED == rc) {
-            /* nothing to do */
-        } else {
-            mca_coll_ml_abort_ml("Failed to run mca_coll_ml_task_completion_processing");
-        }
-    }
-    OPAL_THREAD_UNLOCK(&(cm->active_tasks_mutex));
-
-    /* see if new tasks can be initiated */
-    OPAL_THREAD_LOCK(&(cm->pending_tasks_mutex));
-    OPAL_LIST_FOREACH_SAFE(task_status, task_status_tmp, PENDING_L, mca_coll_ml_task_status_t) {
-        /* check to see if dependencies are satisfied */
-        int n_dependencies = task_status->rt_num_dependencies;
-        int n_dependencies_satisfied = task_status->n_dep_satisfied;
-
-        if (n_dependencies == n_dependencies_satisfied) {
-            /* initiate the task */
-            coll_fn = task_status->bcol_fn->coll_fn;
-            const_args = &task_status->ml_coll_operation->coll_schedule->
-                component_functions[INDEX(task_status)].constant_group_data;
-            rc = coll_fn(&(task_status->ml_coll_operation->variable_fn_params),
-                    (mca_bcol_base_function_t *)const_args);
-            if (BCOL_FN_COMPLETE == rc) {
-                ML_VERBOSE(3, ("GOT BCOL_COMPLETED!"));
-                rc = mca_coll_ml_task_completion_processing(&task_status, PENDING_L);
-                if (OMPI_SUCCESS != rc) {
-                    mca_coll_ml_abort_ml("Failed to run mca_coll_ml_task_completion_processing");
-                }
-            } else if ( BCOL_FN_STARTED == rc ) {
-                ML_VERBOSE(3, ("GOT BCOL_STARTED!"));
-                (void) opal_list_remove_item(PENDING_L, (opal_list_item_t *)task_status);
-                /* RLG - is there potential for deadlock here ?  Need to
-                 * look at this closely
-                 */
-                OPAL_THREAD_LOCK(&(cm->active_tasks_mutex));
-                opal_list_append(ACTIVE_L, (opal_list_item_t *)task_status);
-                OPAL_THREAD_UNLOCK(&(cm->active_tasks_mutex));
-            } else if( BCOL_FN_NOT_STARTED == rc ) {
-                /* nothing to do */
-                ML_VERBOSE(10, ("GOT BCOL_FN_NOT_STARTED!"));
-            } else {
-                OPAL_THREAD_UNLOCK(&(cm->pending_tasks_mutex));
-                /* error will be returned - RLG : need to reconsider return
-                 * types - we have no way to convey error information
-                 * the way the code is implemented now */
-                ML_VERBOSE(3, ("GOT error !"));
-                rc = OMPI_ERROR;
-                OMPI_ERRHANDLER_RETURN(rc,MPI_COMM_WORLD,rc,"Error returned from bcol function: aborting");
-                break;
-            }
-        }
-    }
-    OPAL_THREAD_UNLOCK(&(cm->pending_tasks_mutex));
-
-    /* return */
-    cm->progress_is_busy = false;
-
-    return rc;
-}
-
-
-static void adjust_coll_config_by_mca_param(void)
-{
-    /* setting bcast mca params */
-    if (COLL_ML_STATIC_BCAST == mca_coll_ml_component.bcast_algorithm) {
-        mca_coll_ml_component.coll_config[ML_BCAST][ML_SMALL_MSG].algorithm_id = ML_BCAST_SMALL_DATA_KNOWN;
-        mca_coll_ml_component.coll_config[ML_BCAST][ML_LARGE_MSG].algorithm_id = ML_BCAST_LARGE_DATA_KNOWN;
-    } else if (COLL_ML_SEQ_BCAST == mca_coll_ml_component.bcast_algorithm) {
-        mca_coll_ml_component.coll_config[ML_BCAST][ML_SMALL_MSG].algorithm_id = ML_BCAST_SMALL_DATA_SEQUENTIAL;
-        mca_coll_ml_component.coll_config[ML_BCAST][ML_LARGE_MSG].algorithm_id = ML_BCAST_LARGE_DATA_SEQUENTIAL;
-    } else { /* Unknown root */
-        mca_coll_ml_component.coll_config[ML_BCAST][ML_SMALL_MSG].algorithm_id = ML_BCAST_SMALL_DATA_UNKNOWN;
-        mca_coll_ml_component.coll_config[ML_BCAST][ML_LARGE_MSG].algorithm_id = ML_BCAST_LARGE_DATA_UNKNOWN;
-    }
-}
-
-/*
- * Open the component
- */
-static int ml_open(void)
-{
-    /* local variables */
-    int rc, c_idx, m_idx;
-    mca_coll_ml_component_t *cs = &mca_coll_ml_component;
-
-    /* set the starting sequence number */
-    cs->base_sequence_number = -1;
-    cs->progress_is_busy = false;
-
-    /* If the priority is zero (default) disable the component */
-    if (mca_coll_ml_component.ml_priority <= 0) {
-        return OMPI_ERR_NOT_AVAILABLE;
-    }
-
-    /* Init memory structures (no real memory is allocated) */
-    OBJ_CONSTRUCT(&cs->memory_manager, mca_coll_ml_lmngr_t);
-
-    if (OMPI_SUCCESS != (rc = mca_base_framework_open(&ompi_sbgp_base_framework, 0))) {
-        fprintf(stderr," failure in open mca_sbgp_base_open \n");
-        return rc;
-    }
-    if (OMPI_SUCCESS != (rc = mca_base_framework_open(&ompi_bcol_base_framework, 0))) {
-        fprintf(stderr," failure in open mca_bcol_base_open \n");
-        return rc;
-    }
-
-    /* Reset collective tunings cache */
-    for (c_idx = 0; c_idx < ML_NUM_OF_FUNCTIONS; c_idx++) {
-        for (m_idx = 0; m_idx < ML_NUM_MSG; m_idx++) {
-            mca_coll_ml_reset_config(&cs->coll_config[c_idx][m_idx]);
-        }
-    }
-
-    adjust_coll_config_by_mca_param();
-
-    /* Load configuration file and cache the configuration on component */
-    rc = mca_coll_ml_config_file_init();
-    if (OMPI_SUCCESS != rc) {
-        return OMPI_ERROR;
-    }
-
-
-    /* reigster the progress function */
-    rc = opal_progress_register(coll_ml_progress);
-    if (OMPI_SUCCESS != rc ) {
-        fprintf(stderr," failed to register the ml progress function \n");
-        fflush(stderr);
-        return rc;
-    }
-
-    OBJ_CONSTRUCT(&(cs->pending_tasks_mutex), opal_mutex_t);
-    OBJ_CONSTRUCT(&(cs->pending_tasks), opal_list_t);
-    OBJ_CONSTRUCT(&(cs->active_tasks_mutex), opal_mutex_t);
-    OBJ_CONSTRUCT(&(cs->active_tasks), opal_list_t);
-    OBJ_CONSTRUCT(&(cs->sequential_collectives_mutex), opal_mutex_t);
-    OBJ_CONSTRUCT(&(cs->sequential_collectives), opal_list_t);
-
-    rc = netpatterns_init();
-    if (OMPI_SUCCESS != rc) {
-        return rc;
-    }
-
-    cs->topo_discovery_fn[COLL_ML_HR_FULL] =
-        mca_coll_ml_fulltree_hierarchy_discovery;
-
-    cs->topo_discovery_fn[COLL_ML_HR_ALLREDUCE] =
-        mca_coll_ml_allreduce_hierarchy_discovery;
-
-    cs->topo_discovery_fn[COLL_ML_HR_NBS] =
-        mca_coll_ml_fulltree_exclude_basesmsocket_hierarchy_discovery;
-
-    cs->topo_discovery_fn[COLL_ML_HR_SINGLE_PTP] =
-        mca_coll_ml_fulltree_ptp_only_hierarchy_discovery;
-
-    cs->topo_discovery_fn[COLL_ML_HR_SINGLE_IBOFFLOAD] =
-        mca_coll_ml_fulltree_iboffload_only_hierarchy_discovery;
-
-    cs->need_allreduce_support = false;
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * Close the component
- */
-static int ml_close(void)
-{
-    int ret;
-
-    mca_coll_ml_component_t *cs = &mca_coll_ml_component;
-
-    /* There is not need to release/close resource if the
-     * priority was set to zero */
-    if (cs->ml_priority <= 0) {
-        return OMPI_SUCCESS;
-    }
-
-    OBJ_DESTRUCT(&cs->memory_manager);
-    OBJ_DESTRUCT(&cs->pending_tasks_mutex);
-    OBJ_DESTRUCT(&cs->pending_tasks);
-    OBJ_DESTRUCT(&cs->active_tasks_mutex);
-    OBJ_DESTRUCT(&cs->active_tasks);
-    OBJ_DESTRUCT(&cs->sequential_collectives_mutex);
-    OBJ_DESTRUCT(&cs->sequential_collectives);
-
-    /* deregister progress function */
-    ret = opal_progress_unregister(coll_ml_progress);
-    if (OMPI_SUCCESS != ret ) {
-        OMPI_ERROR_LOG(ret);
-        return ret;
-    }
-
-    /* close the sbgp and bcol frameworks */
-    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_sbgp_base_framework))) {
-        OMPI_ERROR_LOG(ret);
-        return ret;
-    }
-
-    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_bcol_base_framework))) {
-        OMPI_ERROR_LOG(ret);
-        return ret;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/* query to see if the component is available for use, and can
- * satisfy the thread and progress requirements
- */
-int mca_coll_ml_init_query(bool enable_progress_threads,
-                           bool enable_mpi_threads)
-{
-    int ret;
-
-    /* at this stage there is no reason to disaulify this component */
-    /* Add here bcol init nand sbgp init */
-    ret = mca_sbgp_base_init(enable_progress_threads, enable_mpi_threads);
-    if (OMPI_SUCCESS != ret) {
-        return ret;
-    }
-
-    ret = mca_bcol_base_init(enable_progress_threads, enable_mpi_threads);
-    if (OMPI_SUCCESS != ret) {
-        return ret;
-    }
-
-    /* done */
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/coll/ml/coll_ml_config.c b/ompi/mca/coll/ml/coll_ml_config.c
deleted file mode 100644
index c7556ca104..0000000000
--- a/ompi/mca/coll/ml/coll_ml_config.c
+++ /dev/null
@@ -1,613 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <string.h>
-#include <ctype.h>
-#include <stdlib.h>
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-
-#include "coll_ml.h"
-#include "coll_ml_inlines.h"
-#include "coll_ml_config.h"
-#include "coll_ml_lex.h"
-
-static char *key_buffer = NULL;
-static size_t key_buffer_len = 0;
-
-typedef struct section_config_t {
-    char *section_name;
-    int section_id;
-    per_collective_configuration_t config;
-} section_config_t;
-
-typedef struct coll_config_t {
-    char *coll_name;
-    int coll_id;
-    section_config_t section;
-} coll_config_t;
-
-static int algorithm_name_to_id(char *name)
-{
-    assert (NULL != name);
-    if (!strcasecmp(name,"ML_BCAST_SMALL_DATA_KNOWN"))
-        return ML_BCAST_SMALL_DATA_KNOWN;
-    if (!strcasecmp(name,"ML_BCAST_SMALL_DATA_UNKNOWN"))
-        return ML_BCAST_SMALL_DATA_UNKNOWN;
-    if (!strcasecmp(name,"ML_BCAST_SMALL_DATA_SEQUENTIAL"))
-        return ML_BCAST_SMALL_DATA_SEQUENTIAL;
-    if (!strcasecmp(name,"ML_BCAST_LARGE_DATA_KNOWN"))
-        return ML_BCAST_LARGE_DATA_KNOWN;
-    if (!strcasecmp(name,"ML_BCAST_LARGE_DATA_UNKNOWN"))
-        return ML_BCAST_LARGE_DATA_UNKNOWN;
-    if (!strcasecmp(name,"ML_BCAST_LARGE_DATA_SEQUENTIAL"))
-        return ML_BCAST_LARGE_DATA_SEQUENTIAL;
-    if (!strcasecmp(name,"ML_N_DATASIZE_BINS"))
-        return ML_N_DATASIZE_BINS;
-    if (!strcasecmp(name,"ML_NUM_BCAST_FUNCTIONS"))
-        return ML_NUM_BCAST_FUNCTIONS;
-    if (!strcasecmp(name,"ML_SCATTER_SMALL_DATA_KNOWN"))
-        return ML_SCATTER_SMALL_DATA_KNOWN;
-    if (!strcasecmp(name,"ML_SCATTER_N_DATASIZE_BINS"))
-        return ML_SCATTER_N_DATASIZE_BINS;
-    if (!strcasecmp(name,"ML_SCATTER_SMALL_DATA_UNKNOWN"))
-        return ML_SCATTER_SMALL_DATA_UNKNOWN;
-    if (!strcasecmp(name,"ML_SCATTER_SMALL_DATA_SEQUENTIAL"))
-        return ML_SCATTER_SMALL_DATA_SEQUENTIAL;
-    if (!strcasecmp(name,"ML_NUM_SCATTER_FUNCTIONS"))
-        return ML_NUM_SCATTER_FUNCTIONS;
-    if (!strcasecmp(name,"ML_SMALL_DATA_ALLREDUCE"))
-        return ML_SMALL_DATA_ALLREDUCE;
-    if (!strcasecmp(name,"ML_LARGE_DATA_ALLREDUCE"))
-        return ML_LARGE_DATA_ALLREDUCE;
-    if (!strcasecmp(name,"ML_SMALL_DATA_REDUCE"))
-        return ML_SMALL_DATA_ALLREDUCE;
-    if (!strcasecmp(name,"ML_LARGE_DATA_REDUCE"))
-        return ML_LARGE_DATA_ALLREDUCE;
-    if (!strcasecmp(name,"ML_SMALL_DATA_REDUCE"))
-        return ML_SMALL_DATA_REDUCE;
-    if (!strcasecmp(name,"ML_LARGE_DATA_REDUCE"))
-        return ML_LARGE_DATA_REDUCE;
-    if (!strcasecmp(name,"ML_NUM_ALLREDUCE_FUNCTIONS"))
-        return ML_NUM_ALLREDUCE_FUNCTIONS;
-    if (!strcasecmp(name,"ML_SMALL_DATA_ALLTOALL"))
-        return ML_SMALL_DATA_ALLTOALL;
-    if (!strcasecmp(name,"ML_LARGE_DATA_ALLTOALL"))
-        return ML_LARGE_DATA_ALLTOALL;
-    if (!strcasecmp(name,"ML_NUM_ALLTOALL_FUNCTIONS"))
-        return ML_NUM_ALLTOALL_FUNCTIONS;
-    if (!strcasecmp(name,"ML_SMALL_DATA_ALLGATHER"))
-        return ML_SMALL_DATA_ALLGATHER;
-    if (!strcasecmp(name,"ML_LARGE_DATA_ALLGATHER"))
-        return ML_LARGE_DATA_ALLGATHER;
-    if (!strcasecmp(name,"ML_NUM_ALLGATHER_FUNCTIONS"))
-        return ML_NUM_ALLGATHER_FUNCTIONS;
-    if (!strcasecmp(name,"ML_SMALL_DATA_GATHER"))
-        return ML_SMALL_DATA_GATHER;
-    if (!strcasecmp(name,"ML_LARGE_DATA_GATHER"))
-        return ML_LARGE_DATA_GATHER;
-    if (!strcasecmp(name,"ML_NUM_GATHER_FUNCTIONS"))
-        return ML_NUM_GATHER_FUNCTIONS;
-    if (!strcasecmp(name,"ML_BARRIER_DEFAULT"))
-        return ML_BARRIER_DEFAULT;
-
-    /* ERROR */
-    return ML_UNDEFINED;
-}
-
-static int hierarchy_name_to_id(char *name)
-{
-    assert (NULL != name);
-    if (!strcasecmp(name, "FULL_HR")) {
-        return COLL_ML_HR_FULL;
-    }
-    if (!strcasecmp(name, "FULL_HR_NO_BASESOCKET")) {
-        return COLL_ML_HR_NBS;
-    }
-    if (!strcasecmp(name, "PTP_ONLY")) {
-        return COLL_ML_HR_SINGLE_PTP;
-    }
-    if (!strcasecmp(name, "IBOFFLOAD_ONLY")) {
-        return COLL_ML_HR_SINGLE_IBOFFLOAD;
-    }
-    /* Error */
-    return ML_UNDEFINED;
-}
-
-static int section_name_to_id(char *name)
-{
-    assert (NULL != name);
-    if (!strcasecmp(name, "SMALL")) {
-        return ML_SMALL_MSG;
-    }
-
-    if (!strcasecmp(name, "LARGE")) {
-        return ML_LARGE_MSG;
-    }
-    /* Error */
-    return ML_UNDEFINED;
-}
-
-static int coll_name_to_id(char *name)
-{
-    assert (NULL != name);
-    if (!strcasecmp(name, "ALLGATHER")) {
-        return ML_ALLGATHER;
-    }
-    if (!strcasecmp(name, "ALLGATHERV")) {
-        return ML_ALLGATHERV;
-    }
-    if (!strcasecmp(name, "ALLREDUCE")) {
-        return ML_ALLREDUCE;
-    }
-    if (!strcasecmp(name, "ALLTOALL")) {
-        return ML_ALLTOALL;
-    }
-    if (!strcasecmp(name, "ALLTOALLV")) {
-        return ML_ALLTOALLV;
-    }
-    if (!strcasecmp(name, "ALLTOALLW")) {
-        return ML_ALLTOALLW;
-    }
-    if (!strcasecmp(name, "ALLTOALLW")) {
-        return ML_ALLTOALLW;
-    }
-    if (!strcasecmp(name, "BARRIER")) {
-        return ML_BARRIER;
-    }
-    if (!strcasecmp(name, "BCAST")) {
-        return ML_BCAST;
-    }
-    if (!strcasecmp(name, "EXSCAN")) {
-        return ML_EXSCAN;
-    }
-    if (!strcasecmp(name, "GATHER")) {
-        return ML_GATHER;
-    }
-    if (!strcasecmp(name, "GATHERV")) {
-        return ML_GATHERV;
-    }
-    if (!strcasecmp(name, "REDUCE")) {
-        return ML_REDUCE;
-    }
-    if (!strcasecmp(name, "REDUCE_SCATTER")) {
-        return ML_REDUCE_SCATTER;
-    }
-    if (!strcasecmp(name, "SCAN")) {
-        return ML_SCAN;
-    }
-    if (!strcasecmp(name, "SCATTER")) {
-        return ML_SCATTER;
-    }
-    if (!strcasecmp(name, "SCATTERV")) {
-        return ML_SCATTERV;
-    }
-
-    /* nonblocking functions */
-
-    if (!strcasecmp(name, "IALLGATHER")) {
-        return ML_IALLGATHER;
-    }
-    if (!strcasecmp(name, "IALLGATHERV")) {
-        return ML_IALLGATHERV;
-    }
-    if (!strcasecmp(name, "IALLREDUCE")) {
-        return ML_IALLREDUCE;
-    }
-    if (!strcasecmp(name, "IALLTOALL")) {
-        return ML_IALLTOALL;
-    }
-    if (!strcasecmp(name, "IALLTOALLV")) {
-        return ML_IALLTOALLV;
-    }
-    if (!strcasecmp(name, "IALLTOALLW")) {
-        return ML_IALLTOALLW;
-    }
-    if (!strcasecmp(name, "IALLTOALLW")) {
-        return ML_IALLTOALLW;
-    }
-    if (!strcasecmp(name, "IBARRIER")) {
-        return ML_IBARRIER;
-    }
-    if (!strcasecmp(name, "IBCAST")) {
-        return ML_IBCAST;
-    }
-    if (!strcasecmp(name, "IEXSCAN")) {
-        return ML_IEXSCAN;
-    }
-    if (!strcasecmp(name, "IGATHER")) {
-        return ML_IGATHER;
-    }
-    if (!strcasecmp(name, "IGATHERV")) {
-        return ML_IGATHERV;
-    }
-    if (!strcasecmp(name, "IREDUCE")) {
-        return ML_IREDUCE;
-    }
-    if (!strcasecmp(name, "IREDUCE_SCATTER")) {
-        return ML_IREDUCE_SCATTER;
-    }
-    if (!strcasecmp(name, "ISCAN")) {
-        return ML_ISCAN;
-    }
-    if (!strcasecmp(name, "ISCATTER")) {
-        return ML_ISCATTER;
-    }
-    if (!strcasecmp(name, "ISCATTERV")) {
-        return ML_ISCATTERV;
-    }
-
-    /* Error - collecives name was not matched */
-    return ML_UNDEFINED;
-}
-static int set_collective_name(coll_config_t *coll_config)
-{
-    int coll_id =
-        coll_name_to_id(coll_ml_config_yytext);
-
-    if (ML_UNDEFINED == coll_id) {
-        return OMPI_ERROR;
-    }
-
-    coll_config->coll_id = coll_id;
-    coll_config->coll_name = strdup(coll_ml_config_yytext);
-
-    return OMPI_SUCCESS;
-}
-
-static int set_section_name(section_config_t *section_config)
-{
-    int section_id;
-
-    section_id = section_name_to_id(coll_ml_config_yytext);
-
-    if (ML_UNDEFINED == section_id) {
-        return OMPI_ERROR;
-    }
-
-    section_config->section_id = section_id;
-    section_config->section_name = strdup(coll_ml_config_yytext);
-
-    return OMPI_SUCCESS;
-}
-
-void mca_coll_ml_reset_config(per_collective_configuration_t *config)
-{
-    config->topology_id = ML_UNDEFINED;
-    config->threshold = ML_UNDEFINED;
-    config->algorithm_id = ML_UNDEFINED;
-    config->fragmentation_enabled = ML_UNDEFINED;
-}
-
-static void reset_section(section_config_t *section_cf)
-{
-    if (section_cf->section_name) {
-        free (section_cf->section_name);
-        section_cf->section_name = NULL;
-    }
-
-    section_cf->section_id = ML_UNDEFINED;
-    mca_coll_ml_reset_config(&section_cf->config);
-}
-
-static void reset_collective(coll_config_t *coll_cf)
-{
-    if (coll_cf->coll_name) {
-        free (coll_cf->coll_name);
-        coll_cf->coll_name = NULL;
-    }
-
-    coll_cf->coll_id = ML_UNDEFINED;
-    reset_section(&coll_cf->section);
-}
-
-/*
- * String to integer;
- */
-static int string_to_int(char *str)
-{
-    while (isspace(*str)) {
-        ++str;
-    }
-
-    /* Nope -- just decimal, so use atoi() */
-    return atoi(str);
-}
-
-static int parse_algorithm_key(section_config_t *section, char *value)
-{
-    int ret;
-    ret = algorithm_name_to_id(value);
-    if (ML_UNDEFINED == ret) {
-        return OMPI_ERROR;
-    } else {
-        section->config.algorithm_id = ret;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int parse_threshold_key(section_config_t *section, char *value)
-{
-    assert (NULL != value);
-
-    if(!strcasecmp(value, "unlimited")) {
-        section->config.threshold = -1;
-    } else {
-        section->config.threshold = string_to_int(value);
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int parse_hierarchy_key(section_config_t *section, char *value)
-{
-    int ret;
-
-    ret = hierarchy_name_to_id(value);
-    if (ML_UNDEFINED == ret) {
-        return OMPI_ERROR;
-    }
-
-    section->config.topology_id = ret;
-
-    return OMPI_SUCCESS;
-}
-
-static int parse_fragmentation_key(section_config_t *section, char *value)
-{
-    assert (NULL != value);
-
-    if(!strcasecmp(value, "enable")) {
-        section->config.fragmentation_enabled = 1;
-    } else if (!strcasecmp(value, "disable")) {
-        section->config.fragmentation_enabled = 0;
-    } else {
-        ML_ERROR(("Line %d, unexpected fragmentation value %s. Legal values are: enable/disable",
-                    coll_ml_config_yynewlines, value));
-        return OMPI_ERROR;
-    }
-    return OMPI_SUCCESS;
-}
-
-/* Save configuration that have been collected so far */
-static int save_settings(coll_config_t *coll_config)
-{
-    per_collective_configuration_t *cf;
-
-    if (ML_UNDEFINED == coll_config->coll_id || ML_UNDEFINED == coll_config->section.section_id) {
-        return OMPI_ERROR;
-    }
-
-    cf = &mca_coll_ml_component.coll_config[coll_config->coll_id][coll_config->section.section_id];
-
-    cf->topology_id = coll_config->section.config.topology_id;
-    cf->threshold = coll_config->section.config.threshold;
-    cf->algorithm_id = coll_config->section.config.algorithm_id;
-    cf->fragmentation_enabled = coll_config->section.config.fragmentation_enabled;
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * Parse a single line
- */
-static int parse_line(section_config_t *section)
-{
-    int val, ret = OMPI_SUCCESS;
-    char *value = NULL;
-
-    /* Save the name name */
-    if (key_buffer_len < strlen(coll_ml_config_yytext) + 1) {
-        char *tmp;
-        key_buffer_len = strlen(coll_ml_config_yytext) + 1;
-        tmp = (char *) realloc(key_buffer, key_buffer_len);
-        if (NULL == tmp) {
-            free(key_buffer);
-            key_buffer_len = 0;
-            key_buffer = NULL;
-            return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-        }
-        key_buffer = tmp;
-    }
-    strncpy(key_buffer, coll_ml_config_yytext, key_buffer_len);
-
-    /* The first thing we have to see is an "=" */
-    val = coll_ml_config_yylex();
-    if (coll_ml_config_parse_done || COLL_ML_CONFIG_PARSE_EQUAL != val) {
-        ML_ERROR(("Line %d, expected = before key: %s",
-                    coll_ml_config_yynewlines,
-                    key_buffer));
-        return OMPI_ERROR;
-    }
-
-    /* Next we get the value */
-    val = coll_ml_config_yylex();
-    if (COLL_ML_CONFIG_PARSE_SINGLE_WORD == val ||
-        COLL_ML_CONFIG_PARSE_VALUE == val) {
-        value = strdup(coll_ml_config_yytext);
-        if (NULL == value) {
-            return OMPI_ERR_OUT_OF_RESOURCE;
-        }
-
-        /* Now we need to see the newline */
-        val = coll_ml_config_yylex();
-        if (COLL_ML_CONFIG_PARSE_NEWLINE != val &&
-            COLL_ML_CONFIG_PARSE_DONE != val) {
-            ML_ERROR(("Line %d, expected new line after %s",
-                    coll_ml_config_yynewlines,
-                    key_buffer));
-            free(value);
-            return OMPI_ERROR;
-        }
-    }
-
-    /* If we did not get EOL or EOF, something is wrong */
-    else if (COLL_ML_CONFIG_PARSE_DONE != val &&
-             COLL_ML_CONFIG_PARSE_NEWLINE != val) {
-        ML_ERROR(("Line %d, expected new line or end of line",
-                    coll_ml_config_yynewlines));
-        return OMPI_ERROR;
-    } else {
-        ML_ERROR(("Line %d malformed", coll_ml_config_yynewlines));
-        return OMPI_ERROR;
-    }
-
-    /* Line parsing is done, read the values */
-    if (!strcasecmp(key_buffer, "algorithm")) {
-        ret = parse_algorithm_key(section, value);
-    } else if (!strcasecmp(key_buffer, "threshold")) {
-        ret = parse_threshold_key(section, value);
-    } else if (!strcasecmp(key_buffer, "hierarchy")) {
-        ret = parse_hierarchy_key(section, value);
-    } else if (!strcasecmp(key_buffer, "fragmentation")) {
-        ret = parse_fragmentation_key(section, value);
-    /* Failed to parse the key */
-    } else {
-        ML_ERROR(("Line %d, unknown key %s",
-                    coll_ml_config_yynewlines, key_buffer));
-    }
-
-    /* All done */
-    free(value);
-
-    return ret;
-}
-
-/**************************************************************************/
-
-/*
- * Parse a single file
- */
-static int parse_file(char *filename)
-{
-    int val;
-    int ret = OMPI_SUCCESS;
-    bool first_section = true, first_coll = true;
-    coll_config_t coll_config;
-
-    memset (&coll_config, 0, sizeof (coll_config));
-    reset_collective(&coll_config);
-
-    /* Open the file */
-    coll_ml_config_yyin = fopen(filename, "r");
-    if (NULL == coll_ml_config_yyin) {
-        ML_ERROR(("Failed to open config file %s", filename));
-        ret = OMPI_ERR_NOT_FOUND;
-        goto cleanup;
-    }
-
-    /* Do the parsing */
-    coll_ml_config_parse_done = false;
-    coll_ml_config_yynewlines = 1;
-    coll_ml_config_init_buffer(coll_ml_config_yyin);
-    while (!coll_ml_config_parse_done) {
-        val = coll_ml_config_yylex();
-        switch (val) {
-        case COLL_ML_CONFIG_PARSE_DONE:
-        case COLL_ML_CONFIG_PARSE_NEWLINE:
-            break;
-        case COLL_ML_CONFIG_PARSE_COLLECTIVE:
-            /* dump all the information to last section that was defined */
-            if (!first_coll) {
-                ret = save_settings(&coll_config);
-
-                if (OMPI_SUCCESS != ret) {
-                    ML_ERROR(("Error in syntax for collective %s", coll_config.coll_name));
-                    goto cleanup;
-                }
-            }
-
-            /* reset collective config */
-            reset_collective(&coll_config);
-
-            first_coll    = false;
-            first_section = true;
-
-            ret = set_collective_name(&coll_config);
-            if (OMPI_SUCCESS != ret) {
-                goto cleanup;
-            }
-            break;
-        case COLL_ML_CONFIG_PARSE_SECTION:
-            if (ML_UNDEFINED == coll_config.coll_id) {
-                ML_ERROR(("Collective section wasn't defined !"));
-                ret = OMPI_ERROR;
-                goto cleanup;
-            }
-
-            if (!first_section) {
-                /* dump all the information to last section that was defined */
-                ret = save_settings(&coll_config);
-                if (OMPI_SUCCESS != ret) {
-                    ML_ERROR(("Error in syntax for collective %s section %s", coll_config.coll_name,
-                              coll_config.section.section_name));
-                    goto cleanup;
-                }
-            }
-
-            first_section = false;
-
-            /* reset all section values */
-            reset_section(&coll_config.section);
-
-            /* set new section name */
-            ret = set_section_name(&coll_config.section);
-            if (OMPI_SUCCESS != ret) {
-                goto cleanup;
-            }
-            break;
-        case COLL_ML_CONFIG_PARSE_SINGLE_WORD:
-            if (ML_UNDEFINED == coll_config.coll_id ||
-                ML_UNDEFINED == coll_config.section.section_id) {
-                ML_ERROR(("Collective section or sub-section was not defined !"));
-                ret = OMPI_ERROR;
-                goto cleanup;
-            } else {
-                parse_line(&coll_config.section);
-            }
-            break;
-
-        default:
-            /* anything else is an error */
-            ML_ERROR(("Unexpected token!"));
-            ret = OMPI_ERROR;
-            goto cleanup;
-            break;
-        }
-    }
-
-    save_settings(&coll_config);
-    fclose(coll_ml_config_yyin);
-    coll_ml_config_yylex_destroy ();
-    ret = OMPI_SUCCESS;
-
-cleanup:
-    reset_collective(&coll_config);
-    if (NULL != key_buffer) {
-        free(key_buffer);
-        key_buffer = NULL;
-        key_buffer_len = 0;
-    }
-    return ret;
-}
-
-int mca_coll_ml_config_file_init(void)
-{
-    return parse_file(mca_coll_ml_component.config_file_name);
-}
-
diff --git a/ompi/mca/coll/ml/coll_ml_config.h b/ompi/mca/coll/ml/coll_ml_config.h
deleted file mode 100644
index 15ad7dff2a..0000000000
--- a/ompi/mca/coll/ml/coll_ml_config.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef COLL_ML_CONFIG_H_
-#define COLL_ML_CONFIG_H_
-
-#include "opal_config.h"
-#include <stdio.h>
-
-BEGIN_C_DECLS
-
-#define ML_UNDEFINED -1
-
-struct per_collective_configuration_t {
-    int topology_id;
-    int threshold;
-    int algorithm_id;
-    int fragmentation_enabled;
-};
-typedef struct per_collective_configuration_t per_collective_configuration_t;
-
-void mca_coll_ml_reset_config(per_collective_configuration_t *config);
-int mca_coll_ml_config_file_init(void);
-
-END_C_DECLS
-#endif
diff --git a/ompi/mca/coll/ml/coll_ml_copy_fns.c b/ompi/mca/coll/ml/coll_ml_copy_fns.c
deleted file mode 100644
index a3d41b06c0..0000000000
--- a/ompi/mca/coll/ml/coll_ml_copy_fns.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2015      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/** @file */
-
-#include "ompi_config.h"
-
-#include "ompi/constants.h"
-#include "opal/threads/mutex.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/coll/coll.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "opal/sys/atomic.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
-#include "ompi/mca/coll/ml/coll_ml_inlines.h"
-#include "ompi/mca/coll/ml/coll_ml_allocation.h"
-#include "coll_ml_colls.h"
-#include <unistd.h>
-#include <sys/uio.h>
-
-
-
-/* This routine re-orders and packs user data.  The assumption is that
- * there is per-process data, the amount of data is the same for all
- * ranks, and the user data is contigous.
- */
-int mca_coll_ml_pack_reorder_contiguous_data(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    int i, rank;
-    void *user_buf, *library_buf;
-    size_t bytes_per_proc;
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *)
-        coll_op->coll_module;
-    mca_coll_ml_topology_t *topo_info = coll_op->coll_schedule->topo_info;
-    ptrdiff_t ptr_dif;
-
-    /* get the offset into each processes data.  The assumption is that
-     * we are manipulating the same amount of data for each process.
-     */
-
-    /* figure out how much data per-proc to copy */
-    bytes_per_proc=coll_op->fragment_data.per_rank_fragment_size;
-
-    /* loop over all the ranks in the communicator */
-    for( i=0 ; i < ompi_comm_size(ml_module->comm) ; i++ ) {
-
-        /* look up the rank of the i'th element in the sorted list */
-        rank = topo_info->sort_list[i];
-
-        /* get the pointer to user data */
-        user_buf=(void *)coll_op->full_message.src_user_addr;
-        /* compute offset into the user buffer */
-
-        /* offset for data already processed */
-        ptr_dif=rank*coll_op->full_message.n_bytes_per_proc_total+
-                            coll_op->fragment_data.offset_into_user_buffer_per_proc;
-        user_buf=(void *) ((char *)user_buf+ptr_dif);
-                /*
-                rank*coll_op->full_message.n_bytes_per_proc_total+
-                coll_op->fragment_data.offset_into_user_buffer_per_proc);
-                */
-
-        /* get the pointer to the ML buffer */
-        library_buf= (void *)
-            ((char *)coll_op->variable_fn_params.src_desc->data_addr+i*bytes_per_proc);
-
-        /* copy the data */
-        memcpy(library_buf, user_buf, bytes_per_proc);
-
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/* This routine re-orders and packs user data.  The assumption is that
- * there is per-process data, the amount of data is the same for all
- * ranks, and the user data is contigous.
- */
-int mca_coll_ml_pack_reorder_noncontiguous_data(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    int i, rank;
-    void *user_buf, *library_buf;
-    size_t bytes_per_proc;
-    ptrdiff_t ptr_dif;
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *)
-        coll_op->coll_module;
-    mca_coll_ml_topology_t *topo_info = coll_op->coll_schedule->topo_info;
-
-    /* get the offset into each processes data.  The assumption is that
-     * we are manipulating the same amount of data for each process.
-     */
-
-    /* figure out how much data per-proc to copy */
-    bytes_per_proc = coll_op->fragment_data.per_rank_fragment_size;
-
-    /* loop over all the ranks in the communicator */
-    for(i = 0; i < ompi_comm_size(ml_module->comm); i++ ) {
-
-        /* look up the rank of the i'th element in the sorted list */
-        rank = topo_info->sort_list[i];
-
-        /* get the pointer to user data */
-        user_buf=(void *)coll_op->full_message.src_user_addr;
-        /* compute offset into the user buffer */
-
-        /* offset for data already processed */
-        ptr_dif=rank*coll_op->full_message.send_count*
-                coll_op->full_message.send_extent+
-                coll_op->fragment_data.offset_into_user_buffer_per_proc;
-        user_buf=(void *) ((char *)user_buf+ptr_dif);
-
-        /* get the pointer to the ML buffer */
-        library_buf= (void *)
-            ((char *)coll_op->variable_fn_params.src_desc->data_addr+i*bytes_per_proc);
-
-        /* copy the data */
-        memcpy(library_buf, user_buf, bytes_per_proc);
-
-    }
-
-    return OMPI_SUCCESS;
-}
-
diff --git a/ompi/mca/coll/ml/coll_ml_custom_utils.c b/ompi/mca/coll/ml/coll_ml_custom_utils.c
deleted file mode 100644
index c00c4a5439..0000000000
--- a/ompi/mca/coll/ml/coll_ml_custom_utils.c
+++ /dev/null
@@ -1,139 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2014      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2014      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/** @file */
-
-#include "ompi_config.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-
-#include "opal/util/output.h"
-#include "opal/class/opal_list.h"
-#include "opal/class/opal_object.h"
-#include "ompi/mca/mca.h"
-#include "opal/mca/base/base.h"
-#include "opal/threads/mutex.h"
-#include "opal/sys/atomic.h"
-
-#include "ompi/op/op.h"
-#include "ompi/constants.h"
-#include "ompi/mca/coll/coll.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/coll/base/base.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
-#include "ompi/mca/coll/ml/coll_ml_inlines.h"
-#include "ompi/patterns/comm/coll_ops.h"
-
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/communicator/communicator.h"
-
-#include "ompi/mca/bcol/base/base.h"
-#include "coll_ml_custom_utils.h"
-
-/*
- * Local types
- */
-
-struct avail_coll_t {
-    opal_list_item_t super;
-    int ac_priority;
-    mca_coll_base_module_2_1_0_t *ac_module;
-};
-typedef struct avail_coll_t avail_coll_t;
-
-/*
- * Stuff for the OBJ interface
- * If topo_index == COLL_ML_TOPO_MAX it looks over all possilbe topologies, otherwhise it looks
- * in the topology that was specified.
- */
-
-int mca_coll_ml_check_if_bcol_is_used(const char *bcol_name, const mca_coll_ml_module_t *ml_module,
-        int topo_index)
-{
-    int i, rc, hier, *ranks_in_comm,
-        is_used = 0,
-        comm_size = ompi_comm_size(ml_module->comm);
-    int n_hier, tp , max_tp;
-    const mca_coll_ml_topology_t *topo_info;
-
-    ranks_in_comm = (int *) malloc(comm_size * sizeof(int));
-    if (OPAL_UNLIKELY(NULL == ranks_in_comm)) {
-        ML_ERROR(("Memory allocation failed."));
-        ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_NO_MEM);
-        /* not reached but causes a clang warning to not return here */
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    for (i = 0; i < comm_size; ++i) {
-        ranks_in_comm[i] = i;
-    }
-
-    if (COLL_ML_TOPO_MAX == topo_index) {
-        tp = 0;
-        max_tp = COLL_ML_TOPO_MAX;
-    } else {
-        tp = topo_index;
-        max_tp = topo_index + 1;
-    }
-
-    for (; tp < max_tp; tp++) {
-        topo_info = &ml_module->topo_list[tp];
-        n_hier = topo_info->n_levels;
-        for (hier = 0; hier < n_hier; ++hier) {
-            hierarchy_pairs *pair = &topo_info->component_pairs[hier];
-            mca_bcol_base_component_t *b_cm = pair->bcol_component;
-            if(0 == strcmp(bcol_name,
-                        b_cm->bcol_version.mca_component_name)) {
-                is_used = 1;
-                break;
-            }
-        }
-    }
-
-    rc = comm_allreduce_pml(&is_used, &is_used, 1, MPI_INT,
-                  ompi_comm_rank(ml_module->comm), MPI_MAX,
-                  comm_size, ranks_in_comm, ml_module->comm);
-
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        ML_ERROR(("comm_allreduce_pml failed."));
-        ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_OP);
-    }
-
-    free(ranks_in_comm);
-
-    return is_used;
-}
-
-/* The function is very different from the above function */
-int mca_coll_ml_check_if_bcol_is_requested(const char *component_name)
-{
-    mca_base_component_list_item_t *bcol_comp;
-
-    ML_VERBOSE(10, ("Loop over bcol components"));
-    OPAL_LIST_FOREACH(bcol_comp, &mca_bcol_base_components_in_use, mca_base_component_list_item_t) {
-        if(0 == strcmp(component_name,
-                    ((mca_bcol_base_component_2_0_0_t *)
-                     bcol_comp->cli_component)->bcol_version.mca_component_name)) {
-            return true;
-        }
-    }
-
-    /* the component was not resquested */
-    return false;
-}
diff --git a/ompi/mca/coll/ml/coll_ml_custom_utils.h b/ompi/mca/coll/ml/coll_ml_custom_utils.h
deleted file mode 100644
index 7d6a8feb00..0000000000
--- a/ompi/mca/coll/ml/coll_ml_custom_utils.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/** @file */
-
-#ifndef MCA_COLL_ML_CUSTOM_UTILS_H
-#define MCA_COLL_ML_CUSTOM_UTILS_H
-
-#include "ompi_config.h"
-
-#include "coll_ml.h"
-
-/* the function is used to check if the bcol name is used in this ml module */
-int mca_coll_ml_check_if_bcol_is_used(const char *bcol_name, const mca_coll_ml_module_t *ml_module,
-        int topo_index);
-
-/* The function is used to check if the bcol component was REQUESTED by user */
-int mca_coll_ml_check_if_bcol_is_requested(const char *component_name);
-
-END_C_DECLS
-
-#endif /* MCA_COLL_ML_ML_H */
diff --git a/ompi/mca/coll/ml/coll_ml_descriptors.c b/ompi/mca/coll/ml/coll_ml_descriptors.c
deleted file mode 100644
index 4060c27ed7..0000000000
--- a/ompi/mca/coll/ml/coll_ml_descriptors.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-#include "ompi_config.h"
-#include "coll_ml.h"
-#include "coll_ml_inlines.h"
-
-
-static inline void mca_coll_ml_fragment_constructor(mca_coll_ml_fragment_t *frag)
-{
-    frag->fn_args = NULL;
-}
-
-static inline void mca_coll_ml_fragment_destructor(mca_coll_ml_fragment_t *frag)
-{
-    if (frag->fn_args) {
-	free(frag->fn_args);
-	frag->fn_args = NULL;
-    }
-}
-
-static inline void mca_coll_ml_descriptor_constructor(mca_coll_ml_descriptor_t *descriptor)
-{
-
- OBJ_CONSTRUCT(&(descriptor->fragment),mca_coll_ml_fragment_t);
-
- /* this fragment is alway associated with this message descriptor */
- descriptor->fragment.full_msg_descriptor=descriptor;
-
-}
-
-
-static inline void mca_coll_ml_descriptor_destructor(mca_coll_ml_descriptor_t *descriptor)
-{
- OBJ_DESTRUCT(&(descriptor->fragment));
-}
-
-OBJ_CLASS_INSTANCE(
-    mca_coll_ml_fragment_t,
-    opal_list_item_t,
-    mca_coll_ml_fragment_constructor,
-    mca_coll_ml_fragment_destructor);
-
-OBJ_CLASS_INSTANCE(
-    mca_coll_ml_descriptor_t,
-    ompi_request_t,
-    mca_coll_ml_descriptor_constructor,
-    mca_coll_ml_descriptor_destructor);
-
diff --git a/ompi/mca/coll/ml/coll_ml_functions.h b/ompi/mca/coll/ml/coll_ml_functions.h
deleted file mode 100644
index 5d0d0d7b1a..0000000000
--- a/ompi/mca/coll/ml/coll_ml_functions.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/** @file */
-
-#ifndef MCA_COLL_ML_FUNCTIONS_H
-#define MCA_COLL_ML_FUNCTIONS_H
-
-#include "ompi_config.h"
-
-BEGIN_C_DECLS
-
-#define ML_MEMSYNC -100
-
-enum {
-    ML_BARRIER_DEFAULT
-};
- /* small data algorithm */
-/* broadcast functions */
-enum {
-    /* small data algorithm */
-    ML_BCAST_SMALL_DATA_KNOWN,
-    /* small data - dynamic decision making supported */
-    ML_BCAST_SMALL_DATA_UNKNOWN,
-    /* Sequential algorithm */
-    ML_BCAST_SMALL_DATA_SEQUENTIAL,
-
-    ML_BCAST_LARGE_DATA_KNOWN,
-
-    ML_BCAST_LARGE_DATA_UNKNOWN,
-
-    ML_BCAST_LARGE_DATA_SEQUENTIAL,
-
-    /* marker - all routines about this are expected to be used in
-     * selection logic that is based on size of the data */
-    ML_N_DATASIZE_BINS,
-
-    /* number of functions - also counts some markers, but ... */
-    ML_NUM_BCAST_FUNCTIONS
-};
-
-
-/* scatter functions */
-enum {
-    /* small data algorithm */
-    ML_SCATTER_SMALL_DATA_KNOWN,
-
-    /* marker - all routines about this are expected to be used in
-     * selection logic that is based on size of the data */
-    ML_SCATTER_N_DATASIZE_BINS,
-
-    /* small data - dynamic decision making supported */
-    ML_SCATTER_SMALL_DATA_UNKNOWN,
-
-    /* Sequential algorithm */
-    ML_SCATTER_SMALL_DATA_SEQUENTIAL,
-
-    /* number of functions - also counts some markers, but ... */
-    ML_NUM_SCATTER_FUNCTIONS
-};
-
-
-/* Allreduce functions */
-enum {
-    /* small data algorithm */
-    ML_SMALL_DATA_ALLREDUCE,
-
-    /* Large data algorithm */
-    ML_LARGE_DATA_ALLREDUCE,
-
-    /* If some of bcols doesn't support
-       all possibles types, use these extra algthms */
-    /* small data algorithm */
-    ML_SMALL_DATA_EXTRA_TOPO_ALLREDUCE,
-
-    /* large data algorithm */
-    ML_LARGE_DATA_EXTRA_TOPO_ALLREDUCE,
-
-    /* number of functions */
-    ML_NUM_ALLREDUCE_FUNCTIONS
-};
-
-/* Reduce functions */
-enum {
-    /* small data algorithm */
-    ML_SMALL_DATA_REDUCE,
-
-    /* Large data algorithm */
-    ML_LARGE_DATA_REDUCE,
-
-    /* number of functions */
-    ML_NUM_REDUCE_FUNCTIONS
-};
-/* Alltoall functions */
-enum {
-    /* small data algorithm */
-    ML_SMALL_DATA_ALLTOALL,
-    /* large all to all */
-    ML_LARGE_DATA_ALLTOALL,
-    /* number of functions */
-    ML_NUM_ALLTOALL_FUNCTIONS
-};
-
-/* Allgather functions */
-enum {
-    /* small data */
-    ML_SMALL_DATA_ALLGATHER,
-    /* large data */
-    ML_LARGE_DATA_ALLGATHER,
-    /* number of functions */
-    ML_NUM_ALLGATHER_FUNCTIONS
-};
-
-/* gather functions */
-enum {
-    /* small data */
-    ML_SMALL_DATA_GATHER,
-    /* large data */
-    ML_LARGE_DATA_GATHER,
-    /* number of functions */
-    ML_NUM_GATHER_FUNCTIONS
-};
-
-END_C_DECLS
-
-#endif /* MCA_COLL_ML_FUNCTIONS_H */
diff --git a/ompi/mca/coll/ml/coll_ml_hier_algorithm_memsync_setup.c b/ompi/mca/coll/ml/coll_ml_hier_algorithm_memsync_setup.c
deleted file mode 100644
index f50d040f61..0000000000
--- a/ompi/mca/coll/ml/coll_ml_hier_algorithm_memsync_setup.c
+++ /dev/null
@@ -1,195 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/mca/coll/ml/coll_ml.h"
-#include "ompi/mca/coll/ml/coll_ml_inlines.h"
-#include "ompi/include/ompi/constants.h"
-#include "ompi/mca/coll/ml/coll_ml_functions.h"
-
-static int mca_coll_ml_build_memsync_schedule(
-                                    mca_coll_ml_topology_t *topo_info,
-                                    mca_coll_ml_collective_operation_description_t **coll_desc)
-{
-    int i_hier, rc, i_fn, n_fcns, i,
-        n_hiers = topo_info->n_levels;
-
-    bool call_for_top_func;
-    mca_bcol_base_module_t *bcol_module;
-
-    mca_coll_ml_compound_functions_t *comp_fn;
-    mca_coll_ml_collective_operation_description_t  *schedule;
-
-    *coll_desc = (mca_coll_ml_collective_operation_description_t *)
-      calloc(1, sizeof(mca_coll_ml_collective_operation_description_t));
-
-    schedule = *coll_desc;
-    if (OPAL_UNLIKELY(NULL == schedule)) {
-        ML_ERROR(("Can't allocate memory."));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    if (topo_info->global_highest_hier_group_index ==
-          topo_info->component_pairs[n_hiers - 1].bcol_index) {
-        /* The process that is member of highest level subgroup
-           should call for top algorithms in addition to fan-in/out steps */
-        call_for_top_func = true;
-        n_fcns = 2 * n_hiers - 1; /* Up + Top + Down */
-    } else {
-        /* The process is not member of highest level subgroup,
-           as result it does not call for top algorithm,
-           but it calls for all fan-in/out steps */
-        call_for_top_func = false;
-        n_fcns = 2 * n_hiers;
-    }
-
-    /* Set dependencies equal to number of hierarchies */
-    schedule->n_fns = n_fcns;
-    schedule->topo_info = topo_info;
-
-    /* Allocated the component function */
-    schedule->component_functions = (struct mca_coll_ml_compound_functions_t *)
-                                     calloc(n_fcns, sizeof(struct mca_coll_ml_compound_functions_t));
-
-    if (OPAL_UNLIKELY(NULL == schedule->component_functions)) {
-        ML_ERROR(("Can't allocate memory."));
-        rc = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Barrier_Setup_Error;
-    }
-
-    for (i_fn = 0; i_fn < n_fcns; ++i_fn) {
-        i_hier = (i_fn < n_hiers ? i_fn : n_fcns - i_fn - 1);
-        comp_fn = &schedule->component_functions[i_fn];
-
-        /* The hierarchial level */
-        comp_fn->h_level = i_hier;
-        bcol_module = GET_BCOL(topo_info, i_hier);
-
-        /* The UP direction */
-        if (1 + i_fn < n_hiers || (1 + i_fn == n_hiers && !call_for_top_func)) {
-            /* Pasha: We do not have memory syncronization FANIN function, instead I use barrier.
-             * In future we have to replace it with memsync fan-in function
-             * comp_fn->bcol_function =
-             *       bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_FANIN][1][0][0];
-             */
-            comp_fn->bcol_function = GET_BCOL_SYNC_FN(bcol_module);
-
-
-            assert(NULL != comp_fn->bcol_function);
-
-            /* Each function call with index K is depended of all K-1 previous indices -
-               in simple words we will do sequential Fan-In calls */
-            comp_fn->num_dependencies = i_fn;
-            comp_fn->num_dependent_tasks = n_fcns - i_fn - 1;
-
-            /* Init component function */
-            strcpy(comp_fn->fn_name, "MEMSYNC-FANIN");
-
-        /* On the highest level */
-        } else if ((1 + i_fn == n_hiers && call_for_top_func)) {
-            comp_fn->bcol_function = GET_BCOL_SYNC_FN(bcol_module);
-
-            /* Each function call with index K is depended of all K-1 previous indices -
-               in simple words we do sequential calls */
-            comp_fn->num_dependencies = n_hiers - 1; /* All Fan-Ins */
-            comp_fn->num_dependent_tasks = n_fcns - n_hiers; /* All Fan-Outs */
-
-            /* Init component function */
-            strcpy(comp_fn->fn_name, "MEMSYNC-BARRIER");
-
-            assert(NULL != comp_fn->bcol_function);
-            ML_VERBOSE(10, ("func indx %d set to BARRIER %p", i_fn, comp_fn->bcol_function));
-
-        /* The DOWN direction */
-        } else {
-            /* Pasha: We do not have memory syncronization FANOUT function, instead I use barrier.
-             * In future we have to replace it with memsync fan-out function
-             * comp_fn->bcol_function =
-             *       bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_FANOUT][1][0][0];
-             */
-            comp_fn->bcol_function = GET_BCOL_SYNC_FN(bcol_module);
-
-            /* Each function call with index K is depended of all UP and TOP algths */
-            comp_fn->num_dependencies = n_hiers;
-            comp_fn->num_dependent_tasks = 0;
-
-            /* Init component function */
-            strcpy(comp_fn->fn_name, "MEMSYNC-FANOUT");
-        }
-
-        assert(NULL != comp_fn->bcol_function);
-        ML_VERBOSE(10, ("func indx %d set to %p", i_fn, comp_fn->bcol_function));
-
-        if (comp_fn->num_dependent_tasks > 0) {
-            comp_fn->dependent_task_indices = (int *) calloc(comp_fn->num_dependent_tasks, sizeof(int));
-            if (OPAL_UNLIKELY(NULL == comp_fn->dependent_task_indices)) {
-                ML_ERROR(("Can't allocate memory."));
-                rc = OMPI_ERR_OUT_OF_RESOURCE;
-                goto Barrier_Setup_Error;
-            }
-
-            /* All indexes follow after this one */
-            for (i = 0; i < comp_fn->num_dependent_tasks; ++i) {
-                comp_fn->dependent_task_indices[i] = i_fn + i + 1;
-            }
-        }
-
-        /* No need completion func for Barrier */
-        comp_fn->task_comp_fn = NULL;
-
-        ML_VERBOSE(10, ("Setting collective [Barrier] fn_idx %d, n_of_this_type_in_a_row %d, "
-                        "index_in_consecutive_same_bcol_calls %d.",
-                         i_fn, comp_fn->constant_group_data.n_of_this_type_in_a_row,
-                         comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls));
-    }
-
-    rc = ml_coll_barrier_constant_group_data_setup(topo_info, schedule);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        ML_ERROR(("Failed to init const group data."));
-        goto Barrier_Setup_Error;
-    }
-
-    schedule->progress_type = 0;
-
-    return OMPI_SUCCESS;
-
-Barrier_Setup_Error:
-    if (NULL != schedule->component_functions) {
-        free(schedule->component_functions);
-        schedule->component_functions = NULL;
-    }
-
-    free (schedule);
-    *coll_desc = NULL;
-
-    return rc;
-}
-
-int ml_coll_memsync_setup(mca_coll_ml_module_t *ml_module)
-{
-    int ret;
-    /* For barrier syncronization we use barrier topology */
-    mca_coll_ml_topology_t *topo_info =
-           &ml_module->topo_list[ml_module->collectives_topology_map[ML_BARRIER][ML_SMALL_MSG]];
-
-    ret = mca_coll_ml_build_memsync_schedule(topo_info,
-                            &ml_module->coll_ml_memsync_function);
-
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        ML_VERBOSE(10, ("Failed to setup static bcast"));
-        return ret;
-    }
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/coll/ml/coll_ml_hier_algorithms.c b/ompi/mca/coll/ml/coll_ml_hier_algorithms.c
deleted file mode 100644
index 179557dafb..0000000000
--- a/ompi/mca/coll/ml/coll_ml_hier_algorithms.c
+++ /dev/null
@@ -1,188 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi/include/ompi/constants.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
-#include "ompi/mca/coll/ml/coll_ml_inlines.h"
-#include "ompi/mca/coll/ml/coll_ml_allocation.h"
-
-/* collective managment descriptor initialization - called right after
- * the constructor by opal_free_list code
- */
-static int mca_coll_ml_collective_operation_progress_init
-                             (opal_free_list_item_t* item, void* ctx)
-{
-    int i;
-    int max_dag_size = ((struct coll_desc_init *)ctx)->max_dag_size;
-    size_t max_n_bytes_per_proc_total =
-        ((struct coll_desc_init *)ctx)->max_n_bytes_per_proc_total;
-    mca_coll_ml_collective_operation_progress_t *coll_op =
-        (mca_coll_ml_collective_operation_progress_t *) item;
-
-    coll_op->dag_description.status_array =
-        (mca_coll_ml_task_status_t *)
-        calloc(max_dag_size, sizeof(mca_coll_ml_task_status_t));
-    assert(coll_op->dag_description.status_array);
-
-    /* initialize individual elements */
-    for (i = 0; i < max_dag_size; i++ ) {
-        /* Pasha: We assume here index syncronization between
-           task indexes and indexes in component_function array
-           (mca_coll_ml_collective_operation_description)
-         */
-        coll_op->dag_description.status_array[i].
-            my_index_in_coll_schedule = i;
-        coll_op->dag_description.status_array[i].
-            ml_coll_operation = coll_op;
-
-        OBJ_CONSTRUCT(&coll_op->dag_description.status_array[i].item, opal_list_item_t);
-    }
-
-    /* set the size per proc of the ML buffer */
-    coll_op->full_message.max_n_bytes_per_proc_total=
-        max_n_bytes_per_proc_total;
-
-    /* set the pointer to the bcol module */
-    coll_op->coll_module =
-        ((struct coll_desc_init *)ctx)->bcol_base_module;
-
-    return OPAL_SUCCESS;
-}
-
-int ml_coll_schedule_setup(mca_coll_ml_module_t *ml_module)
-{
-    /* local variables */
-    int ret = OMPI_SUCCESS, comm_size;
-    mca_coll_ml_component_t *cm = &mca_coll_ml_component;
-    size_t ml_per_proc_buffer_size;
-
-    /* Barrier */
-    ret = ml_coll_hier_barrier_setup(ml_module);
-    if( OMPI_SUCCESS != ret ) {
-        return ret;
-    }
-
-    /* Broadcast */
-    ret = ml_coll_hier_bcast_setup(ml_module);
-    if( OMPI_SUCCESS != ret ) {
-        return ret;
-    }
-
-    /* Allreduce */
-    if (!mca_coll_ml_component.use_knomial_allreduce) {
-		ret = ml_coll_hier_allreduce_setup(ml_module);
-	} else {
-		ret = ml_coll_hier_allreduce_setup_new(ml_module);
-	}
-
-	if( OMPI_SUCCESS != ret ) {
-        return ret;
-    }
-
-
-    /* Alltoall */
-    /*
-    ret = ml_coll_hier_alltoall_setup_new(ml_module);
-
-    if( OMPI_SUCCESS != ret ) {
-        return ret;
-    }
-    */
-
-    /* Allgather */
-    ret = ml_coll_hier_allgather_setup(ml_module);
-
-    if( OMPI_SUCCESS != ret ) {
-        return ret;
-    }
-
-    /* Gather */
-    /*
-    ret = ml_coll_hier_gather_setup(ml_module);
-
-    if( OMPI_SUCCESS != ret ) {
-        return ret;
-    }
-    */
-
-    /* Reduce */
-    ret = ml_coll_hier_reduce_setup(ml_module);
-    if( OMPI_SUCCESS != ret ) {
-        return ret;
-    }
-
-    /* Scatter */
-    /*
-    ret = ml_coll_hier_scatter_setup(ml_module);
-    if( OMPI_SUCCESS != ret ) {
-        return ret;
-    }
-    */
-
-    ret = ml_coll_memsync_setup(ml_module);
-    if( OMPI_SUCCESS != ret ) {
-        return ret;
-    }
-
-    /* nonblocking Reduce */
-
-    /* Alltoall */
-
-    /* nonblocking alltoall */
-
-    /* max_dag_size will be set here, so initialize it */
-
-    /* Pasha: Do we have to keep the max_dag_size ?
-       In most generic case, it will be equal to max_fn_calls */
-    ml_module->max_dag_size = ml_module->max_fn_calls;
-
-    assert(ml_module->max_dag_size > 0);
-
-    /* initialize the mca_coll_ml_collective_operation_progress_t free list */
-    /* NOTE: as part of initialization each routine needs to make sure that
-     * the module element max_dag_size is set large enough - space for
-     * tracking collective progress is allocated based on this value. */
-
-    /* figure out what the size of the ml buffer is */
-    ml_per_proc_buffer_size=ml_module->payload_block->size_buffer;
-    comm_size=ompi_comm_size(ml_module->comm);
-    ml_per_proc_buffer_size/=comm_size;
-    ml_module->coll_desc_init_data.max_dag_size=ml_module->max_dag_size;
-    ml_module->coll_desc_init_data.max_n_bytes_per_proc_total=ml_per_proc_buffer_size;
-    ml_module->coll_desc_init_data.bcol_base_module=(mca_coll_base_module_t *)
-        ml_module;
-
-    ret = opal_free_list_init (
-            &(ml_module->coll_ml_collective_descriptors),
-            sizeof(mca_coll_ml_collective_operation_progress_t),
-            /* no special alignment needed */
-            8,
-            OBJ_CLASS(mca_coll_ml_collective_operation_progress_t),
-            /* no payload data */
-            0, 0,
-            /* NOTE: hack - need to parametrize this */
-            cm->free_list_init_size,
-            cm->free_list_max_size,
-            cm->free_list_grow_size,
-            /* No Mpool */
-            NULL, 0, NULL,
-            mca_coll_ml_collective_operation_progress_init,
-            (void *)&(ml_module->coll_desc_init_data)
-            );
-    if (OMPI_SUCCESS != ret) {
-        return ret;
-    }
-
-    /* done */
-    return ret;
-}
diff --git a/ompi/mca/coll/ml/coll_ml_hier_algorithms_allgather_setup.c b/ompi/mca/coll/ml/coll_ml_hier_algorithms_allgather_setup.c
deleted file mode 100644
index cd964d41dd..0000000000
--- a/ompi/mca/coll/ml/coll_ml_hier_algorithms_allgather_setup.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2014      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/mca/coll/ml/coll_ml.h"
-#include "ompi/include/ompi/constants.h"
-#include "ompi/mca/coll/ml/coll_ml_functions.h"
-#include "ompi/mca/coll/ml/coll_ml_hier_algorithms_common_setup.h"
-#include "ompi/patterns/net/netpatterns_knomial_tree.h"
-
-#define SMALL_MSG_RANGE 1
-#define LARGE_MSG_RANGE 5
-
-static int mca_coll_ml_build_allgather_schedule(mca_coll_ml_topology_t *topo_info,
-                                                mca_coll_ml_collective_operation_description_t **coll_desc, int bcol_func_index)
-{
-    int ret; /* exit code in case of error */
-    int nfn = 0;
-    int i;
-    int *scratch_indx = NULL,
-        *scratch_num = NULL;
-
-    mca_coll_ml_collective_operation_description_t  *schedule = NULL;
-    mca_coll_ml_compound_functions_t *comp_fn;
-    mca_coll_ml_schedule_hier_info_t h_info;
-
-    ML_VERBOSE(9, ("Setting hierarchy, inputs : n_levels %d, hiest %d ",
-                   topo_info->n_levels, topo_info->global_highest_hier_group_index));
-    MCA_COLL_ML_INIT_HIER_INFO(h_info, topo_info->n_levels,
-                               topo_info->global_highest_hier_group_index, topo_info);
-
-    ret = mca_coll_ml_schedule_init_scratch(topo_info, &h_info,
-                                            &scratch_indx, &scratch_num);
-    if (OMPI_SUCCESS != ret) {
-        ML_ERROR(("Can't mca_coll_ml_schedule_init_scratch."));
-        goto Error;
-    }
-    assert(NULL != scratch_indx);
-    assert(NULL != scratch_num);
-
-    schedule = *coll_desc =
-        mca_coll_ml_schedule_alloc(&h_info);
-    if (NULL == schedule) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Error;
-    }
-    /* Setting topology information */
-    schedule->topo_info = topo_info;
-
-    /* Set dependencies equal to number of hierarchies */
-    for (i = 0; i < h_info.num_up_levels; i++) {
-        int query_conf[MCA_COLL_ML_QUERY_SIZE];
-        MCA_COLL_ML_SET_QUERY(query_conf, DATA_SRC_KNOWN, BLOCKING, BCOL_GATHER, bcol_func_index, 0, 0);
-        comp_fn = &schedule->component_functions[i];
-        MCA_COLL_ML_SET_COMP_FN(comp_fn, i, topo_info,
-                                i, scratch_indx, scratch_num, query_conf, "GATHER_DATA");
-    }
-
-    nfn = i;
-    if (h_info.call_for_top_function) {
-        int query_conf[MCA_COLL_ML_QUERY_SIZE];
-        MCA_COLL_ML_SET_QUERY(query_conf, DATA_SRC_KNOWN, NON_BLOCKING, BCOL_ALLGATHER, bcol_func_index, 0, 0);
-        comp_fn = &schedule->component_functions[nfn];
-        MCA_COLL_ML_SET_COMP_FN(comp_fn, nfn, topo_info,
-                                nfn, scratch_indx, scratch_num, query_conf, "ALLGATHER_DATA");
-        ++nfn;
-    }
-
-    /* coming down the hierarchy */
-    for (i = h_info.num_up_levels - 1; i >= 0; i--, nfn++) {
-        int query_conf[MCA_COLL_ML_QUERY_SIZE];
-        MCA_COLL_ML_SET_QUERY(query_conf, DATA_SRC_KNOWN, NON_BLOCKING, BCOL_BCAST, bcol_func_index, 0, 0);
-        comp_fn = &schedule->component_functions[nfn];
-        MCA_COLL_ML_SET_COMP_FN(comp_fn, i, topo_info,
-                                nfn, scratch_indx, scratch_num, query_conf, "BCAST_DATA");
-    }
-
-    /* Fill the rest of constant data */
-    mca_coll_ml_call_types(&h_info, schedule);
-
-    MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO(schedule);
-
-    free(scratch_num);
-    free(scratch_indx);
-
-    return OMPI_SUCCESS;
-
- Error:
-    if (NULL != scratch_indx) {
-        free(scratch_indx);
-    }
-    if (NULL != scratch_num) {
-        free(scratch_num);
-    }
-
-    return ret;
-}
-
-int ml_coll_hier_allgather_setup(mca_coll_ml_module_t *ml_module)
-{
-    /* Hierarchy Setup */
-    int ret, topo_index, alg;
-    mca_coll_ml_topology_t *topo_info = ml_module->topo_list;
-
-    ML_VERBOSE(10,("entering allgather setup"));
-
-#if 0
-    /* used to validate the recursive k - ing allgather tree */
-    {
-        /* debug print */
-        int ii, jj;
-        netpatterns_k_exchange_node_t exchange_node;
-
-        ret = netpatterns_setup_recursive_knomial_allgather_tree_node(8, 3, 3, &exchange_node);
-        fprintf(stderr,"log tree order %d tree_order %d\n", exchange_node.log_tree_order,exchange_node.tree_order);
-        if( EXCHANGE_NODE == exchange_node.node_type){
-            if( exchange_node.n_extra_sources > 0){
-                fprintf(stderr,"Receiving data from extra rank %d\n",exchange_node.rank_extra_sources_array[0]);
-            }
-            for( ii = 0; ii < exchange_node.log_tree_order; ii++){
-                for( jj = 0; jj < (exchange_node.tree_order-1); jj++) {
-                    if( exchange_node.rank_exchanges[ii][jj] >= 0){
-                        fprintf(stderr,"level %d I send %d bytes to %d from offset %d \n",ii+1,
-                                exchange_node.payload_info[ii][jj].s_len,
-                                exchange_node.rank_exchanges[ii][jj],
-                                exchange_node.payload_info[ii][jj].s_offset);
-                        fprintf(stderr,"level %d I receive %d bytes from %d at offset %d\n",ii+1,
-                                exchange_node.payload_info[ii][jj].r_len,
-                                exchange_node.rank_exchanges[ii][jj],
-                                exchange_node.payload_info[ii][jj].r_offset);
-                    }
-                }
-            }
-            fprintf(stderr,"exchange_node.n_extra_sources %d\n",exchange_node.n_extra_sources);
-            fprintf(stderr,"exchange_node.myid_reindex %d\n",exchange_node.reindex_myid);
-            if( exchange_node.n_extra_sources > 0){
-                fprintf(stderr,"Sending back data to extra rank %d\n",exchange_node.rank_extra_sources_array[0]);
-            }
-        } else {
-            fprintf(stderr,"I am an extra and send to proxy %d\n",
-                    exchange_node.rank_extra_sources_array[0]);
-        }
-    }
-#endif
-
-    alg = mca_coll_ml_component.coll_config[ML_ALLGATHER][ML_SMALL_MSG].algorithm_id;
-    topo_index = ml_module->collectives_topology_map[ML_ALLGATHER][alg];
-    if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) {
-        ML_ERROR(("No topology index or algorithm was defined"));
-        topo_info->hierarchical_algorithms[ML_ALLGATHER] = NULL;
-        return OMPI_ERROR;
-    }
-
-    ret = mca_coll_ml_build_allgather_schedule(&ml_module->topo_list[topo_index],
-                                               &ml_module->coll_ml_allgather_functions[alg],
-                                               SMALL_MSG_RANGE);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        ML_VERBOSE(10, ("Failed to setup static alltoall"));
-        return ret;
-    }
-
-    alg = mca_coll_ml_component.coll_config[ML_ALLGATHER][ML_LARGE_MSG].algorithm_id;
-    topo_index = ml_module->collectives_topology_map[ML_ALLGATHER][alg];
-    if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) {
-        ML_ERROR(("No topology index or algorithm was defined"));
-        topo_info->hierarchical_algorithms[ML_ALLGATHER] = NULL;
-        return OMPI_ERROR;
-    }
-
-    ret = mca_coll_ml_build_allgather_schedule(&ml_module->topo_list[topo_index],
-                                               &ml_module->coll_ml_allgather_functions[alg],
-                                               LARGE_MSG_RANGE);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        ML_VERBOSE(10, ("Failed to setup static alltoall"));
-        return ret;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-void ml_coll_hier_allgather_cleanup(mca_coll_ml_module_t *ml_module)
-{
-    /* Hierarchy Setup */
-    int topo_index, alg;
-    mca_coll_ml_topology_t *topo_info = ml_module->topo_list;
-
-    alg = mca_coll_ml_component.coll_config[ML_ALLGATHER][ML_SMALL_MSG].algorithm_id;
-    topo_index = ml_module->collectives_topology_map[ML_ALLGATHER][alg];
-    if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) {
-        ML_ERROR(("No topology index or algorithm was defined"));
-        topo_info->hierarchical_algorithms[ML_ALLGATHER] = NULL;
-        return;
-    }
-
-    if (NULL == ml_module->coll_ml_allgather_functions[alg]) {
-        return;
-    }
-
-    if (ml_module->coll_ml_allgather_functions[alg]->component_functions) {
-        free(ml_module->coll_ml_allgather_functions[alg]->component_functions);
-        ml_module->coll_ml_allgather_functions[alg]->component_functions = NULL;
-    }
-
-    if (ml_module->coll_ml_allgather_functions[alg]) {
-        free(ml_module->coll_ml_allgather_functions[alg]);
-        ml_module->coll_ml_allgather_functions[alg] = NULL;
-    }
-
-    alg = mca_coll_ml_component.coll_config[ML_ALLGATHER][ML_LARGE_MSG].algorithm_id;
-    topo_index = ml_module->collectives_topology_map[ML_ALLGATHER][alg];
-    if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) {
-        ML_ERROR(("No topology index or algorithm was defined"));
-        topo_info->hierarchical_algorithms[ML_ALLGATHER] = NULL;
-        return;
-    }
-
-    if (ml_module->coll_ml_allgather_functions[alg]->component_functions) {
-        free(ml_module->coll_ml_allgather_functions[alg]->component_functions);
-        ml_module->coll_ml_allgather_functions[alg]->component_functions = NULL;
-    }
-
-    if (ml_module->coll_ml_allgather_functions[alg]) {
-        free(ml_module->coll_ml_allgather_functions[alg]);
-        ml_module->coll_ml_allgather_functions[alg] = NULL;
-    }
-}
diff --git a/ompi/mca/coll/ml/coll_ml_hier_algorithms_allreduce_setup.c b/ompi/mca/coll/ml/coll_ml_hier_algorithms_allreduce_setup.c
deleted file mode 100644
index a371d51b7a..0000000000
--- a/ompi/mca/coll/ml/coll_ml_hier_algorithms_allreduce_setup.c
+++ /dev/null
@@ -1,434 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2014      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/mca/coll/ml/coll_ml.h"
-#include "ompi/include/ompi/constants.h"
-#include "ompi/mca/coll/ml/coll_ml_functions.h"
-
-#define ALLREDUCE_SMALL 1
-#define ALLREDUCE_LARGE 5
-#define SMALL_MSG_RANGE 1
-#define LARGE_MSG_RANGE 5
-
-static int mca_coll_ml_build_allreduce_schedule(
-        mca_coll_ml_topology_t *topo_info,
-        mca_coll_ml_collective_operation_description_t **coll_desc, int bcol_func_index)
-{
-
-    bool call_for_top_function, prev_is_zero;
-    int n_hiers = topo_info->n_levels;
-    int i_hier, j_hier;
-    int cnt, value_to_set = 0;
-    int ret; /* exit code in case of error */
-    int nfn=0;
-    int *scratch_indx = NULL,
-        *scratch_num = NULL;
-     int global_high_hierarchy_index =
-             topo_info->global_highest_hier_group_index;
-
-    mca_coll_ml_collective_operation_description_t  *schedule;
-    mca_coll_ml_compound_functions_t *comp_fn;
-    mca_bcol_base_module_t *prev_bcol,
-                           *bcol_module;
-    int num_up_levels,nbcol_functions,i;
-
-    if (global_high_hierarchy_index ==
-          topo_info->component_pairs[n_hiers - 1].bcol_index) {
-        /* The process that is member of highest level subgroup
-           should call for top algorithms in addition to fan-in/out steps*/
-        call_for_top_function = true;
-        /* hier level run only top algorithm, so we deduct 1 */
-        num_up_levels = n_hiers - 1;
-        /* Top algorithm is called only once, so we deduct 1 */
-        nbcol_functions = 2 * n_hiers - 1;
-    } else {
-        /* The process is not member of highest level subgroup,
-           as result it does not call for top algorithm,
-           but it calls for all fan-in/out steps */
-        call_for_top_function = false;
-        num_up_levels = n_hiers;
-        nbcol_functions = 2 * n_hiers;
-    }
-
-    *coll_desc = (mca_coll_ml_collective_operation_description_t *)
-        calloc(1, sizeof(mca_coll_ml_collective_operation_description_t));
-    schedule = *coll_desc;
-    if (NULL == schedule) {
-        ML_ERROR(("Can't allocate memory."));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    scratch_indx = (int *) calloc(n_hiers * 2, sizeof (int));
-    if (NULL == scratch_indx) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Allreduce_Setup_Error;
-    }
-
-    scratch_num = (int *) malloc(sizeof(int) * (n_hiers * 2));
-    if (NULL == scratch_num) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Allreduce_Setup_Error;
-    }
-
-    prev_bcol = NULL;
-
-    for (i = 0, cnt = 0; i < num_up_levels; ++i, ++cnt) {
-        if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i))) {
-            scratch_indx[cnt] = scratch_indx[cnt - 1] + 1;
-        } else {
-            prev_bcol = GET_BCOL(topo_info, i);
-        }
-    }
-
-    /* top  - only if the proc arrive to highest_level_is_global_highest_level */
-    if (call_for_top_function) {
-        if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, n_hiers - 1))) {
-            scratch_indx[cnt] = scratch_indx[cnt - 1] + 1;
-        } else {
-            prev_bcol = GET_BCOL(topo_info, n_hiers - 1);
-        }
-
-        ++cnt;
-    }
-
-    /* going down */
-    for (i = num_up_levels - 1; i >= 0; --i, ++cnt) {
-        if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i))) {
-            scratch_indx[cnt] = scratch_indx[cnt - 1] + 1;
-        } else {
-            prev_bcol = GET_BCOL(topo_info, i);
-        }
-    }
-
-    i = cnt - 1;
-    prev_is_zero = true;
-
-    do {
-        if (prev_is_zero) {
-            value_to_set = scratch_indx[i] + 1;
-            prev_is_zero = false;
-        }
-
-        if (0 == scratch_indx[i]) {
-            prev_is_zero = true;
-        }
-
-        scratch_num[i] = value_to_set;
-        --i;
-    } while(i >= 0);
-
-    /* Set dependencies equal to number of hierarchies */
-    schedule->n_fns = nbcol_functions;
-    schedule->topo_info = topo_info;
-    schedule->progress_type = 0;
-
-    /* Allocated the component function */
-    schedule->component_functions = (struct mca_coll_ml_compound_functions_t *)
-            calloc(nbcol_functions, sizeof(struct mca_coll_ml_compound_functions_t));
-
-    if (NULL == schedule->component_functions) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Allreduce_Setup_Error;
-    }
-
-    for (i = 0; i < num_up_levels; i++) {
-        comp_fn = &schedule->component_functions[i];
-        comp_fn->h_level = i; /* hierarchy level */
-        bcol_module = GET_BCOL(topo_info, i);
-
-        /* strcpy (comp_fn->fn_name, "ALLREDUCE_SMALL_DATA"); */
-
-        comp_fn->num_dependent_tasks     = 0;
-        comp_fn->num_dependencies        = 0;
-
-        comp_fn->bcol_function =
-            bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_REDUCE][bcol_func_index][0][0];
-        if (NULL == comp_fn->bcol_function) {
-            /* if there isn't a bcol function for this then we can't continue */
-            ret = OMPI_ERR_NOT_SUPPORTED;
-            goto Allreduce_Setup_Error;
-        }
-
-        comp_fn->task_comp_fn = NULL;
-
-        comp_fn->constant_group_data.bcol_module = bcol_module;
-        comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls = scratch_indx[i];
-        comp_fn->constant_group_data.n_of_this_type_in_a_row = scratch_num[i];
-        comp_fn->constant_group_data.n_of_this_type_in_collective = 0;
-        comp_fn->constant_group_data.index_of_this_type_in_collective = 0;
-    }
-
-    nfn = i;
-    if (call_for_top_function) {
-        comp_fn = &schedule->component_functions[nfn];
-        comp_fn->h_level = nfn; /* hierarchy level */
-        bcol_module = GET_BCOL(topo_info, nfn);
-
-        assert (NULL != bcol_module);
-
-        /* strcpy (comp_fn->fn_name, "ALLREDUCE_SMALL_DATA"); */
-
-        /* The allreduce should depend on the reduce */
-        comp_fn->num_dependent_tasks     = 0;
-        comp_fn->num_dependencies        = 0;
-        comp_fn->bcol_function =
-            bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_ALLREDUCE][bcol_func_index][0][0];
-        if (NULL == comp_fn->bcol_function) {
-            /* if there isn't a bcol function for this then we can't continue */
-            ret = OMPI_ERR_NOT_SUPPORTED;
-            goto Allreduce_Setup_Error;
-        }
-
-        comp_fn->task_comp_fn = NULL;
-
-        comp_fn->constant_group_data.bcol_module = bcol_module;
-        comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls = scratch_indx[nfn];
-        comp_fn->constant_group_data.n_of_this_type_in_a_row = scratch_num[nfn];
-        comp_fn->constant_group_data.n_of_this_type_in_collective = 0;
-        comp_fn->constant_group_data.index_of_this_type_in_collective = 0;
-
-        ++nfn;
-    }
-
-    for (i = num_up_levels - 1; i >= 0; i--) {
-        comp_fn = &schedule->component_functions[nfn];
-        comp_fn->h_level = i; /* hierarchy level */
-        bcol_module = GET_BCOL(topo_info, i);
-
-        assert (NULL != bcol_module);
-
-    /*    strcpy (comp_fn->fn_name, "ALLREDUCE_SMALL_DATA"); */
-
-        comp_fn->num_dependent_tasks     = 0;
-        comp_fn->num_dependencies        = 0;
-
-        comp_fn->bcol_function =
-            bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_BCAST][bcol_func_index][0][0];
-        if (NULL == comp_fn->bcol_function) {
-            /* if there isn't a bcol function for this then we can't continue */
-            ret = OMPI_ERR_NOT_SUPPORTED;
-            goto Allreduce_Setup_Error;
-        }
-
-        comp_fn->task_comp_fn = NULL;
-
-        comp_fn->constant_group_data.bcol_module = bcol_module;
-        comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls = scratch_indx[nfn];
-        comp_fn->constant_group_data.n_of_this_type_in_a_row = scratch_num[nfn];
-        comp_fn->constant_group_data.n_of_this_type_in_collective = 0;
-        comp_fn->constant_group_data.index_of_this_type_in_collective = 0;
-
-        ++nfn;
-    }
-
-    /* Fill the rest of constant data */
-    for (i_hier = 0; i_hier < n_hiers; i_hier++) {
-        mca_bcol_base_module_t *current_bcol =
-            schedule->component_functions[i_hier].
-            constant_group_data.bcol_module;
-        cnt = 0;
-        for (j_hier = 0; j_hier < n_hiers; j_hier++) {
-            if (current_bcol ==
-                    schedule->component_functions[j_hier].
-                    constant_group_data.bcol_module) {
-                schedule->component_functions[j_hier].
-                    constant_group_data.index_of_this_type_in_collective = cnt;
-                cnt++;
-            }
-        }
-
-        schedule->component_functions[i_hier].
-            constant_group_data.n_of_this_type_in_collective = cnt;
-    }
-
-    MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO(schedule);
-
-    free(scratch_num);
-    free(scratch_indx);
-
-    return OMPI_SUCCESS;
-
-Allreduce_Setup_Error:
-
-    if (NULL != scratch_indx) {
-        free(scratch_indx);
-    }
-
-    if (NULL != scratch_num) {
-        free(scratch_num);
-    }
-
-    if (NULL != schedule->component_functions) {
-        free(schedule->component_functions);
-    }
-    *coll_desc = NULL;
-    free (schedule);
-
-    return ret;
-}
-
-int ml_coll_hier_allreduce_setup_new(mca_coll_ml_module_t *ml_module)
-{
-    /* Hierarchy Setup */
-    int ret;
-    int topo_index;
-    int alg;
-    mca_coll_ml_topology_t *topo_info = ml_module->topo_list;
-
-    alg = mca_coll_ml_component.coll_config[ML_ALLREDUCE][ML_SMALL_MSG].algorithm_id;
-    topo_index = ml_module->collectives_topology_map[ML_ALLREDUCE][alg];
-    if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) {
-        ML_ERROR(("No topology index or algorithm was defined"));
-        topo_info->hierarchical_algorithms[ML_ALLREDUCE] = NULL;
-        return OMPI_ERROR;
-    }
-
-    ret = mca_coll_ml_build_allreduce_schedule(
-                    &ml_module->topo_list[topo_index],
-                    &ml_module->coll_ml_allreduce_functions[alg],
-                    SMALL_MSG_RANGE);
-
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        ML_VERBOSE(10, ("Failed to setup Small Message Allreduce"));
-         return ret;
-       }
-
-    alg = mca_coll_ml_component.coll_config[ML_ALLREDUCE][ML_LARGE_MSG].algorithm_id;
-    topo_index = ml_module->collectives_topology_map[ML_ALLREDUCE][alg];
-    if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) {
-        ML_ERROR(("No topology index or algorithm was defined"));
-        topo_info->hierarchical_algorithms[ML_ALLREDUCE] = NULL;
-        return OMPI_ERROR;
-    }
-
-    ret = mca_coll_ml_build_allreduce_schedule(
-                    &ml_module->topo_list[topo_index],
-                    &ml_module->coll_ml_allreduce_functions[alg],
-                    LARGE_MSG_RANGE);
-
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        ML_VERBOSE(10, ("Failed to setup Large Message Allreduce"));
-         return ret;
-       }
-
-    if (true == mca_coll_ml_component.need_allreduce_support) {
-        topo_index = ml_module->collectives_topology_map[ML_ALLREDUCE][ML_SMALL_DATA_EXTRA_TOPO_ALLREDUCE];
-        if (ML_UNDEFINED == topo_index) {
-            ML_ERROR(("No topology index was defined"));
-            topo_info->hierarchical_algorithms[ML_ALLREDUCE] = NULL;
-            return OMPI_ERROR;
-        }
-
-        ret = mca_coll_ml_build_allreduce_schedule(
-                        &ml_module->topo_list[topo_index],
-                        &ml_module->coll_ml_allreduce_functions[ML_SMALL_DATA_EXTRA_TOPO_ALLREDUCE],
-                        SMALL_MSG_RANGE);
-
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-            ML_VERBOSE(10, ("Failed to setup Extra Small Message Allreduce"));
-            return ret;
-        }
-
-        topo_index = ml_module->collectives_topology_map[ML_ALLREDUCE][ML_LARGE_DATA_EXTRA_TOPO_ALLREDUCE];
-        if (ML_UNDEFINED == topo_index) {
-            ML_ERROR(("No topology index was defined"));
-            topo_info->hierarchical_algorithms[ML_ALLREDUCE] = NULL;
-            return OMPI_ERROR;
-        }
-
-        ret = mca_coll_ml_build_allreduce_schedule(
-                        &ml_module->topo_list[topo_index],
-                        &ml_module->coll_ml_allreduce_functions[ML_LARGE_DATA_EXTRA_TOPO_ALLREDUCE],
-                        LARGE_MSG_RANGE);
-
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-            ML_VERBOSE(10, ("Failed to setup Extra Large Message Allreduce"));
-            return ret;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-void ml_coll_hier_allreduce_cleanup_new(mca_coll_ml_module_t *ml_module)
-{
-    /* Hierarchy Setup */
-    int topo_index;
-    int alg;
-    mca_coll_ml_topology_t *topo_info = ml_module->topo_list;
-
-    alg = mca_coll_ml_component.coll_config[ML_ALLREDUCE][ML_SMALL_MSG].algorithm_id;
-    topo_index = ml_module->collectives_topology_map[ML_ALLREDUCE][alg];
-    if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) {
-        ML_ERROR(("No topology index or algorithm was defined"));
-        topo_info->hierarchical_algorithms[ML_ALLREDUCE] = NULL;
-        return;
-    }
-
-    if (NULL == ml_module->coll_ml_allreduce_functions[alg]) {
-        return;
-    }
-
-    free(ml_module->coll_ml_allreduce_functions[alg]->component_functions);
-    ml_module->coll_ml_allreduce_functions[alg]->component_functions = NULL;
-    free(ml_module->coll_ml_allreduce_functions[alg]);
-    ml_module->coll_ml_allreduce_functions[alg] = NULL;
-
-    alg = mca_coll_ml_component.coll_config[ML_ALLREDUCE][ML_LARGE_MSG].algorithm_id;
-    topo_index = ml_module->collectives_topology_map[ML_ALLREDUCE][alg];
-    if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) {
-        ML_ERROR(("No topology index or algorithm was defined"));
-        topo_info->hierarchical_algorithms[ML_ALLREDUCE] = NULL;
-        return;
-    }
-
-    free(ml_module->coll_ml_allreduce_functions[alg]->component_functions);
-    ml_module->coll_ml_allreduce_functions[alg]->component_functions = NULL;
-    free(ml_module->coll_ml_allreduce_functions[alg]);
-    ml_module->coll_ml_allreduce_functions[alg] = NULL;
-
-    if (true == mca_coll_ml_component.need_allreduce_support) {
-        topo_index = ml_module->collectives_topology_map[ML_ALLREDUCE][ML_SMALL_DATA_EXTRA_TOPO_ALLREDUCE];
-        if (ML_UNDEFINED == topo_index) {
-            ML_ERROR(("No topology index was defined"));
-            topo_info->hierarchical_algorithms[ML_ALLREDUCE] = NULL;
-            return;
-        }
-
-        alg = ML_SMALL_DATA_EXTRA_TOPO_ALLREDUCE;
-        free(ml_module->coll_ml_allreduce_functions[alg]->component_functions);
-        ml_module->coll_ml_allreduce_functions[alg]->component_functions = NULL;
-        free(ml_module->coll_ml_allreduce_functions[alg]);
-        ml_module->coll_ml_allreduce_functions[alg] = NULL;
-
-        topo_index = ml_module->collectives_topology_map[ML_ALLREDUCE][ML_LARGE_DATA_EXTRA_TOPO_ALLREDUCE];
-        if (ML_UNDEFINED == topo_index) {
-            ML_ERROR(("No topology index was defined"));
-            topo_info->hierarchical_algorithms[ML_ALLREDUCE] = NULL;
-            return;
-        }
-
-        alg = ML_LARGE_DATA_EXTRA_TOPO_ALLREDUCE;
-        free(ml_module->coll_ml_allreduce_functions[alg]->component_functions);
-        ml_module->coll_ml_allreduce_functions[alg]->component_functions = NULL;
-        free(ml_module->coll_ml_allreduce_functions[alg]);
-        ml_module->coll_ml_allreduce_functions[alg] = NULL;
-    }
-}
diff --git a/ompi/mca/coll/ml/coll_ml_hier_algorithms_barrier_setup.c b/ompi/mca/coll/ml/coll_ml_hier_algorithms_barrier_setup.c
deleted file mode 100644
index 2b4a0c2a9f..0000000000
--- a/ompi/mca/coll/ml/coll_ml_hier_algorithms_barrier_setup.c
+++ /dev/null
@@ -1,206 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/mca/coll/ml/coll_ml.h"
-#include "ompi/mca/coll/ml/coll_ml_inlines.h"
-#include "ompi/include/ompi/constants.h"
-#include "ompi/mca/coll/ml/coll_ml_functions.h"
-
-static int mca_coll_ml_build_barrier_schedule(
-                                    mca_coll_ml_topology_t *topo_info,
-                                    mca_coll_ml_collective_operation_description_t
-                                    **coll_desc,
-                                    mca_coll_ml_module_t *ml_module)
-{
-    int i_hier, rc, i_fn, n_fcns, i,
-        n_hiers = topo_info->n_levels;
-
-    bool call_for_top_func;
-    mca_bcol_base_module_t *bcol_module;
-
-    mca_coll_ml_compound_functions_t *comp_fn;
-    mca_coll_ml_collective_operation_description_t  *schedule;
-
-    *coll_desc = (mca_coll_ml_collective_operation_description_t *)
-                  malloc(sizeof(mca_coll_ml_collective_operation_description_t));
-
-    schedule = *coll_desc;
-    if (OPAL_UNLIKELY(NULL == schedule)) {
-        ML_ERROR(("Can't allocate memory."));
-        rc = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Barrier_Setup_Error;
-    }
-
-    if (topo_info->global_highest_hier_group_index ==
-          topo_info->component_pairs[n_hiers - 1].bcol_index) {
-        /* The process that is member of highest level subgroup
-           should call for top algorithms in addition to fan-in/out steps */
-        call_for_top_func = true;
-        n_fcns = 2 * n_hiers - 1; /* Up + Top + Down */
-    } else {
-        /* The process is not member of highest level subgroup,
-           as result it does not call for top algorithm,
-           but it calls for all fan-in/out steps */
-        call_for_top_func = false;
-        n_fcns = 2 * n_hiers;
-    }
-
-    if( ml_module->max_fn_calls < n_fcns ) {
-        ml_module->max_fn_calls = n_fcns;
-    }
-
-    /* Set dependencies equal to number of hierarchies */
-    schedule->n_fns = n_fcns;
-    schedule->topo_info = topo_info;
-
-    /* Allocated the component function */
-    schedule->component_functions = (struct mca_coll_ml_compound_functions_t *)
-                                     calloc(n_fcns, sizeof(struct mca_coll_ml_compound_functions_t));
-
-    if (OPAL_UNLIKELY(NULL == schedule->component_functions)) {
-        ML_ERROR(("Can't allocate memory."));
-        rc = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Barrier_Setup_Error;
-    }
-    for (i_fn = 0; i_fn < n_fcns; ++i_fn) {
-        i_hier = (i_fn < n_hiers ? i_fn : n_fcns - i_fn - 1);
-        comp_fn = &schedule->component_functions[i_fn];
-
-        /* The hierarchial level */
-        comp_fn->h_level = i_hier;
-        bcol_module = GET_BCOL(topo_info, i_hier);
-
-        /* The UP direction */
-        if (1 + i_fn < n_hiers || (1 + i_fn == n_hiers && !call_for_top_func)) {
-            comp_fn->bcol_function =
-                bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_FANIN][1][0][0];
-
-            if (NULL == comp_fn->bcol_function) {
-                ML_VERBOSE(10, ("no function available for BCOL_FANIN, NON_BLOCKING, DATA_SRC_KNOWN"));
-                rc = OMPI_ERR_NOT_AVAILABLE;
-                goto Barrier_Setup_Error;
-            }
-
-            /* Each function call with index K is depended of all K-1 previous indices -
-               in simple words we will do sequential Fan-In calls */
-            comp_fn->num_dependencies = (0 == i_fn) ? 0 : 1;
-            comp_fn->num_dependent_tasks = 1;
-            /* Init component function */
-            strcpy(comp_fn->fn_name, "FANIN");
-            /* On the highest level */
-        } else if ((1 + i_fn == n_hiers && call_for_top_func)) {
-            comp_fn->bcol_function =
-                bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_BARRIER][1][0][0];
-
-            if (NULL == comp_fn->bcol_function) {
-                ML_VERBOSE(10, ("no function available for BCOL_BARRIER, NON_BLOCKING, DATA_SRC_KNOWN"));
-                rc = OMPI_ERR_NOT_AVAILABLE;
-                goto Barrier_Setup_Error;
-            }
-
-            /* Each function call with index K is depended of all K-1 previous indices -
-               in simple words we do sequential calls */
-            comp_fn->num_dependencies = (1 == n_hiers)    ? 0 : 1; /* All Fan-Ins */
-            comp_fn->num_dependent_tasks = n_fcns - n_hiers;  /* All Fan-Outs */
-
-            /* Init component function */
-            strcpy(comp_fn->fn_name, "BARRIER");
-
-            ML_VERBOSE(10, ("func indx %d set to BARRIER %p", i_fn, comp_fn->bcol_function));
-
-        /* The DOWN direction */
-        } else {
-            comp_fn->bcol_function =
-                bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_FANOUT][1][0][0];
-
-            if (NULL == comp_fn->bcol_function) {
-                ML_VERBOSE(10, ("no function available for BCOL_FANOUT, NON_BLOCKING, DATA_SRC_KNOWN"));
-                rc = OMPI_ERR_NOT_AVAILABLE;
-                goto Barrier_Setup_Error;
-            }
-
-            /* Each function call with index K is depended of all UP and TOP algths */
-            comp_fn->num_dependencies = 1;
-            comp_fn->num_dependent_tasks = call_for_top_func ? 0 :
-                                           (i_fn + 1 == n_fcns ? 0 : 1);
-
-            /* Init component function */
-            strcpy(comp_fn->fn_name, "FANOUT");
-        }
-
-        ML_VERBOSE(10, ("func indx %d set to %p", i_fn, comp_fn->bcol_function));
-
-        if (comp_fn->num_dependent_tasks > 0) {
-            comp_fn->dependent_task_indices = (int *) calloc(comp_fn->num_dependent_tasks, sizeof(int));
-            if (OPAL_UNLIKELY(NULL == comp_fn->dependent_task_indices)) {
-                ML_ERROR(("Can't allocate memory."));
-                rc = OMPI_ERR_OUT_OF_RESOURCE;
-                goto Barrier_Setup_Error;
-            }
-
-            /* All indexes follow after this one */
-            for (i = 0; i < comp_fn->num_dependent_tasks; ++i) {
-                comp_fn->dependent_task_indices[i] = i_fn + i + 1;
-            }
-        } else {
-                comp_fn->dependent_task_indices = NULL;
-        }
-
-
-        /* No need completion func for Barrier */
-        comp_fn->task_comp_fn = NULL;
-
-        ML_VERBOSE(10, ("Setting collective [Barrier] fn_idx %d, n_of_this_type_in_a_row %d, "
-                        "index_in_consecutive_same_bcol_calls %d.",
-                         i_fn, comp_fn->constant_group_data.n_of_this_type_in_a_row,
-                         comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls));
-    }
-
-    rc = ml_coll_barrier_constant_group_data_setup(topo_info, schedule);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        ML_ERROR(("Failed to init const group data."));
-        goto Barrier_Setup_Error;
-    }
-
-    schedule->progress_type = 0;
-
-    return OMPI_SUCCESS;
-
-Barrier_Setup_Error:
-    if (NULL != schedule->component_functions) {
-        free(schedule->component_functions);
-        schedule->component_functions = NULL;
-    }
-
-    return rc;
-}
-
-int ml_coll_hier_barrier_setup(mca_coll_ml_module_t *ml_module)
-{
-    int rc;
-    mca_coll_ml_topology_t *topo_info =
-           &ml_module->topo_list[ml_module->collectives_topology_map[ML_BARRIER][ML_SMALL_MSG]];
-
-    rc = mca_coll_ml_build_barrier_schedule(topo_info,
-                            &ml_module->coll_ml_barrier_function, ml_module);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        /* Make sure to reset the barrier pointer to NULL */
-        topo_info->hierarchical_algorithms[BCOL_BARRIER] = NULL;
-
-        return rc;
-    }
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/coll/ml/coll_ml_hier_algorithms_bcast_setup.c b/ompi/mca/coll/ml/coll_ml_hier_algorithms_bcast_setup.c
deleted file mode 100644
index 314a6f4655..0000000000
--- a/ompi/mca/coll/ml/coll_ml_hier_algorithms_bcast_setup.c
+++ /dev/null
@@ -1,851 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/mca/coll/ml/coll_ml.h"
-#include "ompi/mca/coll/ml/coll_ml_inlines.h"
-#include "ompi/include/ompi/constants.h"
-#include "ompi/mca/coll/ml/coll_ml_functions.h"
-
-static int mca_coll_ml_task_comp_dynamic_root_small_message
-    (struct mca_coll_ml_task_status_t *task) {
-
-        task->ml_coll_operation->variable_fn_params.root_flag = true;
-
-        return OMPI_SUCCESS;
-}
-
-
-int mca_coll_ml_setup_scratch_vals(mca_coll_ml_compound_functions_t *func_list,
-        int *scratch_indx, int *scratch_num, int n_hiers)
-{
-    int i_hier, j_hier;
-    int cnt, value_to_set = 0;
-    bool prev_is_zero;
-    mca_coll_ml_compound_functions_t *comp_fn;
-    mca_bcol_base_module_t *prev_bcol = NULL,
-                           *bcol_module;
-
-    /* Calculate scratch numbers */
-    for (i_hier = 0; i_hier < n_hiers; i_hier++) {
-        bcol_module = func_list[i_hier].constant_group_data.bcol_module;
-        if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, bcol_module)) {
-            scratch_indx[i_hier] = scratch_indx[i_hier - 1] + 1;
-        } else {
-            scratch_indx[i_hier] = 0;
-            prev_bcol = bcol_module;
-        }
-    }
-
-    --i_hier;
-    prev_is_zero = true;
-
-    do {
-        if (prev_is_zero) {
-            value_to_set = scratch_indx[i_hier] + 1;
-            prev_is_zero = false;
-        }
-
-        if (0 == scratch_indx[i_hier]) {
-            prev_is_zero = true;
-        }
-
-        scratch_num[i_hier] = value_to_set;
-        --i_hier;
-    } while(i_hier >= 0);
-
-
-    /* Each hierarchy has one function to be implemented */
-    /* this is the basic setup required of the bcol function */
-    for (i_hier = 0; i_hier < n_hiers; i_hier++) {
-        /* We want to be generic, but on this stage we support only single
-         * bcol per hierarchy level
-         */
-        comp_fn = &func_list[i_hier];
-        comp_fn->h_level = i_hier; /* hierarchy level */
-
-        /* we can change this */
-        comp_fn->task_comp_fn = mca_coll_ml_task_comp_dynamic_root_small_message;
-        /* assert(NULL != comp_fn->bcol_function); */
-        /* Constants */
-        comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls = scratch_indx[i_hier];
-        comp_fn->constant_group_data.n_of_this_type_in_a_row = scratch_num[i_hier];
-        comp_fn->constant_group_data.n_of_this_type_in_collective = 0;
-        comp_fn->constant_group_data.index_of_this_type_in_collective = 0;
-
-        ML_VERBOSE(10, ("Setting collective [bcast] fn_idx %d, index_in_consecutive_same_bcol_calls %d, n_of_this_type_in_a_row %d",
-                    i_hier,
-                    comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls,
-                    comp_fn->constant_group_data.n_of_this_type_in_a_row));
-    }
-
-    /* Fill the rest of constant data */
-    for (i_hier = 0; i_hier < n_hiers; i_hier++) {
-        mca_bcol_base_module_t *current_bcol =
-            func_list[i_hier].
-            constant_group_data.bcol_module;
-        cnt = 0;
-        for (j_hier = 0; j_hier < n_hiers; j_hier++) {
-            if (current_bcol ==
-                    func_list[j_hier].
-                    constant_group_data.bcol_module) {
-                func_list[j_hier].constant_group_data.
-                    index_of_this_type_in_collective = cnt;
-
-                cnt++;
-            }
-        }
-        func_list[i_hier].constant_group_data.n_of_this_type_in_collective = cnt;
-    }
-
-    return OMPI_SUCCESS;
-
-}
-
-static void mca_coll_ml_zero_dep_bcast(mca_coll_ml_task_status_t *task_status, int index, mca_coll_ml_compound_functions_t *func)
-{
-    /* no real dependency, set everything to zero */
-    task_status->rt_num_dependencies = 0;
-    task_status->rt_num_dependent_tasks = 0;
-    task_status->rt_dependent_task_indices = NULL;
-}
-
-/*
- * Build schedule without runtime attributes
- */
-static int mca_coll_ml_build_bcast_dynamic_schedule_no_attributes(
-        mca_coll_ml_topology_t *topo_info,
-        mca_coll_ml_collective_operation_description_t **coll_desc, int bcol_func_index)
-{
-
-    int n_hiers = topo_info->n_levels;
-    int i_hier, j_hier;
-    int cnt, value_to_set = 0;
-    int ret; /* exit code in case of error */
-    bool prev_is_zero;
-    int *scratch_indx = NULL,
-        *scratch_num = NULL;
-
-    mca_coll_ml_collective_operation_description_t  *schedule;
-    mca_coll_ml_compound_functions_t *comp_fn;
-    mca_bcol_base_module_t *prev_bcol,
-                           *bcol_module;
-
-    *coll_desc = (mca_coll_ml_collective_operation_description_t *)
-        calloc(1, sizeof(mca_coll_ml_collective_operation_description_t));
-    schedule = *coll_desc;
-    if (NULL == schedule) {
-        ML_ERROR(("Can't allocate memory."));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    scratch_indx = (int *) calloc(n_hiers, sizeof (int));
-    if (NULL == scratch_indx) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Bcast_Setup_Error;
-    }
-
-    scratch_num = (int *) malloc(sizeof(int) * (n_hiers));
-    if (NULL == scratch_num) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Bcast_Setup_Error;
-    }
-
-    prev_bcol = NULL;
-
-    /* Calculate scratch numbers */
-    for (i_hier = 0; i_hier < n_hiers; i_hier++) {
-        if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i_hier))) {
-            scratch_indx[i_hier] = scratch_indx[i_hier - 1] + 1;
-        } else {
-            scratch_indx[i_hier] = 0;
-            prev_bcol = GET_BCOL(topo_info, i_hier);
-        }
-    }
-
-    --i_hier;
-    prev_is_zero = true;
-
-    do {
-        if (prev_is_zero) {
-            value_to_set = scratch_indx[i_hier] + 1;
-            prev_is_zero = false;
-        }
-
-        if (0 == scratch_indx[i_hier]) {
-            prev_is_zero = true;
-        }
-
-        scratch_num[i_hier] = value_to_set;
-        --i_hier;
-    } while(i_hier >= 0);
-
-    /* Set dependencies equal to number of hierarchies */
-    schedule->n_fns = n_hiers;
-    schedule->topo_info = topo_info;
-    schedule->progress_type = 0; /* Pasha: Not really defined, puting zero */
-
-    /* Allocated the component function */
-    schedule->component_functions = (struct mca_coll_ml_compound_functions_t *)
-            calloc(n_hiers, sizeof(struct mca_coll_ml_compound_functions_t));
-    if (NULL == schedule->component_functions) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Bcast_Setup_Error;
-    }
-
-    /* Each hierarchy has one function to be implemented */
-    for (i_hier = 0; i_hier < n_hiers; i_hier++) {
-        /* We want to be generic, but on this stage we support only single
-         * bcol per hierarchy level
-         */
-        comp_fn = &schedule->component_functions[i_hier];
-        comp_fn->h_level = i_hier; /* hierarchy level */
-        bcol_module = GET_BCOL(topo_info, i_hier);
-        /* Init component function */
-        strcpy (comp_fn->fn_name, "BCAST_TEST_SMALL_DYNAMIC");
-        comp_fn->num_dependent_tasks = 0;
-        comp_fn->num_dependencies = 0;
-        comp_fn->dependent_task_indices = NULL;
-        comp_fn->bcol_function =
-            bcol_module->filtered_fns_table[DATA_SRC_UNKNOWN][NON_BLOCKING][BCOL_BCAST][bcol_func_index][0][0];
-        comp_fn->task_comp_fn = mca_coll_ml_task_comp_dynamic_root_small_message;
-        assert(NULL != comp_fn->bcol_function);
-        /*
-        comp_fn->bcol_function->progress_fn =
-            bcol_module->filtered_fns_table[BCOL_BCAST][1][0][0];
-         */
-        /* Constants */
-        comp_fn->constant_group_data.bcol_module = bcol_module;
-        comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls = scratch_indx[i_hier];
-        comp_fn->constant_group_data.n_of_this_type_in_a_row = scratch_num[i_hier];
-        comp_fn->constant_group_data.n_of_this_type_in_collective = 0;
-        comp_fn->constant_group_data.index_of_this_type_in_collective = 0;
-
-        ML_VERBOSE(10, ("Setting collective [bcast] fn_idx %d, index_in_consecutive_same_bcol_calls %d, n_of_this_type_in_a_row %d",
-                    i_hier,
-                    comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls,
-                    comp_fn->constant_group_data.n_of_this_type_in_a_row));
-    }
-
-    /* Fill the rest of constant data */
-    for (i_hier = 0; i_hier < n_hiers; i_hier++) {
-        mca_bcol_base_module_t *current_bcol =
-            schedule->component_functions[i_hier].
-            constant_group_data.bcol_module;
-        cnt = 0;
-        for (j_hier = 0; j_hier < n_hiers; j_hier++) {
-            if (current_bcol ==
-                    schedule->component_functions[j_hier].
-                    constant_group_data.bcol_module) {
-                schedule->component_functions[j_hier].
-                    constant_group_data.index_of_this_type_in_collective = cnt;
-                ML_VERBOSE(10, ("Pasha: Setting collective [bcast small][count %d], fn_idx %d, collective_alg->functions[i].index_of_this_type_in_collective %d",
-                           cnt, i_hier,
-                           schedule->component_functions[j_hier].
-                                constant_group_data.index_of_this_type_in_collective));
-                cnt++;
-            }
-        }
-
-        schedule->component_functions[i_hier].
-            constant_group_data.n_of_this_type_in_collective = cnt;
-    }
-
-    schedule->task_setup_fn[COLL_ML_ROOT_TASK_FN] = mca_coll_ml_zero_dep_bcast;
-    schedule->task_setup_fn[COLL_ML_GENERAL_TASK_FN] = mca_coll_ml_zero_dep_bcast;
-
-    MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO(schedule);
-
-    free(scratch_num);
-    free(scratch_indx);
-
-    return OMPI_SUCCESS;
-
-Bcast_Setup_Error:
-
-    if (NULL != scratch_indx) {
-        free(scratch_indx);
-    }
-
-    if (NULL != scratch_num) {
-        free(scratch_num);
-    }
-
-    if (NULL != schedule->component_functions) {
-        free(schedule->component_functions);
-    }
-
-    return ret;
-}
-
-static int mca_coll_ml_build_bcast_sequential_schedule_no_attributes(
-        mca_coll_ml_topology_t *topo_info,
-        mca_coll_ml_collective_operation_description_t **coll_desc, int bcol_func_index)
-{
-
-    int n_hiers = topo_info->n_levels;
-    int i_hier, j_hier;
-    int cnt, value_to_set = 0;
-    int ret; /* exit code in case of error */
-    bool prev_is_zero;
-    int *scratch_indx = NULL,
-        *scratch_num = NULL;
-
-    mca_coll_ml_collective_operation_description_t  *schedule;
-    mca_coll_ml_compound_functions_t *comp_fn;
-    mca_coll_ml_compound_functions_t *comp_fns_temp;
-    mca_bcol_base_module_t *prev_bcol,
-                           *bcol_module;
-
-    *coll_desc = (mca_coll_ml_collective_operation_description_t *)
-        calloc(1, sizeof(mca_coll_ml_collective_operation_description_t));
-    schedule = *coll_desc;
-    if (NULL == schedule) {
-        ML_ERROR(("Can't allocate memory."));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    scratch_indx = (int *) calloc(n_hiers, sizeof (int));
-    if (NULL == scratch_indx) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Bcast_Setup_Error;
-    }
-
-    scratch_num = (int *) malloc(sizeof(int) * (n_hiers));
-    if (NULL == scratch_num) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Bcast_Setup_Error;
-    }
-
-    prev_bcol = NULL;
-
-    /* Calculate scratch numbers */
-    for (i_hier = 0; i_hier < n_hiers; i_hier++) {
-        if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i_hier))) {
-            scratch_indx[i_hier] = scratch_indx[i_hier - 1] + 1;
-        } else {
-            scratch_indx[i_hier] = 0;
-            prev_bcol = GET_BCOL(topo_info, i_hier);
-        }
-    }
-
-    --i_hier;
-    prev_is_zero = true;
-
-    do {
-        if (prev_is_zero) {
-            value_to_set = scratch_indx[i_hier] + 1;
-            prev_is_zero = false;
-        }
-
-        if (0 == scratch_indx[i_hier]) {
-            prev_is_zero = true;
-        }
-
-        scratch_num[i_hier] = value_to_set;
-        --i_hier;
-    } while(i_hier >= 0);
-
-    /* Set dependencies equal to number of hierarchies */
-    schedule->n_fns = n_hiers;
-    schedule->topo_info = topo_info;
-    schedule->progress_type = 0; /* Pasha: Not really defined, puting zero
-                                  * Josh: would be nice to define it as "sequential"
-                                  * or "concurrent"
-                                  */
-
-    /* Allocated the component function */
-    schedule->component_functions = (struct mca_coll_ml_compound_functions_t *)
-            calloc(n_hiers, sizeof(struct mca_coll_ml_compound_functions_t));
-    if (NULL == schedule->component_functions) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Bcast_Setup_Error;
-    }
-    /* Allocate the schedule list */
-    schedule->comp_fn_arr = (struct mca_coll_ml_compound_functions_t **)
-        calloc(n_hiers,sizeof(struct mca_coll_ml_compound_functions_t *));
-    if (NULL == schedule->comp_fn_arr) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Bcast_Setup_Error;
-    }
-    /* Each hierarchy has one function to be implemented */
-    /* this is the basic setup required of the bcol function */
-    for (i_hier = 0; i_hier < n_hiers; i_hier++) {
-        /* We want to be generic, but on this stage we support only single
-         * bcol per hierarchy level
-         */
-        comp_fn = &schedule->component_functions[i_hier];
-        comp_fn->h_level = i_hier; /* hierarchy level */
-        bcol_module = GET_BCOL(topo_info, i_hier);
-        /* Init component function */
-        strcpy (comp_fn->fn_name, "BCAST_TEST_SMALL_SEQUENTIAL");
-
-        /* should be very simple, shouldn't require any kind of fancy dependencies set*/
-
-        comp_fn->bcol_function =
-            bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_BCAST][bcol_func_index][0][0];
-
-        /* initialize the coll_fn_started flag to false */
-        /*comp_fn->coll_fn_started = false;*/
-        /* debug print */
-
-        /*
-        if(comp_fn->coll_fn_started){
-            fprintf(stderr,"this statement is true\n");
-        } else {
-            fprintf(stderr,"done setting to false \n");
-        }
-        */
-
-        comp_fn->task_comp_fn = mca_coll_ml_task_comp_dynamic_root_small_message;
-        /* assert(NULL != comp_fn->bcol_function); */
-        /* Constants */
-        comp_fn->constant_group_data.bcol_module = bcol_module;
-        comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls = scratch_indx[i_hier];
-        comp_fn->constant_group_data.n_of_this_type_in_a_row = scratch_num[i_hier];
-        comp_fn->constant_group_data.n_of_this_type_in_collective = 0;
-        comp_fn->constant_group_data.index_of_this_type_in_collective = 0;
-
-        ML_VERBOSE(10, ("Setting collective [bcast] fn_idx %d, index_in_consecutive_same_bcol_calls %d, n_of_this_type_in_a_row %d",
-                    i_hier,
-                    comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls,
-                    comp_fn->constant_group_data.n_of_this_type_in_a_row));
-    }
-
-    /* Fill the rest of constant data */
-    for (i_hier = 0; i_hier < n_hiers; i_hier++) {
-        mca_bcol_base_module_t *current_bcol =
-            schedule->component_functions[i_hier].
-            constant_group_data.bcol_module;
-        cnt = 0;
-        for (j_hier = 0; j_hier < n_hiers; j_hier++) {
-            if (current_bcol ==
-                    schedule->component_functions[j_hier].
-                    constant_group_data.bcol_module) {
-                schedule->component_functions[j_hier].
-                    constant_group_data.index_of_this_type_in_collective = cnt;
-                ML_VERBOSE(10, ("Pasha: Setting collective [bcast small][count %d], fn_idx %d, collective_alg->functions[i].index_of_this_type_in_collective %d",
-                           cnt, i_hier,
-                           schedule->component_functions[j_hier].
-                                constant_group_data.index_of_this_type_in_collective));
-                cnt++;
-            }
-        }
-        schedule->component_functions[i_hier].
-            constant_group_data.n_of_this_type_in_collective = cnt;
-    }
-    /* Now that the functions have been set-up properly, we can simple permute the ordering a bit */
-    for (i_hier = 0; i_hier < n_hiers; i_hier++) {
-        /* first one is trivial */
-        comp_fns_temp = (struct mca_coll_ml_compound_functions_t *)
-            calloc(n_hiers, sizeof(struct mca_coll_ml_compound_functions_t));
-        /* else we need to build the schedule */
-
-        for(j_hier = 0; j_hier < n_hiers; j_hier++) {
-            /* put the i_hier-th function first in the list */
-            if( 0 == j_hier ) {
-                comp_fns_temp[j_hier] = schedule->component_functions[i_hier];
-            } else if( j_hier  <= i_hier ) {
-                comp_fns_temp[j_hier] = schedule->component_functions[j_hier-1];
-            } else {
-                comp_fns_temp[j_hier] = schedule->component_functions[j_hier];
-            }
-        }
-        /* now let's attach this list to our array of lists */
-        schedule->comp_fn_arr[i_hier] = comp_fns_temp;
-
-    }
-
-
-#if 1
-    /* I'm going to just loop over each schedule and
-     * set up the scratch indices, scratch numbers
-     * and other constant data
-     */
-    for( i_hier = 1; i_hier < n_hiers; i_hier++) {
-        /* calculate the scratch indices and associated numbers */
-        ret = mca_coll_ml_setup_scratch_vals(schedule->comp_fn_arr[i_hier], scratch_indx,
-                scratch_num, n_hiers);
-        if( OMPI_SUCCESS != ret ) {
-            ret = OMPI_ERROR;
-            goto Bcast_Setup_Error;
-        }
-
-    }
-#endif
-
-    MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO(schedule);
-
-    free(scratch_num);
-    free(scratch_indx);
-
-    return OMPI_SUCCESS;
-
-Bcast_Setup_Error:
-
-    if (NULL != scratch_indx) {
-        free(scratch_indx);
-    }
-
-    if (NULL != scratch_num) {
-        free(scratch_num);
-    }
-
-    if (NULL != schedule->component_functions) {
-        free(schedule->component_functions);
-    }
-
-    if (NULL != schedule->comp_fn_arr) {
-        free(schedule->comp_fn_arr);
-    }
-    free (schedule);
-    *coll_desc = NULL;
-
-    return ret;
-}
-
-static void mca_coll_ml_static_bcast_root(mca_coll_ml_task_status_t *task_status, int index,
-        mca_coll_ml_compound_functions_t *func)
-{
-    task_status->rt_num_dependencies = 0;
-    task_status->rt_num_dependent_tasks = 0;
-    task_status->rt_dependent_task_indices = 0;
-}
-
-static void mca_coll_ml_static_bcast_non_root(mca_coll_ml_task_status_t *task_status, int index,
-        mca_coll_ml_compound_functions_t *func)
-{
-    /* Make active only the first level of hierarchy the gets the data, all the rest of levels
-       will be activated by dependency list */
-    if (task_status->ml_coll_operation->variable_fn_params.root_route->level == index) {
-        task_status->rt_num_dependencies = 0;
-        task_status->rt_num_dependent_tasks = func->num_dependent_tasks;
-        task_status->rt_dependent_task_indices = func->dependent_task_indices;
-        task_status->ml_coll_operation->variable_fn_params.root =
-            task_status->ml_coll_operation->variable_fn_params.root_route->rank;
-    } else {
-        task_status->rt_num_dependencies = 1;           /* wait for root */
-        task_status->rt_num_dependent_tasks = 0;        /* no depended task */
-        task_status->rt_dependent_task_indices = NULL; /* NULL */
-    }
-}
-
-static int mca_coll_ml_build_bcast_known_schedule_no_attributes(
-        mca_coll_ml_topology_t *topo_info,
-        mca_coll_ml_collective_operation_description_t **coll_desc, int bcol_func_index)
-{
-
-    int n_hiers = topo_info->n_levels;
-    int i_hier, j_hier;
-    int cnt, value_to_set = 0;
-    int ret; /* exit code in case of error */
-    bool prev_is_zero;
-    int *scratch_indx = NULL,
-        *scratch_num = NULL;
-
-    mca_coll_ml_collective_operation_description_t  *schedule;
-    mca_coll_ml_compound_functions_t *comp_fn;
-    mca_bcol_base_module_t *prev_bcol,
-                           *bcol_module;
-
-    *coll_desc = (mca_coll_ml_collective_operation_description_t *)
-        calloc(1, sizeof(mca_coll_ml_collective_operation_description_t));
-    schedule = *coll_desc;
-    if (NULL == schedule) {
-        ML_ERROR(("Can't allocate memory."));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    scratch_indx = (int *) calloc(n_hiers, sizeof (int));
-    if (NULL == scratch_indx) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Bcast_Setup_Error;
-    }
-
-    scratch_num = (int *) malloc(sizeof(int) * (n_hiers));
-    if (NULL == scratch_num) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Bcast_Setup_Error;
-    }
-
-    prev_bcol = NULL;
-
-    /* Calculate scratch numbers */
-    for (i_hier = 0; i_hier < n_hiers; i_hier++) {
-        if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i_hier))) {
-            scratch_indx[i_hier] = scratch_indx[i_hier - 1] + 1;
-        } else {
-            prev_bcol = GET_BCOL(topo_info, i_hier);
-        }
-    }
-
-    --i_hier;
-    prev_is_zero = true;
-
-    do {
-        if (prev_is_zero) {
-            value_to_set = scratch_indx[i_hier] + 1;
-            prev_is_zero = false;
-        }
-
-        if (0 == scratch_indx[i_hier]) {
-            prev_is_zero = true;
-        }
-
-        scratch_num[i_hier] = value_to_set;
-        --i_hier;
-    } while(i_hier >= 0);
-
-    /* Set dependencies equal to number of hierarchies */
-    schedule->n_fns = n_hiers;
-    schedule->topo_info = topo_info;
-    schedule->progress_type = 0; /* Pasha: Not really defined, puting zero */
-
-    /* Allocated the component function */
-    schedule->component_functions = (struct mca_coll_ml_compound_functions_t *)
-            calloc(n_hiers, sizeof(struct mca_coll_ml_compound_functions_t));
-    if (NULL == schedule->component_functions) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Bcast_Setup_Error;
-    }
-
-    /* Each hierarchy has one function to be implemented */
-    for (i_hier = 0; i_hier < n_hiers; i_hier++) {
-        int j;
-        /* We want to be generic, but on this stage we support only single
-         * bcol per hierarchy level
-         */
-        comp_fn = &schedule->component_functions[i_hier];
-        comp_fn->h_level = i_hier; /* hierarchy level */
-        bcol_module = GET_BCOL(topo_info, i_hier);
-        /* Init component function */
-        strcpy (comp_fn->fn_name, "BCAST_TEST_SMALL_STATIC");
-        /* Hack for single layer of hierarchy */
-        if (1 == n_hiers) {
-            comp_fn->num_dependent_tasks     = n_hiers - 1;
-            comp_fn->num_dependencies        = 0;
-        } else {
-            comp_fn->num_dependent_tasks     = n_hiers;    /* root will have n_hier - 1 depended tasks, non root zero*/
-            comp_fn->num_dependencies        = 0;              /* root will have zero dependencies */
-        }
-
-        if (0 != comp_fn->num_dependent_tasks) {
-            comp_fn->dependent_task_indices = (int *)calloc(n_hiers, sizeof(int));
-            for (j = 0; j < n_hiers; j++) {
-                comp_fn->dependent_task_indices[j] = j; /* only root will use this one */
-            }
-        }
-
-        comp_fn->bcol_function =
-            bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_BCAST][bcol_func_index][0][0];
-
-        comp_fn->task_comp_fn = mca_coll_ml_task_comp_dynamic_root_small_message;
-        /* assert(NULL != comp_fn->bcol_function); */
-        /* Constants */
-        comp_fn->constant_group_data.bcol_module = bcol_module;
-        comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls = scratch_indx[i_hier];
-        comp_fn->constant_group_data.n_of_this_type_in_a_row = scratch_num[i_hier];
-        comp_fn->constant_group_data.n_of_this_type_in_collective = 0;
-        comp_fn->constant_group_data.index_of_this_type_in_collective = 0;
-
-        ML_VERBOSE(10, ("Setting collective [bcast] fn_idx %d, index_in_consecutive_same_bcol_calls %d, n_of_this_type_in_a_row %d",
-                    i_hier,
-                    comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls,
-                    comp_fn->constant_group_data.n_of_this_type_in_a_row));
-    }
-
-    /* Fill the rest of constant data */
-    for (i_hier = 0; i_hier < n_hiers; i_hier++) {
-        mca_bcol_base_module_t *current_bcol =
-            schedule->component_functions[i_hier].
-            constant_group_data.bcol_module;
-        cnt = 0;
-        for (j_hier = 0; j_hier < n_hiers; j_hier++) {
-            if (current_bcol ==
-                    schedule->component_functions[j_hier].
-                    constant_group_data.bcol_module) {
-                schedule->component_functions[j_hier].
-                    constant_group_data.index_of_this_type_in_collective = cnt;
-                ML_VERBOSE(10, ("Pasha: Setting collective [bcast small][count %d], fn_idx %d, collective_alg->functions[i].index_of_this_type_in_collective %d",
-                           cnt, i_hier,
-                           schedule->component_functions[j_hier].
-                                constant_group_data.index_of_this_type_in_collective));
-                cnt++;
-            }
-        }
-        schedule->component_functions[i_hier].
-            constant_group_data.n_of_this_type_in_collective = cnt;
-    }
-
-    schedule->task_setup_fn[COLL_ML_ROOT_TASK_FN] = mca_coll_ml_static_bcast_root;
-    schedule->task_setup_fn[COLL_ML_GENERAL_TASK_FN] = mca_coll_ml_static_bcast_non_root;
-
-    MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO(schedule);
-
-    free(scratch_num);
-    free(scratch_indx);
-
-    return OMPI_SUCCESS;
-
-Bcast_Setup_Error:
-
-    if (NULL != scratch_indx) {
-        free(scratch_indx);
-    }
-
-    if (NULL != scratch_num) {
-        free(scratch_num);
-    }
-
-    if (NULL != schedule->component_functions) {
-        free(schedule->component_functions);
-    }
-    free (schedule);
-    *coll_desc = NULL;
-
-    return ret;
-}
-
-
-
-#define BCAST_SMALL 1
-#define BCAST_LARGE 5
-
-int ml_coll_hier_bcast_setup(mca_coll_ml_module_t *ml_module)
-{
-    /* Hierarchy Setup */
-    int ret, i , size_code, alg;
-    int topo_index = 0;
-    mca_coll_ml_topology_t *topo_info = ml_module->topo_list;
-
-    for (i = 0; i < ML_NUM_MSG; i++) {
-
-        switch (i) {
-            case ML_SMALL_MSG:
-                size_code = BCAST_SMALL;
-                break;
-            case ML_LARGE_MSG:
-                size_code = BCAST_LARGE;
-                break;
-            default:
-                topo_info->hierarchical_algorithms[ML_BCAST] = NULL;
-                return OMPI_ERROR;
-        }
-
-        alg = mca_coll_ml_component.coll_config[ML_BCAST][i].algorithm_id;
-        topo_index = ml_module->collectives_topology_map[ML_BCAST][alg];
-        if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) {
-            ML_ERROR(("No topology index or algorithm was defined"));
-            topo_info->hierarchical_algorithms[ML_BCAST] = NULL;
-            return OMPI_ERROR;
-        }
-
-        switch (alg) {
-            case ML_BCAST_SMALL_DATA_KNOWN:
-            case ML_BCAST_LARGE_DATA_KNOWN:
-                ret = mca_coll_ml_build_bcast_known_schedule_no_attributes(&topo_info[topo_index],
-                        &ml_module->coll_ml_bcast_functions[alg], size_code);
-                if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-                    ML_VERBOSE(10, ("Failed to setup static bcast"));
-                    topo_info->hierarchical_algorithms[ML_BCAST] = NULL;
-                    return ret;
-                }
-                break;
-            case ML_BCAST_SMALL_DATA_UNKNOWN:
-            case ML_BCAST_LARGE_DATA_UNKNOWN:
-                ret = mca_coll_ml_build_bcast_dynamic_schedule_no_attributes(&topo_info[topo_index],
-                        &ml_module->coll_ml_bcast_functions[alg], size_code);
-                if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-                    ML_VERBOSE(10, ("Failed to setup dynamic bcast"));
-                    topo_info->hierarchical_algorithms[ML_BCAST] = NULL;
-                    return ret;
-                }
-                break;
-            case ML_BCAST_SMALL_DATA_SEQUENTIAL:
-            case ML_BCAST_LARGE_DATA_SEQUENTIAL:
-                ret = mca_coll_ml_build_bcast_sequential_schedule_no_attributes(&topo_info[topo_index],
-                        &ml_module->coll_ml_bcast_functions[alg], size_code);
-                if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-                    ML_VERBOSE(10, ("Failed to setup static bcast"));
-                    topo_info->hierarchical_algorithms[ML_BCAST] = NULL;
-                    return ret;
-                }
-                break;
-            default:
-                topo_info->hierarchical_algorithms[ML_BCAST] = NULL;
-                return OMPI_ERROR;
-        }
-        assert(NULL != ml_module->coll_ml_bcast_functions[alg] &&
-                NULL != ml_module->coll_ml_bcast_functions[alg]);
-    }
-
-    topo_info->hierarchical_algorithms[BCOL_BCAST] = NULL;
-    return ret;
-}
-
-void ml_coll_hier_bcast_cleanup(mca_coll_ml_module_t *ml_module)
-{
-    /* Hierarchy Setup */
-    int i, alg;
-    int topo_index = 0;
-    mca_coll_ml_topology_t *topo_info = ml_module->topo_list;
-
-    assert (NULL != ml_module);
-
-    for (i = 0; i < ML_NUM_MSG; i++) {
-
-        switch (i) {
-            case ML_SMALL_MSG:
-            case ML_LARGE_MSG:
-                break;
-            default:
-                topo_info->hierarchical_algorithms[ML_BCAST] = NULL;
-                return;
-        }
-
-        alg = mca_coll_ml_component.coll_config[ML_BCAST][i].algorithm_id;
-        topo_index = ml_module->collectives_topology_map[ML_BCAST][alg];
-        if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) {
-            ML_ERROR(("No topology index or algorithm was defined"));
-            topo_info->hierarchical_algorithms[ML_BCAST] = NULL;
-            return;
-        }
-
-        if (NULL != ml_module->coll_ml_bcast_functions[alg]) {
-            if (ML_BCAST_SMALL_DATA_KNOWN <= alg && ML_BCAST_LARGE_DATA_SEQUENTIAL >= alg) {
-                if (ml_module->coll_ml_bcast_functions[alg]->component_functions) {
-                    free(ml_module->coll_ml_bcast_functions[alg]->component_functions);
-                    ml_module->coll_ml_bcast_functions[alg]->component_functions = NULL;
-                }
-
-                free(ml_module->coll_ml_bcast_functions[alg]);
-                ml_module->coll_ml_bcast_functions[alg] = NULL;
-            } else {
-                topo_info->hierarchical_algorithms[ML_BCAST] = NULL;
-            }
-        }
-    }
-}
diff --git a/ompi/mca/coll/ml/coll_ml_hier_algorithms_common_setup.c b/ompi/mca/coll/ml/coll_ml_hier_algorithms_common_setup.c
deleted file mode 100644
index 7167c7de79..0000000000
--- a/ompi/mca/coll/ml/coll_ml_hier_algorithms_common_setup.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
-#include "ompi/mca/coll/ml/coll_ml_inlines.h"
-#include "ompi/mca/coll/ml/coll_ml_hier_algorithms_common_setup.h"
-
-int mca_coll_ml_schedule_init_scratch(mca_coll_ml_topology_t *topo_info,
-        mca_coll_ml_schedule_hier_info_t *h_info,
-        int **out_scratch_indx, int **out_scratch_num)
-{
-    bool prev_is_zero;
-    int i, cnt;
-    int n_hiers = h_info->n_hiers;
-    int value_to_set = 0;
-    mca_bcol_base_module_t *prev_bcol = NULL;
-    int *scratch_indx, *scratch_num;
-
-    scratch_indx = *out_scratch_indx =
-        (int *) calloc(n_hiers * 2, sizeof(int));
-    if (NULL == *out_scratch_indx) {
-        ML_ERROR(("Can't allocate memory."));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    scratch_num = *out_scratch_num =
-        (int *) calloc(n_hiers * 2, sizeof(int));
-    if (NULL == *out_scratch_num) {
-        ML_ERROR(("Can't allocate memory."));
-        free(out_scratch_indx);
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    for (i = 0, cnt = 0; i < h_info->num_up_levels; ++i, ++cnt) {
-        if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i))) {
-            scratch_indx[cnt] = scratch_indx[cnt - 1] + 1;
-        } else {
-            scratch_indx[cnt] = 0;
-            prev_bcol = GET_BCOL(topo_info, i);
-        }
-    }
-
-    /* top  - only if the proc arrive to highest_level_is_global_highest_level */
-    if (h_info->call_for_top_function) {
-        if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, n_hiers - 1))) {
-            scratch_indx[cnt] = scratch_indx[cnt - 1] + 1;
-        } else {
-            scratch_indx[cnt] = 0;
-            prev_bcol = GET_BCOL(topo_info, n_hiers - 1);
-        }
-        ++cnt;
-    }
-
-    /* going down */
-    for (i = h_info->num_up_levels - 1; i >= 0; --i, ++cnt) {
-        if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i))) {
-            scratch_indx[cnt] = scratch_indx[cnt - 1] + 1;
-        } else {
-            scratch_indx[cnt] = 0;
-            prev_bcol = GET_BCOL(topo_info, i);
-        }
-    }
-
-    i = cnt - 1;
-    prev_is_zero = true;
-
-    do {
-        if (prev_is_zero) {
-            value_to_set = scratch_indx[i] + 1;
-            prev_is_zero = false;
-        }
-
-        if (0 == scratch_indx[i]) {
-            prev_is_zero = true;
-        }
-
-        scratch_num[i] = value_to_set;
-        --i;
-    } while(i >= 0);
-
-    return OMPI_SUCCESS;
-}
-
-mca_coll_ml_collective_operation_description_t *
-        mca_coll_ml_schedule_alloc(mca_coll_ml_schedule_hier_info_t *h_info)
-{
-    mca_coll_ml_collective_operation_description_t  *schedule = NULL;
-
-    schedule = (mca_coll_ml_collective_operation_description_t *)
-        malloc(sizeof(mca_coll_ml_collective_operation_description_t));
-    if (NULL == schedule) {
-        ML_ERROR(("Can't allocate memory."));
-        return NULL;
-    }
-
-    /* Set dependencies equal to number of hierarchies */
-    schedule->n_fns = h_info->nbcol_functions;
-    schedule->progress_type = 0;
-    /* Allocated the component function */
-    schedule->component_functions = (struct mca_coll_ml_compound_functions_t *)
-        calloc(h_info->nbcol_functions, sizeof(struct mca_coll_ml_compound_functions_t));
-    if (NULL == schedule->component_functions) {
-        ML_ERROR(("Can't allocate memory."));
-        free(schedule);
-        return NULL;
-    }
-    return schedule;
-}
-
-void mca_coll_ml_call_types(mca_coll_ml_schedule_hier_info_t *h_info,
-        mca_coll_ml_collective_operation_description_t *schedule)
-{
-    int i_hier, j_hier, cnt;
-    mca_bcol_base_module_t *current_bcol = NULL;
-
-    for (i_hier = 0; i_hier < h_info->n_hiers; i_hier++) {
-        current_bcol =
-            schedule->component_functions[i_hier].
-            constant_group_data.bcol_module;
-        cnt = 0;
-        for (j_hier = 0; j_hier < h_info->n_hiers; j_hier++) {
-            if (current_bcol ==
-                    schedule->component_functions[j_hier].
-                    constant_group_data.bcol_module) {
-                schedule->component_functions[j_hier].
-                    constant_group_data.index_of_this_type_in_collective = cnt;
-                cnt++;
-            }
-        }
-        schedule->component_functions[i_hier].
-            constant_group_data.n_of_this_type_in_collective = cnt;
-    }
-}
diff --git a/ompi/mca/coll/ml/coll_ml_hier_algorithms_common_setup.h b/ompi/mca/coll/ml/coll_ml_hier_algorithms_common_setup.h
deleted file mode 100644
index 03cb185ec8..0000000000
--- a/ompi/mca/coll/ml/coll_ml_hier_algorithms_common_setup.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_COLL_ML_COMMON_SETUP_H
-#define MCA_COLL_ML_COMMON_SETUP_H
-
-#include "ompi_config.h"
-
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
-
-struct mca_coll_ml_schedule_hier_info_t {
-    int n_hiers;
-    int num_up_levels;
-    int nbcol_functions;
-    bool call_for_top_function;
-};
-typedef struct mca_coll_ml_schedule_hier_info_t
-               mca_coll_ml_schedule_hier_info_t;
-
-#define MCA_COLL_ML_INIT_HIER_INFO(info, n_hr, g_hr, ml_module) \
-do { \
-    info.n_hiers = n_hr; \
-    if (g_hr == \
-            ml_module->component_pairs[n_hr - 1].bcol_index) { \
-        /* The process that is member of highest level subgroup  \
-           should call for top algorithms in addition to fan-in/out steps*/ \
-        ML_VERBOSE(9, ("Setting top %d %d", n_hr, ml_module->component_pairs[g_hr - 1].bcol_index)); \
-        info.call_for_top_function = true; \
-        /* hier level run only top algorithm, so we deduct 1 */ \
-        info.num_up_levels = n_hr - 1; \
-        /* Top algorithm is called only once, so we deduct 1 */ \
-        info.nbcol_functions = 2 * n_hr - 1; \
-    } else { \
-        ML_VERBOSE(9, ("not setting top %d %d", n_hr, ml_module->component_pairs[g_hr - 1].bcol_index)); \
-        /* The process is not member of highest level subgroup, \
-           as result it does not call for top algorithm, \
-           but it calls for all fan-in/out steps */ \
-        info.call_for_top_function = false; \
-        info.num_up_levels = n_hr; \
-        info.nbcol_functions = 2 * n_hr; \
-    } \
-} while (0);
-
-#define MCA_COLL_ML_SET_COMP_FN(fn, level, module, s_level,                         \
-                                               scratch_indx, scratch_num, qc, name) \
-do {                                                                                \
-    fn->h_level = level; /* hierarchy level */                                      \
-    strcpy (fn->fn_name, "name");                                                   \
-    fn->num_dependent_tasks     = 0;                                                \
-    fn->num_dependencies        = 0;                                                \
-    fn->task_comp_fn = NULL;                                                        \
-    fn->constant_group_data.bcol_module = GET_BCOL(module, level);                  \
-    fn->constant_group_data.index_in_consecutive_same_bcol_calls =                  \
-                                                              scratch_indx[s_level];\
-    fn->constant_group_data.n_of_this_type_in_a_row = scratch_num[s_level];         \
-    fn->constant_group_data.n_of_this_type_in_collective = 0;                       \
-    fn->constant_group_data.index_of_this_type_in_collective = 0;                   \
-    fn->bcol_function = fn->constant_group_data.bcol_module->                       \
-                                            filtered_fns_table[qc[0]]               \
-                                                              [qc[1]]               \
-                                                              [qc[2]]               \
-                                                              [qc[3]]               \
-                                                              [qc[4]]               \
-                                                              [qc[5]];              \
-} while (0);
-
-#define MCA_COLL_ML_QUERY_SIZE 6
-
-#define MCA_COLL_ML_SET_QUERY(query, src_type, blocking, coll_type, index, other0, other1) \
-do { \
-    query[0] = src_type; \
-    query[1] = blocking; \
-    query[2] = coll_type; \
-    query[3] = index; \
-    query[4] = other0; \
-    query[5] = other1; \
-} while (0);
-
-int mca_coll_ml_schedule_init_scratch(mca_coll_ml_topology_t *topo_info,
-        mca_coll_ml_schedule_hier_info_t *h_info,
-        int **out_scratch_indx, int **out_scratch_num);
-
-mca_coll_ml_collective_operation_description_t*
-mca_coll_ml_schedule_alloc(mca_coll_ml_schedule_hier_info_t *h_info);
-
-void mca_coll_ml_call_types(mca_coll_ml_schedule_hier_info_t *h_info,
-        mca_coll_ml_collective_operation_description_t *schedule);
-#endif
diff --git a/ompi/mca/coll/ml/coll_ml_hier_algorithms_reduce_setup.c b/ompi/mca/coll/ml/coll_ml_hier_algorithms_reduce_setup.c
deleted file mode 100644
index 579f77d12b..0000000000
--- a/ompi/mca/coll/ml/coll_ml_hier_algorithms_reduce_setup.c
+++ /dev/null
@@ -1,371 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2014      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/mca/coll/ml/coll_ml.h"
-#include "ompi/include/ompi/constants.h"
-#include "ompi/mca/coll/ml/coll_ml_functions.h"
-static int mca_coll_ml_task_comp_static_reduce
-    (struct mca_coll_ml_task_status_t *task) {
-
-    task->ml_coll_operation->variable_fn_params.root_flag = true;
-
-    return OMPI_SUCCESS;
-}
-
-static void mca_coll_ml_static_reduce_non_root(mca_coll_ml_task_status_t *task_status, int index,
-        mca_coll_ml_compound_functions_t *func)
-{
-    /* I am not a root rank, but someone in my group is a root*/
-    if (task_status->ml_coll_operation->variable_fn_params.root_route->level == index) {
-        task_status->rt_num_dependencies = func->num_dependencies;
-        task_status->rt_num_dependent_tasks = 0;
-        task_status->rt_dependent_task_indices = NULL;
-        task_status->ml_coll_operation->variable_fn_params.root =
-                                task_status->ml_coll_operation->variable_fn_params.root_route->rank;
-    } else {
-        task_status->rt_num_dependencies = 0;
-        task_status->rt_num_dependent_tasks = 1;
-        task_status->rt_dependent_task_indices = &task_status->ml_coll_operation->variable_fn_params.root_route->level;
-    }
-
-}
-
-static void mca_coll_ml_static_reduce_root(mca_coll_ml_task_status_t *task_status, int index,
-        mca_coll_ml_compound_functions_t *func)
-{
-        task_status->rt_num_dependencies = func->num_dependencies;
-        task_status->rt_num_dependent_tasks = 0;
-        task_status->rt_dependent_task_indices = NULL;
-}
-
-/*
- * Fill up the collective descriptor
- *
- */
-static int mca_coll_ml_build_static_reduce_schedule(
-                                    mca_coll_ml_topology_t *topo_info,
-                                    mca_coll_ml_collective_operation_description_t **coll_desc)
-{
-    int i_hier, j_hier,  n_fcns,
-        n_hiers = topo_info->n_levels;
-    int *scratch_indx = NULL,
-        *scratch_num = NULL;
-    int cnt, value_to_set = 0;
-    int ret = OMPI_SUCCESS;
-    bool prev_is_zero;
-    mca_coll_ml_compound_functions_t *comp_fns_temp;
-    mca_bcol_base_module_t *prev_bcol,
-                           *bcol_module;
-    mca_coll_ml_compound_functions_t *comp_fn;
-    mca_coll_ml_collective_operation_description_t  *schedule = NULL;
-
-    *coll_desc = (mca_coll_ml_collective_operation_description_t *)
-        calloc(1, sizeof(mca_coll_ml_collective_operation_description_t));
-
-    schedule = *coll_desc;
-    if (OPAL_UNLIKELY(NULL == schedule)) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Error;
-    }
-
-    scratch_indx = (int *) calloc (n_hiers, sizeof (int));
-    if (NULL == scratch_indx) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Error;
-    }
-
-    scratch_num = (int *) malloc(sizeof(int) * (n_hiers));
-    if (NULL == scratch_num) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Error;
-    }
-
-    prev_bcol = NULL;
-
-    /* Calculate scratch numbers */
-    for (i_hier = 0; i_hier < n_hiers; i_hier++) {
-        if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i_hier))) {
-            scratch_indx[i_hier] = scratch_indx[i_hier - 1] + 1;
-        } else {
-            scratch_indx[i_hier] = 0;
-            prev_bcol = GET_BCOL(topo_info, i_hier);
-        }
-    }
-
-    --i_hier;
-    prev_is_zero = true;
-
-    do {
-        if (prev_is_zero) {
-            value_to_set = scratch_indx[i_hier] + 1;
-            prev_is_zero = false;
-        }
-
-        if (0 == scratch_indx[i_hier]) {
-            prev_is_zero = true;
-        }
-
-        scratch_num[i_hier] = value_to_set;
-        --i_hier;
-    } while(i_hier >= 0);
-
-    /* All hierarchies call one function, unlike other collectives */
-    n_fcns = n_hiers;
-
-    /* Set dependencies equal to number of hierarchies */
-    schedule->n_fns = n_fcns;
-    schedule->topo_info = topo_info;
-    schedule->progress_type = 0;
-    /* Allocated the component function */
-    schedule->component_functions = (struct mca_coll_ml_compound_functions_t *)
-                                     calloc(n_fcns, sizeof(struct mca_coll_ml_compound_functions_t));
-
-    if (OPAL_UNLIKELY(NULL == schedule->component_functions)) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Error;
-    }
-
-
-    for (i_hier = 0; i_hier < n_hiers; ++i_hier) {
-        comp_fn = &schedule->component_functions[i_hier];
-
-        /* The hierarchial level */
-        comp_fn->h_level = i_hier;
-        bcol_module = GET_BCOL(topo_info, i_hier);
-
-        comp_fn->bcol_function =
-                bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][BCOL_REDUCE][1][0][0];
-
-        strcpy(comp_fn->fn_name, "REDUCE");
-        ML_VERBOSE(10, ("func indx %d set to %p", i_hier, comp_fn->bcol_function));
-
-
-        ML_VERBOSE(1,("In ML_REDUCE_SETUP  .. looks fine here"));
-        /* No need completion func for Barrier */
-        comp_fn->task_comp_fn = mca_coll_ml_task_comp_static_reduce;
-
-        /* Constants */
-        comp_fn->constant_group_data.bcol_module = bcol_module;
-        comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls = scratch_indx[i_hier];
-        comp_fn->constant_group_data.n_of_this_type_in_a_row = scratch_num[i_hier];
-        comp_fn->constant_group_data.n_of_this_type_in_collective = 0;
-        comp_fn->constant_group_data.index_of_this_type_in_collective = 0;
-
-        ML_VERBOSE(10, ("Setting collective [reduce] fn_idx %d, n_of_this_type_in_a_row %d, "
-                        "index_in_consecutive_same_bcol_calls %d.",
-                         i_hier, comp_fn->constant_group_data.n_of_this_type_in_a_row,
-                         comp_fn->constant_group_data.index_in_consecutive_same_bcol_calls));
-    }
-
-
-    /* Fill the rest of constant data */
-    for (i_hier = 0; i_hier < n_hiers; i_hier++) {
-        mca_bcol_base_module_t *current_bcol =
-            schedule->component_functions[i_hier].
-            constant_group_data.bcol_module;
-        cnt = 0;
-        for (j_hier = 0; j_hier < n_hiers; j_hier++) {
-            if (current_bcol ==
-                    schedule->component_functions[j_hier].
-                    constant_group_data.bcol_module) {
-                schedule->component_functions[j_hier].
-                    constant_group_data.index_of_this_type_in_collective = cnt;
-                cnt++;
-            }
-        }
-        schedule->component_functions[i_hier].
-            constant_group_data.n_of_this_type_in_collective = cnt;
-    }
-
-    /* Manju: Reduction should always use the fixed schedule.
-     * The subgroups that this process is leader should be executed first, then
-     * it should execute the subgroups where this process is not a leader, and
-     * then execute the subgroup that includes the root.
-     */
-
-    /* Allocate the schedule list */
-    schedule->comp_fn_arr = (struct mca_coll_ml_compound_functions_t **)
-        calloc(n_hiers,sizeof(struct mca_coll_ml_compound_functions_t *));
-    if (NULL == schedule->comp_fn_arr) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Error;
-    }
-
-    /* Now that the functions have been set-up properly, we can simple permute the ordering a bit */
-
-    for (i_hier = 0; i_hier < n_hiers; i_hier++) {
-        /* first one is trivial */
-        int leader_hierarchy = 0;
-        int non_leader_hierarchy = 0;
-        int func_index;
-
-        comp_fns_temp = (struct mca_coll_ml_compound_functions_t *)
-            calloc(n_hiers, sizeof(struct mca_coll_ml_compound_functions_t));
-
-        leader_hierarchy = 0;
-        non_leader_hierarchy = n_hiers - 2;
-
-        for(j_hier = 0; j_hier < n_hiers - 1 ; j_hier++) {
-
-            func_index = j_hier < i_hier ? j_hier : j_hier + 1;
-            /* I'm a leader for this group */
-            if (0 == topo_info->component_pairs->subgroup_module->my_index) {
-                comp_fns_temp[leader_hierarchy++] =
-                    schedule->component_functions[func_index];
-            }
-            else {
-                comp_fns_temp[non_leader_hierarchy--] =
-                    schedule->component_functions[func_index];
-            }
-        }
-
-        comp_fns_temp[j_hier] = schedule->component_functions[i_hier];
-        /* now let's attach this list to our array of lists */
-        schedule->comp_fn_arr[i_hier] = comp_fns_temp;
-    }
-
-    /* Manju: Do we need this ? */
-
-    /* I'm going to just loop over each schedule and
-     * set up the scratch indices, scratch numbers
-     * and other constant data
-     */
-    /*
-    for( i_hier = 1; i_hier < n_hiers; i_hier++) {
-        ret = mca_coll_ml_setup_scratch_vals(schedule->comp_fn_arr[i_hier], scratch_indx,
-                scratch_num, n_hiers);
-        if( OMPI_SUCCESS != ret ) {
-            ret = OMPI_ERROR;
-            goto Error;
-        }
-
-    }
-    */
-
-    /* Do I need this ? */
-    schedule->task_setup_fn[COLL_ML_ROOT_TASK_FN] = mca_coll_ml_static_reduce_root;
-    schedule->task_setup_fn[COLL_ML_GENERAL_TASK_FN] = mca_coll_ml_static_reduce_non_root;
-
-    MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO(schedule);
-
-    /* reduce does not use the component functions so we no longer need this. see
-     *  coll_ml_reduce.c:442 */
-    free (schedule->component_functions);
-    schedule->component_functions = NULL;
-
-    free(scratch_num);
-    free(scratch_indx);
-
-    return OMPI_SUCCESS;
-
-Error:
-    if (NULL != scratch_num) {
-        free (scratch_num);
-    }
-
-    if (NULL != scratch_indx) {
-        free (scratch_indx);
-    }
-
-    if (NULL != schedule) {
-        if (NULL != schedule->component_functions) {
-            free(schedule->component_functions);
-            schedule->component_functions = NULL;
-        }
-        free (schedule);
-        *coll_desc = NULL;
-    }
-
-    return ret;
-}
-
-
-int ml_coll_hier_reduce_setup(mca_coll_ml_module_t *ml_module)
-{
-    int alg, ret, topo_index=0;
-    mca_coll_ml_topology_t *topo_info =
-           &ml_module->topo_list[ml_module->collectives_topology_map[ML_REDUCE][ML_SMALL_MSG]];
-
-    if ( ml_module->max_fn_calls < topo_info->n_levels ) {
-        ml_module->max_fn_calls = topo_info->n_levels;
-    }
-
-
-    alg = mca_coll_ml_component.coll_config[ML_REDUCE][ML_SMALL_MSG].algorithm_id;
-    topo_index = ml_module->collectives_topology_map[ML_REDUCE][alg];
-    if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) {
-        ML_ERROR(("No topology index or algorithm was defined"));
-        topo_info->hierarchical_algorithms[ML_REDUCE] = NULL;
-        return OMPI_ERROR;
-    }
-
-    ret = mca_coll_ml_build_static_reduce_schedule(&ml_module->topo_list[topo_index],
-            &ml_module->coll_ml_reduce_functions[alg]);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        ML_VERBOSE(10, ("Failed to setup static reduce"));
-        return ret;
-    }
-
-
-    return OMPI_SUCCESS;
-}
-
-void ml_coll_hier_reduce_cleanup(mca_coll_ml_module_t *ml_module)
-{
-    int alg, i, topo_index=0;
-    mca_coll_ml_topology_t *topo_info =
-           &ml_module->topo_list[ml_module->collectives_topology_map[ML_REDUCE][ML_SMALL_MSG]];
-
-    if ( ml_module->max_fn_calls < topo_info->n_levels ) {
-        ml_module->max_fn_calls = topo_info->n_levels;
-    }
-
-
-    alg = mca_coll_ml_component.coll_config[ML_REDUCE][ML_SMALL_MSG].algorithm_id;
-    topo_index = ml_module->collectives_topology_map[ML_REDUCE][alg];
-    if (ML_UNDEFINED == alg || ML_UNDEFINED == topo_index) {
-        ML_ERROR(("No topology index or algorithm was defined"));
-        topo_info->hierarchical_algorithms[ML_REDUCE] = NULL;
-        return;
-    }
-
-    if (NULL == ml_module->coll_ml_reduce_functions[alg]) {
-        return;
-    }
-
-    if (ml_module->coll_ml_reduce_functions[alg]->comp_fn_arr) {
-        for (i=0; i<ml_module->topo_list[topo_index].n_levels; i++) {
-            if (ml_module->coll_ml_reduce_functions[alg]->comp_fn_arr[i]) {
-                free(ml_module->coll_ml_reduce_functions[alg]->comp_fn_arr[i]);
-                ml_module->coll_ml_reduce_functions[alg]->comp_fn_arr[i] = NULL;
-            }
-        }
-
-        free(ml_module->coll_ml_reduce_functions[alg]->comp_fn_arr);
-        ml_module->coll_ml_reduce_functions[alg]->comp_fn_arr = NULL;
-    }
-
-    ml_module->coll_ml_reduce_functions[alg]->component_functions = NULL;
-
-    free(ml_module->coll_ml_reduce_functions[alg]);
-    ml_module->coll_ml_reduce_functions[alg] = NULL;
-}
diff --git a/ompi/mca/coll/ml/coll_ml_hier_algorithms_setup.c b/ompi/mca/coll/ml/coll_ml_hier_algorithms_setup.c
deleted file mode 100644
index 181e229a11..0000000000
--- a/ompi/mca/coll/ml/coll_ml_hier_algorithms_setup.c
+++ /dev/null
@@ -1,521 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/mca/coll/ml/coll_ml.h"
-#include "ompi/mca/coll/ml/coll_ml_inlines.h"
-#include "ompi/include/ompi/constants.h"
-
-int ml_coll_up_and_down_hier_setup(mca_coll_ml_module_t *ml_module,
-                                   mca_coll_ml_topology_t *topo_info,
-                                   int up_function_idx,
-                                   int top_function_idx,
-                                   int down_function_idx,
-                                   int collective)
-{
-    /* local variables */
-    int i, j, cnt, value_to_set = -1;
-    int ret = OMPI_SUCCESS, num_up_levels;
-
-    int num_hierarchies = topo_info->n_levels;
-    int global_high_hierarchy_index = topo_info->global_highest_hier_group_index;
-
-    bool call_for_top_function, prev_is_zero;
-
-    int *scratch_indx = NULL, *scratch_num = NULL;
-
-    coll_ml_collective_description_t *collective_alg = NULL;
-    mca_bcol_base_module_t *bcol_module = NULL,
-                           *prev_bcol = NULL;
-
-    /* RLG:  one blocking barrier collective algorithm - this is really a hack,
-     * we need to figure out how to do this in a bit more extensible
-     * manner.
-     */
-     collective_alg = (coll_ml_collective_description_t *)
-         malloc(sizeof(coll_ml_collective_description_t));
-     if (NULL == collective_alg) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Error;
-     }
-
-    /* am I a member of the highest level subgroup ? */
-    if (global_high_hierarchy_index ==
-          topo_info->component_pairs[num_hierarchies - 1].bcol_index) {
-        /* The process that is member of highest level subgroup
-           should call for top algorithms in addition to fan-in/out steps*/
-        call_for_top_function = true;
-        /* hier level run only top algorithm, so we deduct 1 */
-        num_up_levels = num_hierarchies - 1;
-        /* Top algorithm is called only once, so we deduct 1 */
-        collective_alg->n_functions = 2 * num_hierarchies - 1;
-    } else {
-        /* The process is not member of highest level subgroup,
-           as result it does not call for top algorithm,
-           but it calls for all fan-in/out steps */
-        call_for_top_function = false;
-        num_up_levels = num_hierarchies;
-        collective_alg->n_functions = 2 * num_hierarchies;
-    }
-
-    ML_VERBOSE(10, ("high_index %d == bcol_index %d: Call top %d, num_up_levels %d, collective_alg->n_functions %d",
-                global_high_hierarchy_index,
-                topo_info->component_pairs[num_hierarchies - 1].bcol_index,
-                call_for_top_function,
-                num_up_levels,
-                collective_alg->n_functions ));
-
-    /* allocate space for the functions */
-    collective_alg->functions = (mca_bcol_base_function_t *)
-        calloc(collective_alg->n_functions, sizeof(mca_bcol_base_function_t));
-    if( NULL == collective_alg->functions) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Error;
-    }
-
-    /* Algorithm Description:
-     * =====================
-     * The algorithm used here for an N level system
-     *  - up to level N-2, inclusive : up algorithm (fan in in barrier, reduce in Allreduce)
-     *  - level N-1: top algorithm (barrier or allreduce)
-     *  - level N-2, to level 0: down algorithm (fanout)
-     */
-
-
-    /* Starting scratch_num and scratch_index calculations */
-    /* =================================================== */
-
-    /* Figure out how many of the same bcols are called in a row.
-     * The index of the bcol in row we store in scratch_indx and
-     * the total number of bcols in the row we store in scratch_num */
-    scratch_indx = (int *) calloc (2 * num_hierarchies, sizeof (int));
-    if(NULL == scratch_indx) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Error;
-    }
-
-    scratch_num = (int *) malloc(sizeof(int) * (2 * num_hierarchies));
-    if(NULL == scratch_num) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Error;
-    }
-
-    /* We go through all stages of algorithm (up, top, down)
-     * and calculate bcol index. If previous bcol is the same type as current
-     * one the counter index is increased, other way the index is zero */
-    prev_bcol = NULL;
-    /* going up */
-    for (i = 0, cnt = 0; i < num_up_levels; ++i, ++cnt) {
-        if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i))) {
-            scratch_indx[cnt] = scratch_indx[cnt - 1] + 1;
-        } else {
-            scratch_indx[cnt] = 0;
-            prev_bcol = GET_BCOL(topo_info, i);
-        }
-    }
-
-    /* top  - only if the proc arrive to highest_level_is_global_highest_level */
-    if (call_for_top_function) {
-        if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, num_hierarchies - 1))) {
-            scratch_indx[cnt] = scratch_indx[cnt - 1] + 1;
-        } else {
-            scratch_indx[cnt] = 0;
-            prev_bcol = GET_BCOL(topo_info, num_hierarchies - 1);
-        }
-
-        ++cnt;
-    }
-
-    /* going down */
-    for (i = num_up_levels - 1; i >= 0; --i, ++cnt) {
-        if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i))) {
-            scratch_indx[cnt] = scratch_indx[cnt - 1] + 1;
-        } else {
-            scratch_indx[cnt] = 0;
-            prev_bcol = GET_BCOL(topo_info, i);
-        }
-    }
-
-    /*
-     * Calculate the number of the same bcols in row.
-     * We parse the index array, if index is zero
-     * it means that the row is done and we start
-     * to calculate next bcols row. The maximum number
-     * for the row is equal to maximal bcol index in the row + 1
-     */
-    i = cnt - 1;
-    prev_is_zero = true;
-    do {
-        if (prev_is_zero) {
-            value_to_set = scratch_indx[i] + 1;
-            prev_is_zero = false;
-        }
-
-        if (0 == scratch_indx[i]) {
-            prev_is_zero = true;
-        }
-
-        scratch_num[i] = value_to_set;
-        --i;
-    } while(i >= 0);
-
-    /* =========================================================== */
-    /* We are done with scratch_num and scratch_index calculations */
-
-    /* Setup function call for each algorithm step */
-    cnt = 0;
-    /* up phase */
-    for (i = 0; i < num_up_levels; i++) {
-        bcol_module = GET_BCOL(topo_info, i);
-        collective_alg->functions[cnt].fn_idx = up_function_idx;
-        collective_alg->functions[cnt].bcol_module = bcol_module;
-        collective_alg->functions[cnt].index_in_consecutive_same_bcol_calls = scratch_indx[cnt];
-        collective_alg->functions[cnt].n_of_this_type_in_a_row = scratch_num[cnt];
-        ML_VERBOSE(10, ("Setting collective [collective code %d][count %d], fn_idx %d, index_in_consecutive_same_bcol_calls %d, n_of_this_type_in_a_row %d",
-                    collective, cnt, collective_alg->functions[cnt].fn_idx,
-                    collective_alg->functions[cnt].index_in_consecutive_same_bcol_calls,
-                    collective_alg->functions[cnt].n_of_this_type_in_a_row));
-        ++cnt;
-    }
-
-    /* top function */
-    if (call_for_top_function) {
-        bcol_module = GET_BCOL(topo_info, num_hierarchies - 1);
-        collective_alg->functions[cnt].fn_idx = top_function_idx;
-        collective_alg->functions[cnt].bcol_module = bcol_module;
-        collective_alg->functions[cnt].index_in_consecutive_same_bcol_calls = scratch_indx[cnt];
-        collective_alg->functions[cnt].n_of_this_type_in_a_row = scratch_num[cnt];
-        ML_VERBOSE(10, ("Setting collective [collective code %d][count %d], fn_idx %d, index_in_consecutive_same_bcol_calls %d, n_of_this_type_in_a_row %d",
-                    collective, cnt, collective_alg->functions[cnt].fn_idx,
-                    collective_alg->functions[cnt].index_in_consecutive_same_bcol_calls,
-                    collective_alg->functions[cnt].n_of_this_type_in_a_row));
-        ++cnt;
-    }
-
-    /* down phase*/
-    for (i = num_up_levels - 1; i >= 0; i--) {
-        bcol_module = GET_BCOL(topo_info, i);
-        collective_alg->functions[cnt].fn_idx = down_function_idx;
-        collective_alg->functions[cnt].bcol_module = bcol_module;
-        collective_alg->functions[cnt].index_in_consecutive_same_bcol_calls = scratch_indx[cnt];
-        collective_alg->functions[cnt].n_of_this_type_in_a_row = scratch_num[cnt];
-        ML_VERBOSE(10, ("Setting collective [collective code %d][count %d], fn_idx %d, index_in_consecutive_same_bcol_calls %d, n_of_this_type_in_a_row %d",
-                    collective, cnt, collective_alg->functions[cnt].fn_idx,
-                    collective_alg->functions[cnt].index_in_consecutive_same_bcol_calls,
-                    collective_alg->functions[cnt].n_of_this_type_in_a_row));
-        ++cnt;
-    }
-
-    /* figure out how many times this bcol is used in this collective call */
-    for (i = 0; i < collective_alg->n_functions; i++) {
-        mca_bcol_base_module_t *current_bcol=
-            collective_alg->functions[i].bcol_module;
-
-        cnt = 0;
-        for (j = 0; j < collective_alg->n_functions; ++j) {
-            if (current_bcol ==
-                    collective_alg->functions[j].bcol_module) {
-                collective_alg->functions[j].index_of_this_type_in_collective = cnt;
-                ML_VERBOSE(10, ("Pasha: Setting collective [collective code %d][count %d], fn_idx %d, collective_alg->functions[i].index_of_this_type_in_collective %d",
-                            collective, cnt, i,
-                            collective_alg->functions[j].index_of_this_type_in_collective));
-                cnt++;
-            }
-        }
-
-        collective_alg->functions[i].n_of_this_type_in_collective=cnt;
-        ML_VERBOSE(10, ("Pasha: Setting collective [collective code %d][count %d], fn_idx %d, collective_alg->functions[i].n_of_this_type_in_collective %d",
-                    collective, cnt, i,
-                    collective_alg->functions[i].n_of_this_type_in_collective));
-    }
-
-    /* set Barrier algorithm */
-    topo_info->hierarchical_algorithms[collective] = collective_alg;
-    /* Setup maximum number function calls, it is used for resource allocation */
-    ml_module->max_fn_calls = (collective_alg->n_functions > ml_module->max_fn_calls) ?
-                                    collective_alg->n_functions : ml_module->max_fn_calls;
-    /* Ishai: What is this n_buffers? I did not find where it is being used*/
-    topo_info->hierarchical_algorithms[collective]->n_buffers = 1;
-
-    /* Release temporary memories */
-    free(scratch_indx);
-    free(scratch_num);
-
-    return OMPI_SUCCESS;
-
-Error:
-    if (NULL != collective_alg) {
-        free(collective_alg->functions);
-    }
-
-    free(collective_alg);
-    free(scratch_indx);
-    free(scratch_num);
-
-    return ret;
-}
-
-int ml_coll_hier_allreduce_setup(mca_coll_ml_module_t *ml_module)
-{
-    int topo_index =
-        ml_module->collectives_topology_map[ML_ALLREDUCE][ML_SMALL_DATA_ALLREDUCE];
-    int ret = ml_coll_up_and_down_hier_setup(ml_module,
-                                             &ml_module->topo_list[topo_index],
-                                             BCOL_REDUCE,
-                                             BCOL_ALLREDUCE,
-                                             BCOL_BCAST,
-                                             BCOL_ALLREDUCE);
-
-    if (OMPI_SUCCESS == ret) {
-        return ret;
-    }
-
-    /* Make sure to reset the allreduce pointer to NULL */
-    ml_module->topo_list[topo_index].hierarchical_algorithms[BCOL_ALLREDUCE] = NULL;
-    return ret;
-}
-
-#if 0
-/*
- * Manju: New setup function in coll_ml_hier_algorithms_reduce_setup.c
- */
-/* Ishai: Reduce is not an hier algorithm (it is rooted) - it needs a different ML algorithm */
-/* Need to rewrite */
-int ml_coll_hier_reduce_setup(mca_coll_ml_module_t *ml_module)
-{
-    int topo_index = ml_module->collectives_topology_map[ML_ALLREDUCE][ML_SMALL_DATA_GATHER];
-    /* Hierarchy Setup */
-    int ret = ml_coll_up_and_down_hier_setup(ml_module,
-                                             &ml_module->topo_list[topo_index],
-                                             BCOL_REDUCE, /*NULL,*/
-                                             BCOL_REDUCE,
-                                             BCOL_REDUCE, /*NULL,*/
-                                             BCOL_REDUCE);
-    if (OMPI_SUCCESS == ret) {
-        return ret;
-    }
-    /* Make sure to reset the bcast pointer to NULL */
-    ml_module->topo_list[topo_index].hierarchical_algorithms[BCOL_BCAST] = NULL;
-    return ret;
-}
-#endif
-
-int ml_coll_barrier_constant_group_data_setup(
-                mca_coll_ml_topology_t *topo_info,
-                mca_coll_ml_collective_operation_description_t  *schedule)
-{
-    /* local variables */
-    int i, j, cnt, value_to_set = -1, ret = OMPI_SUCCESS, num_up_levels,
-        num_hierarchies = topo_info->n_levels, n_functions = schedule->n_fns,
-        global_high_hierarchy_index = topo_info->global_highest_hier_group_index;
-
-    bool call_for_top_function, prev_is_zero;
-    mca_coll_ml_utility_data_t *constant_group_data = NULL;
-
-    int *scratch_indx = NULL, *scratch_num = NULL;
-
-    mca_bcol_base_module_t *prev_bcol = NULL,
-                           *bcol_module = NULL;
-
-    /* Am I a member of the highest level subgroup ? */
-    if (global_high_hierarchy_index ==
-          topo_info->component_pairs[num_hierarchies - 1].bcol_index) {
-        /* The process that is member of highest level subgroup
-           should call for top algorithms in addition to fan-in/out steps*/
-        call_for_top_function = true;
-        /* hier level run only top algorithm, so we deduct 1 */
-        num_up_levels = num_hierarchies - 1;
-    } else {
-        /* The process is not member of highest level subgroup,
-           as result it does not call for top algorithm,
-           but it calls for all fan-in/out steps */
-        call_for_top_function = false;
-        num_up_levels = num_hierarchies;
-    }
-
-    /* Algorithm Description:
-     * =====================
-     * The algorithm used here for an N level system
-     *  - up to level N-2, inclusive : up algorithm (Fan-In in Barrier)
-     *  - level N-1: top algorithm (Barrier algth)
-     *  - level N-2, to level 0: down algorithm (Fan-out)
-     */
-
-
-    /* Starting scratch_num and scratch_index calculations */
-    /* =================================================== */
-
-    /* Figure out how many of the same bcols are called in a row.
-     * The index of the bcol in row we store in scratch_indx and
-     * the total number of bcols in the row we store in scratch_num */
-    scratch_indx = (int *) calloc (2 * num_hierarchies, sizeof (int));
-    if(NULL == scratch_indx) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Const_Data_Setup_Error;
-    }
-
-    scratch_num = (int *) malloc(sizeof(int) * (2 * num_hierarchies));
-    if(NULL == scratch_num) {
-        ML_ERROR(("Can't allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto Const_Data_Setup_Error;
-    }
-
-    /* We go through all stages of algorithm (up, top, down)
-     * and calculate bcol index. If previous bcol is the same type as current
-     * one the counter index is increased, other way the index is zero */
-    prev_bcol = NULL;
-
-    /* Going up */
-    for (i = 0, cnt = 0; i < num_up_levels; ++i, ++cnt) {
-        if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i))) {
-            scratch_indx[cnt] = scratch_indx[cnt - 1] + 1;
-        } else {
-            scratch_indx[cnt] = 0;
-            prev_bcol = GET_BCOL(topo_info, i);
-        }
-    }
-
-    /* Top  - only if the proc arrive to highest_level_is_global_highest_level */
-    if (call_for_top_function) {
-        if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, num_hierarchies - 1))) {
-            scratch_indx[cnt] = scratch_indx[cnt - 1] + 1;
-        } else {
-            scratch_indx[cnt] = 0;
-            prev_bcol = GET_BCOL(topo_info, num_hierarchies - 1);
-        }
-
-        ++cnt;
-    }
-
-    /* Going down */
-    for (i = num_up_levels - 1; i >= 0; --i, ++cnt) {
-        if (IS_BCOL_TYPE_IDENTICAL(prev_bcol, GET_BCOL(topo_info, i))) {
-            scratch_indx[cnt] = scratch_indx[cnt - 1] + 1;
-        } else {
-            scratch_indx[cnt] = 0;
-            prev_bcol = GET_BCOL(topo_info, i);
-        }
-    }
-
-    /*
-     * Calculate the number of the same bcols in row.
-     * We parse the index array, if index is zero
-     * it means that the row is done and we start
-     * to calculate next bcols row. The maximum number
-     * for the row is equal to maximal bcol index in the row + 1
-     */
-    i = cnt - 1;
-    prev_is_zero = true;
-    do {
-        if (prev_is_zero) {
-            value_to_set = scratch_indx[i] + 1;
-            prev_is_zero = false;
-        }
-
-        if (0 == scratch_indx[i]) {
-            prev_is_zero = true;
-        }
-
-        scratch_num[i] = value_to_set;
-        --i;
-    } while(i >= 0);
-
-    /* =========================================================== */
-    /* We are done with scratch_num and scratch_index calculations */
-
-    /* Setup function call for each algorithm step */
-    cnt = 0;
-
-    /* Up phase */
-    for (i = 0; i < num_up_levels; ++i) {
-        bcol_module = GET_BCOL(topo_info, i);
-        constant_group_data = &schedule->component_functions[cnt].constant_group_data;
-
-        constant_group_data->bcol_module = bcol_module;
-        constant_group_data->index_in_consecutive_same_bcol_calls = scratch_indx[cnt];
-        constant_group_data->n_of_this_type_in_a_row = scratch_num[cnt];
-
-        ++cnt;
-    }
-
-    /* Top function */
-    if (call_for_top_function) {
-        bcol_module = GET_BCOL(topo_info, num_hierarchies - 1);
-        constant_group_data = &schedule->component_functions[cnt].constant_group_data;
-
-        constant_group_data->bcol_module = bcol_module;
-        constant_group_data->index_in_consecutive_same_bcol_calls = scratch_indx[cnt];
-        constant_group_data->n_of_this_type_in_a_row = scratch_num[cnt];
-
-        ++cnt;
-    }
-
-    /* Down phase */
-    for (i = num_up_levels - 1; i >= 0; --i) {
-        bcol_module = GET_BCOL(topo_info, i);
-        constant_group_data = &schedule->component_functions[cnt].constant_group_data;
-
-        constant_group_data->bcol_module = bcol_module;
-
-        /* All Fan-Outs will be done in parallel */
-        constant_group_data->index_in_consecutive_same_bcol_calls = 0;
-        constant_group_data->n_of_this_type_in_a_row = 1;
-
-        ++cnt;
-    }
-
-    /* Figure out how many times this bcol is used in this collective call */
-    for (i = 0; i < n_functions; ++i) {
-        struct mca_coll_ml_compound_functions_t *component_functions =
-                                 schedule->component_functions;
-        mca_bcol_base_module_t *current_bcol =
-                                 component_functions[i].constant_group_data.bcol_module;
-
-        /* silence clang warning about possible NULL dereference of component_functions.
-         * this case is a developer error if it occurs */
-        assert (NULL != component_functions && NULL != constant_group_data);
-
-        cnt = 0;
-        for (j = 0; j < n_functions; ++j) {
-            if (current_bcol ==
-                    component_functions[j].constant_group_data.bcol_module) {
-                constant_group_data->index_of_this_type_in_collective = cnt;
-
-                ++cnt;
-            }
-        }
-
-        component_functions[i].constant_group_data.n_of_this_type_in_collective = cnt;
-    }
-
-    MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO(schedule);
-
-    /* Release temporary memories */
-    free(scratch_num);
-    free(scratch_indx);
-
-    return OMPI_SUCCESS;
-
-Const_Data_Setup_Error:
-    free(scratch_indx);
-    free(scratch_num);
-
-    return ret;
-}
diff --git a/ompi/mca/coll/ml/coll_ml_inlines.h b/ompi/mca/coll/ml/coll_ml_inlines.h
deleted file mode 100644
index d54b3b37aa..0000000000
--- a/ompi/mca/coll/ml/coll_ml_inlines.h
+++ /dev/null
@@ -1,639 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2015      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2015      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/** @file */
-
-#ifndef MCA_COLL_ML_INLINES_H
-#define MCA_COLL_ML_INLINES_H
-
-#include "ompi_config.h"
-
-BEGIN_C_DECLS
-
-static inline __opal_attribute_always_inline__ int ml_fls(int num)
-{
-    int i = 1;
-    int j = 0;
-
-    if (0 == num) {
-        return 0;
-    }
-
-    while (i < num) {
-        i *= 2;
-        j++;
-    }
-
-    if (i > num) {
-        j--;
-    }
-
-   return j;
-}
-
-static inline __opal_attribute_always_inline__
-        int mca_coll_ml_buffer_recycling(mca_coll_ml_collective_operation_progress_t *ml_request)
-{
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *)ml_request->coll_module;
-    mca_bcol_base_memory_block_desc_t *ml_memblock = ml_module->payload_block;
-    uint64_t bank_index = ml_request->fragment_data.buffer_desc->bank_index;
-    int rc;
-
-    opal_atomic_add(&ml_memblock->bank_release_counters[bank_index], 1);
-
-    /* Check if the bank is ready for recycling */
-    if (ml_memblock->bank_release_counters[bank_index] ==
-            ml_memblock->num_buffers_per_bank ) {
-        ml_memblock->ready_for_memsync[bank_index] = true;
-
-        ML_VERBOSE(10, ("Sync count %d, bank %d", ml_memblock->memsync_counter, bank_index));
-        assert(ml_memblock->bank_is_busy);
-        if (ml_memblock->memsync_counter == (int)bank_index) {
-            while(ml_memblock->ready_for_memsync[ml_memblock->memsync_counter]) {
-                ML_VERBOSE(10, ("Calling for service barrier: ml_buffer_index - %d %d %d == %d.",
-                            ml_request->fragment_data.buffer_desc->buffer_index,
-                            ml_memblock->memsync_counter,
-                            ml_memblock->bank_release_counters[ml_memblock->memsync_counter],
-                            ml_memblock->num_buffers_per_bank));
-                /* Setting the ready flag to 0 - unready - done */
-                ml_memblock->ready_for_memsync[ml_memblock->memsync_counter] = false;
-
-                rc = mca_coll_ml_memsync_intra(ml_module, ml_memblock->memsync_counter);
-                if (OMPI_SUCCESS != rc) {
-                    ML_ERROR(("Failed to start memory sync !!!"));
-                    return rc;
-                }
-
-                opal_atomic_add(&ml_memblock->memsync_counter, 1);
-                if (ml_memblock->memsync_counter == (int)ml_memblock->num_banks) {
-                    ml_memblock->memsync_counter = 0;
-                }
-                ML_VERBOSE(10, ("After service barrier."));
-            }
-        } else {
-            ML_VERBOSE(10, ("Out of order %d", ml_memblock->memsync_counter));
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static inline  __opal_attribute_always_inline__ int coll_ml_fragment_completion_processing(
-        mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    /* local variables */
-    int ret = OMPI_SUCCESS;
-    size_t bytes_in_this_frag;
-    struct full_message_t *full_msg_desc = coll_op->fragment_data.message_descriptor;
-    bool ready_to_release = true, out_of_resource = false;
-
-    ML_VERBOSE(10, ("Coll_op %p processing completion", coll_op));
-    /* Call unpack/pack function */
-    if (OPAL_LIKELY(NULL != coll_op->process_fn)) {
-        ret = coll_op->process_fn(coll_op);
-        switch(ret) {
-            case OMPI_SUCCESS:
-                ML_VERBOSE(10, ("unpack done"));
-                ready_to_release = true;
-                break;
-            case ORTE_ERR_NO_MATCH_YET:
-                ML_VERBOSE(10, ("unexpected packet"));
-                ready_to_release = false;
-                break;
-            default:
-                ML_ERROR(("Error, unexpected error code %d", ret));
-                return ret;
-        }
-    }
-
-    bytes_in_this_frag = coll_op->fragment_data.fragment_size;
-
-    ML_VERBOSE(10, ("Delivered %d bytes in frag %d total %d",
-                full_msg_desc->n_bytes_delivered,
-                bytes_in_this_frag,
-                full_msg_desc->n_bytes_total));
-
-    /* check for full message completion */
-    if(full_msg_desc->n_bytes_delivered + bytes_in_this_frag ==
-            full_msg_desc->n_bytes_total) {
-        /* message complete - don't update number of bytes delivered, just
-         * mark the message complete
-         */
-        full_msg_desc->n_bytes_delivered += bytes_in_this_frag;
-
-        /* decrement the number of fragments */
-        full_msg_desc->n_active--;
-
-        ML_VERBOSE(10, ("Signaling completion"));
-
-        /* here we need to be sure that we point to the first fragment only */
-        ompi_request_complete(&(coll_op->fragment_data.message_descriptor->super), true);
-        coll_op->fragment_data.message_descriptor->super.req_status.MPI_ERROR = OMPI_SUCCESS;
-    } else {
-        assert(NULL != coll_op->fragment_data.buffer_desc);
-        /* update the number of bytes delivered */
-        full_msg_desc->n_bytes_delivered += bytes_in_this_frag;
-        /* decrement the number of fragments */
-        full_msg_desc->n_active--;
-        /* here we need to start the next fragment */
-        ML_VERBOSE(10, ("Launch frags for %p", coll_op));
-        if (full_msg_desc->n_bytes_scheduled < full_msg_desc->n_bytes_total) {
-            ret = coll_op->fragment_data.message_descriptor->fragment_launcher(coll_op);
-            if (OPAL_UNLIKELY(OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret)) {
-                out_of_resource = true;
-            } else if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-                ML_VERBOSE(10, ("Failed to launch fragment"));
-                return ret;
-            }
-        }
-    }
-
-    if (ready_to_release) {
-        /* Check if we have to recycle memory.
-         * Note: It is safe to recycle ML buffers since the ML buffer data
-         * already was unpacked to user buffer
-         */
-         if (NULL != coll_op->fragment_data.buffer_desc) {
-             ret = mca_coll_ml_buffer_recycling(coll_op);
-             if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-                 return ret;
-             }
-         }
-        /* if this is not fragment 0, return fragment to the free list.
-         * fragment 0 will be returned in mca_ml_request_free() which
-         * is called from the MPI wait() and test() routines.
-         * We can recover the pointer to the fragement descriptor from
-         * the MPI level request object, wich is the first element
-         * in the fragment descriptor.
-         */
-         /* I contend that this is a bug. This is not the right way to check
-             * for the first fragment as it assumes that the first fragment would always
-             * for any collective have zero as the first offset or that other subsequent
-             * fragments would not. It is not safe to assume this. The correct check is
-             * the following one
-             */
-
-        ML_VERBOSE(10, ("Master ? %p %d", coll_op,  coll_op->fragment_data.offset_into_user_buffer));
-        /* This check is in fact a bug. Not the correct definiton of first
-         * fragment. First fragment is the only fragment that satisfies the
-         * following criteria
-         */
-        /*if (0 != coll_op->fragment_data.offset_into_user_buffer &&
-                !out_of_resource) {
-                */
-        if (((&coll_op->full_message != coll_op->fragment_data.message_descriptor) &&
-	     !out_of_resource) || IS_COLL_SYNCMEM(coll_op)) {
-            /* non-zero offset ==> this is not fragment 0 */
-            CHECK_AND_RECYCLE(coll_op);
-        }
-    }
-
-    /* return */
-    return OMPI_SUCCESS;
-}
-
-/* task completion */
-static inline __opal_attribute_always_inline__ int coll_ml_task_dependency_processing(
-        mca_coll_ml_task_status_t *task)
-{
-    /* update dependencies */
-    mca_coll_ml_collective_operation_progress_t *my_schedule_instance =
-        task->ml_coll_operation;
-    int n_dependent_tasks = task->rt_num_dependent_tasks;
-    int dep_task;
-
-    for (dep_task = 0; dep_task < n_dependent_tasks; dep_task++)
-    {
-        int task_index;
-        task_index = task->rt_dependent_task_indices[dep_task];
-        my_schedule_instance->dag_description.status_array[task_index].n_dep_satisfied++;
-    }
-
-    /* return */
-    return OMPI_SUCCESS;
-}
-
-/* collective task completion processing -
- * "task" may be removed from list in this routine.
- * Thread safety is assumed to be handled outside this routine.
- */
-static inline __opal_attribute_always_inline__ int mca_coll_ml_task_completion_processing(
-        mca_coll_ml_task_status_t **task_status_g, opal_list_t *list)
-{
-    /* local variables */
-    int ret = OMPI_SUCCESS;
-    mca_coll_ml_task_status_t *task_status = *task_status_g;
-
-    mca_coll_ml_collective_operation_progress_t *coll_op =
-        task_status->ml_coll_operation;
-
-    /* Pasha: Since all our collectives so far use the root
-       flag, I replacing the call for custom call back function
-       with setting root_flag.
-       If we will see that we need some custom functionality,
-       we will enable it later.
-     */
-
-    task_status->ml_coll_operation->variable_fn_params.root_flag = true;
-
-#if 0
-    /* process task completion function,
-       if any was defined  */
-    if (OPAL_LIKELY(NULL != task_status->task_comp_fn)) {
-        ret = task_status->task_comp_fn(task_status);
-        if (ret != OMPI_SUCCESS) {
-            return ret;
-        }
-    }
-#endif
-
-    /* update dependencies */
-    ret = coll_ml_task_dependency_processing(task_status);
-    if (ret != OMPI_SUCCESS) {
-        ML_VERBOSE(3,("Failed to coll_ml_task_dependency_processing"));
-        return ret;
-    }
-
-    /* process task completion function,
-       if any was defined  */
-    if (OPAL_LIKELY(NULL != task_status->task_comp_fn)) {
-        ret = task_status->task_comp_fn(task_status);
-        if (ret != OMPI_SUCCESS) {
-            ML_VERBOSE(3,("Failed to task_comp_fn"));
-            return ret;
-        }
-    }
-
-    /* remove the descriptor from the incomplete list
-       (Pasha: if the list was provided) */
-    /* No need to put this an any new list - it is associcated
-     * with the mca_coll_ml_collective_operation_progress_t
-     * descriptor already
-     */
-
-    if (NULL != list) {
-        (*task_status_g) = (mca_coll_ml_task_status_t *)
-            opal_list_remove_item(list, (opal_list_item_t *)(task_status));
-    }
-
-    /* update completion counter */
-    coll_op->dag_description.num_tasks_completed++;
-
-    if(coll_op->dag_description.num_tasks_completed ==
-            coll_op->coll_schedule->n_fns)
-    {
-        /* the actual fragment descriptor is not on any list, as
-         * we can get at it from the task descriptors
-         */
-        ret = coll_ml_fragment_completion_processing(coll_op);
-        if (OMPI_SUCCESS != ret) {
-            ML_VERBOSE(3,("Failed to coll_ml_fragment_completion_processing"));
-            return ret;
-        }
-    }
-
-    /* return */
-    return ret;
-}
-
-static inline __opal_attribute_always_inline__ int mca_coll_ml_generic_collectives_append_to_queue(
-                                        mca_coll_ml_collective_operation_progress_t  *op_prog,
-                                        mca_coll_ml_task_setup_fn_t task_setup)
-{
-    int fn_index;
-    mca_coll_ml_collective_operation_description_t *op_desc =
-        op_prog->coll_schedule;
-    mca_coll_ml_compound_functions_t *func = NULL;
-    mca_coll_ml_task_status_t *task_status = NULL;
-    mca_coll_ml_component_t *cm = &mca_coll_ml_component;
-
-    ML_VERBOSE(9, ("Calling mca_coll_ml_generic_collectives_launcher"));
-
-    /* Init all tasks, before we start them */
-    for (fn_index = 0; fn_index < op_desc->n_fns; fn_index++) {
-        func = &op_desc->component_functions[fn_index];
-        task_status = &op_prog->dag_description.status_array[fn_index];
-
-        ML_VERBOSE(9, ("Processing function index %d", fn_index));
-
-        assert(NULL != func);
-
-        /* Init task status */
-        task_status->n_dep_satisfied = 0; /* start from zero */
-        task_status->bcol_fn = func->bcol_function;
-        /* setup run time parametres */
-        /* Pasha: do we need the if proctection ? */
-        if (OPAL_LIKELY(NULL != task_setup)) {
-            task_setup(task_status, fn_index, func);
-        }
-
-        /* the pointer to operation progress supposed to be set during
-           construction time. Just want to make sure that it is ok */
-        assert(task_status->ml_coll_operation == op_prog);
-
-        /* We assume that all pointer to functions are defined and it
-         is not reson to check for null */
-        assert(NULL != func->bcol_function->coll_fn);
-
-        /* In order to preserve ordering on all ranks we have to add it to tail */
-        /* TBD: Need to review the way we launch fragments */
-        ML_VERBOSE(9, ("The task %p dependency is %d, appending it on pending list",
-                    (void *)task_status, func->num_dependencies));
-        OPAL_THREAD_LOCK(&(mca_coll_ml_component.pending_tasks_mutex));
-        opal_list_append(&cm->pending_tasks, (opal_list_item_t *)task_status);
-        OPAL_THREAD_UNLOCK(&(mca_coll_ml_component.pending_tasks_mutex));
-    }
-
-    ML_VERBOSE(9, ("Collective was launched !"));
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__ int mca_coll_ml_generic_collectives_launcher(
-                                        mca_coll_ml_collective_operation_progress_t  *op_prog,
-                                        mca_coll_ml_task_setup_fn_t task_setup)
-{
-    int fn_index;
-    int rc, ret;
-    mca_coll_ml_collective_operation_description_t *op_desc =
-        op_prog->coll_schedule;
-    mca_coll_ml_compound_functions_t *func = NULL;
-    mca_coll_ml_task_status_t *task_status = NULL;
-    mca_coll_ml_component_t *cm = &mca_coll_ml_component;
-
-    ML_VERBOSE(9, ("Calling mca_coll_ml_generic_collectives_launcher"));
-
-    /* Init all tasks, before we start them */
-    for (fn_index = 0; fn_index < op_desc->n_fns; fn_index++) {
-        func = &op_desc->component_functions[fn_index];
-        task_status = &op_prog->dag_description.status_array[fn_index];
-
-        ML_VERBOSE(9, ("Processing function index %d", fn_index));
-
-        assert(NULL != func);
-
-        /* Init task status */
-        task_status->n_dep_satisfied = 0; /* start from zero */
-        /* task_status->my_index_in_coll_schedule = fn_index;
-        pasha: the value is set during init */
-        task_status->bcol_fn = func->bcol_function;
-        /* Pasha: disabling support for custom complition functions
-        task_status->task_comp_fn = func->task_comp_fn;
-        */
-
-        /* setup run time parametres */
-        /* Pasha: do we need the if proctection ? */
-        if (OPAL_LIKELY(NULL != task_setup)) {
-            task_setup(task_status, fn_index, func);
-        }
-
-        /* the pointer to operation progress supposed to be set during
-           construction time. Just want to make sure that it is ok */
-        assert(task_status->ml_coll_operation == op_prog);
-        /* Task status is done */
-
-        /* launch the task and put it on corresponding list (if required) */
-
-        /* We assume that all pointer to functions are defined and it
-         is not reason to check for null */
-        assert(NULL != func->bcol_function->coll_fn);
-    }
-
-    /* try to start startable */
-    for (fn_index = 0; fn_index < op_desc->n_fns; fn_index++) {
-        func = &op_desc->component_functions[fn_index];
-        task_status = &op_prog->dag_description.status_array[fn_index];
-        /* fire the collective immediately if it has no dependencies */
-        if (0 == task_status->rt_num_dependencies) {
-            rc = func->bcol_function->coll_fn(&op_prog->variable_fn_params,
-                    /* Pasha: Need to update the prototype of the func,
-                       right now it is ugly hack for compilation */
-                    (struct mca_bcol_base_function_t *)&func->constant_group_data);
-            switch(rc) {
-                case BCOL_FN_NOT_STARTED:
-                    /* put it on pending list */
-                    ML_VERBOSE(9, ("Call to bcol collecitive return BCOL_FN_NOT_STARTED, putting the task on pending list"));
-                    OPAL_THREAD_LOCK(&(mca_coll_ml_component.pending_tasks_mutex));
-                    opal_list_append(&cm->pending_tasks, (opal_list_item_t *)task_status);
-                    OPAL_THREAD_UNLOCK(&(mca_coll_ml_component.pending_tasks_mutex));
-                    break;
-                case BCOL_FN_STARTED:
-                    /* put it on started list */
-                    ML_VERBOSE(9, ("Call to bcol collecitive return BCOL_FN_STARTED, puting the task on active list"));
-                    OPAL_THREAD_LOCK(&(mca_coll_ml_component.active_tasks_mutex));
-                    opal_list_append(&cm->active_tasks, (opal_list_item_t *)task_status);
-                    OPAL_THREAD_UNLOCK(&(mca_coll_ml_component.active_tasks_mutex));
-                    break;
-                case BCOL_FN_COMPLETE:
-                    /* the task is done ! lets start relevant dependencies */
-                    ML_VERBOSE(9, ("Call to bcol collecitive return BCOL_FN_COMPLETE"));
-                    /* the task does not belong to any list, yes. So passing NULL */
-                    ret = mca_coll_ml_task_completion_processing(&task_status, NULL);
-                    if (OMPI_SUCCESS != ret) {
-                        ML_VERBOSE(9, ("Failed to mca_coll_ml_task_completion_processing"));
-                        return ret;
-                    }
-                    break;
-                default:
-                    ML_ERROR(("Unknow exit status %d", rc));
-                    return OMPI_ERROR;
-            }
-        } else {
-            /* the task is depend on other, lets put it on pending list */
-            ML_VERBOSE(9, ("The task %p dependency is %d, putting it on pending list",
-                        (void *)task_status, func->num_dependencies));
-            OPAL_THREAD_LOCK(&(mca_coll_ml_component.pending_tasks_mutex));
-            opal_list_append(&cm->pending_tasks, (opal_list_item_t *)task_status);
-            OPAL_THREAD_UNLOCK(&(mca_coll_ml_component.pending_tasks_mutex));
-        }
-    }
-    ML_VERBOSE(9, ("Collective was launched !"));
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__ mca_coll_ml_collective_operation_progress_t *
-mca_coll_ml_alloc_op_prog_single_frag_dag(
-        mca_coll_ml_module_t *ml_module,
-        mca_coll_ml_collective_operation_description_t *coll_schedule,
-        const void *src, void *dst, size_t total_bytes,
-        size_t offset_into_user_buffer
-        )
-{
-    opal_free_list_item_t *item;
-    mca_coll_ml_collective_operation_progress_t  *coll_op = NULL;
-    ompi_request_t *req;
-
-    /* Blocking call on fragment allocation (Maybe we want to make it non blocking ?) */
-    item = opal_free_list_wait (&(ml_module->coll_ml_collective_descriptors));
-
-    coll_op = (mca_coll_ml_collective_operation_progress_t *) item;
-    ML_VERBOSE(10, (">>> Allocating coll op %p", coll_op));
-    assert(NULL != coll_op);
-    assert(coll_op->dag_description.status_array[0].item.opal_list_item_refcount == 0);
-    req = &(coll_op->full_message.super);
-
-    OMPI_REQUEST_INIT(req, false);
-    /* Mark the request ACTIVE. It is critical for MPI_Test()*/
-    req->req_state = OMPI_REQUEST_ACTIVE;
-    req->req_status._cancelled = 0;
-    req->req_status.MPI_ERROR = OMPI_SUCCESS;
-
-    MCA_COLL_ML_OP_BASIC_SETUP(coll_op, total_bytes,
-            offset_into_user_buffer, src, dst, coll_schedule);
-
-    /* We do not set sequential, since it is not sequential call */
-    coll_op->dag_description.num_tasks_completed = 0;
-
-    /* Release reference counter have to be zero */
-    assert(0 == coll_op->pending);
-
-    return coll_op;
-}
-
-static inline __opal_attribute_always_inline__ mca_coll_ml_collective_operation_progress_t *
-mca_coll_ml_duplicate_op_prog_single_frag_dag(
-        mca_coll_ml_module_t *ml_module,
-        mca_coll_ml_collective_operation_progress_t *old_op)
-{
-    mca_coll_ml_collective_operation_progress_t  *new_op = NULL;
-
-    new_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-            ml_module->coll_ml_bcast_functions[old_op->fragment_data.current_coll_op],
-            old_op->fragment_data.message_descriptor->dest_user_addr,
-            (void *)old_op->fragment_data.message_descriptor->src_user_addr,
-            old_op->fragment_data.message_descriptor->n_bytes_total,
-            old_op->fragment_data.message_descriptor->n_bytes_scheduled);
-
-    new_op->fragment_data.current_coll_op = old_op->fragment_data.current_coll_op;
-    new_op->fragment_data.message_descriptor = old_op->fragment_data.message_descriptor;
-
-    return new_op;
-}
-
-static inline __opal_attribute_always_inline__ mca_coll_ml_collective_operation_progress_t *
-                                                    mca_coll_ml_alloc_op_prog_single_frag_seq(
-                                        mca_coll_ml_module_t *ml_module,
-                                        mca_coll_ml_collective_operation_description_t *coll_schedule,
-                                        void *src, void *dst,
-                                        size_t total_bytes,
-                                        size_t offset_into_user_buffer
-                                        )
-{
-    opal_free_list_item_t *item;
-    mca_coll_ml_collective_operation_progress_t  *coll_op = NULL;
-
-    /* Blocking call on fragment allocation (Maybe we want to make it non blocking ?) */
-    item = opal_free_list_wait (&(ml_module->coll_ml_collective_descriptors));
-
-    coll_op = (mca_coll_ml_collective_operation_progress_t *) item;
-
-    assert(NULL != coll_op);
-
-    MCA_COLL_ML_OP_BASIC_SETUP(coll_op, total_bytes,
-            offset_into_user_buffer, src, dst, coll_schedule);
-
-    /* set sequential data */
-    /* pasha - do we have something to set ? */
-
-    return coll_op;
-}
-
-static inline __opal_attribute_always_inline__
-                void mca_coll_ml_convertor_get_send_frag_size(mca_coll_ml_module_t *ml_module,
-                                     size_t *frag_size, struct full_message_t *message_descriptor)
-{
-    size_t fragment_size = *frag_size;
-    opal_convertor_t *dummy_convertor = &message_descriptor->dummy_convertor;
-
-    /* The last frag needs special service */
-    if (fragment_size >
-          (size_t) message_descriptor->send_converter_bytes_packed) {
-        *frag_size = message_descriptor->send_converter_bytes_packed;
-        message_descriptor->send_converter_bytes_packed = 0;
-
-        return;
-    }
-    if( (message_descriptor->dummy_conv_position + fragment_size) >
-            message_descriptor->n_bytes_total ) {
-        message_descriptor->dummy_conv_position = (message_descriptor->dummy_conv_position + fragment_size)
-            - message_descriptor->n_bytes_total;
-    } else {
-        message_descriptor->dummy_conv_position += fragment_size;
-    }
-
-    opal_convertor_generic_simple_position(dummy_convertor, &message_descriptor->dummy_conv_position);
-    *frag_size -= dummy_convertor->partial_length;
-
-    message_descriptor->send_converter_bytes_packed -= (*frag_size);
-}
-
-static inline __opal_attribute_always_inline__ int
-mca_coll_ml_launch_sequential_collective (mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    mca_bcol_base_coll_fn_desc_t *bcol_func;
-    int ifunc, n_fn, ih, ret;
-    mca_coll_ml_collective_operation_description_t *sched =
-        coll_op->coll_schedule;
-
-    n_fn = sched->n_fns;
-    ih = coll_op->sequential_routine.current_active_bcol_fn;
-
-    /* if collectives are already pending just add this one to the list */
-    if (opal_list_get_size (&mca_coll_ml_component.sequential_collectives)) {
-        opal_list_append(&mca_coll_ml_component.sequential_collectives, (opal_list_item_t *) coll_op);
-
-        return OMPI_SUCCESS;
-    }
-
-    for (ifunc = ih; ifunc < n_fn; ifunc++, coll_op->sequential_routine.current_active_bcol_fn++) {
-        ret = coll_op->sequential_routine.seq_task_setup(coll_op);
-        if (OMPI_SUCCESS != ret) {
-            return ret;
-        }
-
-        bcol_func = (sched->component_functions[ifunc].bcol_function);
-        ret = bcol_func->coll_fn(&coll_op->variable_fn_params,
-                    (struct mca_bcol_base_function_t *) &sched->component_functions[ifunc].constant_group_data);
-
-        if (BCOL_FN_COMPLETE == ret) {
-            if (ifunc == n_fn - 1) {
-                ret = coll_ml_fragment_completion_processing(coll_op);
-                if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-                    mca_coll_ml_abort_ml("Failed to run coll_ml_fragment_completion_processing");
-                }
-
-                return OMPI_SUCCESS;
-            }
-        } else {
-            if (BCOL_FN_STARTED == ret) {
-                coll_op->sequential_routine.current_bcol_status = SEQ_TASK_IN_PROG;
-            } else {
-                coll_op->sequential_routine.current_bcol_status = SEQ_TASK_PENDING;
-            }
-
-            ML_VERBOSE(10, ("Adding pending bcol to the progress list to access by ml_progress func-id %d", ifunc));
-            opal_list_append(&mca_coll_ml_component.sequential_collectives, (opal_list_item_t *) coll_op);
-
-            break;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-END_C_DECLS
-
-#endif
diff --git a/ompi/mca/coll/ml/coll_ml_lex.h b/ompi/mca/coll/ml/coll_ml_lex.h
deleted file mode 100644
index d09fe45bf9..0000000000
--- a/ompi/mca/coll/ml/coll_ml_lex.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#ifndef COLL_ML_LEX_H_
-#define COLL_ML_LEX_H_
-
-#include "opal_config.h"
-#include <stdio.h>
-
-BEGIN_C_DECLS
-
-int coll_ml_config_yylex(void);
-int coll_ml_config_init_buffer(FILE *file);
-int coll_ml_config_yylex_destroy(void);
-
-extern FILE *coll_ml_config_yyin;
-extern bool coll_ml_config_parse_done;
-extern char *coll_ml_config_yytext;
-extern int coll_ml_config_yynewlines;
-
-/*
- * Make lex-generated files not issue compiler warnings
- */
-#define YY_STACK_USED 0
-#define YY_ALWAYS_INTERACTIVE 0
-#define YY_NEVER_INTERACTIVE 0
-#define YY_MAIN 0
-#define YY_NO_UNPUT 1
-#define YY_SKIP_YYWRAP 1
-
-enum {
-    COLL_ML_CONFIG_PARSE_DONE,
-    COLL_ML_CONFIG_PARSE_ERROR,
-    COLL_ML_CONFIG_PARSE_NEWLINE,
-    COLL_ML_CONFIG_PARSE_SECTION,
-    COLL_ML_CONFIG_PARSE_COLLECTIVE,
-    COLL_ML_CONFIG_PARSE_EQUAL,
-    COLL_ML_CONFIG_PARSE_SINGLE_WORD,
-    COLL_ML_CONFIG_PARSE_VALUE,
-    COLL_ML_CONFIG_PARSE_MAX
-};
-END_C_DECLS
-#endif
diff --git a/ompi/mca/coll/ml/coll_ml_lex.l b/ompi/mca/coll/ml/coll_ml_lex.l
deleted file mode 100644
index 45c1e0aefb..0000000000
--- a/ompi/mca/coll/ml/coll_ml_lex.l
+++ /dev/null
@@ -1,141 +0,0 @@
-%option nounput
-%option noinput
-
-%{ /* -*- C -*- */
-#include "opal_config.h"
-
-#include <stdio.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-
-#include "coll_ml_lex.h"
-
-BEGIN_C_DECLS
-
-/*
- * local functions
- */
-static int coll_ml_config_yywrap(void);
-
-END_C_DECLS
-
-/*
- * global variables
- */
-int coll_ml_config_yynewlines = 1;
-bool coll_ml_config_parse_done = false;
-char *coll_ml_config_string = NULL;
-
-%}
-
-WHITE       [\f\t\v ]
-CHAR        [A-Za-z0-9_\-\.]
-NAME_CHAR   [A-Za-z0-9_\-\.\\\/]
-
-%x comment
-%x section_name
-%x collective_name
-%x section_end
-%x collective_end
-%x value
-
-%%
-
-{WHITE}*\n          { ++coll_ml_config_yynewlines;
-                      return COLL_ML_CONFIG_PARSE_NEWLINE; }
-#.*\n               { ++coll_ml_config_yynewlines;
-                      return COLL_ML_CONFIG_PARSE_NEWLINE; }
-"//".*\n            { ++coll_ml_config_yynewlines;
-                      return COLL_ML_CONFIG_PARSE_NEWLINE; }
-
-"/*"                { BEGIN(comment);
-                      return COLL_ML_CONFIG_PARSE_NEWLINE; }
-<comment>[^*\n]*       ; /* Eat up non '*'s */
-<comment>"*"+[^*/\n]*  ; /* Eat '*'s not followed by a '/' */
-<comment>\n         { ++coll_ml_config_yynewlines;
-                      return COLL_ML_CONFIG_PARSE_NEWLINE; }
-<comment>"*"+"/"    { BEGIN(INITIAL); /* Done with block comment */
-                      return COLL_ML_CONFIG_PARSE_NEWLINE; }
-
-{WHITE}*\[{WHITE}*  { BEGIN(collective_name); }
-<collective_name>({NAME_CHAR}|{WHITE})*{NAME_CHAR}/{WHITE}*\] {
-                      BEGIN(collective_end);
-                      return COLL_ML_CONFIG_PARSE_COLLECTIVE; }
-<collective_name>\n    { ++coll_ml_config_yynewlines;
-                      return COLL_ML_CONFIG_PARSE_ERROR; }
-<collective_name>.     { return COLL_ML_CONFIG_PARSE_ERROR; }
-<collective_end>{WHITE}*\]{WHITE}*\n {
-                      BEGIN(INITIAL);
-                      ++coll_ml_config_yynewlines;
-                      return COLL_ML_CONFIG_PARSE_NEWLINE; }
-
-{WHITE}*\<{WHITE}*  { BEGIN(section_name); }
-<section_name>({NAME_CHAR}|{WHITE})*{NAME_CHAR}/{WHITE}*\> {
-                      BEGIN(section_end);
-                      return COLL_ML_CONFIG_PARSE_SECTION; }
-<section_name>\n    { ++coll_ml_config_yynewlines;
-                      return COLL_ML_CONFIG_PARSE_ERROR; }
-<section_name>.     { return COLL_ML_CONFIG_PARSE_ERROR; }
-<section_end>{WHITE}*\>{WHITE}*\n {
-                      BEGIN(INITIAL);
-                      ++coll_ml_config_yynewlines;
-                      return COLL_ML_CONFIG_PARSE_NEWLINE; }
-
-{WHITE}*"="{WHITE}* { BEGIN(value);
-                      return COLL_ML_CONFIG_PARSE_EQUAL; }
-{WHITE}+            ; /* whitespace */
-{CHAR}+             { return COLL_ML_CONFIG_PARSE_SINGLE_WORD; }
-
-<value>{WHITE}*\n   { BEGIN(INITIAL);
-                      ++coll_ml_config_yynewlines;
-                      return COLL_ML_CONFIG_PARSE_NEWLINE; }
-<value>[^\n]*[^\t \n]/[\t ]* {
-                      return COLL_ML_CONFIG_PARSE_VALUE; }
-
-.                   { return COLL_ML_CONFIG_PARSE_ERROR; }
-%%
-
-/* Old flex (2.5.4a? and older) does not define a destroy function */
-#if !defined(YY_FLEX_SUBMINOR_VERSION)
-#define YY_FLEX_SUBMINOR_VERSION 0
-#endif
-
-#if (YY_FLEX_MAJOR_VERSION < 2) || (YY_FLEX_MAJOR_VERSION == 2 && (YY_FLEX_MINOR_VERSION < 5 || (YY_FLEX_MINOR_VERSION == 5 && YY_FLEX_SUBMINOR_VERSION < 5)))
-int coll_ml_config_yylex_destroy(void)
-{
-    if (NULL != YY_CURRENT_BUFFER) {
-        yy_delete_buffer(YY_CURRENT_BUFFER);
-#if defined(YY_CURRENT_BUFFER_LVALUE)
-        YY_CURRENT_BUFFER_LVALUE = NULL;
-#else
-        YY_CURRENT_BUFFER = NULL;
-#endif  /* YY_CURRENT_BUFFER_LVALUE */
-    }
-    return YY_NULL;
-}
-#endif
-
-static int coll_ml_config_yywrap(void)
-{
-    coll_ml_config_parse_done = true;
-    return 1;
-}
-
-
-/*
- * Ensure that we have a valid yybuffer to use.  Specifically, if this
- * scanner is invoked a second time, finish_parsing() (above) will
- * have been executed, and the current buffer will have been freed.
- * Flex doesn't recognize this fact because as far as it's concerned,
- * its internal state was already initialized, so it thinks it should
- * have a valid buffer.  Hence, here we ensure to give it a valid
- * buffer.
- */
-int coll_ml_config_init_buffer(FILE *file)
-{
-    YY_BUFFER_STATE buf = yy_create_buffer(file, YY_BUF_SIZE);
-    yy_switch_to_buffer(buf);
-
-    return 0;
-}
diff --git a/ompi/mca/coll/ml/coll_ml_lmngr.c b/ompi/mca/coll/ml/coll_ml_lmngr.c
deleted file mode 100644
index 1be3f4afe6..0000000000
--- a/ompi/mca/coll/ml/coll_ml_lmngr.c
+++ /dev/null
@@ -1,330 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2014      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * Copyright (c) 2014      Intel, Inc. All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-#include "opal/class/opal_list.h"
-#include "opal/threads/mutex.h"
-#include "coll_ml.h"
-#include "coll_ml_inlines.h"
-#include "coll_ml_mca.h"
-#include "coll_ml_lmngr.h"
-#ifndef HAVE_POSIX_MEMALIGN
-#include "opal/align.h"
-#include "opal_stdint.h"
-#endif
-#include "opal/util/sys_limits.h"
-
-/* Constructor for list memory manager */
-static void construct_lmngr(mca_coll_ml_lmngr_t *lmngr)
-{
-    mca_coll_ml_component_t *cm = &mca_coll_ml_component;
-
-    ML_VERBOSE(7, ("Constructing new list manager %p", (void *)lmngr));
-
-    /* No real memory is allocated, only basic init.
-    The real memory will be allocated on demand, on first block allocation */
-
-    /* I caching this block size, alignment and list size
-    since maybe in future we will want to define different parameters
-    for lists */
-    lmngr->list_block_size = cm->lmngr_block_size;
-    lmngr->list_alignment = cm->lmngr_alignment;
-    lmngr->list_size = cm->lmngr_size;
-    lmngr->n_resources = 0;
-    lmngr->base_addr = NULL; /* If the base addr is not null, the struct was initilized
-                                and memory was allocated */
-    /* Not sure that lock is required */
-    OBJ_CONSTRUCT(&lmngr->mem_lock, opal_mutex_t);
-
-    /* Only construct the list, no memry initialisation */
-    OBJ_CONSTRUCT(&lmngr->blocks_list, opal_list_t);
-}
-
-static void destruct_lmngr(mca_coll_ml_lmngr_t *lmngr)
-{
-    int max_nc = lmngr->n_resources;
-    int rc, i;
-    bcol_base_network_context_t *nc;
-    opal_list_item_t *item;
-
-    ML_VERBOSE(6, ("Destructing list manager %p", (void *)lmngr));
-
-    while (NULL != (item = opal_list_remove_first(&lmngr->blocks_list))) {
-        OBJ_RELEASE(item);
-    }
-
-    OBJ_DESTRUCT(&lmngr->blocks_list);
-
-    if (NULL != lmngr->alloc_base) {
-        for( i = 0; i < max_nc; i++ ) {
-            nc = lmngr->net_context[i];
-            rc = nc->deregister_memory_fn(nc->context_data,
-                    lmngr->reg_desc[nc->context_id]);
-            if(rc != OMPI_SUCCESS) {
-                ML_ERROR(("Failed to unregister , lmngr %p", (void *)lmngr));
-            }
-        }
-
-        ML_VERBOSE(10, ("Release base addr %p", lmngr->alloc_base));
-
-        free(lmngr->alloc_base);
-        lmngr->alloc_base = NULL;
-        lmngr->base_addr = NULL;
-    }
-
-    lmngr->list_block_size = 0;
-    lmngr->list_alignment = 0;
-    lmngr->list_size = 0;
-    lmngr->n_resources = 0;
-
-    OBJ_DESTRUCT(&lmngr->mem_lock);
-}
-
-OBJ_CLASS_INSTANCE(mca_coll_ml_lmngr_t,
-        opal_object_t,
-        construct_lmngr,
-        destruct_lmngr);
-
-int mca_coll_ml_lmngr_tune(mca_coll_ml_lmngr_t *lmngr,
-        size_t block_size, size_t list_size, size_t alignment)
-{
-    ML_VERBOSE(7, ("Tunning list manager"));
-
-    if (OPAL_UNLIKELY(NULL == lmngr->base_addr)) {
-        ML_VERBOSE(7, ("The list manager is already initialized, you can not tune it"));
-        return OMPI_ERROR;
-    }
-
-    lmngr->list_block_size = block_size;
-    lmngr->list_alignment = alignment;
-    lmngr->list_size = list_size;
-
-    return OMPI_SUCCESS;
-}
-
-int mca_coll_ml_lmngr_reg(void)
-{
-    int tmp, ret = OMPI_SUCCESS;
-
-    mca_coll_ml_component_t *cm = &mca_coll_ml_component;
-
-#define CHECK(expr) do {\
-    tmp = (expr); \
-    if (0 > tmp) ret = tmp; \
- } while (0)
-
-    ML_VERBOSE(7, ("Setting parameters for list manager"));
-
-    cm->lmngr_size = 8;
-    CHECK(mca_base_component_var_register(&mca_coll_ml_component.super.collm_version,
-                                          "memory_manager_list_size", "Memory manager list size",
-                                          MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
-                                          OPAL_INFO_LVL_9,
-                                          MCA_BASE_VAR_SCOPE_READONLY,
-                                          &cm->lmngr_size));
-
-    /* The size list couldn't be less than possible max of ML modules,
-       it = max supported communicators by ML */
-    if (cm->lmngr_size < cm->max_comm) {
-        cm->lmngr_size = cm->max_comm;
-    }
-
-    mca_coll_ml_component.lmngr_block_size = cm->payload_buffer_size *
-      cm->n_payload_buffs_per_bank *
-      cm->n_payload_mem_banks *
-      cm->lmngr_size;
-
-    CHECK(mca_base_component_var_register(&mca_coll_ml_component.super.collm_version,
-                                          "memory_manager_block_size", "Memory manager block size",
-                                          MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
-                                          OPAL_INFO_LVL_9,
-                                          MCA_BASE_VAR_SCOPE_READONLY,
-                                          &mca_coll_ml_component.lmngr_block_size));
-
-    cm->lmngr_alignment = opal_getpagesize();
-    CHECK(mca_base_component_var_register(&mca_coll_ml_component.super.collm_version,
-                                          "memory_manager_alignment", "Memory manager alignment",
-                                          MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
-                                          OPAL_INFO_LVL_9,
-                                          MCA_BASE_VAR_SCOPE_READONLY,
-                                          &mca_coll_ml_component.lmngr_block_size));
-
-    return ret;
-}
-
-static int lmngr_register(mca_coll_ml_lmngr_t *lmngr, bcol_base_network_context_t *nc)
-{
-    int rc, j;
-    int max_nc = lmngr->n_resources;
-
-    rc = nc->register_memory_fn(nc->context_data,
-            lmngr->base_addr,
-            lmngr->list_size * lmngr->list_block_size,
-            &lmngr->reg_desc[nc->context_id]);
-
-    if(rc != OMPI_SUCCESS) {
-        int ret_val;
-        ML_VERBOSE(7, ("Failed to register [%d], unrolling the registration", rc));
-        /* deregistser the successful registrations */
-        for( j = 0; j < max_nc; j++ ) {
-            /* set the registration parameter to point to the current
-             * resource description */
-            nc = lmngr->net_context[j];
-            ret_val = nc->deregister_memory_fn(nc->context_data,
-                    lmngr->reg_desc[nc->context_id]);
-            if(ret_val != OMPI_SUCCESS) {
-                return ret_val;
-            }
-        }
-
-        return rc;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_coll_ml_lmngr_init(mca_coll_ml_lmngr_t *lmngr)
-{
-    int i, num_blocks;
-    int rc;
-    unsigned char *addr;
-    bcol_base_network_context_t *nc;
-
-    ML_VERBOSE(7, ("List initialization"));
-
-#ifdef HAVE_POSIX_MEMALIGN
-    if((errno = posix_memalign(&lmngr->base_addr,
-                    lmngr->list_alignment,
-                    lmngr->list_size * lmngr->list_block_size)) != 0) {
-        ML_ERROR(("Failed to allocate memory: %d [%s]", errno, strerror(errno)));
-        return OMPI_ERROR;
-    }
-    lmngr->alloc_base = lmngr->base_addr;
-#else
-    lmngr->alloc_base =
-        malloc(lmngr->list_size * lmngr->list_block_size + lmngr->list_alignment);
-    if(NULL == lmngr->alloc_base) {
-        ML_ERROR(("Failed to allocate memory: %d [%s]", errno, strerror(errno)));
-        return OMPI_ERROR;
-    }
-
-    lmngr->base_addr = (void*)OPAL_ALIGN((uintptr_t)lmngr->alloc_base,
-            lmngr->list_alignment, uintptr_t);
-#endif
-
-    assert(lmngr->n_resources < MCA_COLL_ML_MAX_REG_INFO);
-
-    for(i= 0 ;i < lmngr->n_resources ;i++) {
-        nc = lmngr->net_context[i];
-        ML_VERBOSE(7, ("Call registration for resource index %d", i));
-        rc = lmngr_register(lmngr, nc);
-        if (OMPI_SUCCESS != rc) {
-            ML_ERROR(("Failed to lmngr register: %d [%s]", errno, strerror(errno)));
-            return rc;
-        }
-    }
-
-    /* slice the memory to blocks */
-    addr = (unsigned char *) lmngr->base_addr;
-    for(num_blocks = 0; num_blocks < (int)lmngr->list_size; num_blocks++) {
-        mca_bcol_base_lmngr_block_t *item = OBJ_NEW(mca_bcol_base_lmngr_block_t);
-        item->base_addr = (void *)addr;
-        item->lmngr = lmngr;
-        /* ML_VERBOSE(10, ("Appending block # %d %p", num_blocks, (void *)addr)); */
-        opal_list_append(&lmngr->blocks_list, (opal_list_item_t *)item);
-        /* advance the address */
-        addr += lmngr->list_block_size;
-    }
-
-    ML_VERBOSE(7, ("List initialization done %d",
-                opal_list_get_size(&lmngr->blocks_list)));
-    return OMPI_SUCCESS;
-}
-
-mca_bcol_base_lmngr_block_t* mca_coll_ml_lmngr_alloc (
-        mca_coll_ml_lmngr_t *lmngr)
-{
-    int rc;
-    opal_list_t *list = &lmngr->blocks_list;
-
-    /* Check if the list manager was initialized */
-    if(OPAL_UNLIKELY(NULL == lmngr->base_addr)) {
-        ML_VERBOSE(7 ,("Starting memory initialization"));
-        rc = mca_coll_ml_lmngr_init(lmngr);
-        if (OMPI_SUCCESS != rc) {
-            ML_ERROR(("Failed to init memory"));
-            return NULL;
-        }
-    }
-
-    if(OPAL_UNLIKELY(opal_list_is_empty(list))) {
-        /* Upper layer need to handle the NULL */
-        ML_VERBOSE(1, ("List manager is empty."));
-        return NULL;
-    }
-
-    return (mca_bcol_base_lmngr_block_t *)opal_list_remove_first(list);
-}
-
-void mca_coll_ml_lmngr_free(mca_bcol_base_lmngr_block_t *block)
-{
-    opal_list_append(&block->lmngr->blocks_list, (opal_list_item_t *)block);
-}
-
-int mca_coll_ml_lmngr_append_nc(mca_coll_ml_lmngr_t *lmngr, bcol_base_network_context_t *nc)
-{
-    int i, rc;
-
-    ML_VERBOSE(7, ("Append new network context %p to list manager %p",
-                nc, lmngr));
-
-    if (NULL == nc) {
-        return OMPI_ERROR;
-    }
-
-    /* check if we already have the context on the list.
-       if we do have - do not do anything, just return success
-     */
-    if (OPAL_UNLIKELY(MCA_COLL_ML_MAX_REG_INFO == lmngr->n_resources)) {
-        ML_ERROR(("MPI overflows maximum supported network contexts is %d", MCA_COLL_ML_MAX_REG_INFO));
-        return OMPI_ERROR;
-    }
-
-    for (i = 0; i < lmngr->n_resources; i++) {
-        if (lmngr->net_context[i] == nc) {
-            ML_VERBOSE(7, ("It is not new "));
-            return OMPI_SUCCESS;
-        }
-    }
-
-    ML_VERBOSE(7, ("Adding new context"));
-
-    /* Setting context id */
-    nc->context_id = lmngr->n_resources;
-    lmngr->net_context[lmngr->n_resources] = nc;
-
-    lmngr->n_resources++;
-
-    /* Register the memory with new context */
-    if (NULL != lmngr->base_addr) {
-        rc = lmngr_register(lmngr, nc);
-        if (OMPI_SUCCESS == rc) {
-            return rc;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/coll/ml/coll_ml_lmngr.h b/ompi/mca/coll/ml/coll_ml_lmngr.h
deleted file mode 100644
index c07b3802b5..0000000000
--- a/ompi/mca/coll/ml/coll_ml_lmngr.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_ML_LMNGR_H
-#define MCA_ML_LMNGR_H
-
-#include "ompi_config.h"
-#include "opal/class/opal_list.h"
-#include "ompi/mca/bcol/bcol.h"
-
-#define MCA_COLL_ML_MAX_REG_INFO 32
-
-/* LMNGR - List manager for registred memory */
-struct mca_coll_ml_lmngr_t {
-    opal_object_t super;
-    /* lock to control list access */
-    opal_mutex_t mem_lock;
-
-    /* list of memory chunks */
-    opal_list_t blocks_list;
-
-    /* base (allocated) address of the memory pool */
-    void* base_addr;
-    void *alloc_base;
-
-    /* size of memory chunks */
-    size_t list_block_size;
-
-    /* memory chunk alignment */
-    size_t list_alignment;
-
-    /* init list size */
-    size_t list_size;
-
-    /* number network context of resources
-       In other words, number of different registration
-       functions that will be used. For example in case
-       of iboffload for each device (PD) we will have
-       different entry
-       */
-   int n_resources;
-
-   /* registration descriptor */
-   void * reg_desc[MCA_COLL_ML_MAX_REG_INFO];
-
-   /* bcol network context array */
-   struct bcol_base_network_context_t * net_context[MCA_COLL_ML_MAX_REG_INFO];
-};
-typedef struct mca_coll_ml_lmngr_t mca_coll_ml_lmngr_t;
-OBJ_CLASS_DECLARATION(mca_coll_ml_lmngr_t);
-
-/* read user defined parametres for list manager */
-int mca_coll_ml_lmngr_reg(void);
-/* If programmer want to user other than default mca
-parametres, he can use the tune function. The tune
-function must be run before list initialization,
-otherway error will be returned */
-int mca_coll_ml_lmngr_tune(mca_coll_ml_lmngr_t *lmngr,
-        size_t block_size, size_t list_size, size_t alignment);
-
-/* Append new network context to the existing list memory manager */
-int mca_coll_ml_lmngr_append_nc(mca_coll_ml_lmngr_t *lmngr, bcol_base_network_context_t *nc);
-
-/* Allocate a block from memory list manager */
-mca_bcol_base_lmngr_block_t* mca_coll_ml_lmngr_alloc (
-        mca_coll_ml_lmngr_t *lmngr);
-
-/* Return block to list memory manager */
-void mca_coll_ml_lmngr_free (mca_bcol_base_lmngr_block_t *block);
-
-#endif
diff --git a/ompi/mca/coll/ml/coll_ml_mca.c b/ompi/mca/coll/ml/coll_ml_mca.c
deleted file mode 100644
index dd1e0ba49c..0000000000
--- a/ompi/mca/coll/ml/coll_ml_mca.c
+++ /dev/null
@@ -1,300 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "coll_ml.h"
-#include "coll_ml_inlines.h"
-#include "coll_ml_mca.h"
-#include "coll_ml_lmngr.h"
-#include "ompi/patterns/net/netpatterns.h"
-#include "opal/mca/installdirs/installdirs.h"
-
-/*
- * Local flags
- */
-enum {
-    REGINT_NEG_ONE_OK = 0x01,
-    REGINT_GE_ZERO = 0x02,
-    REGINT_GE_ONE = 0x04,
-    REGINT_NONZERO = 0x08,
-    REGINT_MAX = 0x88
-};
-
-enum {
-    REGSTR_EMPTY_OK = 0x01,
-    REGSTR_MAX = 0x88
-};
-
-/*
- * Enumerators
- */
-mca_base_var_enum_value_t fragmentation_enable_enum[] = {
-    {0, "disable"},
-    {1, "enable"},
-    {2, "auto"},
-    {-1, NULL}
-};
-
-mca_base_var_enum_value_t bcast_algorithms[] = {
-    {COLL_ML_STATIC_BCAST, "static"},
-    {COLL_ML_SEQ_BCAST, "sequential"},
-    {COLL_ML_UNKNOWN_BCAST, "unknown-root"},
-    {-1, NULL}
-};
-
-/*
- * utility routine for string parameter registration
- */
-static int reg_string(const char* param_name,
-                      const char* deprecated_param_name,
-                      const char* param_desc,
-                      const char* default_value, char **storage,
-                      int flags)
-{
-    int index;
-
-    *storage = (char *) default_value;
-    index = mca_base_component_var_register(&mca_coll_ml_component.super.collm_version,
-                                            param_name, param_desc, MCA_BASE_VAR_TYPE_STRING,
-                                            NULL, 0, 0, OPAL_INFO_LVL_9,
-                                            MCA_BASE_VAR_SCOPE_READONLY, storage);
-    if (NULL != deprecated_param_name) {
-        (void) mca_base_var_register_synonym(index, "ompi", "coll", "ml", deprecated_param_name,
-                                             MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-    }
-
-    if (0 != (flags & REGSTR_EMPTY_OK) && (NULL == *storage || 0 == strlen(*storage))) {
-        opal_output(0, "Bad parameter value for parameter \"%s\"",
-                param_name);
-        return OMPI_ERR_BAD_PARAM;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * utility routine for integer parameter registration
- */
-static int reg_int(const char* param_name,
-        const char* deprecated_param_name,
-        const char* param_desc,
-        int default_value, int *storage, int flags)
-{
-    int index;
-
-    *storage = default_value;
-    index = mca_base_component_var_register(&mca_coll_ml_component.super.collm_version,
-                                            param_name, param_desc, MCA_BASE_VAR_TYPE_INT,
-                                            NULL, 0, 0,OPAL_INFO_LVL_9,
-                                            MCA_BASE_VAR_SCOPE_READONLY, storage);
-    if (NULL != deprecated_param_name) {
-        (void) mca_base_var_register_synonym(index, "ompi", "coll", "ml", deprecated_param_name,
-                                             MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-    }
-
-    if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == *storage) {
-        return OMPI_SUCCESS;
-    }
-
-    if ((0 != (flags & REGINT_GE_ZERO) && *storage < 0) ||
-        (0 != (flags & REGINT_GE_ONE) && *storage < 1) ||
-        (0 != (flags & REGINT_NONZERO) && 0 == *storage)) {
-        opal_output(0, "Bad parameter value for parameter \"%s\"",
-                param_name);
-        return OMPI_ERR_BAD_PARAM;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int reg_bool(const char* param_name,
-        const char* deprecated_param_name,
-        const char* param_desc,
-        bool default_value, bool *storage)
-{
-    int index;
-
-    *storage = default_value;
-    index = mca_base_component_var_register(&mca_coll_ml_component.super.collm_version,
-                                            param_name, param_desc, MCA_BASE_VAR_TYPE_BOOL,
-                                            NULL, 0, 0,OPAL_INFO_LVL_9,
-                                            MCA_BASE_VAR_SCOPE_READONLY, storage);
-    if (NULL != deprecated_param_name) {
-        (void) mca_base_var_register_synonym(index, "ompi", "coll", "ml", deprecated_param_name,
-                                             MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int reg_ullint(const char* param_name,
-        const char* deprecated_param_name,
-        const char* param_desc,
-        unsigned long long default_value, unsigned long long *storage, int flags)
-{
-    int index;
-
-    *storage = default_value;
-    index = mca_base_component_var_register(&mca_coll_ml_component.super.collm_version,
-                                            param_name, param_desc, MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG,
-                                            NULL, 0, 0,OPAL_INFO_LVL_9,
-                                            MCA_BASE_VAR_SCOPE_READONLY, storage);
-    if (NULL != deprecated_param_name) {
-        (void) mca_base_var_register_synonym(index, "ompi", "coll", "ml", deprecated_param_name,
-                                             MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-    }
-
-    if ((0 != (flags & REGINT_GE_ONE) && *storage < 1) ||
-        (0 != (flags & REGINT_NONZERO) && 0 == *storage)) {
-        opal_output(0, "Bad parameter value for parameter \"%s\"",
-                param_name);
-        return OMPI_ERR_BAD_PARAM;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_coll_ml_verify_params(void)
-{
-    int dummy;
-
-    /* Make sure that the the number of memory banks is a power of 2 */
-    mca_coll_ml_component.n_payload_mem_banks =
-        ompi_roundup_to_power_radix(2, mca_coll_ml_component.n_payload_mem_banks,
-                &dummy);
-
-    /* Make sure that the the number of buffers is a power of 2 */
-    mca_coll_ml_component.n_payload_buffs_per_bank =
-        ompi_roundup_to_power_radix(2, mca_coll_ml_component.n_payload_buffs_per_bank,
-                &dummy);
-
-    return OMPI_SUCCESS;
-}
-
-int mca_coll_ml_register_params(void)
-{
-    mca_base_var_enum_t *new_enum;
-    int ret, tmp;
-    char *str = NULL;
-
-    ret = OMPI_SUCCESS;
-#define CHECK(expr) do {                    \
-        tmp = (expr);                       \
-        if (OMPI_SUCCESS != tmp) ret = tmp; \
-     } while (0)
-
-    /* register openib component parameters */
-
-    CHECK(reg_int("priority", NULL, "ML component priority"
-                  "(from 0(low) to 90 (high))", 0, &mca_coll_ml_component.ml_priority, 0));
-
-    CHECK(reg_int("verbose", NULL, "Output some verbose ML information "
-                  "(0 = no output, nonzero = output)", 0, &mca_coll_ml_component.verbose, 0));
-
-    CHECK(reg_int("max_comm", NULL, "Maximum number of communicators that can use coll/ml", 24,
-                  (int *) &mca_coll_ml_component.max_comm, 0));
-
-    CHECK(reg_int("min_comm_size", NULL, "Minimum size of communicator to use coll/ml", 0,
-                  &mca_coll_ml_component.min_comm_size, 0));
-
-    CHECK(reg_int("n_payload_mem_banks", NULL, "Number of payload memory banks", 2,
-                  &mca_coll_ml_component.n_payload_mem_banks, 0));
-
-    CHECK(reg_int("n_payload_buffs_per_bank", NULL, "Number of payload buffers per bank", 16,
-                  &mca_coll_ml_component.n_payload_buffs_per_bank, 0));
-
-    /* RLG: need to handle alignment and size */
-    CHECK(reg_ullint("payload_buffer_size", NULL, "Size of payload buffers", 4*1024,
-                     &mca_coll_ml_component.payload_buffer_size, 0));
-
-    /* get the pipeline depth, default is 2 */
-    CHECK(reg_int("pipeline_depth", NULL, "Size of fragmentation pipeline", 2,
-                  &mca_coll_ml_component.pipeline_depth, 0));
-
-    CHECK(reg_int("free_list_init_size", NULL, "Initial size of free lists in coll/ml", 128,
-                  &mca_coll_ml_component.free_list_init_size, 0));
-
-    CHECK(reg_int("free_list_grow_size", NULL, "Initial size of free lists in coll/ml", 64,
-                  &mca_coll_ml_component.free_list_grow_size, 0));
-
-    CHECK(reg_int("free_list_max_size", NULL, "Initial size of free lists in coll/ml", -1,
-                  &mca_coll_ml_component.free_list_max_size, 0));
-
-    mca_coll_ml_component.use_knomial_allreduce = 1;
-
-    tmp = mca_base_var_enum_create ("coll_ml_bcast_algorithm", bcast_algorithms, &new_enum);
-    if (OPAL_SUCCESS != tmp) {
-        return tmp;
-    }
-
-    mca_coll_ml_component.bcast_algorithm = COLL_ML_STATIC_BCAST;
-    tmp = mca_base_component_var_register (&mca_coll_ml_component.super.collm_version, "bcast_algorithm",
-                                           "Algorithm to use for broadcast", MCA_BASE_VAR_TYPE_INT,
-                                           new_enum, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
-                                           &mca_coll_ml_component.bcast_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (0 > tmp) {
-        ret = tmp;
-    }
-
-    CHECK(reg_bool("disable_allgather", NULL, "Disable Allgather", false,
-                   &mca_coll_ml_component.disable_allgather));
-
-    CHECK(reg_bool("disable_reduce", NULL, "Disable Reduce", false,
-                   &mca_coll_ml_component.disable_reduce));
-
-    tmp = mca_base_var_enum_create ("coll_ml_enable_fragmentation_enum", fragmentation_enable_enum, &new_enum);
-    if (OPAL_SUCCESS != tmp) {
-        return tmp;
-    }
-
-    /* default to auto-enable fragmentation */
-    mca_coll_ml_component.enable_fragmentation = 2;
-    tmp = mca_base_component_var_register (&mca_coll_ml_component.super.collm_version, "enable_fragmentation",
-                                           "Disable/Enable fragmentation for large messages", MCA_BASE_VAR_TYPE_INT,
-                                           new_enum, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
-                                           &mca_coll_ml_component.enable_fragmentation);
-    if (0 > tmp) {
-        ret = tmp;
-    }
-    OBJ_RELEASE(new_enum);
-
-    asprintf(&str, "%s/mca-coll-ml.config",
-            opal_install_dirs.opaldatadir);
-    if (NULL == str) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    CHECK(reg_string("config_file", NULL,
-                "ML collectives configuration file",
-                str, &mca_coll_ml_component.config_file_name,
-                0));
-    free(str);
-
-    /* Reading parameters for list manager */
-    CHECK(mca_coll_ml_lmngr_reg());
-
-    /* Verify the parameters */
-    CHECK(mca_coll_ml_verify_params());
-
-    return ret;
-}
diff --git a/ompi/mca/coll/ml/coll_ml_mca.h b/ompi/mca/coll/ml/coll_ml_mca.h
deleted file mode 100644
index 7730bd284d..0000000000
--- a/ompi/mca/coll/ml/coll_ml_mca.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
- /** @file */
-
-#ifndef MCA_COLL_ML_MCA_H
-#define MCA_COLL_ML_MCA_H
-
-#include<ctype.h>
-#include "ompi_config.h"
-
-int mca_coll_ml_register_params(void);
-
-#endif
diff --git a/ompi/mca/coll/ml/coll_ml_memsync.c b/ompi/mca/coll/ml/coll_ml_memsync.c
deleted file mode 100644
index de0c322cfb..0000000000
--- a/ompi/mca/coll/ml/coll_ml_memsync.c
+++ /dev/null
@@ -1,175 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/** @file */
-
-#include "ompi_config.h"
-
-#include "ompi/constants.h"
-#include "opal/threads/mutex.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/coll/coll.h"
-#include "opal/sys/atomic.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
-#include "ompi/mca/coll/ml/coll_ml_inlines.h"
-#include "ompi/mca/coll/ml/coll_ml_allocation.h"
-
-static int mca_coll_ml_memsync_recycle_memory(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *)coll_op->coll_module;
-    mca_bcol_base_memory_block_desc_t *ml_memblock = ml_module->payload_block;
-    mca_coll_ml_collective_operation_progress_t *pending_op = NULL;
-    int bank = coll_op->full_message.bank_index_to_recycle;
-    int rc;
-    bool have_resources = true;
-
-    assert(bank >= 0 ||
-           bank < (int)ml_memblock->num_banks ||
-           ML_MEMSYNC == coll_op->fragment_data.current_coll_op);
-
-    ML_VERBOSE(10,("MEMSYNC: bank %d was recycled coll_op %p", bank, coll_op));
-
-    /* set the bank as free */
-
-    ml_memblock->bank_is_busy[bank] = false;
-    ml_memblock->bank_release_counters[bank] = 0;
-
-    /* Check if we have any requests that are waiting for memory */
-    while(opal_list_get_size(&ml_module->waiting_for_memory_list) && have_resources) {
-        pending_op = (mca_coll_ml_collective_operation_progress_t *)
-            opal_list_get_first(&ml_module->waiting_for_memory_list);
-
-        ML_VERBOSE(10, ("Trying to start pending %p", pending_op));
-        assert(pending_op->pending & REQ_OUT_OF_MEMORY);
-        rc = pending_op->fragment_data.message_descriptor->fragment_launcher(pending_op);
-        switch (rc) {
-            case OMPI_SUCCESS:
-                ML_VERBOSE(10, ("Pending fragment was started %p", pending_op));
-                pending_op->pending ^= REQ_OUT_OF_MEMORY;
-                opal_list_remove_item(&ml_module->waiting_for_memory_list,
-                        (opal_list_item_t *)pending_op);
-                if (0 != pending_op->fragment_data.offset_into_user_buffer) {
-                    /* non-zero offset ==> this is not fragment 0 */
-                    CHECK_AND_RECYCLE(pending_op);
-                }
-                break;
-            case OMPI_ERR_TEMP_OUT_OF_RESOURCE:
-                ML_VERBOSE(10, ("Already on the list %p", pending_op));
-                have_resources = false;
-                break;
-            default:
-                ML_ERROR(("Error happened %d", rc));
-                return rc;
-        }
-    }
-
-    ML_VERBOSE(10, ("Memsync done %p", coll_op));
-    return OMPI_SUCCESS;
-}
-
-static void mca_coll_ml_barrier_task_setup(
-                mca_coll_ml_task_status_t *task_status,
-                int index, mca_coll_ml_compound_functions_t *func)
-{
-    task_status->rt_num_dependencies = func->num_dependencies;
-    task_status->rt_num_dependent_tasks = func->num_dependent_tasks;
-    task_status->rt_dependent_task_indices = func->dependent_task_indices;
-}
-
-static inline __opal_attribute_always_inline__ int mca_coll_ml_memsync_launch(mca_coll_ml_module_t *ml_module,
-                                     ompi_request_t **req, int bank_index)
-{
-    mca_coll_ml_collective_operation_progress_t *coll_op;
-
-    coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-            ml_module->coll_ml_memsync_function,
-            NULL, NULL, 0, 0);
-
-    assert(NULL != coll_op);
-
-    ML_VERBOSE(10, ("Get coll request %p", coll_op));
-
-    coll_op->fragment_data.buffer_desc = NULL;
-
-    /* Caching bank index for future memory recycling callback */
-    coll_op->full_message.bank_index_to_recycle = bank_index;
-
-    coll_op->fragment_data.current_coll_op = ML_MEMSYNC;
-    /* I don't want to define one more parameter, so under root
-     * we pass buffer index */
-    coll_op->variable_fn_params.root = bank_index;
-    /* As well it's little bit ugly, since it is no wait for this request,
-     * in order to recycle it we have to set offset to some value > 1 */
-    coll_op->fragment_data.offset_into_user_buffer = 1;
-    coll_op->variable_fn_params.buffer_index = MCA_COLL_ML_NO_BUFFER;
-    coll_op->variable_fn_params.sequence_num = -1; /* It should be safe to use -1 */
-    /* Pointer to a coll finalize function */
-    if (OPAL_LIKELY(ml_module->initialized)) {
-        coll_op->process_fn = mca_coll_ml_memsync_recycle_memory;
-    } else {
-        /* No post work on first call */
-        coll_op->process_fn = NULL;
-    }
-
-    ML_VERBOSE(10,("Memsync start %p", &coll_op));
-
-    return mca_coll_ml_generic_collectives_append_to_queue(coll_op, mca_coll_ml_barrier_task_setup);
-}
-
-/**
- * Non blocking memory syncronization
- */
-int mca_coll_ml_memsync_intra(mca_coll_ml_module_t *ml_module, int bank_index)
-{
-    int rc;
-    ompi_request_t *req;
-
-    ML_VERBOSE(8, ("MEMSYNC start"));
-
-    if (OPAL_UNLIKELY(0 == opal_list_get_size(&ml_module->active_bcols_list))) {
-        /* Josh's change: In the case where only p2p is active, we have no way
-         * to reset the bank release counters to zero, I am doing that here since it
-         * would actually be "correct" to do it outside of this conditional, however
-         * I suspect that reseting the value to zero elsewhere would result in corrupted
-         * flow for non-contiguous data types
-         */
-
-        /* nasty hack to ensure that resources are released in the single level
-         * ptp case.
-         */
-        mca_coll_ml_collective_operation_progress_t dummy_coll;
-
-        dummy_coll.coll_module = (mca_coll_base_module_t *) ml_module;
-        dummy_coll.fragment_data.current_coll_op = ML_MEMSYNC;
-        dummy_coll.full_message.bank_index_to_recycle = bank_index;
-
-        /* Handling special case when memory syncronization is not required */
-        rc = mca_coll_ml_memsync_recycle_memory(&dummy_coll);
-        if(OPAL_UNLIKELY(rc != OMPI_SUCCESS)){
-            ML_ERROR(("Failed to flush the list."));
-            return rc;
-        }
-    } else {
-        /* retain the communicator until the operation is finished. the communicator
-         * will be released by CHECK_AND_RECYCLE */
-        OBJ_RETAIN(ml_module->comm);
-
-        rc = mca_coll_ml_memsync_launch(ml_module, &req, bank_index);
-        if (OPAL_UNLIKELY(rc != OMPI_SUCCESS)) {
-            ML_ERROR(("Failed to launch a barrier."));
-            return rc;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/coll/ml/coll_ml_module.c b/ompi/mca/coll/ml/coll_ml_module.c
deleted file mode 100644
index 05651a6c07..0000000000
--- a/ompi/mca/coll/ml/coll_ml_module.c
+++ /dev/null
@@ -1,3122 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2013-2014 Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2014      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/**
- * @file
- *
- * Most of the description of the data layout is in the
- * coll_ml_module.c file.
- */
-
-#include "ompi_config.h"
-
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <errno.h>
-
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/coll/coll.h"
-#include "ompi/mca/coll/base/base.h"
-#include "ompi/mca/sbgp/base/base.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/mca/sbgp/sbgp.h"
-#include "ompi/patterns/comm/coll_ops.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
-
-#include "opal/util/argv.h"
-#include "opal/datatype/opal_datatype.h"
-#include "opal/util/output.h"
-#include "opal/util/arch.h"
-#include "opal/align.h"
-
-#include "coll_ml.h"
-#include "coll_ml_inlines.h"
-#include "coll_ml_select.h"
-#include "coll_ml_custom_utils.h"
-#include "coll_ml_allocation.h"
-
-static int coll_ml_parse_topology (sub_group_params_t *sub_group_meta_data, size_t sub_group_count,
-                                   int *list_of_ranks_in_all_subgroups, int level_one_size);
-
-/* #define NEW_LEADER_SELECTION */
-
-struct ranks_proxy_t {
-    /* number of subgroups for which the rank is a proxy */
-    int number_subgroups;
-    /* subgrou indecies */
-    int *subgroup_index;
-};
-typedef struct rank_proxy_t rank_proxy_t;
-
-#define PROVIDE_SUFFICIENT_MEMORY(ptr, dummy_ptr, ptr_size, unit_type, in_use, \
-				  n_to_add,n_to_grow)			\
-    do {                                                                \
-        if ((in_use) + (n_to_add) > (ptr_size)) {                       \
-            (dummy_ptr) = (unit_type *)					\
-                realloc(ptr, sizeof(unit_type) * ((ptr_size) + (n_to_grow))); \
-            if (NULL != (dummy_ptr)) {					\
-                (ptr) = (dummy_ptr);                                    \
-                (ptr_size) += (n_to_grow);                              \
-            }                                                           \
-        }                                                               \
-    } while (0)
-
-/*
- * Local functions
- */
-
-static int ml_module_enable(mca_coll_base_module_t *module,
-			    struct ompi_communicator_t *comm);
-
-static int mca_coll_ml_fill_in_route_tab(mca_coll_ml_topology_t *topo,
-					 ompi_communicator_t *comm);
-
-static void
-mca_coll_ml_module_construct(mca_coll_ml_module_t *module)
-{
-    int index_topo, coll_i, st_i;
-    mca_coll_ml_topology_t *topo;
-
-    memset ((char *) module + sizeof (module->super), 0, sizeof (*module) - sizeof (module->super));
-
-    /* It's critical to reset data_offset to zero */
-    module->data_offset = -1;
-
-    /* If the topology support zero level and no fragmentation was requested */
-    for (index_topo = 0; index_topo < COLL_ML_TOPO_MAX; index_topo++) {
-        topo = &module->topo_list[index_topo];
-        topo->global_lowest_hier_group_index = -1;
-        topo->global_highest_hier_group_index = -1;
-        topo->number_of_all_subgroups = -1;
-        topo->n_levels = -1;
-        topo->all_bcols_mode = ~(0); /* set to all bits */
-        topo->status = COLL_ML_TOPO_DISABLED; /* all topologies are not used by default */
-    }
-
-    for (coll_i = 0; coll_i < ML_NUM_OF_FUNCTIONS; coll_i++) {
-        for (st_i = 0; st_i < MCA_COLL_MAX_NUM_SUBTYPES; st_i++) {
-            module->collectives_topology_map[coll_i][st_i] = ML_UNDEFINED;
-        }
-    }
-
-    for (coll_i = 0; coll_i < BCOL_NUM_OF_FUNCTIONS; ++coll_i) {
-        module->small_message_thresholds[coll_i] = BCOL_THRESHOLD_UNLIMITED;
-    }
-
-    OBJ_CONSTRUCT(&module->active_bcols_list, opal_list_t);
-    OBJ_CONSTRUCT(&module->waiting_for_memory_list, opal_list_t);
-    OBJ_CONSTRUCT(&module->fragment_descriptors, opal_free_list_t);
-    OBJ_CONSTRUCT(&module->message_descriptors, opal_free_list_t);
-    OBJ_CONSTRUCT(&module->coll_ml_collective_descriptors, opal_free_list_t);
-
-    memset (&module->fallback, 0, sizeof (module->fallback));
-}
-
-#define ML_RELEASE_FALLBACK(_coll_ml, _coll)                            \
-    do {                                                                \
-        if (_coll_ml->fallback.coll_ ## _coll ## _module) {             \
-            OBJ_RELEASE(_coll_ml->fallback.coll_ ## _coll ## _module);  \
-            _coll_ml->fallback.coll_ ## _coll ## _module = NULL;        \
-        }                                                               \
-    } while (0);
-
-static void
-mca_coll_ml_module_destruct(mca_coll_ml_module_t *module)
-{
-    int i, j, k,fnc, index_topo;
-    mca_coll_ml_topology_t *topo;
-
-    ML_VERBOSE(4, ("ML module destruct"));
-
-    for (index_topo = 0; index_topo < COLL_ML_TOPO_MAX; index_topo++) {
-        topo = &module->topo_list[index_topo];
-        if (COLL_ML_TOPO_DISABLED == topo->status) {
-            /* skip the topology */
-            continue;
-        }
-
-        if (NULL != topo->component_pairs) {
-            for(i = 0; i < topo->n_levels; ++i) {
-                for(j = 0; j < topo->component_pairs[i].num_bcol_modules; ++j) {
-                    OBJ_RELEASE(topo->component_pairs[i].bcol_modules[j]);
-                }
-                /* free the array of bcol module */
-                free(topo->component_pairs[i].bcol_modules);
-
-                OBJ_RELEASE(topo->component_pairs[i].subgroup_module);
-            }
-
-            free(topo->component_pairs);
-        }
-
-        /* gvm Leak FIX Free collective algorithms structure */
-        for (fnc = 0; fnc < BCOL_NUM_OF_FUNCTIONS; fnc++) {
-            if (NULL != topo->hierarchical_algorithms[fnc]){
-                free(topo->hierarchical_algorithms[fnc]);
-            }
-        }
-
-        /* free up the route vector memory */
-        if (NULL != topo->route_vector) {
-            free(topo->route_vector);
-        }
-        /* free resrouce description */
-        if(NULL != topo->array_of_all_subgroups) {
-            for( k=0 ; k < topo->number_of_all_subgroups ; k++ ) {
-                if(0 < topo->array_of_all_subgroups[k].n_ranks) {
-                    free(topo->array_of_all_subgroups[k].rank_data);
-                    topo->array_of_all_subgroups[k].rank_data = NULL;
-                }
-            }
-            free(topo->array_of_all_subgroups);
-            topo->array_of_all_subgroups = NULL;
-        }
-        if (NULL != topo->hier_layout_info) {
-            free(topo->hier_layout_info);
-            topo->hier_layout_info = NULL;
-        }
-    }
-
-    OPAL_LIST_DESTRUCT(&(module->active_bcols_list));
-    OBJ_DESTRUCT(&(module->waiting_for_memory_list));
-
-    /* gvm Leak FIX Remove fragment free list */
-    OBJ_DESTRUCT(&(module->fragment_descriptors));
-    OBJ_DESTRUCT(&(module->message_descriptors));
-    /* push mca_bcol_base_memory_block_desc_t back on list manager */
-    mca_coll_ml_free_block(module->payload_block);
-    /* release the cinvertor if it was allocated */
-    if (NULL != module->reference_convertor) {
-        OBJ_RELEASE(module->reference_convertor);
-    }
-
-    OBJ_DESTRUCT(&(module->coll_ml_collective_descriptors));
-
-    if (NULL != module->coll_ml_barrier_function) {
-        if (NULL != module->coll_ml_barrier_function->component_functions) {
-            free(module->coll_ml_barrier_function->component_functions);
-            module->coll_ml_barrier_function->component_functions = NULL;
-        }
-        free(module->coll_ml_barrier_function);
-        module->coll_ml_barrier_function = NULL;
-    }
-
-    if (module->coll_ml_memsync_function) {
-        if (module->coll_ml_memsync_function->component_functions) {
-            free(module->coll_ml_memsync_function->component_functions);
-            module->coll_ml_memsync_function->component_functions = NULL;
-        }
-        free(module->coll_ml_memsync_function);
-        module->coll_ml_memsync_function = NULL;
-    }
-
-    ml_coll_hier_allreduce_cleanup_new(module);
-    ml_coll_hier_allgather_cleanup(module);
-    ml_coll_hier_bcast_cleanup(module);
-    ml_coll_hier_reduce_cleanup(module);
-
-    /* release saved collectives */
-    ML_RELEASE_FALLBACK(module, allreduce);
-    ML_RELEASE_FALLBACK(module, allgather);
-    ML_RELEASE_FALLBACK(module, reduce);
-    ML_RELEASE_FALLBACK(module, bcast);
-    ML_RELEASE_FALLBACK(module, iallreduce);
-    ML_RELEASE_FALLBACK(module, iallgather);
-    ML_RELEASE_FALLBACK(module, ireduce);
-    ML_RELEASE_FALLBACK(module, ibcast);
-}
-
-
-static int mca_coll_ml_request_free(ompi_request_t** request)
-{
-    /* local variables */
-    mca_coll_ml_collective_operation_progress_t *ml_request=
-        (mca_coll_ml_collective_operation_progress_t *)(*request);
-    mca_coll_ml_module_t *ml_module = OP_ML_MODULE(ml_request);
-
-    /* The ML memory bank recycling check done, no we may
-     * return request and signal completion */
-
-    /* this fragement does not hold the message data, so ok to return */
-    assert(0 == ml_request->pending);
-    //assert(0 == ml_request->fragment_data.offset_into_user_buffer);
-    assert(&ml_request->full_message == ml_request->fragment_data.message_descriptor);
-    assert(ml_request->dag_description.status_array[0].item.opal_list_item_refcount == 0);
-    ML_VERBOSE(10, ("Releasing Master %p", ml_request));
-    /* Mark the request as invalid */
-    OMPI_REQUEST_FINI(&ml_request->full_message.super);
-    opal_free_list_return (&(ml_module->coll_ml_collective_descriptors),
-                           (opal_free_list_item_t *)ml_request);
-
-    /* MPI needs to return with the request object set to MPI_REQUEST_NULL
-     */
-    *request = MPI_REQUEST_NULL;
-
-    return OMPI_SUCCESS;
-}
-
-/* constructor for collective managment descriptor */
-static void mca_coll_ml_collective_operation_progress_construct
-(mca_coll_ml_collective_operation_progress_t *desc) {
-
-    /* initialize pointer */
-    desc->dag_description.status_array = NULL;
-
-    OBJ_CONSTRUCT(&desc->full_message.send_convertor, opal_convertor_t);
-    OBJ_CONSTRUCT(&desc->full_message.recv_convertor, opal_convertor_t);
-
-    OBJ_CONSTRUCT(&desc->full_message.dummy_convertor, opal_convertor_t);
-
-    /* intialize request free pointer */
-    desc->full_message.super.req_free = mca_coll_ml_request_free;
-
-    /* no cancel function */
-    desc->full_message.super.req_cancel = NULL;
-    /* Collective request type */
-    desc->full_message.super.req_type = OMPI_REQUEST_COLL;
-    /* RLG: Do we need to set req_mpi_object ? */
-
-    /* If not null , we have to release next fragment */
-    desc->next_to_process_frag = NULL;
-
-    /* pointer to previous fragment */
-    desc->prev_frag = NULL;
-
-    /* Pasha: moreinit */
-    desc->pending = 0;
-}
-
-/* destructor for collective managment descriptor */
-static void mca_coll_ml_collective_operation_progress_destruct
-(mca_coll_ml_collective_operation_progress_t *desc) {
-    mca_coll_ml_module_t *ml_module =
-        (mca_coll_ml_module_t *) desc->coll_module;
-
-    int i, max_dag_size = ml_module->max_dag_size;
-
-    if (NULL != desc->dag_description.status_array) {
-        for (i = 0; i < max_dag_size; ++i) {
-            OBJ_DESTRUCT(&desc->dag_description.status_array[i].item);
-        }
-
-        free(desc->dag_description.status_array);
-        desc->dag_description.status_array = NULL;
-    }
-
-    OBJ_DESTRUCT(&desc->full_message.send_convertor);
-    OBJ_DESTRUCT(&desc->full_message.recv_convertor);
-
-    OBJ_DESTRUCT(&desc->full_message.dummy_convertor);
-}
-/* initialize the full message descriptor - can pass in module specific
- * initialization data
- */
-static int init_ml_fragment_desc(opal_free_list_item_t *desc , void* ctx);
-static int init_ml_message_desc(opal_free_list_item_t *desc , void* ctx)
-{
-    mca_coll_ml_module_t *module= (mca_coll_ml_module_t *) ctx;
-    mca_coll_ml_descriptor_t *msg_desc = (mca_coll_ml_descriptor_t *) desc;
-
-    /* finish setting up the fragment descriptor */
-    init_ml_fragment_desc((opal_free_list_item_t*)&(msg_desc->fragment),module);
-
-    return OPAL_SUCCESS;
-}
-
-/* initialize the fragment descriptor - can pass in module specific
- * initialization data
- */
-static int init_ml_fragment_desc(opal_free_list_item_t *desc , void* ctx)
-{
-    mca_coll_ml_module_t *module= (mca_coll_ml_module_t *) ctx;
-    mca_coll_ml_fragment_t *frag_desc = (mca_coll_ml_fragment_t *) desc;
-
-    /* allocated array of function arguments */
-    /* RLG - we have a problem if we don't get the memory */
-    /* malloc-debug does not like zero allocations */
-    if (module->max_fn_calls > 0) {
-        frag_desc->fn_args = (bcol_function_args_t *)
-            malloc(sizeof(bcol_function_args_t) * module->max_fn_calls);
-    }
-
-    return OPAL_SUCCESS;
-}
-static void mca_coll_ml_bcol_list_item_construct(mca_coll_ml_bcol_list_item_t *item)
-{
-    item->bcol_module = NULL;
-}
-OBJ_CLASS_INSTANCE(mca_coll_ml_bcol_list_item_t,
-                   opal_list_item_t,
-                   mca_coll_ml_bcol_list_item_construct,
-                   NULL);
-
-static void generate_active_bcols_list(mca_coll_ml_module_t *ml_module)
-{
-    int i, j, index_topo;
-    mca_coll_ml_topology_t *topo;
-    bool bcol_was_found;
-    mca_coll_ml_bcol_list_item_t *bcol_item = NULL;
-    mca_bcol_base_module_t *bcol_module = NULL;
-
-    ML_VERBOSE(10, ("Generating active bcol list "));
-
-    for (index_topo = 0; index_topo < COLL_ML_TOPO_MAX; index_topo++) {
-        topo = &ml_module->topo_list[index_topo];
-        if (COLL_ML_TOPO_DISABLED == topo->status) {
-            /* skip the topology */
-            continue;
-        }
-        for( i = 0; i < topo->n_levels; i++) {
-
-            for( j = 0; j < topo->component_pairs[i].num_bcol_modules; j++) {
-                bcol_module = topo->component_pairs[i].bcol_modules[j];
-
-                /* Check if the bcol provides synchronization function, if the
-                 * function is not provided we skip this bcol, since it isn't used
-                 * for memory synchronization (for instance - ptpcoll )*/
-                if (NULL == GET_BCOL_SYNC_FN(bcol_module)) {
-                    ML_VERBOSE(10,(" No sync function was provided by bcol %s",
-                                   bcol_module->bcol_component->bcol_version.mca_component_name));
-                    continue;
-                }
-
-                bcol_was_found = false;
-                for(bcol_item = (mca_coll_ml_bcol_list_item_t *)opal_list_get_first(&ml_module->active_bcols_list);
-                    !bcol_was_found &&
-                        bcol_item != (mca_coll_ml_bcol_list_item_t *)opal_list_get_end(&ml_module->active_bcols_list);
-                    bcol_item = (mca_coll_ml_bcol_list_item_t *)opal_list_get_next((opal_list_item_t *)bcol_item)) {
-                    if (bcol_module == bcol_item->bcol_module) {
-                        bcol_was_found = true;
-                    }
-                }
-
-                /* append the item to the list if it was not found */
-                if (!bcol_was_found) {
-                    bcol_item = OBJ_NEW(mca_coll_ml_bcol_list_item_t);
-                    bcol_item->bcol_module = bcol_module;
-                    opal_list_append(&ml_module->active_bcols_list, (opal_list_item_t *)bcol_item);
-                }
-
-            }
-        }
-    }
-}
-
-static int calculate_buffer_header_size(mca_coll_ml_module_t *ml_module)
-{
-    mca_coll_ml_topology_t *topo;
-    mca_bcol_base_module_t *bcol_module;
-
-    uint32_t offset = 0;
-    int i, j, *ranks_in_comm, kount = 0,
-        rc, data_offset = 0, index_topo,
-        comm_size = ompi_comm_size(ml_module->comm);
-
-    ML_VERBOSE(10, ("Calculating offset for the ML"));
-
-    /* probably a stupid thing to do, but we have to loop over twice */
-
-    for (index_topo = 0; index_topo < COLL_ML_TOPO_MAX; index_topo++) {
-        topo = &ml_module->topo_list[index_topo];
-        if (COLL_ML_TOPO_DISABLED == topo->status) {
-            /* skip the topology */
-            continue;
-        }
-
-        for (i = 0; i < topo->n_levels; i++) {
-            for (j = 0; j < topo->component_pairs[i].num_bcol_modules; j++) {
-                bcol_module = topo->component_pairs[i].bcol_modules[j];
-                if (0 < bcol_module->header_size) {
-                    /* bump the kounter */
-                    kount++;
-                    /* find the largest header request */
-                    if (offset < bcol_module->header_size) {
-                        offset = bcol_module->header_size;
-                    }
-                }
-
-                /* Set bcol mode bits */
-                topo->all_bcols_mode &= bcol_module->supported_mode;
-            }
-        }
-
-        offset = OPAL_ALIGN(offset, BCOL_HEAD_ALIGN, uint32_t);
-        /* select largest offset between multiple topologies */
-        if (data_offset < (int) offset) {
-            data_offset = (int) offset;
-        }
-    }
-
-    ranks_in_comm = (int *) malloc(comm_size * sizeof(int));
-    if (OPAL_UNLIKELY(NULL == ranks_in_comm)) {
-        ML_ERROR(("Memory allocation failed."));
-        return OMPI_ERROR;
-    }
-
-    for (i = 0; i < comm_size; ++i) {
-        ranks_in_comm[i] = i;
-    }
-
-    rc = comm_allreduce_pml(&data_offset, &data_offset, 1,
-                            MPI_INT, ompi_comm_rank(ml_module->comm),
-                            MPI_MAX, comm_size,
-                            ranks_in_comm, ml_module->comm);
-    free(ranks_in_comm);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        ML_ERROR(("comm_allreduce_pml failed."));
-        return OMPI_ERROR;
-    }
-
-    ml_module->data_offset = (uint32_t) data_offset;
-
-    ML_VERBOSE(10, ("The offset is %d", ml_module->data_offset));
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_coll_ml_register_bcols(mca_coll_ml_module_t *ml_module)
-{
-    /* local variables */
-    int i, j, index_topo;
-    int ret = OMPI_SUCCESS;
-    mca_bcol_base_module_t *bcol_module;
-    mca_coll_ml_topology_t *topo;
-
-    /* loop over all bcols and register the ml memory block which each */
-    for (index_topo = 0; index_topo < COLL_ML_TOPO_MAX; index_topo++) {
-        topo = &ml_module->topo_list[index_topo];
-        if (COLL_ML_TOPO_DISABLED == topo->status) {
-            /* skip the topology */
-            continue;
-        }
-
-        for (i = 0; i < topo->n_levels; i++) {
-            for (j = 0; j < topo->component_pairs[i].num_bcol_modules; j++) {
-                bcol_module = topo->component_pairs[i].bcol_modules[j];
-                if (NULL != bcol_module->bcol_memory_init) {
-                    ret = bcol_module->bcol_memory_init(ml_module->payload_block,
-                                                        ml_module->data_offset,
-                                                        bcol_module,
-                                                        (NULL != bcol_module->network_context) ?
-                                                        bcol_module->network_context->context_data: NULL);
-                    if (OMPI_SUCCESS != ret) {
-                        ML_ERROR(("Bcol registration failed on ml level!!"));
-                        return ret;
-                    }
-                }
-            }
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int ml_module_memory_initialization(mca_coll_ml_module_t *ml_module)
-{
-    int ret;
-    int nbanks, nbuffers, buf_size;
-    mca_coll_ml_component_t *cs = &mca_coll_ml_component;
-
-    ml_module->payload_block = mca_coll_ml_allocate_block(cs,ml_module->payload_block);
-
-    if (NULL == ml_module->payload_block) {
-        ML_VERBOSE(1, ("mca_coll_ml_allocate_block exited with error."));
-        return OMPI_ERROR;
-    }
-
-    /* get memory block parameters */
-    nbanks = cs->n_payload_mem_banks;
-    nbuffers = cs->n_payload_buffs_per_bank;
-    buf_size = cs->payload_buffer_size;
-
-    ML_VERBOSE(10, ("Call for initialize block."));
-
-    ret = mca_coll_ml_initialize_block(ml_module->payload_block,
-                                       nbuffers, nbanks, buf_size, ml_module->data_offset,
-                                       NULL);
-    if (OMPI_SUCCESS != ret) {
-        return ret;
-    }
-
-    ML_VERBOSE(10, ("Call for register bcols."));
-
-    /* inititialize the memory with all of the bcols:
-       loop through the bcol modules and invoke the memory init */
-    ret = mca_coll_ml_register_bcols(ml_module);
-    if (OMPI_SUCCESS != ret) {
-        ML_ERROR(("mca_coll_ml_register_bcols returned an error."));
-        /* goto CLEANUP; */
-        return ret;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/* do some sanity checks */
-static int check_global_view_of_subgroups( int n_procs_selected,
-					   int n_procs_in, int ll_p1, int* all_selected,
-					   mca_sbgp_base_module_t *module )
-{
-    /* local variables */
-    int ret=OMPI_SUCCESS;
-    int i, sum;
-
-    bool local_leader_found=false;
-
-    /* is there a single local-leader */
-    for (i = 0; i < n_procs_selected; i++) {
-        if( ll_p1 == -all_selected[module->group_list[i]]) {
-            /* found the local leader */
-            if( local_leader_found ) {
-                /* more than one local leader - don't know how to
-                 * handle this, so bail
-                 */
-                ML_VERBOSE(1, ("More than a single leader for a group."));
-                ret=OMPI_ERROR;
-                goto exit_ERROR;
-            } else {
-                local_leader_found=true;
-            }
-        }
-    }
-
-    /* check to make sure that all agree on the same size of
-     * the group
-     */
-    sum=0;
-    for (i = 0; i < n_procs_in; i++) {
-        if(ll_p1==all_selected[i]) {
-            sum++;
-        } else if( ll_p1 == -all_selected[i]) {
-            sum++;
-        }
-    }
-    if( sum != n_procs_selected ) {
-        ML_VERBOSE(1, ("number of procs in the group unexpected.  Expected %d Got %d",n_procs_selected,sum));
-        ret=OMPI_ERROR;
-        goto exit_ERROR;
-    }
-    /* check to make sure that all have the same list of ranks.
-     */
-    for (i = 0; i < n_procs_selected; i++) {
-        if(ll_p1!=all_selected[module->group_list[i]] &&
-           ll_p1!=-all_selected[module->group_list[i]] ) {
-            ret=OMPI_ERROR;
-            ML_VERBOSE(1, ("Mismatch in rank list - element #%d - %d ",i,all_selected[module->group_list[i]]));
-            goto exit_ERROR;
-        }
-    }
-
-    /* return */
-    return ret;
-
- exit_ERROR:
-    /* return */
-    return ret;
-}
-
-static int ml_init_k_nomial_trees(mca_coll_ml_topology_t *topo, int *list_of_ranks_in_all_subgroups, int my_rank_in_list)
-{
-    int *list_n_connected;
-    int group_size, rank, i, j, knt, offset, k, my_sbgp = 0;
-    int my_root, level_one_knt;
-    sub_group_params_t *array_of_all_subgroup_ranks = topo->
-        array_of_all_subgroups;
-    int num_total_subgroups = topo->number_of_all_subgroups;
-    int n_hier = topo->n_levels;
-
-    hierarchy_pairs *pair = NULL;
-    mca_coll_ml_leader_offset_info_t *loc_leader = (mca_coll_ml_leader_offset_info_t *)
-        malloc(sizeof(mca_coll_ml_leader_offset_info_t)*(n_hier+1));
-
-    if (NULL == loc_leader) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    /* first thing I want to know is where does the first level end */
-    level_one_knt = 0;
-
-    while (level_one_knt < num_total_subgroups && 0 == array_of_all_subgroup_ranks[level_one_knt].level_in_hierarchy) {
-        level_one_knt++;
-    }
-
-    /* fprintf(stderr,"PPP %d %d %d ", level_one_knt, array_of_all_subgroup_ranks[0].level_in_hierarchy, num_total_subgroups); */
-
-    /* I want to cache this number for unpack*/
-    array_of_all_subgroup_ranks->level_one_index = level_one_knt;
-
-    /* determine whether or not ranks are contiguous */
-    topo->ranks_contiguous = true;
-    for (i = 0, knt = 0 ; i < level_one_knt && topo->ranks_contiguous ; ++i) {
-        for (j = 0 ; j < array_of_all_subgroup_ranks[i].n_ranks ; ++j, ++knt) {
-            if (knt != list_of_ranks_in_all_subgroups[knt]) {
-                topo->ranks_contiguous = false;
-                break;
-            }
-        }
-    }
-
-    loc_leader[0].offset = 0;
-
-    /* now find my first level offset, and my index in level one */
-    for (i = 0, loc_leader[0].level_one_index = -1 ; i < level_one_knt ; ++i) {
-        offset = array_of_all_subgroup_ranks[i].index_of_first_element;
-        for (k = 0 ; k < array_of_all_subgroup_ranks[i].n_ranks ; ++k) {
-            rank = list_of_ranks_in_all_subgroups[k + offset];
-            if (rank == my_rank_in_list) {
-                loc_leader[0].offset = offset;
-                loc_leader[0].level_one_index = k;
-                i = level_one_knt;
-                break;
-            }
-        }
-    }
-
-    /* every rank MUST appear at level 0 */
-    assert (loc_leader[0].level_one_index > -1);
-
-    for (i = 0 ; i < n_hier ; ++i) {
-        pair = &topo->component_pairs[i];
-        /* find the size of the group */
-        group_size = pair->subgroup_module->group_size;
-        /* malloc some memory for the new list to cache
-           on the bcol module
-        */
-        list_n_connected = (int *) calloc(group_size, sizeof (int));
-        if (NULL == list_n_connected) {
-            free (loc_leader);
-            return OMPI_ERR_OUT_OF_RESOURCE;
-        }
-
-        /* next thing to do is to find out which subgroup I'm in
-         * at this particular level
-         */
-        for (j = 0, knt = 0, my_sbgp = -1 ; j < num_total_subgroups && 0 > my_sbgp ; ++j) {
-            offset = array_of_all_subgroup_ranks[j].index_of_first_element;
-
-            /* in the 1-level case just skip any group of size 1 and move on
-             * to the real group. */
-            if (1 == n_hier && 1 == array_of_all_subgroup_ranks[j].n_ranks) {
-                continue;
-            }
-
-            for (k = 0; k < array_of_all_subgroup_ranks[j].n_ranks; k++) {
-                rank = list_of_ranks_in_all_subgroups[k+offset];
-                /* we can not use the level_in_topology flag to determine the
-                 * level since not all levels may be represented so keep a count
-                 * of the number of times this ranks shows up. when it has been
-                 * seen the correct number of times we are done. */
-                if (rank == my_rank_in_list && ++knt == (i+1)){
-                    my_sbgp = j;
-                    /* tag whether I am a local leader or not at this level */
-                    loc_leader[i].leader = (my_rank_in_list == array_of_all_subgroup_ranks[j].root_rank_in_comm);
-                    break;
-                }
-            }
-        }
-
-        /* should have found a subgroup */
-        assert (my_sbgp > -1);
-
-        for (j = 0 ; j < group_size ; ++j) {
-            list_n_connected[j] = array_of_all_subgroup_ranks[my_sbgp].
-                rank_data[j].num_of_ranks_represented;
-        }
-
-        /* now find all sbgps that the root of this sbgp belongs to
-         * previous to this "my_sbgp" */
-        my_root = array_of_all_subgroup_ranks[my_sbgp].root_rank_in_comm;
-
-        for (j = 0, knt = 0 ; j < my_sbgp ; ++j) {
-            if (array_of_all_subgroup_ranks[j].root_rank_in_comm == my_root) {
-                for (k = 1; k < array_of_all_subgroup_ranks[j].n_ranks; ++k) {
-                    knt += array_of_all_subgroup_ranks[j].rank_data[k].
-                        num_of_ranks_represented;
-                }
-
-            }
-        }
-
-        /* and then I add one for the root itself */
-        list_n_connected[0] = knt + 1;
-
-        /* now cache this on the bcol module */
-        pair->bcol_modules[0]->list_n_connected = list_n_connected;
-
-        /*  I should do one more round here and figure out my offset at this level
-         *  the calculation is simple: Am I a local leader in this level? If so, then I keep the offset
-         *  from the previous level. Else, I find out how "far away" the local leader is from me and set
-         *  this as the new offset.
-         */
-        /* do this after first level */
-        if (i > 0) {
-            /* if I'm not the local leader */
-            if( !loc_leader[i].leader) {
-                /* then I am not a local leader at this level */
-                offset = array_of_all_subgroup_ranks[my_sbgp].index_of_first_element;
-                for (k = 0, knt = 0 ; k < array_of_all_subgroup_ranks[my_sbgp].n_ranks ; ++k) {
-                    rank = list_of_ranks_in_all_subgroups[k+offset];
-                    if (rank == my_rank_in_list) {
-                        break;
-                    }
-
-                    knt += list_n_connected[k];
-                }
-                loc_leader[i].offset = loc_leader[i-1].offset - knt;
-            } else {
-                /* if I am the local leader, then keep the same offset */
-                loc_leader[i].offset = loc_leader[i-1].offset;
-            }
-        }
-
-        pair->bcol_modules[0]->hier_scather_offset = loc_leader[i].offset;
-
-        /*setup the tree */
-        pair->bcol_modules[0]->k_nomial_tree(pair->bcol_modules[0]);
-    }
-
-    /* see if I am in the last subgroup, if I am,
-     * then I am a root for the bcast operation
-     */
-    offset = array_of_all_subgroup_ranks[n_hier - 1].index_of_first_element;
-    for( i = 0; i < array_of_all_subgroup_ranks[n_hier - 1].n_ranks; i++){
-        rank = list_of_ranks_in_all_subgroups[i + offset];
-        if( rank == my_rank_in_list ){
-            loc_leader[n_hier - 1].offset = 0;
-            loc_leader[n_hier - 1].leader = true;
-        }
-    }
-
-    /* set the last offset to 0 and set the leader according to your top level position */
-    loc_leader[n_hier].offset = 0;
-    if(loc_leader[n_hier - 1].leader){
-        loc_leader[n_hier].leader = true;
-    } else {
-        loc_leader[n_hier].leader = false;
-    }
-
-    /* what other goodies do I want to cache on the ml-module? */
-    topo->hier_layout_info = loc_leader;
-
-    return OMPI_SUCCESS;
-}
-
-static int ml_setup_full_tree_data(mca_coll_ml_topology_t *topo,
-				   ompi_communicator_t *comm,
-				   int my_highest_group_index, int *map_to_comm_ranks,
-				   int *num_total_subgroups, sub_group_params_t **array_of_all_subgroup_ranks,
-				   int **list_of_ranks_in_all_subgroups)
-{
-
-    int ret = OMPI_SUCCESS;
-    int i, in_buf, root, my_rank,sum;
-    int in_num_total_subgroups = *num_total_subgroups;
-    int *scratch_space = NULL;
-
-    /* figure out who holds all the sub-group information - only those
-     * ranks in the top level know this data at this point */
-    my_rank = ompi_comm_rank(comm);
-    if( (my_highest_group_index == topo->global_highest_hier_group_index )
-        &&
-        ( my_rank ==
-          topo->component_pairs[topo->n_levels-1].subgroup_module->group_list[0])
-        ) {
-        in_buf=my_rank;
-    } else {
-        /* since this will be a sum allreduce - contributing 0 will not
-         * change the value */
-        in_buf=0;
-    }
-    ret = comm_allreduce_pml(&in_buf, &root, 1, MPI_INT,
-                             my_rank, MPI_SUM,
-                             ompi_comm_size(comm), map_to_comm_ranks,
-                             comm);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        ML_VERBOSE(10, ("comm_allreduce_pml failed. root reduction"));
-        goto exit_ERROR;
-    }
-
-    /* broadcast the number of groups */
-    ret=comm_bcast_pml(num_total_subgroups, root, 1,
-                       MPI_INT, my_rank, ompi_comm_size(comm),
-                       map_to_comm_ranks,comm);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        ML_VERBOSE(10, ("comm_bcast_pml failed. num_total_subgroups bcast"));
-        goto exit_ERROR;
-    }
-
-    scratch_space=(int *)malloc(4*sizeof(int)*(*num_total_subgroups));
-    if (OPAL_UNLIKELY(NULL == scratch_space)) {
-        ML_VERBOSE(10, ("Cannot allocate memory scratch_space."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-
-    if( my_rank == root ) {
-        for(i=0 ; i < (*num_total_subgroups) ; i++ ) {
-            scratch_space[4*i]=(*array_of_all_subgroup_ranks)[i].root_rank_in_comm;
-            scratch_space[4*i+1]=(*array_of_all_subgroup_ranks)[i].n_ranks;
-            scratch_space[4*i+2]=(*array_of_all_subgroup_ranks)[i].index_of_first_element;
-            scratch_space[4*i+3]=(*array_of_all_subgroup_ranks)[i].level_in_hierarchy;
-        }
-    }
-    ret=comm_bcast_pml(scratch_space, root, 4*(*num_total_subgroups),
-                       MPI_INT, my_rank, ompi_comm_size(comm),
-                       map_to_comm_ranks, comm);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        ML_VERBOSE(10, ("comm_allreduce_pml failed. scratch_space bcast"));
-        goto exit_ERROR;
-    }
-    if( my_rank != root ) {
-        if( in_num_total_subgroups != (*num_total_subgroups) ) {
-            /* free old array_of_all_subgroup_ranks array - need to fill it
-             * with the global data - assume that if the array size is the
-             * same, all data is correct, and in the same order */
-            free((*array_of_all_subgroup_ranks));
-            (*array_of_all_subgroup_ranks)=(sub_group_params_t *)
-                malloc(sizeof(sub_group_params_t)*(*num_total_subgroups));
-            if (OPAL_UNLIKELY(NULL == (*array_of_all_subgroup_ranks))) {
-                ML_VERBOSE(10, ("Cannot allocate memory array_of_all_subgroup_ranks."));
-                ret = OMPI_ERR_OUT_OF_RESOURCE;
-                goto exit_ERROR;
-            }
-            for(i=0 ; i < (*num_total_subgroups) ; i++ ) {
-                (*array_of_all_subgroup_ranks)[i].root_rank_in_comm=scratch_space[4*i];
-                (*array_of_all_subgroup_ranks)[i].n_ranks=scratch_space[4*i+1];
-                (*array_of_all_subgroup_ranks)[i].index_of_first_element=scratch_space[4*i+2];
-                (*array_of_all_subgroup_ranks)[i].level_in_hierarchy=scratch_space[4*i+3];
-            }
-        }
-    }
-    /* figure out how many entries in all the subgroups - ranks that apear
-     * in k subgroups appear k times in the list */
-    sum=0;
-    for(i=0 ; i < (*num_total_subgroups) ; i++ ) {
-        sum+=(*array_of_all_subgroup_ranks)[i].n_ranks;
-    }
-    if( in_num_total_subgroups != (*num_total_subgroups) && sum > 0 ) {
-        (*list_of_ranks_in_all_subgroups)=(int *)
-            realloc((*list_of_ranks_in_all_subgroups),sizeof(int)*sum);
-        if (OPAL_UNLIKELY(NULL == (*list_of_ranks_in_all_subgroups))) {
-            ML_VERBOSE(10, ("Cannot allocate memory *list_of_ranks_in_all_subgroups."));
-            ret = OMPI_ERR_OUT_OF_RESOURCE;
-            goto exit_ERROR;
-        }
-    }
-    ret = comm_bcast_pml(*list_of_ranks_in_all_subgroups, root, sum,
-                         MPI_INT, my_rank, ompi_comm_size(comm),
-                         map_to_comm_ranks, comm);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        ML_VERBOSE(10, ("Bcast failed for list_of_ranks_in_all_subgroups "));
-        goto exit_ERROR;
-    }
-
-    /*
-     *  The data that is needed for a given rooted operation is:
-     *    - subgroup,rank information for the source of the data.
-     *      That is, which rank in the subgroup will recieve the
-     *      data and distribute to the rest of the ranks.
-     *    - the ranks that this data will be sent to.  This is
-     *      described by the ranks in the current subgroups, and
-     *      the subroups for which each rank is a proxy for,
-     *      recursively in the communication tree.
-     *
-     *  The assumption is that data will be delived to each subgroup
-     *    in an order, that is, all the data destined to subgroup rank 0
-     *    will appear 1st, then that for rank 1, etc.  This implies that
-     *    the data destined to rank 0, for example, will include the
-     *    data for rank 0, as well as all the ranks that appear following
-     *    it in the tree - in order.
-     *
-     *  Proxies: A rank may be a proxy for more than a single subgroup.
-     *    When a rank is proxy for more than a single subgroup, we
-     *    maintain a fixed order of subgroups for which this is a
-     *    proxy, with an assumption that the data for the first subgroup
-     *    appears first in the list, then that for the second, etc.
-     *    Since the data for the proxy (which is a member of this subgroup)
-     *    appears only once in the data list, the assumption is that the
-     *    proxy will be the root for this operation, and it is the first
-     *    set of data in the data list.  This means, that the data offset
-     *    for the second ranks in each subgroup will include all the data
-     *    for the previous subgroups, recursively.  This lets us maintain
-     *    the simple addressing scheme of contigous data per rank in
-     *    the subcommunicator.
-     *
-     *  The information needed for each rank in the subgroup are the
-     *    group indices for which it is a proxy.
-     */
-    /*
-     * fill in the vertecies in the hierarchichal communications graph
-     */
-
-    /* figure out how detailed connection information, so that we can
-     * can figure out how the data needs to be ordered for sending it
-     * though the tree in various collective algorithms that have per-rank
-     * data associated with them.
-     */
-
-    /* this function does a depth first traversal of the tree data and
-     * builds rank data and ensures that hierarchy level 0 is in the
-     * correct order for collective algorithms with per-rank data.
-     */
-    coll_ml_parse_topology (*array_of_all_subgroup_ranks, *num_total_subgroups,
-                            *list_of_ranks_in_all_subgroups, ompi_comm_size (comm));
-
-    /* The list of ranks in all subgroups is the same as the old sort list. This is the same
-     * order needed for both scatter and gather. */
-    topo->sort_list = (*list_of_ranks_in_all_subgroups);
-
-    /* return */
- exit_ERROR:
-    if (scratch_space) {
-        free(scratch_space);
-    }
-
-    return ret;
-}
-
-static int get_new_subgroup_data (int32_t *all_selected, int size_of_all_selected,
-				  sub_group_params_t **sub_group_meta_data,
-				  int *size_of_sub_group_meta_data,
-				  int **list_of_ranks_in_all_subgroups,
-				  int *size_of_list_of_ranks_in_all_subgroups,
-				  int *num_ranks_in_list_of_ranks_in_all_subgroups,
-				  int *num_total_subgroups,
-				  int *map_to_comm_ranks, int level_in_hierarchy
-				  ) {
-
-    /* local data */
-    int rc=OMPI_SUCCESS;
-    int rank_in_list,old_sg_size=(*num_total_subgroups);
-    int sg_index, array_id, offset, sg_id;
-    sub_group_params_t *dummy1 = NULL;
-    int32_t **dummy2 = NULL;
-    int32_t *dummy3 = NULL;
-    int32_t **temp = NULL;
-    int knt1 = 0,
-        knt2 = 0,
-        knt3 = 0;
-
-    /* loop over all elements in the array of ranks selected, looking for
-     * newly selected ranks - these form the new subgroups */
-    for(rank_in_list = 0 ; rank_in_list < size_of_all_selected ; rank_in_list++ ) {
-        int sg_root, current_rank_in_comm;
-        /* get root's rank in the communicator */
-        sg_root=all_selected[rank_in_list];
-
-        if( 0 == sg_root ) {
-            /* this rank not selected - go to the next rank */
-            continue;
-        }
-
-        if( sg_root < 0 ) {
-            sg_root=-sg_root-1;
-        } else {
-            sg_root-=1;
-        }
-
-        current_rank_in_comm=map_to_comm_ranks[rank_in_list];
-
-        /* loop over existing groups, and see if this is a member of a new group
-         * or if this group has already been found.
-         */
-        for (sg_index = old_sg_size, sg_id = -1 ; sg_index < (*num_total_subgroups) ; sg_index++) {
-            if ((*sub_group_meta_data)[sg_index].root_rank_in_comm == sg_root) {
-                /* add rank to the list */
-                (*sub_group_meta_data)[sg_index].n_ranks++;
-                sg_id = sg_index;
-                break;
-            }
-        }
-
-        if (-1 == sg_id) {
-            /* did not find existing sub-group, create new one */
-            /* intialize new subgroup */
-            PROVIDE_SUFFICIENT_MEMORY((*sub_group_meta_data), dummy1,
-                                      (*size_of_sub_group_meta_data),
-                                      sub_group_params_t, (*num_total_subgroups), 1, 5);
-            if (OPAL_UNLIKELY(NULL == (*sub_group_meta_data))) {
-                ML_VERBOSE(10, ("Cannot allocate memory for sub_group_meta_data."));
-                rc = OMPI_ERR_OUT_OF_RESOURCE;
-                goto exit_ERROR;
-            }
-            /* do this for the temporary memory slots */
-            PROVIDE_SUFFICIENT_MEMORY(temp, dummy2,
-                                      knt1, int32_t *, knt2, 1, 5);
-            if (OPAL_UNLIKELY(NULL == temp)) {
-                ML_VERBOSE(10, ("Cannot allocate memory for temporary storage"));
-                rc = OMPI_ERR_OUT_OF_RESOURCE;
-                goto exit_ERROR;
-            }
-            (*sub_group_meta_data)[(*num_total_subgroups)].root_rank_in_comm = sg_root;
-            (*sub_group_meta_data)[(*num_total_subgroups)].n_ranks = 1;
-
-            /* no need for this here - use a temporary ptr */
-            temp[knt2]=
-                (int *)calloc(size_of_all_selected, sizeof(int));
-            if (OPAL_UNLIKELY(NULL == temp[knt2] ) ){
-                ML_VERBOSE(10, ("Cannot allocate memory for sub_group_meta_data."));
-                rc = OMPI_ERR_OUT_OF_RESOURCE;
-                goto exit_ERROR;
-            }
-            sg_id = (*num_total_subgroups)++;
-            knt3 = ++knt2;
-        } else {
-            knt3 = sg_id - old_sg_size + 1;
-        }
-
-        array_id = (*sub_group_meta_data)[sg_id].n_ranks-1;
-        temp[knt3-1][array_id] = current_rank_in_comm;
-    }
-
-    /* linearize the data - one rank will ship this to all the other
-     * ranks the communicator
-     */
-    /* make sure there is enough memory to hold the list */
-    PROVIDE_SUFFICIENT_MEMORY((*list_of_ranks_in_all_subgroups),dummy3,
-                              (*size_of_list_of_ranks_in_all_subgroups),
-                              int, (*num_ranks_in_list_of_ranks_in_all_subgroups),
-                              size_of_all_selected,size_of_all_selected);
-    if (OPAL_UNLIKELY(NULL == (*list_of_ranks_in_all_subgroups))) {
-        ML_VERBOSE(10, ("Cannot allocate memory for list_of_ranks_in_all_subgroups."));
-        rc = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-
-    /* loop over new subgroups */
-    for( sg_id=old_sg_size ; sg_id < (*num_total_subgroups) ; sg_id++ ) {
-        offset=(*num_ranks_in_list_of_ranks_in_all_subgroups);
-
-        (*sub_group_meta_data)[sg_id].index_of_first_element=offset;
-
-        if ((*sub_group_meta_data)[sg_id].n_ranks && NULL == temp) {
-            return OMPI_ERROR;
-        }
-
-        for( array_id=0 ; array_id < (*sub_group_meta_data)[sg_id].n_ranks ;
-             array_id++ ) {
-            (*list_of_ranks_in_all_subgroups)[offset+array_id]=
-                temp[sg_id-old_sg_size][array_id];
-        }
-        (*num_ranks_in_list_of_ranks_in_all_subgroups)+=
-            (*sub_group_meta_data)[sg_id].n_ranks;
-        (*sub_group_meta_data)[sg_id].level_in_hierarchy=level_in_hierarchy;
-        /* this causes problems on XT5 starting at 6144 cores */
-        free(temp[sg_id-old_sg_size]);
-    }
-
-    /* clean up temporary storage */
- exit_ERROR:
-    if (NULL != temp) {
-        free(temp);
-    }
-
-    /* return */
-    return rc;
-}
-
-static int topo_parse (sub_group_params_t *sub_group_meta_data, int index, int *dst, int *src, int *dst_offset)
-{
-    int src_offset = sub_group_meta_data[index].index_of_first_element;
-    int total_ranks_represented = 0, ranks_represented;
-
-    if (0 == sub_group_meta_data[index].level_in_hierarchy) {
-        ML_VERBOSE(10, ("Copying data for index %d to %d. Ranks at this level: %d", index, *dst_offset,
-                        sub_group_meta_data[index].n_ranks));
-
-        /* move level one subgroup data */
-        memmove (dst + *dst_offset, src + src_offset, sizeof (int) * sub_group_meta_data[index].n_ranks);
-
-        /* update the offset of this subgroup since it may have been moved */
-        sub_group_meta_data[index].index_of_first_element = *dst_offset;
-        *dst_offset += sub_group_meta_data[index].n_ranks;
-    }
-
-    ML_VERBOSE(10, ("Subgroup %d has %d ranks. level = %d", index, sub_group_meta_data[index].n_ranks,
-                    sub_group_meta_data[index].level_in_hierarchy));
-
-    /* fill in subgroup ranks */
-    sub_group_meta_data[index].rank_data=(rank_properties_t *)
-        malloc(sizeof(rank_properties_t) * sub_group_meta_data[index].n_ranks);
-    if (OPAL_UNLIKELY(NULL == sub_group_meta_data[index].rank_data)) {
-        ML_VERBOSE(10, ("Cannot allocate memory for rank_data "));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    /* recurse on all subgroups */
-    for (int j = 0 ; j < sub_group_meta_data[index].n_ranks ; ++j) {
-        int rank = src[j + src_offset];
-        int next_level;
-
-        /* determine if this rank is the root of the subgroup */
-        if (rank == sub_group_meta_data[index].root_rank_in_comm) {
-            sub_group_meta_data[index].root_index = j;
-        }
-
-        sub_group_meta_data[index].rank_data[j].leaf = true;
-        sub_group_meta_data[index].rank_data[j].rank = rank;
-
-        if (sub_group_meta_data[index].level_in_hierarchy) {
-            ML_VERBOSE(10, ("Looking for subgroup containing %d as root", rank));
-
-            for (next_level = index - 1 ; next_level >= 0 ; --next_level) {
-                if (rank == sub_group_meta_data[next_level].root_rank_in_comm) {
-                    ML_VERBOSE(10, ("Subgroup %d has root %d", next_level, rank));
-                    break;
-                }
-            }
-
-            /* all ranks are represented in the lowest level. this subgroup is not at the lowest level
-             * so it must be a root at a lower level */
-            assert (next_level >= 0);
-
-            /* not a leaf node */
-            sub_group_meta_data[index].rank_data[j].leaf = false;
-            ranks_represented = topo_parse (sub_group_meta_data, next_level, dst, src, dst_offset);
-            if (0 > ranks_represented) {
-                return ranks_represented;
-            }
-            sub_group_meta_data[index].rank_data[j].num_of_ranks_represented = ranks_represented;
-
-            total_ranks_represented += ranks_represented;
-        } else {
-            /* leaf node */
-            sub_group_meta_data[index].rank_data[j].leaf = true;
-            sub_group_meta_data[index].rank_data[j].num_of_ranks_represented = 1;
-
-            total_ranks_represented++;
-        }
-
-        ML_VERBOSE(10, ("Group %d, level %d, index %d, rank %d represents %d ranks", index,
-                        sub_group_meta_data[index].level_in_hierarchy, j, rank,
-                        sub_group_meta_data[index].rank_data[j].num_of_ranks_represented));
-    }
-
-    return total_ranks_represented;
-}
-
-/* put level one in leaf order */
-static int coll_ml_parse_topology (sub_group_params_t *sub_group_meta_data, size_t sub_group_count,
-                                   int *list_of_ranks_in_all_subgroups, int level_one_size)
-{
-    int *tmp_data;
-    int offset, rc;
-
-    tmp_data = calloc (level_one_size, sizeof (int));
-    if (NULL == tmp_data) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    /* do a DFS parse of the topology and ensure that level 1 is in the correct scatter/gather order */
-    offset = 0;
-    rc = topo_parse (sub_group_meta_data, sub_group_count - 1, tmp_data, list_of_ranks_in_all_subgroups, &offset);
-    if (0 > rc) {
-        free (tmp_data);
-        return rc;
-    }
-
-    /* all ranks in level one should be represented in the re-order buffer */
-    assert (offset == level_one_size);
-
-    /* copy re-ordered level 1 (0) */
-    if (0 != offset) {
-        /* copy new level one data back into the list of all subgroups */
-        memmove (list_of_ranks_in_all_subgroups, tmp_data, sizeof (int) * offset);
-    }
-
-    free (tmp_data);
-
-    return OMPI_SUCCESS;
-}
-
-static int append_new_network_context(hierarchy_pairs *pair)
-{
-    int i;
-    int rc;
-    mca_coll_ml_lmngr_t *memory_manager = &mca_coll_ml_component.memory_manager;
-    bcol_base_network_context_t *nc = NULL;
-
-    for (i = 0; i < pair->num_bcol_modules; i++) {
-        nc = pair->bcol_modules[i]->network_context;
-        if (NULL != nc) {
-            rc = mca_coll_ml_lmngr_append_nc(memory_manager, nc);
-            if (OMPI_SUCCESS != rc) {
-                return OMPI_ERROR;
-            }
-            /* caching the network context id on bcol */
-            pair->bcol_modules[i]->context_index = nc->context_id;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int ml_module_set_small_msg_thresholds(mca_coll_ml_module_t *ml_module)
-{
-    const mca_coll_ml_topology_t *topo_info;
-    mca_bcol_base_module_t *bcol_module;
-    hierarchy_pairs *pair;
-
-    int i, j, rc, hier, *ranks_in_comm, n_hier, tp,
-        comm_size = ompi_comm_size(ml_module->comm);
-
-    for (tp = 0; tp < COLL_ML_TOPO_MAX; ++tp) {
-        topo_info = &ml_module->topo_list[tp];
-        if (COLL_ML_TOPO_DISABLED == topo_info->status) {
-            /* Skip the topology */
-            continue;
-        }
-
-        n_hier = topo_info->n_levels;
-        for (hier = 0; hier < n_hier; ++hier) {
-            pair = &topo_info->component_pairs[hier];
-
-            for (i = 0; i < pair->num_bcol_modules; ++i) {
-                bcol_module = pair->bcol_modules[i];
-
-                if (NULL != bcol_module->set_small_msg_thresholds) {
-                    bcol_module->set_small_msg_thresholds(bcol_module);
-                }
-
-                for (j = 0; j < BCOL_NUM_OF_FUNCTIONS; ++j) {
-                    if (ml_module->small_message_thresholds[j] >
-                        bcol_module->small_message_thresholds[j]) {
-                        ml_module->small_message_thresholds[j] =
-                            bcol_module->small_message_thresholds[j];
-                    }
-                }
-            }
-
-        }
-    }
-
-    ranks_in_comm = (int *) malloc(comm_size * sizeof(int));
-    if (OPAL_UNLIKELY(NULL == ranks_in_comm)) {
-        ML_ERROR(("Memory allocation failed."));
-        return OMPI_ERROR;
-    }
-
-    for (i = 0; i < comm_size; ++i) {
-        ranks_in_comm[i] = i;
-    }
-
-    rc = comm_allreduce_pml(ml_module->small_message_thresholds,
-                            ml_module->small_message_thresholds,
-                            BCOL_NUM_OF_FUNCTIONS, MPI_INT,
-                            ompi_comm_rank(ml_module->comm), MPI_MIN,
-                            comm_size, ranks_in_comm, ml_module->comm);
-    free(ranks_in_comm);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        ML_ERROR(("comm_allreduce_pml failed."));
-        return OMPI_ERROR;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_coll_ml_read_allbcols_settings(mca_coll_ml_module_t *ml_module,
-					      int n_hierarchies)
-{
-    int i, j,
-        ret = OMPI_SUCCESS;
-    int *ranks_map = NULL,
-        *bcols_in_use = NULL,
-        *bcols_in_use_all_ranks = NULL;
-    bool use_user_bufs, limit_size_user_bufs;
-    ssize_t length_ml_payload;
-    int64_t frag_size;
-    const mca_bcol_base_component_2_0_0_t *bcol_component = NULL;
-    mca_base_component_list_item_t *bcol_cli = NULL;
-    int bcol_index;
-
-    /* If this assert fails, it means that you changed initialization
-     * order and the date offset , that is critical for this section of code,
-     * have not been initilized.
-     * DO NOT REMOVE THIS ASSERT !!!
-     */
-    assert(ml_module->data_offset >= 0);
-
-    /* need to figure out which bcol's are participating
-     * in the hierarchy across the communicator, so that we can set
-     * appropriate segmentation parameters.
-     */
-    bcols_in_use = (int *) calloc(2 * n_hierarchies, sizeof(int));
-    if (OPAL_UNLIKELY(NULL == bcols_in_use)) {
-        ML_VERBOSE(10, ("Cannot allocate memory for bcols_in_use."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-    /* setup pointers to arrays that will hold bcol parameters.  Since
-     * given bols are not instantiated in all processes, need to get this
-     * information from those ranks that have instantiated these
-     * parameters
-     */
-    bcols_in_use_all_ranks = bcols_in_use+n_hierarchies;
-
-    /* get list of bcols that I am using */
-    for (j = 0; j < COLL_ML_TOPO_MAX; j++) {
-        mca_coll_ml_topology_t *topo_info = &ml_module->topo_list[j];
-        if (COLL_ML_TOPO_DISABLED == topo_info->status) {
-            /* skip the topology */
-            continue;
-        }
-
-        for(i = 0; i < topo_info->n_levels; i++ ) {
-            int ind;
-            ind = topo_info->component_pairs[i].bcol_index;
-            bcols_in_use[ind] = 1;
-        }
-    }
-
-    /* set one to one mapping */
-    ranks_map = (int *) malloc(sizeof(int) * ompi_comm_size(ml_module->comm));
-    if (NULL == ranks_map) {
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-    for (i = 0; i < ompi_comm_size(ml_module->comm); i++) {
-        ranks_map[i] = i;
-    }
-
-    /* reduce over all the ranks to figure out which bcols are
-     * participating at this level
-     */
-    ret = comm_allreduce_pml(bcols_in_use, bcols_in_use_all_ranks,
-                             n_hierarchies, MPI_INT, ompi_comm_rank(ml_module->comm),
-                             MPI_MAX, ompi_comm_size(ml_module->comm),
-                             ranks_map, ml_module->comm);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        ML_VERBOSE(10, ("comm_allreduce_pml failed. bcols_in_use reduction"));
-        goto exit_ERROR;
-    }
-
-    /*
-     * figure out fragmenation parameters
-     */
-
-    /* size of ml buffer */
-    length_ml_payload = mca_coll_ml_component.payload_buffer_size - ml_module->data_offset;
-
-    /* figure out if data will be segmented for pipelining -
-     * for non-contigous data will just use a fragment the size
-     * of the ml payload buffer */
-
-    /* check to see if any bcols impose a limit */
-    limit_size_user_bufs = false;
-    use_user_bufs = true;
-    frag_size = length_ml_payload;
-    bcol_index = 0;
-
-    OPAL_LIST_FOREACH(bcol_cli, &mca_bcol_base_components_in_use, mca_base_component_list_item_t) {
-        /* check to see if this bcol is being used */
-        if (!bcols_in_use_all_ranks[bcol_index++]) {
-            /* not in use */
-            continue;
-        }
-
-        bcol_component = (mca_bcol_base_component_2_0_0_t *) bcol_cli->cli_component;
-
-        /* check to see if user buffers can be used */
-        if (!bcol_component->can_use_user_buffers) {
-            /* need to use library buffers, so all will do this */
-            use_user_bufs = false;
-        }
-
-        /* figure out fragement size */
-        if (bcol_component->max_frag_size != FRAG_SIZE_NO_LIMIT ){
-            /* user buffers need to be limited in size */
-            limit_size_user_bufs = true;
-
-            if (0 == frag_size) {
-                /* nothing set yet */
-                frag_size = bcol_component->max_frag_size;
-            } else if (frag_size < bcol_component->max_frag_size) {
-                /* stricter constraint on fragment size */
-                frag_size = bcol_component->max_frag_size;
-            }
-        }
-    }
-
-    if (!use_user_bufs || limit_size_user_bufs) {
-        /* we need to limit the user buffer size or use library buffers */
-        ml_module->fragment_size = frag_size;
-    } else {
-        /* entire message may be processed in single chunk */
-        ml_module->fragment_size = FRAG_SIZE_NO_LIMIT;
-    }
-
-    /* for non-contigous data - just use the ML buffers */
-    ml_module->ml_fragment_size = length_ml_payload;
-
-    /* set whether we can use user buffers */
-    ml_module->use_user_buffers = use_user_bufs;
-
-    ML_VERBOSE(10, ("Seting payload size to %d %d [%d %d]",
-                    ml_module->ml_fragment_size, length_ml_payload,
-                    mca_coll_ml_component.payload_buffer_size,
-                    ml_module->data_offset));
-
- exit_ERROR:
-    if (NULL != ranks_map) {
-        free(ranks_map);
-    }
-    if (NULL != bcols_in_use) {
-        free(bcols_in_use);
-    }
-
-    return ret;
-}
-
-static int ml_discover_hierarchy(mca_coll_ml_module_t *ml_module)
-{
-    ompi_proc_t *my_proc = NULL;
-
-    int n_hierarchies = 0,
-        i = 0, ret = OMPI_SUCCESS;
-
-    int size_bcol_list, size_sbgp_list;
-
-    size_bcol_list = opal_list_get_size(&mca_bcol_base_components_in_use);
-    size_sbgp_list = opal_list_get_size(&mca_sbgp_base_components_in_use);
-
-    if ((size_bcol_list != size_sbgp_list) || size_sbgp_list < 1 || size_bcol_list < 1) {
-        ML_ERROR(("Error: (size of mca_bcol_base_components_in_use = %d)"
-                  " != (size of mca_sbgp_base_components_in_use = %d) or zero.",
-                  size_bcol_list, size_sbgp_list));
-        return OMPI_ERROR;
-    }
-
-    n_hierarchies = size_sbgp_list;
-
-    my_proc = ompi_proc_local();
-    /* create the converter, for current implementation we
-       support homogenius comunicators only */
-    ml_module->reference_convertor =
-        opal_convertor_create(my_proc->super.proc_arch, 0);
-
-    if (OPAL_UNLIKELY(NULL == ml_module->reference_convertor)) {
-        return OMPI_ERROR;
-    }
-
-    /* Do loop over all supported hiearchies.
-       To Do. We would like to have mca parameter that will allow control list
-       of topolgies that user would like use. Right now we will run
-    */
-    for (i = 0; i < COLL_ML_TOPO_MAX; i++) {
-        if (COLL_ML_TOPO_ENABLED == ml_module->topo_list[i].status) {
-            ret = mca_coll_ml_component.topo_discovery_fn[i](ml_module, n_hierarchies);
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-                return ret;
-            }
-        }
-    }
-
-    /* Local query for bcol header size */
-    ret = calculate_buffer_header_size(ml_module);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        return ret;
-    }
-
-    /* Get BCOL tuning, like support for zero copy, fragment size, and etc.
-     * This query involves global synchronization over all processes */
-    ret = mca_coll_ml_read_allbcols_settings(ml_module, n_hierarchies);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        return ret;
-    }
-    /* Here is the safe point to call ml_module_memory_initialization , please
-       be very careful,if you decide to move this arround.*/
-    ret = ml_module_memory_initialization(ml_module);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        /* make sure to release just allocated memory */
-        mca_coll_ml_free_block(ml_module->payload_block);
-        return ret;
-    }
-
-    ret = ml_module_set_small_msg_thresholds(ml_module);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        /* make sure to release just allocated memory */
-        mca_coll_ml_free_block(ml_module->payload_block);
-        return ret;
-    }
-
-    {
-        /* Syncronization barrier to make sure that all sides finsihed
-         * to register the memory */
-        int ret, i;
-        int *comm_ranks = NULL;
-
-        comm_ranks = (int *)calloc(ompi_comm_size(ml_module->comm), sizeof(int));
-        if (OPAL_UNLIKELY(NULL == comm_ranks)) {
-            ML_VERBOSE(10, ("Cannot allocate memory."));
-            return OMPI_ERR_OUT_OF_RESOURCE;
-        }
-
-        for (i = 0; i < ompi_comm_size(ml_module->comm); i++) {
-            comm_ranks[i] = i;
-        }
-
-        ret = comm_allreduce_pml(&ret, &i,
-                                 1, MPI_INT, ompi_comm_rank(ml_module->comm),
-                                 MPI_MIN, ompi_comm_size(ml_module->comm), comm_ranks,
-                                 ml_module->comm);
-
-	free(comm_ranks);
-
-        if (OMPI_SUCCESS != ret) {
-            ML_ERROR(("comm_allreduce - failed to collect max_comm data"));
-            return ret;
-        }
-        /* Barrier done */
-    }
-
-    return ret;
-}
-
-static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
-						mca_coll_ml_topology_t *topo, int n_hierarchies,
-						const char *exclude_sbgp_name, const char *include_sbgp_name)
-{
-    /* local variables */
-    char *ptr_output = NULL;
-    sbgp_base_component_keyval_t   *sbgp_cli = NULL;
-    mca_base_component_list_item_t *bcol_cli = NULL;
-    hierarchy_pairs *pair = NULL;
-
-    mca_sbgp_base_module_t *module = NULL;
-    ompi_proc_t **copy_procs = NULL,
-        *my_proc = NULL;
-
-    const mca_sbgp_base_component_2_0_0_t *sbgp_component = NULL;
-
-
-    int i_hier = 0, n_hier = 0, ll_p1, bcol_index = 0,
-        n_procs_in = 0, group_index = 0, n_remain = 0,
-        i, j, ret = OMPI_SUCCESS, my_rank_in_list = 0,
-        n_procs_selected = 0, original_group_size = 0, i_am_done = 0,
-        local_leader, my_rank_in_subgroup, my_rank_in_remaining_list = 0,
-        my_rank_in_comm;
-
-    int32_t my_lowest_group_index = -1, my_highest_group_index = -1;
-
-    int *map_to_comm_ranks = NULL, *bcols_in_use = NULL;
-
-    int32_t *all_selected = NULL,
-        *index_proc_selected = NULL;
-
-    short all_reduce_buffer2_in[2];
-    short all_reduce_buffer2_out[2];
-    sub_group_params_t *array_of_all_subgroup_ranks=NULL;
-    /* this pointer should probably be an int32_t and not an int type */
-    int32_t *list_of_ranks_in_all_subgroups=NULL;
-    int num_ranks_in_all_subgroups=0,num_total_subgroups=0;
-    int size_of_array_of_all_subgroup_ranks=0;
-    int size_of_list_of_ranks_in_all_subgroups=0;
-    int32_t in_allgather_value;
-
-    if (NULL != exclude_sbgp_name && NULL != include_sbgp_name) {
-        ret = OMPI_ERROR;
-        goto exit_ERROR;
-    }
-
-    ML_VERBOSE(10,("include %s exclude %s size %d", include_sbgp_name, exclude_sbgp_name, n_hierarchies));
-
-    /* allocates scratch space */
-    all_selected = (int32_t *) calloc(ompi_comm_size(ml_module->comm), sizeof(int32_t));
-    if (OPAL_UNLIKELY(NULL == all_selected)) {
-        ML_VERBOSE(10, ("Cannot allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-
-    map_to_comm_ranks = (int *) calloc(ompi_comm_size(ml_module->comm), sizeof(int));
-    if (OPAL_UNLIKELY(NULL == map_to_comm_ranks)) {
-        ML_VERBOSE(10, ("Cannot allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-
-    /*
-    ** obtain list of procs
-    */
-    /* create private copy for manipulation */
-    copy_procs = (ompi_proc_t **) calloc(ompi_comm_size(ml_module->comm),
-                                         sizeof(ompi_proc_t *));
-    if (OPAL_UNLIKELY(NULL == copy_procs)) {
-        ML_VERBOSE(10, ("Cannot allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-
-    for (i = 0; i < ompi_comm_size(ml_module->comm); i++) {
-        copy_procs[i] = ompi_comm_peer_lookup (ml_module->comm, i);
-        map_to_comm_ranks[i] = i;
-    }
-
-    my_rank_in_comm = ompi_comm_rank (ml_module->comm);
-    n_procs_in = ompi_comm_size(ml_module->comm);
-    original_group_size = n_procs_in;
-
-    /* setup information for all-reduce over out of band */
-    index_proc_selected = (int32_t *) malloc(sizeof(int32_t) * n_procs_in);
-    if (OPAL_UNLIKELY(NULL == index_proc_selected)) {
-        ML_VERBOSE(10, ("Cannot allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-
-    /* get my proc pointer - used to identify myself in the list */
-    my_proc = ompi_proc_local();
-    my_rank_in_list = ompi_comm_rank(ml_module->comm);
-
-    topo->component_pairs = (hierarchy_pairs *) calloc(n_hierarchies, sizeof(hierarchy_pairs));
-    if (OPAL_UNLIKELY(NULL == topo->component_pairs)) {
-        ML_VERBOSE(10, ("Cannot allocate memory."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-
-    n_hier = 0;
-    /*
-     * Algorithm for subgrouping:
-     *  1) Start with all the ranks in the communicator
-     *  2) iterate over all (exclusive) hierarchy selection rules
-     *     A) Apply subgrouping function to the remaining set of ranks
-     *       - After the call to subgrouping subgroup_module->group_list
-     *         has the index of ranks selected, from the list or ranks
-     *         passed in.
-     *       - map_to_comm_ranks maintains the mapping of the remaining
-     *         ranks, to their rank in the communicator
-     *     B) Each rank initializes a scratch array the size of the
-     *        remaining ranks to 0, and then fills in the entry that
-     *        corresponds to itself only with the value -/+R.  If the
-     *        rank is the local leader for the subgroup, the value of -R
-     *        is entered, other wise R is entered.  R is the root of the
-     *        selected subgroup plus 1, so that for rank 0, +R has a
-     *        different value than -R.
-     *     C) The vector is then reduced, with the results going to all
-     *        ranks, over the list of remaining ranks.  As a result,
-     *        the ranks of a given subgroup will show up with the value R,
-     *        for all but the local-leader, which will have the value of -R.
-     *        This is also used for error checking.
-     *     D) subgroup_module->group_list is changed to contain the ranks
-     *        of each member of the group within the communicator.
-     *     E) Local rank with the group is determined.
-     *     F) the list or remaining ranks is compacted, removing all selected
-     *        ranks that are not the local-leader of the group.
-     *        map_to_comm_ranks is also compacted.
-     *  3) This is terminated once all ranks are selected.
-     */
-
-    /* loop over hierarchies */
-    sbgp_cli = (sbgp_base_component_keyval_t *) opal_list_get_first(&mca_sbgp_base_components_in_use);
-    bcol_cli = (mca_base_component_list_item_t *) opal_list_get_first(&mca_bcol_base_components_in_use);
-
-    ML_VERBOSE(10, ("Loop over hierarchies."));
-
-    i_hier = 0;
-    while ((opal_list_item_t *) sbgp_cli != opal_list_get_end(&mca_sbgp_base_components_in_use)){
-        /* number of processes selected with this sbgp on all ranks */
-        int global_n_procs_selected;
-
-        /* silence clang warnings */
-        assert (NULL != bcol_cli && NULL != sbgp_cli);
-
-        /*
-        ** obtain the list of  ranks in the current level
-        */
-
-        sbgp_component = (mca_sbgp_base_component_2_0_0_t *) sbgp_cli->component.cli_component;
-
-        /* Skip excluded levels */
-        if (NULL != exclude_sbgp_name) {
-
-            ML_VERBOSE(10,("EXCLUDE compare %s to %s", include_sbgp_name,
-                           sbgp_component->sbgp_version.mca_component_name));
-            if(0 == strcmp(exclude_sbgp_name,
-                           sbgp_component->sbgp_version.mca_component_name)) {
-                /* take the next element */
-                sbgp_cli = (sbgp_base_component_keyval_t *) opal_list_get_next((opal_list_item_t *) sbgp_cli);
-                bcol_cli = (mca_base_component_list_item_t *) opal_list_get_next((opal_list_item_t *) bcol_cli);
-                continue;
-            }
-        }
-
-        if (NULL != include_sbgp_name) {
-            ML_VERBOSE(10,("INCLUDE compare %s to %s", include_sbgp_name,
-                           sbgp_component->sbgp_version.mca_component_name));
-            if(0 != strcmp(include_sbgp_name,
-                           sbgp_component->sbgp_version.mca_component_name)) {
-                /* take the next element */
-                sbgp_cli = (sbgp_base_component_keyval_t *) opal_list_get_next((opal_list_item_t *) sbgp_cli);
-                bcol_cli = (mca_base_component_list_item_t *) opal_list_get_next((opal_list_item_t *) bcol_cli);
-                continue;
-            }
-        }
-
-        ML_VERBOSE(10,("Passed include %s exclude %s", include_sbgp_name, exclude_sbgp_name));
-
-        /* discover subgroup */
-        ML_VERBOSE(10, ("Discover subgroup: hier level - %d.", i_hier));
-        module = sbgp_component->select_procs(copy_procs, n_procs_in,
-                                              ml_module->comm,
-                                              sbgp_cli->key_value, &ptr_output);
-        if (NULL == module) {
-            /* no module created */
-            n_procs_selected = 0;
-            /* We must continue and participate in the allgather.
-             * It's not clear that one can enter this conditional
-             * during "normal" execution. We need to review
-             * all modules.
-             */
-
-            /* THE CODE SNIPPET COMMENTED OUT BELOW IS DANGEROUS CODE THAT
-             * COULD RESULT IN A HANG - THE "CONTINUE" STATEMENT MAY RESULT IN
-             * RANKS BYPASSING THE ALLGATHER IN NON-SYMMETRIC CASES
-             */
-
-            /*
-              sbgp_cli = (sbgp_base_component_keyval_t *) opal_list_get_next((opal_list_item_t *) sbgp_cli);
-              bcol_cli = (mca_base_component_list_item_t *) opal_list_get_next((opal_list_item_t *) bcol_cli);
-              continue;
-            */
-
-            /* Skipping subgroups of size one will cause these processes to be missed in list of level one
-             * indices. */
-        } else if (NULL == module->group_list || (1 == module->group_size && i_hier)) {
-            /* bypass modules that have no group_list */
-            n_procs_selected = 0;
-            OBJ_RELEASE(module);
-            module=NULL;
-        } else {
-            n_procs_selected = module->group_size;
-        }
-
-        ML_VERBOSE(10, ("Hier level - %d; group size - %d", i_hier, n_procs_selected));
-
-        /* setup array indicating all procs that were selected */
-        for (i = 0; i < n_procs_in; i++) {
-            index_proc_selected[i] = 0;
-        }
-
-        /* figure out my rank in the subgroup */
-        my_rank_in_subgroup=-1;
-        ll_p1=-1;
-        in_allgather_value = 0;
-        if (n_procs_selected) {
-            /* I need to contribute to the vector */
-            for (group_index = 0; group_index < n_procs_selected; group_index++) {
-                /* set my rank within the group */
-                if (map_to_comm_ranks[module->group_list[group_index]] == my_rank_in_comm) {
-                    my_rank_in_subgroup=group_index;
-                    module->my_index = group_index;
-                    /* currently the indecies are still given in terms of
-                     * the rank in the list of remaining ranks */
-                    my_rank_in_remaining_list=module->group_list[group_index];
-                }
-            }
-
-            if( -1 != my_rank_in_subgroup ) {
-                /* I am contributing to this subgroup */
-
-#ifdef NEW_LEADER_SELECTION
-#if 0
-                int lleader_index;
-                /* Select the local leader */
-                lleader_index = coll_ml_select_leader(ml_module,module, map_to_comm_ranks,
-                                                      copy_procs,n_procs_selected);
-
-                local_leader = map_to_comm_ranks[module->group_list[lleader_index]];
-#endif
-#else
-
-                /* local leader is rank within list or remaining ranks */
-                local_leader = map_to_comm_ranks[module->group_list[0]];
-
-#endif
-                ML_VERBOSE(10,("The local leader selected for hierarchy %d is rank %d ",
-                               i_hier, local_leader));
-
-                ll_p1 = local_leader + 1;
-                if (local_leader == my_rank_in_comm) {
-                    in_allgather_value =
-                        index_proc_selected[my_rank_in_remaining_list] = -ll_p1;
-                } else {
-                    in_allgather_value =
-                        index_proc_selected[my_rank_in_remaining_list] = ll_p1;
-                }
-            }
-        }
-
-        /* gather the information from all the other remaining ranks */
-        ML_VERBOSE(10, ("Call for comm_allreduce_pml."));
-        ret = comm_allgather_pml(&in_allgather_value,
-                                 all_selected, 1, MPI_INT, my_rank_in_list,
-                                 n_procs_in, map_to_comm_ranks ,ml_module->comm);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-            ML_VERBOSE(10, ("comm_allreduce_pml failed."));
-            goto exit_ERROR;
-        }
-
-        /* do some sanity checks */
-        if( -1 != my_rank_in_subgroup ) {
-            ret = check_global_view_of_subgroups(n_procs_selected,
-                                                 n_procs_in, ll_p1, all_selected, module );
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-                ML_VERBOSE(10, ("check_global_view_of_subgroups failed."));
-                goto exit_ERROR;
-            }
-        }
-
-        /*
-        ** change the list of procs stored on the module to ranks within
-        ** the communicator.
-        */
-
-        ML_VERBOSE(10, ("Change the list of procs; hier level - %d.", i_hier));
-        for (group_index = 0; group_index < n_procs_selected; group_index++) {
-            module->group_list[group_index] = map_to_comm_ranks[module->group_list[group_index]];
-            /* set my rank within the group */
-            if (module->group_list[group_index] == ompi_comm_rank(ml_module->comm)) {
-                module->my_index = group_index;
-            }
-        }
-
-        /*
-         * accumulate data on the new subgroups created
-         */
-        /*XXX*/
-        global_n_procs_selected = num_ranks_in_all_subgroups;
-        ret = get_new_subgroup_data(all_selected, n_procs_in,
-                                    &array_of_all_subgroup_ranks,
-                                    &size_of_array_of_all_subgroup_ranks,
-                                    &list_of_ranks_in_all_subgroups,
-                                    &size_of_list_of_ranks_in_all_subgroups,
-                                    &num_ranks_in_all_subgroups,
-                                    &num_total_subgroups, map_to_comm_ranks,i_hier);
-
-        if( OMPI_SUCCESS != ret ) {
-            ML_VERBOSE(10, (" Error: get_new_subgroup_data returned %d ",ret));
-            goto exit_ERROR;
-        }
-
-        /* the global number of processes selected at this level is the difference
-         * in the number of procs in all subgroups between this level and the
-         * last */
-        global_n_procs_selected = num_ranks_in_all_subgroups - global_n_procs_selected;
-
-        /* am I done ? */
-        i_am_done=0;
-        if ( (all_selected[my_rank_in_list] == ll_p1) &&
-             /* if I was not a member of any group, still need to continue */
-             n_procs_selected ){
-            i_am_done = 1;
-        }
-        /* get my rank in the list */
-        n_remain = 0;
-        my_rank_in_list = -1;
-        for (i = 0; i < n_procs_in; i++) {
-            if (all_selected[i] > 0 ) {
-                /* this proc will not be used in the next hierarchy */
-                continue;
-            }
-            /* reset my_rank_in_list, n_procs_in */
-            copy_procs[n_remain] = copy_procs[i];
-            map_to_comm_ranks[n_remain] = map_to_comm_ranks[i];
-
-            if (my_proc == copy_procs[n_remain]){
-                my_rank_in_list = n_remain;
-            }
-
-            n_remain++;
-        }
-
-        /* check to make sure we did not get a size 1 group if more than
-         * one rank are still remaning to be grouped */
-        if ((1 == n_procs_selected) && n_remain > 1) {
-            OBJ_RELEASE(module);
-            n_procs_selected = 0;
-        }
-
-        if( 0 < n_procs_selected ) {
-            /* increment the level counter */
-            pair = &topo->component_pairs[n_hier];
-
-            /* add this to the list of sub-group/bcol pairs in use */
-            pair->subgroup_module = module;
-            pair->bcol_component = (mca_bcol_base_component_t *)
-                ((mca_base_component_list_item_t *) bcol_cli)->cli_component;
-
-            pair->bcol_index = bcol_index;
-
-            /* create bcol modules */
-            ML_VERBOSE(10, ("Create bcol modules."));
-            pair->bcol_modules = pair->bcol_component->collm_comm_query(module, &pair->num_bcol_modules);
-            /* failed to create a new module */
-            if (OPAL_UNLIKELY(NULL == pair->bcol_modules)) {
-                ML_VERBOSE(10, ("Failed to create new modules."));
-                ret = OMPI_ERROR;
-                goto exit_ERROR;
-            }
-
-            if (pair->bcol_component->need_ordering) {
-                topo->topo_ordering_info.num_bcols_need_ordering += pair->num_bcol_modules;
-            }
-
-            /* Append new network contexts to our memory managment */
-            ML_VERBOSE(10, ("Append new network contexts to our memory managment."));
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != append_new_network_context(pair))) {
-                ML_VERBOSE(10, ("Exit with error. - append new network context"));
-                ret = OMPI_ERROR;
-                goto exit_ERROR;
-            }
-
-            for (i = 0; i < pair->num_bcol_modules; ++i) {
-                /* set the starting sequence number */
-                pair->bcol_modules[i]->squence_number_offset =
-                    mca_coll_ml_component.base_sequence_number;
-
-                /* cache the sub-group size */
-                pair->bcol_modules[i]->size_of_subgroup=
-                    module->group_size;
-
-                /* set the bcol id */
-                pair->bcol_modules[i]->bcol_id = (int16_t) bcol_index;
-
-                /* Set bcol mode bits */
-                topo->all_bcols_mode &= (( mca_bcol_base_module_t *) pair->bcol_modules[i])->supported_mode;
-            }
-
-            /*
-             * set largest power of 2 for this group
-             */
-            module->n_levels_pow2 = ml_fls(module->group_size);
-            /* silence a clang warning */
-            assert (module->n_levels_pow2 > 0 && module->n_levels_pow2 < 32);
-            module->pow_2 = 1 << module->n_levels_pow2;
-
-            n_hier++;
-
-            if (-1 == my_lowest_group_index) {
-                my_lowest_group_index = bcol_index;
-            }
-
-            my_highest_group_index = bcol_index;
-        }
-
-        /* if n_remain is 1, and the communicator size is not 1, and module
-        ** is not NULL, I am done
-        */
-        if ((1 == n_remain) && (1 < original_group_size) &&
-            (NULL != module)) {
-            i_am_done = 1;
-        }
-
-        /* am I done ? */
-        if (1 == i_am_done) {
-            /* nothing more to do */
-            goto SelectionDone;
-        }
-
-        /* take the next element */
-        sbgp_cli = (sbgp_base_component_keyval_t *) opal_list_get_next((opal_list_item_t *) sbgp_cli);
-        bcol_cli = (mca_base_component_list_item_t *) opal_list_get_next((opal_list_item_t *) bcol_cli);
-
-        /* if no processes were selected anywhere with this sbgp module don't bother
-         * incrementing the hierarchy index. this resolves issues where (for example)
-         * process binding is not enabled or supported. */
-        if (global_n_procs_selected) {
-            /* The way initialization is currently written *all* ranks MUST appear
-             * in the first level (0) of the hierarchy. If any rank is not in the first
-             * level then the calculation of gather/scatter offsets will be wrong.
-             * NTH: DO NOT REMOVE this assert until this changes! */
-            assert (i_hier || global_n_procs_selected == n_procs_in);
-            i_hier++;
-        }
-
-        ++bcol_index;
-
-        n_procs_in = n_remain;
-    }
-
- SelectionDone:
-
-    if (topo->topo_ordering_info.num_bcols_need_ordering > 0) {
-        for (j = 0; j < n_hier; ++j) {
-            pair = &topo->component_pairs[j];
-            if (pair->bcol_component->need_ordering) {
-                for (i = 0; i < pair->num_bcol_modules; ++i) {
-                    pair->bcol_modules[i]->next_inorder = &topo->topo_ordering_info.next_inorder;
-                }
-            }
-        }
-    }
-
-    /* If I was not done, it means that we skipped all subgroups and no hierarchy was build */
-    if (0 == i_am_done) {
-
-        if (NULL != include_sbgp_name || NULL != exclude_sbgp_name) {
-            /* User explicitly asked for specific type of topology, which generates empty group */
-            opal_show_help("help-mpi-coll-ml.txt",
-                       "empty-sub-group", true,
-                        NULL != include_sbgp_name ? include_sbgp_name : exclude_sbgp_name);
-            ret = OMPI_ERROR;
-            goto exit_ERROR;
-        }
-
-        ML_VERBOSE(10, ("Constructing empty hierarchy"));
-        ret = OMPI_SUCCESS;
-        goto exit_ERROR;
-    }
-
-    topo->n_levels = n_hier;
-
-    /* Find lowest and highest index of the groups in this communicator.
-    ** This will be needed in deciding where in the hierarchical collective
-    ** sequence of calls these particular groups belong.
-    ** It is done with one allreduce call to save allreduce overhead.
-    */
-    all_reduce_buffer2_in[0] = (short)my_lowest_group_index;
-    all_reduce_buffer2_in[1] = (short)-my_highest_group_index;
-    /* restore map to ranks for the original communicator */
-    for (i = 0; i < ompi_comm_size(ml_module->comm); i++) {
-        map_to_comm_ranks[i] = i;
-    }
-
-    ret = comm_allreduce_pml(all_reduce_buffer2_in, all_reduce_buffer2_out,
-                             2, MPI_SHORT, ompi_comm_rank(ml_module->comm),
-                             MPI_MIN, original_group_size,
-                             map_to_comm_ranks, ml_module->comm);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        ML_VERBOSE(10, ("comm_allreduce_pml failed. all_reduce_buffer2_in reduction"));
-        goto exit_ERROR;
-    }
-
-    topo->global_lowest_hier_group_index = all_reduce_buffer2_out[0];
-    topo->global_highest_hier_group_index = -all_reduce_buffer2_out[1];
-
-    ML_VERBOSE(10, ("The lowest index and highest index was successfully found."));
-
-    ML_VERBOSE(10, ("ml_discover_hierarchy done, n_levels %d lowest_group_index %d highest_group_index %d,"
-                    " original_group_size %d my_lowest_group_index %d my_highest_group_index %d",
-                    topo->n_levels, topo->global_lowest_hier_group_index,
-                    topo->global_highest_hier_group_index,
-                    original_group_size,
-                    my_lowest_group_index,
-                    my_highest_group_index));
-
-    /*
-     * setup detailed subgroup information
-     */
-    ret = ml_setup_full_tree_data(topo, ml_module->comm, my_highest_group_index,
-                                  map_to_comm_ranks,&num_total_subgroups,&array_of_all_subgroup_ranks,
-                                  &list_of_ranks_in_all_subgroups);
-
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        ML_VERBOSE(10, ("comm_allreduce_pml failed:  bcols_in_use reduction %d ",ret));
-        goto exit_ERROR;
-    }
-
-    /* cache the ML hierarchical description on the tree */
-    topo->number_of_all_subgroups = num_total_subgroups;
-    topo->array_of_all_subgroups = array_of_all_subgroup_ranks;
-
-    ret = ml_init_k_nomial_trees(topo, list_of_ranks_in_all_subgroups, ompi_comm_rank(ml_module->comm));
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        goto exit_ERROR;
-    }
-
-    /* Set the route table if know-root type of algorithms is used */
-    if (COLL_ML_STATIC_BCAST == mca_coll_ml_component.bcast_algorithm) {
-        ret = mca_coll_ml_fill_in_route_tab(topo, ml_module->comm);
-        if (OMPI_SUCCESS != ret) {
-            ML_ERROR(("mca_coll_ml_fill_in_route_tab returned an error."));
-            goto exit_ERROR;
-        }
-    }
-
-    /*
-    ** If all ranks are selected, there will be a single rank that remains -
-    ** the root of the last group.  Check to make sure that all ranks are
-    ** selected, and if not, return an error.  We can't handle the collectives
-    ** correctly with this module.
-    */
-
- exit_ERROR:
-
-    ML_VERBOSE(10, ("Discovery done"));
-
-    /* free temp resources */
-    if (NULL != all_selected) {
-        free(all_selected);
-        all_selected = NULL;
-    }
-
-    if (NULL != copy_procs) {
-        free(copy_procs);
-        copy_procs = NULL;
-    }
-
-    if (NULL != map_to_comm_ranks) {
-        free(map_to_comm_ranks);
-        map_to_comm_ranks = NULL;
-    }
-
-    if (NULL != index_proc_selected) {
-        free(index_proc_selected);
-        index_proc_selected = NULL;
-    }
-
-    if (NULL != bcols_in_use) {
-        free(bcols_in_use);
-        bcols_in_use = NULL;
-    }
-
-    if (NULL != list_of_ranks_in_all_subgroups) {
-        free(list_of_ranks_in_all_subgroups);
-        list_of_ranks_in_all_subgroups = NULL;
-    }
-
-    return ret;
-}
-
-void mca_coll_ml_allreduce_matrix_init(mca_coll_ml_module_t *ml_module,
-				       const mca_bcol_base_component_2_0_0_t *bcol_component)
-{
-    int op, dt, et;
-
-    for (op = 0; op < OMPI_OP_NUM_OF_TYPES; ++op) {
-        for (dt = 0; dt < OMPI_DATATYPE_MAX_PREDEFINED; ++dt) {
-            for (et = 0; et < BCOL_NUM_OF_ELEM_TYPES; ++et) {
-                ml_module->allreduce_matrix[op][dt][et] =
-                    bcol_component->coll_support(op, dt, et);
-            }
-        }
-    }
-}
-
-int mca_coll_ml_fulltree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
-					     int n_hierarchies)
-{
-    return mca_coll_ml_tree_hierarchy_discovery(ml_module,
-                                                &ml_module->topo_list[COLL_ML_HR_FULL],
-                                                n_hierarchies, NULL, NULL);
-}
-
-int mca_coll_ml_allreduce_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
-					      int n_hierarchies)
-{
-    mca_base_component_list_item_t *bcol_cli;
-    const mca_bcol_base_component_2_0_0_t *bcol_component;
-
-    sbgp_base_component_keyval_t *sbgp_cli;
-    const mca_sbgp_base_component_2_0_0_t *sbgp_component;
-
-    sbgp_cli = (sbgp_base_component_keyval_t *)
-        opal_list_get_first(&mca_sbgp_base_components_in_use);
-
-    OPAL_LIST_FOREACH(bcol_cli, &mca_bcol_base_components_in_use, mca_base_component_list_item_t) {
-        bcol_component = (mca_bcol_base_component_2_0_0_t *) bcol_cli->cli_component;
-
-        /* silence false-positive clang warning */
-        assert (NULL != sbgp_cli);
-
-        if (NULL != bcol_component->coll_support_all_types &&
-            !bcol_component->coll_support_all_types(BCOL_ALLREDUCE)) {
-            mca_base_component_list_item_t *bcol_cli_next;
-            const mca_bcol_base_component_2_0_0_t *bcol_component_next;
-
-            bcol_cli_next = (mca_base_component_list_item_t *)
-                opal_list_get_next((opal_list_item_t *) bcol_cli);
-
-            mca_coll_ml_component.need_allreduce_support = true;
-            mca_coll_ml_allreduce_matrix_init(ml_module, bcol_component);
-
-            sbgp_component = (mca_sbgp_base_component_2_0_0_t *)
-                sbgp_cli->component.cli_component;
-
-            ML_VERBOSE(10, ("Topology build: sbgp %s will be excluded.",
-                            sbgp_component->sbgp_version.mca_component_name));
-
-
-            /* If there isn't additional component supports all types => print warning */
-            if (1 == opal_list_get_size(&mca_bcol_base_components_in_use) ||
-                (opal_list_item_t *) bcol_cli_next ==
-                opal_list_get_end(&mca_bcol_base_components_in_use)) {
-                opal_show_help("help-mpi-coll-ml.txt",
-                       "allreduce-not-supported", true,
-                        bcol_component->bcol_version.mca_component_name);
-
-            } else {
-                bcol_component_next = (mca_bcol_base_component_2_0_0_t *)
-                    bcol_cli_next->cli_component;
-
-                if (NULL != bcol_component_next->coll_support_all_types &&
-                    !bcol_component_next->coll_support_all_types(BCOL_ALLREDUCE)) {
-
-                    opal_show_help("help-mpi-coll-ml.txt",
-                       "allreduce-alt-nosupport", true,
-                        bcol_component->bcol_version.mca_component_name);
-
-                }
-            }
-
-            return mca_coll_ml_tree_hierarchy_discovery(ml_module,
-                                                        &ml_module->topo_list[COLL_ML_HR_ALLREDUCE],
-                                                        n_hierarchies, sbgp_component->sbgp_version.mca_component_name, NULL);
-        }
-
-        sbgp_cli = (sbgp_base_component_keyval_t *) opal_list_get_next((opal_list_item_t *) sbgp_cli);
-    }
-
-    return OMPI_SUCCESS;
-}
-
-int mca_coll_ml_fulltree_exclude_basesmsocket_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
-								  int n_hierarchies)
-{
-    return mca_coll_ml_tree_hierarchy_discovery(ml_module,
-                                                &ml_module->topo_list[COLL_ML_HR_NBS],
-                                                n_hierarchies, "basesmsocket", NULL);
-}
-
-int mca_coll_ml_fulltree_ptp_only_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
-						      int n_hierarchies)
-{
-    return mca_coll_ml_tree_hierarchy_discovery(ml_module,
-                                                &ml_module->topo_list[COLL_ML_HR_SINGLE_PTP],
-                                                n_hierarchies, NULL, "p2p");
-}
-
-int mca_coll_ml_fulltree_iboffload_only_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
-							    int n_hierarchies)
-{
-    return mca_coll_ml_tree_hierarchy_discovery(ml_module,
-                                                &ml_module->topo_list[COLL_ML_HR_SINGLE_IBOFFLOAD],
-                                                n_hierarchies, NULL, "ibnet");
-}
-
-#define IS_REACHABLE 1
-#define IS_NOT_REACHABLE -1
-
-static int mca_coll_ml_fill_in_route_tab(mca_coll_ml_topology_t *topo, ompi_communicator_t *comm)
-{
-    int i, rc, level, comm_size = 0,
-        my_rank = ompi_comm_rank(comm);
-
-    int32_t **route_table = NULL;
-    int32_t *all_reachable_ranks = NULL;
-    mca_sbgp_base_module_t *sbgp_group = NULL;
-    comm_size = ompi_comm_size(comm);
-
-    all_reachable_ranks = (int32_t *) malloc(comm_size * sizeof(int32_t));
-    if (NULL == all_reachable_ranks) {
-        ML_VERBOSE(10, ("Cannot allocate memory."));
-        rc = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-
-    for (i = 0; i < comm_size; ++i) {
-        all_reachable_ranks[i] = IS_NOT_REACHABLE;
-    }
-
-    route_table = (int32_t **) calloc(topo->n_levels, sizeof(int32_t *));
-    if (NULL == route_table) {
-        ML_VERBOSE(10, ("Cannot allocate memory."));
-        rc = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-
-    topo->route_vector = (mca_bcol_base_route_info_t *)
-        calloc(comm_size, sizeof(mca_bcol_base_route_info_t));
-    if (NULL == topo->route_vector) {
-        ML_VERBOSE(10, ("Cannot allocate memory."));
-        rc = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-
-    all_reachable_ranks[my_rank] = IS_REACHABLE;
-
-    for (level = 0; level < topo->n_levels; ++level) {
-        sbgp_group = topo->component_pairs[level].subgroup_module;
-
-        route_table[level] = (int32_t *) malloc(comm_size * sizeof(int32_t));
-        if (NULL == route_table[level]) {
-            ML_VERBOSE(10, ("Cannot allocate memory."));
-            rc = OMPI_ERR_OUT_OF_RESOURCE;
-            goto exit_ERROR;
-        }
-
-        for (i = 0; i < comm_size; ++i) {
-            if (IS_NOT_REACHABLE != all_reachable_ranks[i]) {
-                all_reachable_ranks[i] = sbgp_group->my_index;
-            }
-        }
-
-        rc = comm_allreduce_pml(all_reachable_ranks,
-                                route_table[level],
-                                comm_size,
-                                MPI_INT, sbgp_group->my_index,
-                                MPI_MAX, sbgp_group->group_size,
-                                sbgp_group->group_list,
-                                comm);
-        if (OMPI_SUCCESS != rc) {
-            ML_VERBOSE(10, ("comm_allreduce failed."));
-            goto exit_ERROR;
-        }
-
-        for (i = 0; i < comm_size; ++i) {
-            if (IS_NOT_REACHABLE !=
-                route_table[level][i]) {
-                all_reachable_ranks[i] = IS_REACHABLE;
-            }
-        }
-    }
-
-    assert(0 < level);
-
-    /* If there are unreachable ranks =>
-       reach them through leader of my upper layer */
-    for (i = 0; i < comm_size; ++i) {
-        if (IS_NOT_REACHABLE ==
-            route_table[level - 1][i]) {
-            route_table[level - 1][i] = 0;
-        }
-    }
-
-    free(all_reachable_ranks);
-
-    for (i = 0; i < comm_size; ++i) {
-        for (level = 0; level < topo->n_levels; ++level) {
-            if (IS_NOT_REACHABLE != route_table[level][i]) {
-                topo->route_vector[i].level = level;
-                topo->route_vector[i].rank = route_table[level][i];
-                break;
-            }
-        }
-    }
-
-#if OPAL_ENABLE_DEBUG
-#define COLL_ML_ROUTE_BUFF_SIZE (1024*1024)
-    /* Only bother creating the string if we're actually going to
-       print it out (i.e., if the verbose level is >= 10) */
-    if (mca_coll_ml_component.verbose >= 10) {
-        int ii, jj;
-        char *buff, *output;
-
-        output = buff = calloc(1, COLL_ML_ROUTE_BUFF_SIZE);
-        assert(NULL != output);
-
-        sprintf(output, "ranks:   ");
-
-        output = buff + strlen(buff);
-        assert(COLL_ML_ROUTE_BUFF_SIZE + buff > output);
-
-        for(ii = 0; ii < comm_size; ++ii) {
-            sprintf(output, " %2d",  ii);
-
-            output = buff + strlen(buff);
-            assert(COLL_ML_ROUTE_BUFF_SIZE + buff > output);
-        }
-
-        for (ii = 0; ii < topo->n_levels; ++ii) {
-            sprintf(output, "\nlevel: %d ", ii);
-
-            output = buff + strlen(buff);
-            assert(COLL_ML_ROUTE_BUFF_SIZE + buff > output);
-            for(jj = 0; jj < comm_size; ++jj) {
-                sprintf(output, " %2d", route_table[ii][jj]);
-
-                output = buff + strlen(buff);
-                assert(COLL_ML_ROUTE_BUFF_SIZE + buff > output);
-            }
-        }
-
-        sprintf(output, "\n\nThe vector is:\n============\nranks:       ");
-
-        output = buff + strlen(buff);
-        assert(COLL_ML_ROUTE_BUFF_SIZE + buff > output);
-
-        for(ii = 0; ii < comm_size; ++ii) {
-            sprintf(output, " %6d",  ii);
-
-            output = buff + strlen(buff);
-            assert(COLL_ML_ROUTE_BUFF_SIZE + buff > output);
-        }
-
-        sprintf(output, "\nlevel x rank: ");
-
-        output = buff + strlen(buff);
-        assert(COLL_ML_ROUTE_BUFF_SIZE + buff > output);
-
-        for(ii = 0; ii < comm_size; ++ii) {
-            sprintf(output, " (%d, %d)",
-                    topo->route_vector[ii].level,
-                    topo->route_vector[ii].rank);
-
-            output = buff + strlen(buff);
-            assert(COLL_ML_ROUTE_BUFF_SIZE + buff > output);
-        }
-
-        ML_VERBOSE(10, ("\nThe table is:\n============%s", buff));
-        free(buff);
-    }
-#endif
-
-    for (level = 0; level < topo->n_levels; ++level) {
-        free(route_table[level]);
-    }
-
-    free(route_table);
-
-    return OMPI_SUCCESS;
-
- exit_ERROR:
-
-    ML_VERBOSE(10, ("Exit with error status - %d.", rc));
-    if (NULL != route_table) {
-        for (level = 0; level < topo->n_levels; ++level) {
-            if (NULL != route_table[level]) {
-                free(route_table[level]);
-            }
-        }
-
-        free(route_table);
-    }
-
-    free(all_reachable_ranks);
-
-    return rc;
-}
-
-static void init_coll_func_pointers(mca_coll_ml_module_t *ml_module)
-{
-    mca_coll_base_module_2_1_0_t *coll_base = &ml_module->super;
-
-    int iboffload_used =
-        mca_coll_ml_check_if_bcol_is_used("iboffload", ml_module, COLL_ML_TOPO_MAX);
-
-    /* initialize coll component function pointers */
-    coll_base->coll_module_enable = ml_module_enable;
-    coll_base->ft_event        = NULL;
-
-    if (mca_coll_ml_component.disable_allgather) {
-        coll_base->coll_allgather = NULL;
-        coll_base->coll_iallgather = NULL;
-    } else {
-        coll_base->coll_allgather = mca_coll_ml_allgather;
-        coll_base->coll_iallgather = mca_coll_ml_allgather_nb;
-    }
-
-    coll_base->coll_allgatherv = NULL;
-
-    if (mca_coll_ml_component.use_knomial_allreduce) {
-        if (true == mca_coll_ml_component.need_allreduce_support) {
-            coll_base->coll_allreduce = mca_coll_ml_allreduce_dispatch;
-            coll_base->coll_iallreduce = mca_coll_ml_allreduce_dispatch_nb;
-        } else {
-            coll_base->coll_allreduce = mca_coll_ml_allreduce;
-            coll_base->coll_iallreduce = mca_coll_ml_allreduce_nb;
-        }
-    } else {
-        coll_base->coll_allreduce = NULL;
-    }
-
-    coll_base->coll_alltoall = NULL;
-    coll_base->coll_ialltoall = NULL;
-
-    coll_base->coll_alltoallv  = NULL;
-    coll_base->coll_alltoallw  = NULL;
-
-    coll_base->coll_barrier = mca_coll_ml_barrier_intra;
-
-    /* Use the sequential broadcast */
-    if (COLL_ML_SEQ_BCAST == mca_coll_ml_component.bcast_algorithm) {
-        coll_base->coll_bcast = mca_coll_ml_bcast_sequential_root;
-    } else {
-        coll_base->coll_bcast = mca_coll_ml_parallel_bcast;
-    }
-
-    coll_base->coll_exscan     = NULL;
-    coll_base->coll_gather     = NULL;
-    /*
-      coll_base->coll_gather     = mca_coll_ml_gather;
-    */
-    /* Current iboffload/ptpcoll version have no support for gather */
-    if (iboffload_used  ||
-        mca_coll_ml_check_if_bcol_is_used("ptpcoll", ml_module, COLL_ML_TOPO_MAX)) {
-        coll_base->coll_gather      = NULL;
-    }
-
-
-    coll_base->coll_gatherv    = NULL;
-    if (mca_coll_ml_component.disable_reduce) {
-        coll_base->coll_reduce     = NULL;
-    } else {
-        coll_base->coll_reduce     = mca_coll_ml_reduce;
-    }
-    coll_base->coll_reduce_scatter = NULL;
-    coll_base->coll_scan       = NULL;
-    coll_base->coll_scatter    = NULL;
-#if 0
-    coll_base->coll_scatter    = mca_coll_ml_scatter_sequential;
-#endif
-    coll_base->coll_scatterv   = NULL;
-
-    coll_base->coll_iallgatherv = NULL;
-    coll_base->coll_ialltoallv  = NULL;
-    coll_base->coll_ialltoallw  = NULL;
-    coll_base->coll_ibarrier    = mca_coll_ml_ibarrier_intra;
-
-    coll_base->coll_ibcast      = mca_coll_ml_parallel_bcast_nb;
-    coll_base->coll_iexscan     = NULL;
-    coll_base->coll_igather     = NULL;
-    coll_base->coll_igatherv    = NULL;
-    coll_base->coll_ireduce     = mca_coll_ml_reduce_nb;
-    coll_base->coll_ireduce_scatter = NULL;
-    coll_base->coll_iscan       = NULL;
-    coll_base->coll_iscatter    = NULL;
-    coll_base->coll_iscatterv   = NULL;
-}
-
-static int init_lists(mca_coll_ml_module_t *ml_module)
-{
-    mca_coll_ml_component_t *cs = &mca_coll_ml_component;
-    int num_elements = cs->free_list_init_size;
-    int max_elements = cs->free_list_max_size;
-    int elements_per_alloc = cs->free_list_grow_size;
-    size_t length_payload = 0;
-    size_t length;
-    int ret;
-
-    /* initialize full message descriptors - moving this to the
-     *   module, as the fragment has resrouce requirements that
-     *   are communicator dependent */
-    /* no data associated with the message descriptor */
-
-    length = sizeof(mca_coll_ml_descriptor_t);
-    ret = opal_free_list_init(&(ml_module->message_descriptors), length,
-                              opal_cache_line_size, OBJ_CLASS(mca_coll_ml_descriptor_t),
-                              length_payload, 0,
-                              num_elements, max_elements, elements_per_alloc,
-                              NULL, 0, NULL,
-                              init_ml_message_desc, ml_module);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        ML_ERROR(("opal_free_list_init exit with error"));
-        return ret;
-    }
-
-    /* initialize fragement descriptors - always associate one fragment
-     * descriptr with full message descriptor, so that we can minimize
-     * small message latency */
-
-    /* create a free list of fragment descriptors */
-    /*length_payload=sizeof(something);*/
-    length = sizeof(mca_coll_ml_fragment_t);
-    ret = opal_free_list_init (&(ml_module->fragment_descriptors), length,
-                               opal_cache_line_size, OBJ_CLASS(mca_coll_ml_fragment_t),
-                               length_payload, 0,
-                               num_elements, max_elements, elements_per_alloc,
-                               NULL, 0, NULL,
-                               init_ml_fragment_desc, ml_module);
-    if (OMPI_SUCCESS != ret) {
-        ML_ERROR(("opal_free_list_init exit with error"));
-        return ret;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int check_for_max_supported_ml_modules(struct ompi_communicator_t *comm)
-{
-    int i, ret;
-    mca_coll_ml_component_t *cs = &mca_coll_ml_component;
-    int *comm_ranks = NULL;
-
-    comm_ranks = (int *)calloc(ompi_comm_size(comm), sizeof(int));
-    if (OPAL_UNLIKELY(NULL == comm_ranks)) {
-        ML_VERBOSE(10, ("Cannot allocate memory."));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-    for (i = 0; i < ompi_comm_size(comm); i++) {
-        comm_ranks[i] = i;
-    }
-
-    ret = comm_allreduce_pml(&cs->max_comm, &cs->max_comm,
-                             1 , MPI_INT, ompi_comm_rank(comm),
-                             MPI_MIN, ompi_comm_size(comm), comm_ranks,
-                             comm);
-    free(comm_ranks);
-    if (OMPI_SUCCESS != ret) {
-        ML_ERROR(("comm_allreduce - failed to collect max_comm data"));
-        return ret;
-    }
-
-    if (0 >= cs->max_comm ||
-        ompi_comm_size(comm) < cs->min_comm_size) {
-        return OMPI_ERROR;
-    } else {
-        --cs->max_comm;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-#if OPAL_ENABLE_DEBUG
-#define DEBUG_ML_COMM_QUERY()						\
-    do {                                                                \
-        static int verbosity_level = 5;					\
-        static int module_num = 0;                                      \
-        ML_VERBOSE(10, ("ML module - %p num %d for comm - %p, "		\
-                        "comm size - %d, ML component prio - %d.",	\
-                        ml_module, ++module_num, comm, ompi_comm_size(comm), *priority)); \
-        /* For now I want to always print that we enter ML -		\
-           at the past there was an issue that we did not enter ML and actually run with tuned. \
-           Still I do not want to print it for each module - only for the first. */ \
-        ML_VERBOSE(verbosity_level, ("ML module - %p was successfully created", ml_module)); \
-        verbosity_level = 10;						\
-    } while(0)
-
-#else
-#define DEBUG_ML_COMM_QUERY()
-#endif
-
-static int mca_coll_ml_need_multi_topo(int bcol_collective)
-{
-    mca_base_component_list_item_t *bcol_cli;
-    const mca_bcol_base_component_2_0_0_t *bcol_component;
-
-    for (bcol_cli = (mca_base_component_list_item_t *)
-             opal_list_get_first(&mca_bcol_base_components_in_use);
-         (opal_list_item_t *) bcol_cli !=
-             opal_list_get_end(&mca_bcol_base_components_in_use);
-         bcol_cli = (mca_base_component_list_item_t *)
-             opal_list_get_next((opal_list_item_t *) bcol_cli)) {
-        bcol_component = (mca_bcol_base_component_2_0_0_t *) bcol_cli->cli_component;
-        if (NULL != bcol_component->coll_support_all_types &&
-            !bcol_component->coll_support_all_types(bcol_collective)) {
-            return true;
-        }
-    }
-
-    return false;
-}
-
-/* We may call this function ONLY AFTER algorithm initialization */
-static int setup_bcast_table(mca_coll_ml_module_t *module)
-{
-    mca_coll_ml_component_t *cm = &mca_coll_ml_component;
-    bool has_zero_copy;
-
-    /* setup bcast index table */
-    if (COLL_ML_STATIC_BCAST == cm->bcast_algorithm) {
-        module->bcast_fn_index_table[0] = ML_BCAST_SMALL_DATA_KNOWN;
-
-        has_zero_copy = !!(MCA_BCOL_BASE_ZERO_COPY &
-                           module->coll_ml_bcast_functions[ML_BCAST_LARGE_DATA_KNOWN]->topo_info->all_bcols_mode);
-
-        if (1 == cm->enable_fragmentation || (2 == cm->enable_fragmentation && !has_zero_copy)) {
-            module->bcast_fn_index_table[1] = ML_BCAST_SMALL_DATA_KNOWN;
-        } else if (!has_zero_copy) {
-
-            opal_show_help("help-mpi-coll-ml.txt",
-                       "fragmentation-disabled", true);
-            return OMPI_ERROR;
-
-        } else {
-            module->bcast_fn_index_table[1] = ML_BCAST_LARGE_DATA_KNOWN;
-        }
-    } else {
-        module->bcast_fn_index_table[0] = ML_BCAST_SMALL_DATA_UNKNOWN;
-
-        if (NULL == module->coll_ml_bcast_functions[ML_BCAST_LARGE_DATA_UNKNOWN]) {
-
-            opal_show_help("help-mpi-coll-ml.txt",
-                       "static-bcast-disabled", true);
-
-            return OMPI_ERROR;
-        }
-
-        has_zero_copy = !!(MCA_BCOL_BASE_ZERO_COPY &
-                           module->coll_ml_bcast_functions[ML_BCAST_LARGE_DATA_UNKNOWN]->topo_info->all_bcols_mode);
-
-        if (1 == cm->enable_fragmentation || (2 == cm->enable_fragmentation && !has_zero_copy)) {
-            module->bcast_fn_index_table[1] = ML_BCAST_SMALL_DATA_UNKNOWN;
-        } else if (!has_zero_copy) {
-
-            opal_show_help("help-mpi-coll-ml.txt",
-                       "fragmentation-disabled", true);
-
-            return OMPI_ERROR;
-        } else {
-            /* If the topology support zero level and no fragmentation was requested */
-            module->bcast_fn_index_table[1] = ML_BCAST_LARGE_DATA_UNKNOWN;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static void ml_check_for_enabled_topologies (int map[][MCA_COLL_MAX_NUM_SUBTYPES], mca_coll_ml_topology_t *topo_list)
-{
-    int coll_i, st_i;
-    for (coll_i = 0; coll_i < MCA_COLL_MAX_NUM_COLLECTIVES; coll_i++) {
-        for (st_i = 0; st_i < MCA_COLL_MAX_NUM_SUBTYPES; st_i++) {
-            if (map[coll_i][st_i] > -1) {
-                /* The topology is used, so set it to enabled */
-                assert(map[coll_i][st_i] <= COLL_ML_TOPO_MAX);
-                topo_list[map[coll_i][st_i]].status = COLL_ML_TOPO_ENABLED;
-            }
-        }
-    }
-}
-
-static void setup_default_topology_map(mca_coll_ml_module_t *ml_module)
-{
-    int i, j;
-    for (i = 0; i < MCA_COLL_MAX_NUM_COLLECTIVES; i++) {
-        for (j = 0; j < MCA_COLL_MAX_NUM_SUBTYPES; j++) {
-            ml_module->collectives_topology_map[i][j] = -1;
-        }
-    }
-
-    ml_module->collectives_topology_map[ML_BARRIER][ML_BARRIER_DEFAULT]           = COLL_ML_HR_FULL;
-
-    ml_module->collectives_topology_map[ML_BCAST][ML_BCAST_SMALL_DATA_KNOWN]      = COLL_ML_HR_FULL;
-    ml_module->collectives_topology_map[ML_BCAST][ML_BCAST_SMALL_DATA_UNKNOWN]    = COLL_ML_HR_FULL;
-    ml_module->collectives_topology_map[ML_BCAST][ML_BCAST_SMALL_DATA_SEQUENTIAL] = COLL_ML_HR_FULL;
-    ml_module->collectives_topology_map[ML_BCAST][ML_BCAST_LARGE_DATA_KNOWN]      = COLL_ML_HR_FULL;
-    ml_module->collectives_topology_map[ML_BCAST][ML_BCAST_LARGE_DATA_UNKNOWN]    = COLL_ML_HR_FULL;
-    ml_module->collectives_topology_map[ML_BCAST][ML_BCAST_LARGE_DATA_UNKNOWN]    = COLL_ML_HR_FULL;
-
-    ml_module->collectives_topology_map[ML_ALLGATHER][ML_SMALL_DATA_ALLGATHER]    = COLL_ML_HR_FULL;
-    ml_module->collectives_topology_map[ML_ALLGATHER][ML_LARGE_DATA_ALLGATHER]    = COLL_ML_HR_FULL;
-
-    ml_module->collectives_topology_map[ML_GATHER][ML_SMALL_DATA_GATHER]    = COLL_ML_HR_FULL;
-    ml_module->collectives_topology_map[ML_GATHER][ML_LARGE_DATA_GATHER]    = COLL_ML_HR_FULL;
-
-    ml_module->collectives_topology_map[ML_ALLTOALL][ML_SMALL_DATA_ALLTOALL]      = COLL_ML_HR_SINGLE_IBOFFLOAD;
-    ml_module->collectives_topology_map[ML_ALLTOALL][ML_LARGE_DATA_ALLTOALL]      = COLL_ML_HR_SINGLE_IBOFFLOAD;
-
-    ml_module->collectives_topology_map[ML_ALLREDUCE][ML_SMALL_DATA_ALLREDUCE]    = COLL_ML_HR_FULL;
-    ml_module->collectives_topology_map[ML_ALLREDUCE][ML_LARGE_DATA_ALLREDUCE]    = COLL_ML_HR_FULL;
-
-    if (mca_coll_ml_need_multi_topo(BCOL_ALLREDUCE)) {
-        ml_module->collectives_topology_map[ML_ALLREDUCE][ML_SMALL_DATA_EXTRA_TOPO_ALLREDUCE] = COLL_ML_HR_ALLREDUCE;
-        ml_module->collectives_topology_map[ML_ALLREDUCE][ML_LARGE_DATA_EXTRA_TOPO_ALLREDUCE] = COLL_ML_HR_ALLREDUCE;
-    }
-
-    ml_module->collectives_topology_map[ML_REDUCE][ML_SMALL_DATA_REDUCE]    = COLL_ML_HR_FULL;
-    ml_module->collectives_topology_map[ML_REDUCE][ML_LARGE_DATA_REDUCE]    = COLL_ML_HR_FULL;
-
-
-    ml_module->collectives_topology_map[ML_SCATTER][ML_SCATTER_SMALL_DATA_KNOWN]  = COLL_ML_HR_FULL;
-    ml_module->collectives_topology_map[ML_SCATTER][ML_SCATTER_N_DATASIZE_BINS]   = COLL_ML_HR_FULL;
-    ml_module->collectives_topology_map[ML_SCATTER][ML_SCATTER_SMALL_DATA_UNKNOWN]    = COLL_ML_HR_FULL;
-    ml_module->collectives_topology_map[ML_SCATTER][ML_SCATTER_SMALL_DATA_SEQUENTIAL] = COLL_ML_HR_FULL;
-}
-
-#define GET_CF(I, J) (&mca_coll_ml_component.coll_config[I][J]);
-
-static void load_cached_config(mca_coll_ml_module_t *ml_module)
-{
-    int c_idx, m_idx, alg;
-    per_collective_configuration_t *cf = NULL;
-
-    for (c_idx = 0; c_idx < ML_NUM_OF_FUNCTIONS; c_idx++) {
-        for (m_idx = 0; m_idx < ML_NUM_MSG; m_idx++) {
-            cf = GET_CF(c_idx, m_idx);
-            /* load topology tunings */
-            if (ML_UNDEFINED != cf->topology_id &&
-                ML_UNDEFINED != cf->algorithm_id) {
-                alg =
-                    cf->algorithm_id;
-                ml_module->collectives_topology_map[c_idx][alg] =
-                    cf->topology_id;
-            }
-        }
-    }
-}
-
-/* Pasha: In future I would suggest to convert this configuration to some sophisticated mca parameter or
-   even configuration file. On this stage of project I will set it statically and later we will change it
-   to run time parameter */
-static void setup_topology_coll_map(mca_coll_ml_module_t *ml_module)
-{
-    /* Load default topology setup */
-    setup_default_topology_map(ml_module);
-
-    /* Load configuration file */
-    load_cached_config(ml_module);
-
-    ml_check_for_enabled_topologies(ml_module->collectives_topology_map, ml_module->topo_list);
-}
-
-/* query to see if the module is available for use on the given
- * communicator, and if so, what it's priority is.  This is where
- * the backing shared-memory file is created.
- */
-mca_coll_base_module_t *
-mca_coll_ml_comm_query(struct ompi_communicator_t *comm, int *priority)
-{
-    /* local variables */
-    int ret = OMPI_SUCCESS;
-
-    mca_coll_ml_module_t *ml_module = NULL;
-    mca_coll_ml_component_t *cs = &mca_coll_ml_component;
-    bool iboffload_was_requested = mca_coll_ml_check_if_bcol_is_requested("iboffload");
-
-    ML_VERBOSE(10, ("ML comm query start."));
-
-    /**
-     * No support for inter-communicator yet.
-     */
-    if (OMPI_COMM_IS_INTER(comm)) {
-        *priority = -1;
-        return NULL;
-    }
-
-    if (MPI_THREAD_MULTIPLE == ompi_mpi_thread_provided) {
-        ML_VERBOSE(10, ("coll:ml: MPI_THREAD_MULTIPLE not suppported; skipping this component"));
-        *priority = -1;
-        return NULL;
-    }
-
-
-    /* NTH: Disabled this check until we have a better one. */
-#if 0
-    if (!ompi_rte_proc_is_bound) {
-        /* do not enable coll/ml unless this process is bound (for now) */
-        *priority = -1;
-        return NULL;
-    }
-#endif
-
-    /**
-     * If it is inter-communicator and size is less than 2 we have specialized modules
-     * to handle the intra collective communications.
-     */
-    if (OMPI_COMM_IS_INTRA(comm) && ompi_comm_size(comm) < 2) {
-        ML_VERBOSE(10, ("It is inter-communicator and size is less than 2."));
-        *priority = -1;
-        return NULL;
-    }
-
-    /**
-     * In current implementation we limit number of supported ML modules in cases when
-     * iboffload companent was requested
-     */
-    if (iboffload_was_requested) {
-        ret = check_for_max_supported_ml_modules(comm);
-        if (OMPI_SUCCESS != ret) {
-            /* We have nothing to cleanup yet, so just return NULL */
-            ML_VERBOSE(10, ("check_for_max_supported_ml_modules returns ERROR, return NULL"));
-            *priority = -1;
-            return NULL;
-        }
-    }
-
-    ML_VERBOSE(10, ("Create ML module start."));
-
-    /* allocate and initialize an ml  module */
-    ml_module = OBJ_NEW(mca_coll_ml_module_t);
-    if (NULL == ml_module) {
-        return NULL;
-    }
-
-    /* Get our priority */
-    *priority = cs->ml_priority;
-
-    /** Set initial ML values **/
-    ml_module->comm = comm;
-    /* set the starting sequence number */
-    ml_module->collective_sequence_num = cs->base_sequence_number;
-    ml_module->no_data_collective_sequence_num = cs->base_sequence_number;
-    /* initialize the size of the largest collective communication description */
-    ml_module->max_fn_calls = 0;
-
-#ifdef NEW_LEADER_SELECTION
-    coll_ml_construct_resource_graphs(ml_module);
-#endif
-
-    /* Set topology - function map */
-    setup_topology_coll_map(ml_module);
-
-    /**
-     * This is the core of the function:
-     * setup communicator hierarchy - the ml component is available for
-     * caching information about the sbgp modules selected.
-     */
-    ret = ml_discover_hierarchy(ml_module);
-    if (OMPI_SUCCESS != ret) {
-        ML_VERBOSE(1, ("ml_discover_hierarchy exited with error."));
-        goto CLEANUP;
-    }
-
-    /* gvm Disabled for debuggin */
-    ret = mca_coll_ml_build_filtered_fn_table(ml_module);
-    if (OMPI_SUCCESS != ret) {
-        ML_VERBOSE(1, ("mca_coll_ml_build_filtered_fn_table returned an error."));
-        goto CLEANUP;
-    }
-
-    /* Generate active bcols list */
-    generate_active_bcols_list(ml_module);
-
-    /* setup collective schedules - note that a given bcol may have more than
-       one module instantiated.  We may want to use the same collective cap
-       capabilities over more than one set of procs.  Each module will store
-       the relevant information for a given set of procs */
-    ML_VERBOSE(10, ("Call for setup schedule."));
-    ret = ml_coll_schedule_setup(ml_module);
-    if (OMPI_SUCCESS != ret) {
-        ML_VERBOSE(1, ("ml_coll_schedule_setup exit with error"));
-        goto CLEANUP;
-    }
-
-    /* Setup bcast table */
-    ML_VERBOSE(10, ("Setup bcast table"));
-    ret = setup_bcast_table(ml_module);
-    if (OMPI_SUCCESS != ret) {
-        ML_VERBOSE(1, ("setup_bcast_table exit with error"));
-        goto CLEANUP;
-    }
-
-    ML_VERBOSE(10, ("Setup pointer to collectives calls."));
-    init_coll_func_pointers(ml_module);
-
-    ML_VERBOSE(10, ("Setup free lists"));
-    ret = init_lists(ml_module);
-    if (OMPI_SUCCESS != ret) {
-        goto CLEANUP;
-    }
-
-    DEBUG_ML_COMM_QUERY();
-
-    /* Compute the bruck's buffer constant -- temp buffer requirements */
-    {
-        int comm_size =ompi_comm_size(comm);
-        int count = 1, log_comm_size = 0;
-
-        /* compute log of comm_size */
-        while (count < comm_size) {
-            count = count << 1;
-            log_comm_size++;
-        }
-
-        ml_module->brucks_buffer_threshold_const =
-            (comm_size / 2 + comm_size % 2) * (log_comm_size) ;
-
-
-        ml_module->log_comm_size = log_comm_size;
-    }
-
-    if (iboffload_was_requested) {
-        /* HACK: Calling memory sync barrier first time to make sure
-         * that iboffload create qps for service barrier in right order,
-         * otherwise we may have deadlock and really nasty data corruptions.
-         * If you plan to remove this one - please talk to me first.
-         * Pasha.
-         !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-         Work around for deadlock caused by connection setup
-         for asyc service barrier. Asyc service barrier use own set of
-         MQ and QP _BUT_ the exchange operation uses the MQ that is used for
-         primary set of collectives operations like Allgahter, Barrier,etc.
-         As result exchange wait operation could be pushed to primary MQ and
-         cause dead-lock.
-         !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-         Create connection for service barrier and memory address exchange
-         for ml buffers and asyc service barrier
-        */
-        ret = mca_coll_ml_memsync_intra(ml_module, 0);
-        if (OMPI_SUCCESS != ret) {
-            goto CLEANUP;
-        }
-        opal_progress();
-    }
-
-    /* The module is ready */
-    ml_module->initialized = true;
-
-    return &(ml_module->super);
-
- CLEANUP:
-    /* Vasily: RLG:  Need to cleanup free lists */
-    if (NULL != ml_module) {
-        OBJ_RELEASE(ml_module);
-    }
-
-    return NULL;
-}
-
-/* copied slightly modified from coll/hcoll */
-#define ML_SAVE_FALLBACK(_coll_ml, _coll)                               \
-    do {                                                                \
-        _coll_ml->fallback.coll_ ## _coll = comm->c_coll.coll_ ## _coll;    \
-        _coll_ml->fallback.coll_ ## _coll ## _module = comm->c_coll.coll_ ## _coll ## _module; \
-        if (comm->c_coll.coll_ ## _coll && comm->c_coll.coll_ ## _coll ## _module) { \
-            OBJ_RETAIN(_coll_ml->fallback.coll_ ## _coll ## _module);   \
-        }                                                               \
-    } while(0)
-
-static void ml_save_fallback_colls (mca_coll_ml_module_t *coll_ml,
-				    struct ompi_communicator_t *comm)
-{
-    memset (&coll_ml->fallback, 0, sizeof (coll_ml->fallback));
-    /* save lower-priority collectives to handle cases not yet handled
-     * by coll/ml */
-    ML_SAVE_FALLBACK(coll_ml, allreduce);
-    ML_SAVE_FALLBACK(coll_ml, allgather);
-    ML_SAVE_FALLBACK(coll_ml, reduce);
-    ML_SAVE_FALLBACK(coll_ml, bcast);
-    ML_SAVE_FALLBACK(coll_ml, iallreduce);
-    ML_SAVE_FALLBACK(coll_ml, iallgather);
-    ML_SAVE_FALLBACK(coll_ml, ireduce);
-    ML_SAVE_FALLBACK(coll_ml, ibcast);
-}
-
-/*
- * Init module on the communicator
- */
-static int
-ml_module_enable(mca_coll_base_module_t *module,
-		 struct ompi_communicator_t *comm)
-{
-    /* local variables */
-    char output_buffer[2 * MPI_MAX_OBJECT_NAME];
-
-    ml_save_fallback_colls ((mca_coll_ml_module_t *) module, comm);
-
-    memset(&output_buffer[0], 0, sizeof(output_buffer));
-    snprintf(output_buffer, sizeof(output_buffer), "%s (cid %d)", comm->c_name,
-             comm->c_contextid);
-
-    ML_VERBOSE(10, ("coll:ml:enable: new communicator: %s.", output_buffer));
-
-    /* All done */
-    return OMPI_SUCCESS;
-}
-
-OBJ_CLASS_INSTANCE(mca_coll_ml_module_t,
-                   mca_coll_base_module_t,
-                   mca_coll_ml_module_construct,
-                   mca_coll_ml_module_destruct);
-
-OBJ_CLASS_INSTANCE(mca_coll_ml_collective_operation_progress_t,
-		   ompi_request_t,
-		   mca_coll_ml_collective_operation_progress_construct,
-		   mca_coll_ml_collective_operation_progress_destruct);
diff --git a/ompi/mca/coll/ml/coll_ml_payload_buffers.h b/ompi/mca/coll/ml/coll_ml_payload_buffers.h
deleted file mode 100644
index d4ac765342..0000000000
--- a/ompi/mca/coll/ml/coll_ml_payload_buffers.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_ML_PAYLOAD_BUFFERS_H
-#define MCA_ML_PAYLOAD_BUFFERS_H
-
-#include "ompi/include/ompi/constants.h"
-#include "opal/threads/mutex.h"
-
-struct buffer_t {
-    /* payload */
-    void *payload;
-
-    /* next payload buffer - need this because of wrap around, and
-     * because we want to allocate several buffers at once, but only
-     * manipulate one entry
-     */
-    struct buffer_t *next_buffer;
-};
-typedef struct buffer_t buffer_t;
-
-struct ml_buffers_t {
-    /* fifo size */
-    int fifo_size;
-
-    /* write index - next to allocate */
-    int head_index;
-    opal_mutex_t head_lock;
-
-    /* read index - next to free */
-    int tail_index;
-
-    /* number available - used to detect full queue */
-    int n_segments_available;
-
-    /* mask - assumes that fifo link is a power of 2 */
-    int mask;
-
-    /* fifo */
-    buffer_t *fifo;
-};
-
-typedef struct ml_buffers_t ml_buffers_t;
-
-/* Initialization function */
-
-static inline int ml_fifo_init(
-        int fifo_size, void *memory_chunk, size_t size_of_memory_chunk,
-        size_t segment_alignment,
-        size_t segment_size, ml_buffers_t *buffer_fifo)
-{
-    /* local variable */
-    ptrdiff_t allocation_base, memory_chunk_ptr;
-    size_t memory_to_allocate, allocated_fifo_size,
-           allocated_segment_size, seg;
-
-    /* make sure fifo size is power of 2, and round up if not - want
-     * efficient addressing */
-    if( 0 >= fifo_size ) {
-        return OMPI_ERROR;
-    }
-    allocated_fifo_size=1;
-    while ( allocated_fifo_size < (size_t)fifo_size ) {
-        allocated_fifo_size*=2;
-    }
-
-    /* set buffer size to match its alignment - round size up */
-    allocated_segment_size=segment_size;
-    if( 0 >= segment_alignment ) {
-        /* multiples of alignmnet */
-        allocated_segment_size=( (allocated_segment_size-1)/segment_alignment)+1;
-        allocated_segment_size=allocated_segment_size*segment_alignment;
-    }
-
-    /* adjust base pointer to segment alignment */
-    memory_chunk_ptr = (ptrdiff_t )memory_chunk;
-    allocation_base=( ( memory_chunk_ptr-1)/segment_alignment)+1;
-    allocation_base=allocated_segment_size*segment_alignment;
-
-    /* check for input consistency */
-    memory_to_allocate=size_of_memory_chunk-(allocation_base-memory_chunk_ptr);
-    if( (allocated_segment_size * allocated_fifo_size) < memory_to_allocate ) {
-        return OMPI_ERROR;
-    }
-
-    /* allocate the fifo array */
-    buffer_fifo->fifo=(buffer_t *)malloc(sizeof(buffer_t)*allocated_fifo_size);
-    if( NULL == buffer_fifo->fifo) {
-        return OMPI_ERROR;
-    }
-
-    /* Initialize structure */
-    for( seg=0 ; seg < allocated_fifo_size ; seg++ ) {
-        buffer_fifo->fifo[seg].payload=
-            (void *)(allocation_base+seg*allocated_segment_size);
-    }
-    for( seg=0 ; seg < allocated_fifo_size-1 ; seg++ ) {
-        buffer_fifo->fifo[seg].next_buffer=
-            &(buffer_fifo->fifo[seg+1]);
-    }
-    buffer_fifo->fifo[allocated_fifo_size-1].next_buffer=
-        &(buffer_fifo->fifo[0]);
-
-    buffer_fifo->head_index=0;
-    buffer_fifo->tail_index=0;
-    buffer_fifo->n_segments_available=allocated_fifo_size;
-    buffer_fifo->fifo_size=allocated_fifo_size;
-    buffer_fifo->mask=buffer_fifo->fifo_size-1;
-    OBJ_CONSTRUCT(&(buffer_fifo->head_lock), opal_mutex_t);
-
-    /* return */
-    return OMPI_SUCCESS;
-}
-
-/*
- * Allocate several buffers.  Either all requested buffers are allocated,
- *  or none are allocated.
- */
-static inline buffer_t *ml_fifo_alloc_n_buffers(int n_to_allocate,
- ml_buffers_t *buffer_fifo)
-{
-  /* local variables */
-  buffer_t *ret=NULL;
-
-  /* RLG - probably want to try a few times before giving up */
-  if(!OPAL_THREAD_TRYLOCK(&(buffer_fifo->head_lock))) {
-      if( buffer_fifo->n_segments_available >= n_to_allocate ) {
-          ret=&(buffer_fifo->fifo[buffer_fifo->head_index]);
-          buffer_fifo->head_index=(buffer_fifo->head_index+n_to_allocate);
-          /* wrap around */
-          buffer_fifo->head_index&=buffer_fifo->mask;
-
-          buffer_fifo->n_segments_available -= n_to_allocate;
-      }
-      OPAL_THREAD_UNLOCK(&(buffer_fifo->head_lock));
-  }  /* end of allocatoin */
-
-  return ret;
-}
-
-/* return buffers */
-static inline void ml_fifo_return_n_buffers(int n_to_return,
-  ml_buffers_t *buffer_fifo)
-{
-
-  OPAL_THREAD_LOCK(&(buffer_fifo->head_lock));
-
-  /* move tail pointer - RLG:  Do we really need the tail pointer ? */
-  buffer_fifo->tail_index=(buffer_fifo->tail_index+n_to_return);
-  /* wrap around */
-  buffer_fifo->tail_index&=buffer_fifo->mask;
-
-  /* adjust number of available buffers */
-  buffer_fifo->n_segments_available += n_to_return;
-
-  OPAL_THREAD_UNLOCK(&(buffer_fifo->head_lock));
-
-}
-
-#endif
-
diff --git a/ompi/mca/coll/ml/coll_ml_progress.c b/ompi/mca/coll/ml/coll_ml_progress.c
deleted file mode 100644
index 602331f785..0000000000
--- a/ompi/mca/coll/ml/coll_ml_progress.c
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi/mca/coll/ml/coll_ml.h"
-
-/*
- * This routine is used to progress a series of communication
- * primitives.
- *
- *  Assumptions:
- *      - A message is described by a message descriptor
- *      - Each message has a setup function associated with it, which is
- *        algorithm specific.  When a fragment is being prepared, this
- *        progress is used to setup the arguments that will be passed into
- *        each routine called to complete a given function.  The idea here
- *        is that when the progress routines is called, the full communication
- *        pattern has already been described in the setup function, with
- *        progress function being generic.
- *      - Each fragment is described by a fragment descriptor
- *      - Each message descriptor has a fragment descriptor permanently
- *        associated with it.
- *      - The message will be proressed as long as the individul
- *        functions complete.  When an indivicual funciton does not
- *        complete, the current state will be saved, for future
- *        restart.
- *      - return status
- *          OMPI_COMPLETE: funciton completed
- *          OMPI_INCOMPLETE: need to continue progressing the funciton
- *          any other return value - error condition
- */
-
-int coll_ml_progress_individual_message(mca_coll_ml_fragment_t *frag_descriptor)
-{
-   /* local variables */
-   int fn_index, ret = OMPI_SUCCESS;
-   uint32_t n_frags_complete;
-   int starting_fn_index=frag_descriptor->current_fn_index;
-   coll_ml_collective_description_t *local_comm_description=
-       frag_descriptor->full_msg_descriptor->local_comm_description;
-
-   /* loop over functions */
-   for( fn_index=starting_fn_index ; fn_index < local_comm_description->n_functions;
-        fn_index ++ ) {
-       mca_bcol_base_module_t *bcol_module=
-           local_comm_description->functions[fn_index].bcol_module;
-       ret =(bcol_module->bcol_function_table[local_comm_description->functions[fn_index].fn_idx])
-               (&(frag_descriptor->fn_args[fn_index]), &local_comm_description->functions[fn_index]);
-       if( ML_OMPI_COMPLETE != ret ) {
-           /* since function incomplete, need to decide what to do */
-           if( ML_OMPI_INCOMPLETE == ret ) {
-               /* need to return to this later */
-               /* mark where to continue */
-               frag_descriptor->current_fn_index=fn_index;
-               /* RLG - is this really best ?  Only advantage is that
-                * if we exit the loop, we can assume message is
-                * complete
-                */
-               return OMPI_SUCCESS;
-           } else {
-               /* some sort of error condition */
-               frag_descriptor->current_fn_index=fn_index;
-               return ret;
-           }
-       }
-
-   }
-
-   /* looks like we are done */
-   /* increment counter for number of completed fragments */
-   n_frags_complete = OPAL_THREAD_ADD_SIZE_T(
-       &(frag_descriptor->full_msg_descriptor->frags_complete), 1);
-
-   /*
-    * release resrouces
-    */
-
-   /* fragment resources */
-
-   /* full message resources */
-   if ( n_frags_complete == frag_descriptor->full_msg_descriptor->n_fragments)
-   {
-       /* free any fragments that still need to be freed
-        * NOTE: at this level we do not handle any resrouces
-        * aside from the pre-registered buffers, all these
-        * are handled in the bcol level */
-
-       /* return the buffers to the ml free list */
-
-       /* mark as complete - so MPI can complete
-        * the message descriptor will be freed by a call
-        * to mpi_test/mpi_wait/... as the message descriptor
-        * also holds the mpi request object */
-
-   }
-
-   return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/coll/ml/coll_ml_reduce.c b/ompi/mca/coll/ml/coll_ml_reduce.c
deleted file mode 100644
index cfec0743a7..0000000000
--- a/ompi/mca/coll/ml/coll_ml_reduce.c
+++ /dev/null
@@ -1,528 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2015      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/** @file */
-
-#include "ompi_config.h"
-
-#include "ompi/constants.h"
-#include "opal/threads/mutex.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/coll/coll.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "opal/sys/atomic.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
-#include "ompi/mca/coll/ml/coll_ml_allocation.h"
-#include "ompi/mca/coll/ml/coll_ml_inlines.h"
-#define REDUCE_SMALL_MESSAGE_THRESHOLD 2048
-
-static int mca_coll_ml_reduce_unpack(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    int ret;
-    /* need to put in more */
-    int count = coll_op->variable_fn_params.count;
-    ompi_datatype_t *dtype = coll_op->variable_fn_params.dtype;
-
-    void *dest = (void *)((uintptr_t)coll_op->full_message.dest_user_addr +
-            (uintptr_t)coll_op->fragment_data.offset_into_user_buffer);
-    void *src = (void *)((uintptr_t)coll_op->fragment_data.buffer_desc->data_addr +
-            (size_t)coll_op->variable_fn_params.rbuf_offset);
-
-    ret = ompi_datatype_copy_content_same_ddt(dtype, (int32_t) count, (char *) dest,
-            (char *) src);
-    if (ret < 0) {
-        return OMPI_ERROR;
-    }
-
-    if (coll_op->variable_fn_params.root_flag) {
-        ML_VERBOSE(1,("In reduce unpack %d",
-            *(int *)((unsigned char*) src)));
-    }
-
-    ML_VERBOSE(10, ("sbuf addr %p, sbuf offset %d, sbuf val %lf, rbuf addr %p, rbuf offset %d, rbuf val %lf.",
-                coll_op->variable_fn_params.sbuf, coll_op->variable_fn_params.sbuf_offset,
-                *(double *) ((unsigned char *) coll_op->variable_fn_params.sbuf +
-                    (size_t) coll_op->variable_fn_params.sbuf_offset),
-                coll_op->variable_fn_params.rbuf, coll_op->variable_fn_params.rbuf_offset,
-                *(double *) ((unsigned char *) coll_op->variable_fn_params.rbuf +
-                    (size_t) coll_op->variable_fn_params.rbuf_offset)));
-
-    return OMPI_SUCCESS;
-}
-
-
-static int
-mca_coll_ml_reduce_task_setup (mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    int fn_idx, h_level, next_h_level, my_index;
-    mca_sbgp_base_module_t *sbgp;
-    mca_coll_ml_topology_t *topo = coll_op->coll_schedule->topo_info;
-
-    fn_idx      = coll_op->sequential_routine.current_active_bcol_fn;
-    h_level     = coll_op->coll_schedule->component_functions[fn_idx].h_level;
-    next_h_level = (fn_idx < coll_op->coll_schedule->n_fns - 1) ?
-        coll_op->coll_schedule->component_functions[fn_idx+1].h_level : -1;
-    sbgp        = topo->component_pairs[h_level].subgroup_module;
-    my_index    = sbgp->my_index;
-
-    if (coll_op->variable_fn_params.root_flag) {
-        ML_VERBOSE(1,("In task completion Data in receiver buffer %d ",
-            *(int *)((unsigned char*) coll_op->variable_fn_params.rbuf +
-            coll_op->variable_fn_params.rbuf_offset)));
-    }
-
-    /* determine the root for this level of the hierarchy */
-    if (coll_op->coll_schedule->topo_info->route_vector[coll_op->global_root].level == next_h_level ||
-        coll_op->global_root == sbgp->group_list[my_index]) {
-        /* I am the global root or I will be talking to the global root in the next round. */
-        coll_op->variable_fn_params.root = my_index;
-    } else if (coll_op->coll_schedule->topo_info->route_vector[coll_op->global_root].level == h_level) {
-        /* the root is in this level of my hierarchy */
-        coll_op->variable_fn_params.root = coll_op->coll_schedule->topo_info->route_vector[coll_op->global_root].rank;
-    } else {
-        coll_op->variable_fn_params.root = 0;
-    }
-
-    /* Set the route vector for this root */
-    coll_op->variable_fn_params.root_route =
-        &coll_op->coll_schedule->topo_info->route_vector[sbgp->group_list[coll_op->variable_fn_params.root]];
-
-    /* Am I the root of this hierarchy? */
-    coll_op->variable_fn_params.root_flag = (my_index == coll_op->variable_fn_params.root);
-
-    /* For hierarchy switch btw source and destination buffer
-     * No need to make this switch for the first call ..
-     * */
-    if (0 < fn_idx) {
-        int tmp_offset = coll_op->variable_fn_params.sbuf_offset;
-        coll_op->variable_fn_params.sbuf_offset =
-            coll_op->variable_fn_params.rbuf_offset;
-        coll_op->variable_fn_params.rbuf_offset = tmp_offset;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_coll_ml_reduce_frag_progress(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    /* local variables */
-    void *buf;
-
-    size_t dt_size;
-    int ret, frag_len, count;
-
-    ptrdiff_t lb, extent;
-
-    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc;
-    mca_coll_ml_collective_operation_progress_t *new_op;
-
-    mca_coll_ml_module_t *ml_module = OP_ML_MODULE(coll_op);
-
-    ret = ompi_datatype_get_extent(coll_op->variable_fn_params.dtype, &lb, &extent);
-    if (ret < 0) {
-     return OMPI_ERROR;
-    }
-
-    dt_size = (size_t) extent;
-
-    /* Keep the pipeline filled with fragments */
-    while (coll_op->fragment_data.message_descriptor->n_active <
-        coll_op->fragment_data.message_descriptor->pipeline_depth) {
-        /* If an active fragment happens to have completed the collective during
-         * a hop into the progress engine, then don't launch a new fragment,
-         * instead break and return.
-         */
-        if (coll_op->fragment_data.message_descriptor->n_bytes_scheduled
-            == coll_op->fragment_data.message_descriptor->n_bytes_total) {
-            break;
-        }
-
-        /* Get an ml buffer */
-        src_buffer_desc = mca_coll_ml_alloc_buffer(OP_ML_MODULE(coll_op));
-        if (NULL == src_buffer_desc) {
-            /* If there exist outstanding fragments, then break out
-             * and let an active fragment deal with this later,
-             * there are no buffers available.
-             */
-            if (0 < coll_op->fragment_data.message_descriptor->n_active) {
-                return OMPI_SUCCESS;
-            } else {
-                /* It is useless to call progress from here, since
-                 * ml progress can't be executed as result ml memsync
-                 * call will not be completed and no memory will be
-                 * recycled. So we put the element on the list, and we will
-                 * progress it later when memsync will recycle some memory*/
-
-                /* The fragment is already on list and
-                 * the we still have no ml resources
-                 * Return busy */
-                if (coll_op->pending & REQ_OUT_OF_MEMORY) {
-                    ML_VERBOSE(10,("Out of resources %p", coll_op));
-                    return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-                }
-
-                coll_op->pending |= REQ_OUT_OF_MEMORY;
-                opal_list_append(&((OP_ML_MODULE(coll_op))->waiting_for_memory_list),
-                        (opal_list_item_t *)coll_op);
-                ML_VERBOSE(10,("Out of resources %p adding to pending queue", coll_op));
-                return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-            }
-        }
-
-        /* Get a new collective descriptor and initialize it */
-        new_op =  mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                ml_module->coll_ml_reduce_functions[ML_SMALL_DATA_REDUCE],
-                coll_op->fragment_data.message_descriptor->src_user_addr,
-                coll_op->fragment_data.message_descriptor->dest_user_addr,
-                coll_op->fragment_data.message_descriptor->n_bytes_total,
-                coll_op->fragment_data.message_descriptor->n_bytes_scheduled);
-
-        ML_VERBOSE(1,(" In Reduce fragment progress %d %d ",
-                    coll_op->fragment_data.message_descriptor->n_bytes_total,
-                    coll_op->fragment_data.message_descriptor->n_bytes_scheduled));
-        MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(new_op,
-                src_buffer_desc->buffer_index, src_buffer_desc);
-
-        new_op->fragment_data.current_coll_op = coll_op->fragment_data.current_coll_op;
-        new_op->fragment_data.message_descriptor = coll_op->fragment_data.message_descriptor;
-
-        /* set the task setup callback  */
-        new_op->sequential_routine.seq_task_setup = mca_coll_ml_reduce_task_setup;
-        /* We need this address for pointer arithmetic in memcpy */
-        buf = (void*)coll_op->fragment_data.message_descriptor->src_user_addr;
-        /* calculate the number of data types in this packet */
-        count = (coll_op->fragment_data.message_descriptor->n_bytes_total -
-                coll_op->fragment_data.message_descriptor->n_bytes_scheduled <
-                ((size_t) OP_ML_MODULE(coll_op)->small_message_thresholds[BCOL_REDUCE]/4 )?
-                (coll_op->fragment_data.message_descriptor->n_bytes_total -
-                coll_op->fragment_data.message_descriptor->n_bytes_scheduled) / dt_size :
-                (size_t) coll_op->variable_fn_params.count);
-
-        /* calculate the fragment length */
-        frag_len = count * dt_size;
-
-        ret = ompi_datatype_copy_content_same_ddt(coll_op->variable_fn_params.dtype, count,
-                (char *) src_buffer_desc->data_addr, (char *) ((uintptr_t) buf + (uintptr_t)
-                    coll_op->fragment_data.message_descriptor->n_bytes_scheduled));
-        if (ret < 0) {
-            return OMPI_ERROR;
-        }
-
-        /* if root unpack the data */
-        if (ompi_comm_rank(ml_module->comm) == coll_op->global_root ) {
-            new_op->process_fn = mca_coll_ml_reduce_unpack;
-            new_op->variable_fn_params.root_flag = true;
-        } else {
-            new_op->process_fn = NULL;
-            new_op->variable_fn_params.root_flag = false;
-        }
-
-        new_op->variable_fn_params.root_route = coll_op->variable_fn_params.root_route;
-
-        /* Setup fragment specific data */
-        new_op->fragment_data.message_descriptor->n_bytes_scheduled += frag_len;
-        new_op->fragment_data.buffer_desc = src_buffer_desc;
-        new_op->fragment_data.fragment_size = frag_len;
-        (new_op->fragment_data.message_descriptor->n_active)++;
-
-        /* Set in Reduce Buffer arguments */
-        ML_SET_VARIABLE_PARAMS_BCAST(new_op, OP_ML_MODULE(new_op), count,
-                                     coll_op->variable_fn_params.dtype, src_buffer_desc,
-                                     0, (ml_module->payload_block->size_buffer -
-                                         ml_module->data_offset)/2, frag_len,
-                                     src_buffer_desc->data_addr);
-
-        new_op->variable_fn_params.buffer_size = frag_len;
-        new_op->variable_fn_params.sbuf = src_buffer_desc->data_addr;
-        new_op->variable_fn_params.rbuf = src_buffer_desc->data_addr;
-        new_op->variable_fn_params.root = coll_op->variable_fn_params.root;
-        new_op->global_root = coll_op->global_root;
-        new_op->variable_fn_params.op = coll_op->variable_fn_params.op;
-        new_op->variable_fn_params.hier_factor = coll_op->variable_fn_params.hier_factor;
-        new_op->sequential_routine.current_bcol_status = SEQ_TASK_PENDING;
-        MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(new_op);
-
-        ML_VERBOSE(10,("FFFF Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d %d",
-                    new_op->variable_fn_params.buffer_size,
-                    new_op->fragment_data.fragment_size,
-                    new_op->fragment_data.message_descriptor->n_bytes_scheduled));
-        /* initialize first coll */
-        new_op->sequential_routine.seq_task_setup(new_op);
-
-        /* append this collective !! */
-        OPAL_THREAD_LOCK(&(mca_coll_ml_component.sequential_collectives_mutex));
-        opal_list_append(&mca_coll_ml_component.sequential_collectives,
-                (opal_list_item_t *)new_op);
-        OPAL_THREAD_UNLOCK(&(mca_coll_ml_component.sequential_collectives_mutex));
-
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__
-int parallel_reduce_start (const void *sbuf, void *rbuf, int count,
-                           struct ompi_datatype_t *dtype, struct ompi_op_t *op,
-                           int root,
-                           struct ompi_communicator_t *comm,
-                           mca_coll_ml_module_t *ml_module,
-                           ompi_request_t **req,
-                           int small_data_reduce,
-                           int large_data_reduce) {
-    ptrdiff_t lb, extent;
-    size_t pack_len, dt_size;
-    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL;
-    mca_coll_ml_collective_operation_progress_t * coll_op = NULL;
-    bool contiguous = ompi_datatype_is_contiguous_memory_layout(dtype, count);
-    mca_coll_ml_component_t *cm = &mca_coll_ml_component;
-    int ret, n_fragments = 1, frag_len,
-        pipeline_depth, n_dts_per_frag, rank;
-
-    if (MPI_IN_PLACE == sbuf) {
-        sbuf = rbuf;
-    }
-
-    ret = ompi_datatype_get_extent(dtype, &lb, &extent);
-    if (ret < 0) {
-        return OMPI_ERROR;
-    }
-
-    rank = ompi_comm_rank (comm);
-
-    dt_size = (size_t) extent;
-    pack_len = count * dt_size;
-
-    /* We use a separate recieve and send buffer so only half the buffer is usable. */
-    if (pack_len < (size_t) ml_module->small_message_thresholds[BCOL_REDUCE] / 4) {
-        /* The len of the message can not be larger than ML buffer size */
-        assert(pack_len <= ml_module->payload_block->size_buffer);
-
-        src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-
-        ML_VERBOSE(10,("Using small data reduce (threshold = %d)",
-                                        REDUCE_SMALL_MESSAGE_THRESHOLD));
-        while (NULL == src_buffer_desc) {
-            opal_progress();
-            src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        }
-
-        coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                ml_module->coll_ml_reduce_functions[small_data_reduce],
-                sbuf, rbuf, pack_len, 0);
-
-        MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(coll_op,
-                src_buffer_desc->buffer_index, src_buffer_desc);
-
-        coll_op->variable_fn_params.rbuf = src_buffer_desc->data_addr;
-        coll_op->variable_fn_params.sbuf = src_buffer_desc->data_addr;
-        coll_op->variable_fn_params.buffer_index = src_buffer_desc->buffer_index;
-        coll_op->variable_fn_params.src_desc = src_buffer_desc;
-        coll_op->variable_fn_params.count = count;
-
-        ret = ompi_datatype_copy_content_same_ddt(dtype, count,
-                (void *) (uintptr_t) src_buffer_desc->data_addr, (char *) sbuf);
-        if (ret < 0){
-            return OMPI_ERROR;
-        }
-
-    } else if (cm->enable_fragmentation || !contiguous) {
-        ML_VERBOSE(1,("Using Fragmented Reduce "));
-
-        /* fragment the data */
-        /* check for retarded application programming decisions */
-        if (dt_size > (size_t) ml_module->small_message_thresholds[BCOL_REDUCE] / 4) {
-            ML_ERROR(("Sorry, but we don't support datatypes that large"));
-            return OMPI_ERROR;
-        }
-
-        /* calculate the number of data types that can fit per ml-buffer */
-        n_dts_per_frag = ml_module->small_message_thresholds[BCOL_REDUCE] / (4 * dt_size);
-
-        /* calculate the number of fragments */
-        n_fragments = (count + n_dts_per_frag - 1) / n_dts_per_frag; /* round up */
-
-        /* calculate the actual pipeline depth */
-        pipeline_depth = n_fragments < cm->pipeline_depth ? n_fragments : cm->pipeline_depth;
-
-        /* calculate the fragment size */
-        frag_len = n_dts_per_frag * dt_size;
-
-        /* allocate an ml buffer */
-        src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        while (NULL == src_buffer_desc) {
-            opal_progress();
-            src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        }
-
-        coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                ml_module->coll_ml_reduce_functions[small_data_reduce],
-                sbuf,rbuf,
-                pack_len,
-                0 /* offset for first pack */);
-
-        MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(coll_op,
-                src_buffer_desc->buffer_index, src_buffer_desc);
-
-
-        coll_op->variable_fn_params.sbuf = (void *) src_buffer_desc->data_addr;
-        coll_op->variable_fn_params.rbuf = (void *) src_buffer_desc->data_addr;
-
-        coll_op->fragment_data.message_descriptor->n_active = 1;
-        coll_op->full_message.n_bytes_scheduled = frag_len;
-        coll_op->full_message.fragment_launcher = mca_coll_ml_reduce_frag_progress;
-        coll_op->full_message.pipeline_depth = pipeline_depth;
-        coll_op->fragment_data.current_coll_op = small_data_reduce;
-        coll_op->fragment_data.fragment_size = frag_len;
-
-        coll_op->variable_fn_params.count = n_dts_per_frag;  /* seems fishy */
-        coll_op->variable_fn_params.buffer_size = frag_len;
-        coll_op->variable_fn_params.src_desc = src_buffer_desc;
-        /* copy into the ml-buffer */
-        ret = ompi_datatype_copy_content_same_ddt(dtype, n_dts_per_frag,
-                (char *) src_buffer_desc->data_addr, (char *) sbuf);
-        if (ret < 0) {
-            return OMPI_ERROR;
-        }
-    } else {
-        ML_VERBOSE(1,("Using zero-copy ptp reduce"));
-        coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                ml_module->coll_ml_reduce_functions[large_data_reduce],
-                sbuf, rbuf, pack_len, 0);
-
-        coll_op->variable_fn_params.userbuf =
-            coll_op->variable_fn_params.sbuf = sbuf;
-
-        coll_op->variable_fn_params.rbuf = rbuf;
-
-        /* The ML buffer is used for testing. Later, when we
-         * switch to use knem/mmap/portals this should be replaced
-         * appropriately
-         */
-        src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        while (NULL == src_buffer_desc) {
-            opal_progress();
-            src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        }
-
-        coll_op->variable_fn_params.buffer_index = src_buffer_desc->buffer_index;
-        coll_op->variable_fn_params.src_desc = src_buffer_desc;
-        coll_op->variable_fn_params.count = count;
-    }
-
-    coll_op->process_fn = (rank != root) ? NULL : mca_coll_ml_reduce_unpack;
-
-    /* Set common parts */
-    coll_op->fragment_data.buffer_desc = src_buffer_desc;
-    coll_op->variable_fn_params.dtype = dtype;
-    coll_op->variable_fn_params.op = op;
-
-    /* NTH: the root, root route, and root flag are set in the task setup */
-
-    /* Fill in the function arguments */
-    coll_op->variable_fn_params.sbuf_offset = 0;
-    coll_op->variable_fn_params.rbuf_offset = (ml_module->payload_block->size_buffer -
-                               ml_module->data_offset)/2;
-
-    /* Keep track of the global root of this operation */
-    coll_op->global_root = root;
-
-    coll_op->variable_fn_params.sequence_num =
-        OPAL_THREAD_ADD32(&(ml_module->collective_sequence_num), 1);
-    coll_op->sequential_routine.current_active_bcol_fn = 0;
-    /* set the task setup callback  */
-    coll_op->sequential_routine.seq_task_setup = mca_coll_ml_reduce_task_setup;
-
-    /* Reduce requires the schedule to be fixed. If we use other (changing) schedule,
-       the operation might result in different result. */
-    coll_op->coll_schedule->component_functions = coll_op->coll_schedule->
-        comp_fn_arr[coll_op->coll_schedule->topo_info->route_vector[root].level];
-
-    /* Launch the collective */
-    ret = mca_coll_ml_launch_sequential_collective (coll_op);
-    if (OMPI_SUCCESS != ret) {
-        ML_VERBOSE(10, ("Failed to launch reduce collective"));
-        return ret;
-    }
-
-    *req = &coll_op->full_message.super;
-
-    return OMPI_SUCCESS;
-}
-
-
-int mca_coll_ml_reduce(const void *sbuf, void *rbuf, int count,
-        struct ompi_datatype_t *dtype, struct ompi_op_t *op,
-        int root, struct ompi_communicator_t *comm,
-        mca_coll_base_module_t *module) {
-
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t*)module;
-    int ret = OMPI_SUCCESS;
-    ompi_request_t *req;
-
-    if (OPAL_UNLIKELY(!ompi_op_is_commute(op) || !opal_datatype_is_contiguous_memory_layout(&dtype->super, count))) {
-        /* coll/ml does not handle non-communative operations at this time. fallback
-         * on another collective module */
-        return ml_module->fallback.coll_reduce (sbuf, rbuf, count, dtype, op, root, comm,
-                                                ml_module->fallback.coll_reduce_module);
-    }
-
-    ML_VERBOSE(10,("Calling Ml Reduce "));
-    ret = parallel_reduce_start(sbuf, rbuf, count, dtype, op,
-                           root, comm, (mca_coll_ml_module_t *)module,
-                           &req, ML_SMALL_DATA_REDUCE,
-                           ML_LARGE_DATA_REDUCE);
-    if (OPAL_UNLIKELY(ret != OMPI_SUCCESS)) {
-        ML_VERBOSE(10, ("Failed to launch"));
-        return ret;
-    }
-
-    /* Blocking reduce */
-    ret = ompi_request_wait(&req, MPI_STATUS_IGNORE);
-
-    ML_VERBOSE(10, ("Blocking Reduce is done"));
-
-    return ret;
-}
-
-
-int mca_coll_ml_reduce_nb(const void *sbuf, void *rbuf, int count,
-        struct ompi_datatype_t *dtype, struct ompi_op_t *op,
-        int root, struct ompi_communicator_t *comm,
-        ompi_request_t **req,
-        mca_coll_base_module_t *module) {
-
-    int ret = OMPI_SUCCESS;
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t*)module;
-
-    if (OPAL_UNLIKELY(!ompi_op_is_commute(op) || !opal_datatype_is_contiguous_memory_layout(&dtype->super, count))) {
-        /* coll/ml does not handle non-communative operations at this time. fallback
-         * on another collective module */
-        return ml_module->fallback.coll_ireduce (sbuf, rbuf, count, dtype, op, root, comm, req,
-                                                 ml_module->fallback.coll_ireduce_module);
-    }
-
-    ML_VERBOSE(10,("Calling Ml Reduce "));
-    ret = parallel_reduce_start(sbuf, rbuf, count, dtype, op,
-                           root, comm, ml_module,
-                           req, ML_SMALL_DATA_REDUCE,
-                           ML_LARGE_DATA_REDUCE);
-    if (OPAL_UNLIKELY(ret != OMPI_SUCCESS)) {
-        ML_VERBOSE(10, ("Failed to launch"));
-        return ret;
-    }
-
-
-    ML_VERBOSE(10, ("Non-blocking Reduce is done"));
-
-    return OMPI_SUCCESS;
-
-}
diff --git a/ompi/mca/coll/ml/coll_ml_resource_affinity.c b/ompi/mca/coll/ml/coll_ml_resource_affinity.c
deleted file mode 100644
index 23d9a0fc71..0000000000
--- a/ompi/mca/coll/ml/coll_ml_resource_affinity.c
+++ /dev/null
@@ -1,147 +0,0 @@
-#include "opal/mca/carto/carto.h"
-#include "opal/mca/carto/base/base.h"
-#include "opal/util/output.h"
-#include "opal/class/opal_graph.h"
-#include "opal/mca/paffinity/base/base.h"
-#include "ompi/constants.h"
-
-#include "orte/mca/ess/ess.h"
-#include "coll_ml_resource_affinity.h"
-
-int get_dev_distance_for_all_procs(opal_carto_graph_t *graph, const char *device)
-{
-    opal_paffinity_base_cpu_set_t cpus;
-    opal_carto_base_node_t *device_node;
-    int min_distance = -1, i, num_processors;
-
-    if(opal_paffinity_base_get_processor_info(&num_processors) != OMPI_SUCCESS) {
-        num_processors = 100; /* Choose something big enough */
-    }
-
-    device_node = opal_carto_base_find_node(graph, device);
-
-    /* no topology info for device found. Assume that it is close */
-    if(NULL == device_node)
-        return 0;
-
-    OPAL_PAFFINITY_CPU_ZERO(cpus);
-    opal_paffinity_base_get(&cpus);
-
-    for (i = 0; i < num_processors; i++) {
-        opal_carto_base_node_t *slot_node;
-        int distance, socket, core;
-        char *slot;
-
-        if(!OPAL_PAFFINITY_CPU_ISSET(i, cpus))
-            continue;
-
-        opal_paffinity_base_get_map_to_socket_core(i, &socket, &core);
-        asprintf(&slot, "socket%d", socket);
-
-        slot_node = opal_carto_base_find_node(graph, slot);
-
-        free(slot);
-
-        if(NULL == slot_node)
-            return 0;
-
-        distance = opal_carto_base_spf(graph, slot_node, device_node);
-
-        if(distance < 0)
-            return 0;
-
-        if(min_distance < 0 || min_distance > distance)
-            min_distance = distance;
-    }
-
-    return min_distance;
-}
-
-int get_dev_distance_proc(opal_carto_graph_t *graph,
-				const char *device,int rank, struct ompi_proc_t *proc){
-    opal_paffinity_base_cpu_set_t cpus;
-    opal_carto_base_node_t *device_node;
-	opal_carto_base_node_t *slot_node;
-    int distance, socket, core;
-    char *slot;
-	int process_id;
-	int nrank;
-
-	nrank = orte_ess.get_node_rank(&(proc->proc_name));
-
-	opal_paffinity_base_get_physical_processor_id(nrank, &process_id);
-
-	device_node = opal_carto_base_find_node(graph, device);
-
-    /* no topology info for device found. Assume that it is close */
-    if(NULL == device_node)
-        return 0;
-
-    OPAL_PAFFINITY_CPU_ZERO(cpus);
-    opal_paffinity_base_get(&cpus);
-
-
-
-    opal_paffinity_base_get_map_to_socket_core(process_id, &socket, &core);
-    asprintf(&slot, "socket%d", socket);
-	ML_VERBOSE(10,("The socket addres is %d",socket));
-
-    slot_node = opal_carto_base_find_node(graph, slot);
-
-    free(slot);
-
-    if(NULL == slot_node)
-            return -1;
-
-    distance = opal_carto_base_spf(graph, slot_node, device_node);
-
-    if(distance < 0)
-	return -1;
-
-    return distance;
-
-}
-
-int coll_ml_select_leader(mca_coll_ml_module_t *ml_module,
-						  mca_sbgp_base_module_t *sbgp_module,
-							int *rank_in_comm,
-							struct ompi_proc_t ** procs,
-							int nprocs){
-
-	int rank, dist1, dist2,dist;
-	int min_dist = 10000;
-	int i,leader = 10000;
-	struct ompi_proc_t *proc = NULL;
-
-	for (i=0; i<nprocs; i++) {
-
-		/* if local process */
-		rank = rank_in_comm[sbgp_module->group_list[i]];
-		proc = procs[sbgp_module->group_list[i]];
-		dist1 = get_dev_distance_proc(ml_module->sm_graph,"mem0",rank,proc);
-		dist2 = get_dev_distance_proc(ml_module->ib_graph,"mthca0",rank,proc);
-
-		dist = dist1 + dist2;
-
-		ML_VERBOSE(10,("The distance for proc %d dist1 %d, dist2 %d",i,dist1,dist2));
-		if ((dist < min_dist) || ((dist == min_dist) && (i < leader))) {
-			leader = i;
-			min_dist = dist;
-		}
-	}
-
-	return leader;
-}
-
-
-int coll_ml_construct_resource_graphs(mca_coll_ml_module_t *ml_module){
-
-	opal_carto_base_get_host_graph(&ml_module->sm_graph,"Memory");
-	opal_carto_base_get_host_graph(&ml_module->ib_graph,"Infiniband");
-
-	/* debug
-	opal_graph_print(ml_module->sm_graph);
-	*/
-	return 0;
-
-}
diff --git a/ompi/mca/coll/ml/coll_ml_resource_affinity.h b/ompi/mca/coll/ml/coll_ml_resource_affinity.h
deleted file mode 100644
index c64c214ee0..0000000000
--- a/ompi/mca/coll/ml/coll_ml_resource_affinity.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#include "opal/mca/carto/carto.h"
-#include "opal/mca/carto/base/base.h"
-#include "opal/util/output.h"
-#include "opal/class/opal_graph.h"
-#include "coll_ml.h"
-
-
-/* Get the host graph for SM and Infiniband */
-int discover_on_node_resources(const char device);
-int get_dev_distance_for_all_procs(opal_carto_graph_t *graph,
-							const char *device);
-int get_dev_distance_proc(opal_carto_graph_t *graph,
-				const char *device,int rank,struct ompi_proc_t *proc);
-int coll_ml_select_leader(mca_coll_ml_module_t *ml_module,
-						  mca_sbgp_base_module_t *sbgp_module,
-							int *rank_in_comm,
-							struct ompi_proc_t ** procs,
-							int nprocs);
-int coll_ml_construct_resource_graphs(mca_coll_ml_module_t *ml_module);
diff --git a/ompi/mca/coll/ml/coll_ml_select.c b/ompi/mca/coll/ml/coll_ml_select.c
deleted file mode 100644
index a46197b869..0000000000
--- a/ompi/mca/coll/ml/coll_ml_select.c
+++ /dev/null
@@ -1,358 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2013-2014 Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2014      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/*
- * Code for selecting a collective function. The selection is based on
- * comm-time attributes and invoke-time attributes.
- *
- * comm-time attributes: Attributes, which can used to filter available
- * collective functions at communicator init time. Example attributes include
- * comm size and msg size supported by bcols.
- *
- * invoke-time attributes: Attributes, which can be used to select function
- * for given collective when a collective is invoked.
- *
- */
-
-#include "coll_ml_select.h"
-
-static int msg_to_range(size_t msg_len)
-{
-    int range;
-
-    if (msg_len < MSG_RANGE_INITIAL) {
-        return 1;
-    }
-
-    range = (int) log10((double)((msg_len / MSG_RANGE_INITIAL)));
-
-    if (range > NUM_MSG_RANGES)
-        return NUM_MSG_RANGES;
-
-    return range;
-}
-
-static int cmp_comm_attribs(struct mca_bcol_base_coll_fn_comm_attributes_t *attrib_var,
-                    struct mca_bcol_base_coll_fn_comm_attributes_t *attrib_bcol){
-
-
-    if (!(attrib_var->comm_size_max <= attrib_bcol->comm_size_max)) {
-        return  -1 ;
-    }
-
-#if 0 /* Manju: pelase fix it*/
-    if (attrib_var->data_src != attrib_bcol->data_src) {
-        return -1;
-    }
-
-    if (attrib_var->waiting_semantics !=
-                 attrib_bcol->waiting_semantics) {
-        return -1;
-    }
-#endif
-
-    return 0;
-}
-
-/*
- * Table that holds function names
- */
-static int init_invoke_table(mca_coll_ml_module_t *ml_module)
-{
-    int i=0,j=0,k=0, index_topo;
-    int bcoll_type;
-    struct mca_bcol_base_module_t *bcol_module = NULL;
-    int j_bcol_module=0;
-    int i_hier=0;
-    mca_coll_ml_topology_t *topo;
-
-    for (index_topo = 0; index_topo < COLL_ML_TOPO_MAX; index_topo++) {
-        topo = &ml_module->topo_list[index_topo];
-        if (COLL_ML_TOPO_DISABLED == topo->status) {
-            /* skip the topology */
-            continue;
-        }
-        for (i_hier = 0; i_hier < topo->n_levels; i_hier++) {
-
-            for (j_bcol_module = 0;
-                    j_bcol_module < topo->component_pairs[i_hier].num_bcol_modules;
-                    ++j_bcol_module) {
-
-                bcol_module = topo->component_pairs[i_hier].bcol_modules[j_bcol_module];
-
-                for (bcoll_type = 0; bcoll_type < BCOL_NUM_OF_FUNCTIONS ; bcoll_type++){
-                    for (i=0; i<NUM_MSG_RANGES; i++) {
-                        for (j=0; j<OMPI_OP_NUM_OF_TYPES; j++) {
-                            for (k=0; k<OMPI_DATATYPE_MAX_PREDEFINED; k++) {
-                                bcol_module->filtered_fns_table[DATA_SRC_UNKNOWN][BLOCKING][bcoll_type][i][j][k]
-                                    = NULL;
-
-                                bcol_module->filtered_fns_table[DATA_SRC_KNOWN][BLOCKING][bcoll_type][i][j][k]
-                                    = NULL;
-
-                                bcol_module->filtered_fns_table[DATA_SRC_UNKNOWN][NON_BLOCKING][bcoll_type][i][j][k]
-                                    = NULL;
-
-                                bcol_module->filtered_fns_table[DATA_SRC_KNOWN][NON_BLOCKING][bcoll_type][i][j][k]
-                                    = NULL;
-
-                            }
-                        }
-                    }
-                }
-            }
-
-        }
-    }
-
-    return 0;
-}
-
-static int add_to_invoke_table(mca_bcol_base_module_t *bcol_module,
-                       mca_bcol_base_coll_fn_desc_t *fn_filtered,
-                       mca_coll_ml_module_t *ml_module)
-{
-    struct mca_bcol_base_coll_fn_invoke_attributes_t *inv_attribs = NULL;
-    int bcoll_type, data_src_type, waiting_semantic;
-    int range_min,range_max;
-    int i=0,j=0,k=0;
-
-
-    if((NULL == fn_filtered->inv_attr)||(NULL == fn_filtered->comm_attr)) {
-        return OMPI_ERROR;
-    }
-
-    ML_VERBOSE(10, ("Calling add_to_invoke_table %p",fn_filtered->coll_fn));
-
-    inv_attribs = fn_filtered->inv_attr;
-    bcoll_type = fn_filtered->comm_attr->bcoll_type;
-    data_src_type = fn_filtered->comm_attr->data_src;
-    waiting_semantic = fn_filtered->comm_attr->waiting_semantics;
-
-    range_min = msg_to_range(inv_attribs->bcol_msg_min);
-    range_max = msg_to_range(inv_attribs->bcol_msg_max);
-
-    for (j=0; j<OMPI_OP_NUM_OF_TYPES; j++) {
-        for (k=0; k<OMPI_DATATYPE_MAX_PREDEFINED; k++) {
-
-            if ((inv_attribs->datatype_bitmap & (1ul << k)) && (inv_attribs->op_types_bitmap & (1ul << j))){
-
-               for (i=range_min; i<=range_max; i++) {
-                    bcol_module->filtered_fns_table[data_src_type][waiting_semantic][bcoll_type][i][j][k]
-                                                                    = fn_filtered;
-                    ML_VERBOSE(21, ("Putting functions %d %d %d %d %p", bcoll_type, i, j, k, fn_filtered));
-               }
-            }
-        }
-    }
-
-    return 0;
-
-}
-
-/*
- * Maps count to msg range that is used for
- * function table
- * RANGE 0 is for small messages (say small msg =10k)
- * MSG RANGE 1 - 10K - 100K
- * RANGE 2 - 100K -1M
- * RANGE 3 - 1M - 10M
- *
- * This is valid only when MSG_RANGE_INC is 10.
- * For other values the function should replace log10 to log with
- * base=MSG_RANGE_INC
- */
-static int count_to_msg_range(int count,struct ompi_datatype_t *dtype)
-{
-    size_t msg_len =0,dt_size;
-    int range = 0 ;
-
-    ompi_datatype_type_size(dtype, &dt_size);
-    msg_len = count*dt_size;
-
-    if (msg_len < MSG_RANGE_INITIAL) {
-        return 1;
-    }
-
-    range = (int) log10((double)((msg_len/MSG_RANGE_INITIAL)));
-
-    if (range > NUM_MSG_RANGES)
-        return NUM_MSG_RANGES;
-
-    return range;
-
-}
-
-/* Based on the attributes filled in comm_select_attributes
-      select functions for invoke time filtering */
-
-
-static int build_algorithms_table(mca_coll_ml_module_t *ml_module,struct
-                mca_bcol_base_coll_fn_comm_attributes_t *my_comm_attrib)
-{
-    int i_hier, j_bcol_module, k_bcol_fn, index_topo;
-    struct mca_bcol_base_module_t *bcol_module = NULL;
-    opal_list_t *fn_filtered_list;
-    opal_list_item_t *item;
-    mca_coll_ml_topology_t *topo;
-
-    /*
-     * Go through each hierarchy and for each
-     * bcol module in the hierarchy, select the alogrithms.
-     */
-    for (index_topo = 0; index_topo < COLL_ML_TOPO_MAX; index_topo++) {
-        topo = &ml_module->topo_list[index_topo];
-        for (i_hier = 0; i_hier < topo->n_levels; i_hier++) {
-            my_comm_attrib->comm_size_max =
-                topo->component_pairs[i_hier].subgroup_module->group_size;
-
-            for (j_bcol_module = 0;
-                    j_bcol_module < topo->component_pairs[i_hier].num_bcol_modules;
-                    ++j_bcol_module) {
-
-                bcol_module = topo->component_pairs[i_hier].bcol_modules[j_bcol_module];
-
-                /* Go through all bcols and available bcol functions */
-                for (k_bcol_fn = 0; k_bcol_fn < BCOL_NUM_OF_FUNCTIONS; k_bcol_fn++) {
-                    struct mca_bcol_base_coll_fn_desc_t *fn_filtered = NULL;
-
-                    /* Query the function attributes */
-                    fn_filtered_list =
-                        &(bcol_module->bcol_fns_table[k_bcol_fn]);
-
-
-                    if (0 == opal_list_get_size(fn_filtered_list)) {
-                        continue;
-                    }
-                    /* All definitions of a collective type is stored in the list
-                     * Each item in the list is checked for compatability in the
-                     * attributes and stored in the filtered list */
-                    for (item = opal_list_get_first(fn_filtered_list);
-                            item != opal_list_get_end(fn_filtered_list);
-                            item = opal_list_get_next(item)){
-
-                        fn_filtered = (struct mca_bcol_base_coll_fn_desc_t *)item;
-                        if (cmp_comm_attribs(my_comm_attrib, fn_filtered->comm_attr) < 0) {
-                            /* Criteria not satisfied continue to next bcol function */
-                            continue;
-                        }
-
-                        /*
-                         * Add bcol function to be available for invoke time selection
-                         */
-                        add_to_invoke_table(bcol_module, fn_filtered, ml_module);
-                    }
-
-                }
-            }
-        }
-    }
-
-    return 0;
-
-}
-
-int mca_coll_ml_build_filtered_fn_table(mca_coll_ml_module_t *ml_module)
-{
-
-    struct mca_bcol_base_coll_fn_comm_attributes_t *my_comm_attrib = NULL;
-
-
-    /* Init table storing all filtered functions */
-    init_invoke_table(ml_module);
-
-    my_comm_attrib = malloc(sizeof(struct mca_bcol_base_coll_fn_comm_attributes_t));
-
-    if (!my_comm_attrib) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    my_comm_attrib->comm_size_min = 0;
-
-    /*
-     * This values should be passed using (maybe) mca parameters
-     */
-#if 0 /* Manju: pelase fix it*/
-    my_comm_attrib->data_src = DATA_SRC_KNOWN;
-    my_comm_attrib->waiting_semantics = BLOCKING;
-#endif
-
-    if (build_algorithms_table(ml_module,my_comm_attrib)) {
-        return OMPI_ERROR;
-    }
-
-    free(my_comm_attrib);
-
-    return OMPI_SUCCESS;
-
-}
-
-#if 0
-static struct mca_bcol_base_coll_fn_invoke_attributes_t *mca_construct_invoke_attributes(
-                struct ompi_datatype_t *dtype, int count,
-                struct ompi_op_t op_type)
-{
-    size_t dt_size, msg_size;
-    struct mca_bcol_base_coll_fn_invoke_attributes_t *inv_attribs = NULL;
-
-    ompi_datatype_type_size(dtype, &dt_size);
-    msg_size = count*dt_size;
-
-
-    inv_attribs = malloc(sizeof(struct mca_bcol_base_coll_fn_invoke_attributes_t));
-
-    /* Fix : We might need to have range for msg size - For now selection will
-     * be based on maximum value
-     */
-    inv_attribs->bcol_msg_min = 0;
-    inv_attribs->bcol_msg_max = msg_size;
-
-    return inv_attribs;
-}
-#endif
-
-int mca_select_bcol_function(mca_bcol_base_module_t *bcol_module,
-                int bcoll_type,
-                bcol_function_args_t *bcol_fn_arguments,
-                mca_bcol_base_function_t *ml_fn_arguments )
-{
-
-    struct mca_bcol_base_coll_fn_desc_t *fn_filtered = NULL;
-    int msg_range=0;
-    int ret;
-    int data_src_type = DATA_SRC_KNOWN, waiting_type = BLOCKING;
-
-    msg_range =
-            count_to_msg_range(bcol_fn_arguments->count,
-                            bcol_fn_arguments->dtype);
-    if ((BCOL_ALLREDUCE == bcoll_type) || (BCOL_REDUCE == bcoll_type)) {
-        /* needs to be resolved, the op structure has changed, there is no field called "op_type" */
-        fn_filtered =
-            bcol_module->filtered_fns_table[data_src_type][waiting_type][bcoll_type][msg_range][bcol_fn_arguments->dtype->id][bcol_fn_arguments->op->op_type];
-    }
-    else {
-        fn_filtered =
-            bcol_module->filtered_fns_table[data_src_type][waiting_type][bcoll_type][msg_range][bcol_fn_arguments->dtype->id][0];
-
-    }
-
-    if (NULL == fn_filtered) {
-        return OMPI_ERROR;
-    }
-
-    ret = (fn_filtered->coll_fn)(bcol_fn_arguments,ml_fn_arguments);
-    return ret;
-}
-
diff --git a/ompi/mca/coll/ml/coll_ml_select.h b/ompi/mca/coll/ml/coll_ml_select.h
deleted file mode 100644
index 32e3706d7a..0000000000
--- a/ompi/mca/coll/ml/coll_ml_select.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef MCA_COLL_ML_SELECT_H
-#define MCA_COLL_ML_SELECT_H
-
-
-#include "ompi_config.h"
-
-#include <math.h>
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/op/op.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "coll_ml.h"
-#include "coll_ml_inlines.h"
-
-
-
-/* Forward declaration */
-struct mca_coll_ml_module_t;
-
-int mca_select_bcol_function(mca_bcol_base_module_t *bcol_module,
-                int bcoll_type,
-                bcol_function_args_t *bcol_fn_arguments,
-                mca_bcol_base_function_t *ml_fn_arguments );
-/*
- *  Goes through the function table and filters the collectives functions
- *  based on comm-time attributes.
- */
-int mca_coll_ml_build_filtered_fn_table(struct mca_coll_ml_module_t *ml_module);
-
-#endif /* MCA_COLL_ML_SELECT_H */
diff --git a/ompi/mca/coll/ml/common_sym_whitelist.txt b/ompi/mca/coll/ml/common_sym_whitelist.txt
deleted file mode 100644
index 6a99e2b40c..0000000000
--- a/ompi/mca/coll/ml/common_sym_whitelist.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-# Ignore symbols in this component that are auto-generated and we
-# can't do anything about them (e.g., flex/bison symbols).
-coll_ml_config_yyleng
-coll_ml_config_yytext
diff --git a/ompi/mca/coll/ml/help-mpi-coll-ml.txt b/ompi/mca/coll/ml/help-mpi-coll-ml.txt
deleted file mode 100644
index 60ca60dfa1..0000000000
--- a/ompi/mca/coll/ml/help-mpi-coll-ml.txt
+++ /dev/null
@@ -1,64 +0,0 @@
-# -*- text -*-
-#
-# Copyright (c) 2009-2014 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2014      Research Organization for Information Science
-#                         and Technology (RIST). All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-# This is the US/English help file for Open MPI's Hierarchical Collective
-# Component (coll/ml).
-#
-[empty-sub-group]
-ML topology configuration explicitly requested for this subgroup:
-
-    %s
-
-Such configuration results in a creation of empty groups. As a result, ML
-framework cannot configure requested collective operations. ML framework will be
-disabled. One configuration that might enable ML component is --mca bcol_base_string basesmuma,ptpcoll
---mca sbgp_base_subgroups_string basesmuma,p2p
-
-[allreduce-not-supported]
-This BCOL is configured in one of the hierarchy :
-
-    %s
-
-The BCOL does not support Allreduce for all
-operations and datatype combination. In addition, you did not suggest
-alternate topology building configurations.
-
-[allreduce-alt-nosupport]
-The hierarchy is configured with alternate BCOL:
-
-    %s
-
-Both the original topology and alternate topology not support Allreduce for all
-operations and datatype combination. In addition, you did not suggest
-alternate topology building configurations.
-
-[fragmentation-disabled]
-
-ML could not be used because the mca param coll_ml_enable_fragmentation
-was set to zero and there is a bcol that does not support
-zero copy method.
-
-[static-bcast-disabled]
-
-ML could not be used because the mca param coll_ml_bcast_algorithm
-was not set to static and other broadcast implementation was available.
-
-[coll-ml-check-error]
-
-ML detected an error on communicator %s
-
-This communicator cannot be used any more
-
-[coll-ml-check-fatal-error]
-
-ML detected an unrecoverable error on intrinsic communicator %s
-
-The program will now abort
diff --git a/ompi/mca/coll/ml/mca-coll-ml.config b/ompi/mca/coll/ml/mca-coll-ml.config
deleted file mode 100644
index 6410b11923..0000000000
--- a/ompi/mca/coll/ml/mca-coll-ml.config
+++ /dev/null
@@ -1,170 +0,0 @@
-##################################
-# ML collective configuration file
-##################################
-# NOTE (by Pasha):
-# Since ML configuration infrastructure is limited on this stage we do not support some tunings, even so parser
-# understands this values and keys, but we do not have place to load all this values.
-#   threshold  - ML infrastructure does not handle multiple thresholds.
-#   fragmentation  - ML infrastructure does not fragmentation tuning per collective.
-##################################
-
-# Defining collective section
-[BARRIER]
-# Defining message size section. We will support small/large. In future we may add more options. Barrier is very specific case, because it is only collective that does not transfer any data, so for this specific case we use small
-<small>
-# Since ML does not define any algorithm for BARRIER, we just use default. Later we have to introduce some algorithm name for Barrier
-algorithm = ML_BARRIER_DEFAULT
-
-# Hierarchy setup:
-#
-# full_hr - means all possible levels of hierarchy (list of possible is defined by user command line)
-# full_hr_no_basesocket - means all possible levels of hierarchy (list of possible is defined by user command line)
-#                         except the basesocket subgroup.
-# ptp_only - only ptp hierarchy
-# iboffload_only - only iboffload hierarhcy
-hierarchy = full_hr
-
-[IBARRIER]
-<small>
-algorithm = ML_BARRIER_DEFAULT
-hierarchy = full_hr
-
-[BCAST]
-<small>
-# bcast supports: ML_BCAST_SMALL_DATA_KNOWN, ML_BCAST_SMALL_DATA_UNKNOWN, ML_BCAST_SMALL_DATA_SEQUENTIAL
-algorithm = ML_BCAST_SMALL_DATA_KNOWN
-hierarchy = full_hr
-<large>
-# bcast supports: ML_BCAST_LARGE_DATA_KNOWN, ML_BCAST_LARGE_DATA_UNKNOWN, ML_BCAST_LARGE_DATA_SEQUENTIAL
-algorithm = ML_BCAST_LARGE_DATA_KNOWN
-hierarchy = full_hr
-
-[IBCAST]
-<small>
-algorithm = ML_BCAST_SMALL_DATA_KNOWN
-hierarchy = full_hr
-<large>
-algorithm = ML_BCAST_LARGE_DATA_KNOWN
-hierarchy = full_hr
-
-[GATHER]
-<small>
-# gather supports: ML_SMALL_DATA_GATHER
-algorithm = ML_SMALL_DATA_GATHER
-hierarchy = full_hr
-<large>
-# gather supports: ML_LARGE_DATA_GATHER
-algorithm = ML_LARGE_DATA_GATHER
-hierarchy = full_hr
-
-[IGATHER]
-<small>
-# gather supports: ML_SMALL_DATA_GATHER
-algorithm = ML_SMALL_DATA_GATHER
-hierarchy = full_hr
-<large>
-# gather supports: ML_LARGE_DATA_GATHER
-algorithm = ML_LARGE_DATA_GATHER
-hierarchy = full_hr
-
-[ALLGATHER]
-<small>
-# allgather supports: ML_SMALL_DATA_ALLGATHER
-algorithm = ML_SMALL_DATA_ALLGATHER
-hierarchy = full_hr
-<large>
-# allgather supports: ML_LARGE_DATA_ALLGATHER
-algorithm = ML_LARGE_DATA_ALLGATHER
-hierarchy = full_hr
-
-[IALLGATHER]
-<small>
-# allgather supports: ML_SMALL_DATA_ALLGATHER
-algorithm = ML_SMALL_DATA_ALLGATHER
-hierarchy = full_hr
-<large>
-# allgather supports: ML_LARGE_DATA_ALLGATHER
-algorithm = ML_LARGE_DATA_ALLGATHER
-hierarchy = full_hr
-
-[ALLTOALL]
-<small>
-# alltoall supports: ML_SMALL_DATA_ALLTOALL
-algorithm = ML_SMALL_DATA_ALLTOALL
-hierarchy = ptp_only
-<large>
-# alltoall supports: ML_LARGE_DATA_ALLTOALL
-algorithm = ML_LARGE_DATA_ALLTOALL
-hierarchy = ptp_only
-
-[IALLTOALL]
-<small>
-# alltoall supports: ML_SMALL_DATA_ALLTOALL
-algorithm = ML_SMALL_DATA_ALLTOALL
-hierarchy = ptp_only
-<large>
-# alltoall supports: ML_LARGE_DATA_ALLTOALL
-algorithm = ML_LARGE_DATA_ALLTOALL
-hierarchy = ptp_only
-
-[ALLREDUCE]
-<small>
-# allreduce supports: ML_SMALL_DATA_ALLREDUCE
-algorithm = ML_SMALL_DATA_ALLREDUCE
-hierarchy = full_hr
-<large>
-# allreduce supports: ML_LARGE_DATA_ALLREDUCE
-algorithm = ML_LARGE_DATA_ALLREDUCE
-hierarchy = full_hr
-
-[IALLREDUCE]
-<small>
-# allreduce supports: ML_SMALL_DATA_ALLREDUCE
-algorithm = ML_SMALL_DATA_ALLREDUCE
-hierarchy = full_hr
-<large>
-# allreduce supports: ML_LARGE_DATA_ALLREDUCE
-algorithm = ML_LARGE_DATA_ALLREDUCE
-hierarchy = full_hr
-
-[REDUCE]
-<small>
-# scatter supports: ML_SCATTER_SMALL_DATA_SEQUENTIAL
-algorithm = ML_SMALL_DATA_REDUCE
-hierarchy = full_hr
-<large>
-# scatter supports: ML_SCATTER_SMALL_DATA_SEQUENTIAL
-algorithm = ML_LARGE_DATA_REDUCE
-hierarchy = full_hr
-
-[IREDUCE]
-<small>
-# scatter supports: ML_SCATTER_SMALL_DATA_SEQUENTIAL
-algorithm = ML_SMALL_DATA_REDUCE
-hierarchy = full_hr
-<large>
-# scatter supports: ML_SCATTER_SMALL_DATA_SEQUENTIAL
-algorithm = ML_LARGE_DATA_REDUCE
-hierarchy = full_hr
-
-
-
-[SCATTER]
-<small>
-# scatter supports: ML_SCATTER_SMALL_DATA_SEQUENTIAL
-algorithm = ML_SCATTER_SMALL_DATA_SEQUENTIAL
-hierarchy = full_hr
-<large>
-# scatter supports: ML_SCATTER_SMALL_DATA_SEQUENTIAL
-algorithm = ML_SCATTER_SMALL_DATA_SEQUENTIAL
-hierarchy = full_hr
-
-[ISCATTER]
-<small>
-# scatter supports: ML_SCATTER_SMALL_DATA_SEQUENTIAL
-algorithm = ML_SCATTER_SMALL_DATA_SEQUENTIAL
-hierarchy = full_hr
-<large>
-# scatter supports: ML_SCATTER_SMALL_DATA_SEQUENTIAL
-algorithm = ML_SCATTER_SMALL_DATA_SEQUENTIAL
-hierarchy = full_hr
diff --git a/ompi/mca/coll/ml/owner.txt b/ompi/mca/coll/ml/owner.txt
deleted file mode 100644
index 51ea04a517..0000000000
--- a/ompi/mca/coll/ml/owner.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# owner/status file
-# owner: institution that is responsible for this package
-# status: e.g. active, maintenance, unmaintained
-#
-owner: ORNL?
-status: unmaintained
diff --git a/ompi/mca/sbgp/Makefile.am b/ompi/mca/sbgp/Makefile.am
deleted file mode 100644
index d07ea3306b..0000000000
--- a/ompi/mca/sbgp/Makefile.am
+++ /dev/null
@@ -1,36 +0,0 @@
-#
-# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-
-# main library setup
-noinst_LTLIBRARIES = libmca_sbgp.la
-libmca_sbgp_la_SOURCES =
-
-# header setup
-nobase_ompi_HEADERS =
-nobase_nodist_ompi_HEADERS =
-
-# local files
-headers = sbgp.h
-libmca_sbgp_la_SOURCES += $(headers) $(nodist_headers)
-
-# Conditionally install the header files
-if WANT_INSTALL_HEADERS
-nobase_ompi_HEADERS += $(headers)
-nobase_nodist_ompi_HEADERS += $(nodist_headers)
-ompidir = $(ompiincludedir)/ompi/mca/sbgp
-else
-ompidir = $(includedir)
-endif
-
-include base/Makefile.am
-
-distclean-local:
-	rm -f base/static-components.h
diff --git a/ompi/mca/sbgp/base/Makefile.am b/ompi/mca/sbgp/base/Makefile.am
deleted file mode 100644
index c520ef7bb7..0000000000
--- a/ompi/mca/sbgp/base/Makefile.am
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
-# Copyright (c) 2012-2013 Los Alamos National Security, Inc.  All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-
-headers += \
-        base/base.h
-libmca_sbgp_la_SOURCES += \
-        base/sbgp_base_frame.c \
-        base/sbgp_base_init.c
diff --git a/ompi/mca/sbgp/base/base.h b/ompi/mca/sbgp/base/base.h
deleted file mode 100644
index f421aac1dd..0000000000
--- a/ompi/mca/sbgp/base/base.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2012-2013 Los Alamos National Security, Inc.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_SBGP_BASE_H
-#define MCA_SBGP_BASE_H
-
-#include "ompi_config.h"
-
-#include "ompi/mca/mca.h"
-#include "opal/mca/base/mca_base_framework.h"
-/*
- * Global functions for SBGP
- */
-
-/* components in use */
-OMPI_MODULE_DECLSPEC extern opal_list_t mca_sbgp_base_components_in_use;
-OMPI_MODULE_DECLSPEC extern int mca_sbgp_base_components_in_use_inited;
-OMPI_DECLSPEC extern char *ompi_sbgp_subgroups_string;
-
-BEGIN_C_DECLS
-
-/*
- * MCA Framework
- */
-OMPI_DECLSPEC extern mca_base_framework_t ompi_sbgp_base_framework;
-
-/* select a component */
-OMPI_DECLSPEC int mca_sbgp_base_init(bool, bool);
-
-/* subgrouping component and key value */
-struct sbgp_base_component_keyval_t {
-    mca_base_component_list_item_t component;
-    char *key_value;
-};
-typedef struct sbgp_base_component_keyval_t sbgp_base_component_keyval_t;
-OBJ_CLASS_DECLARATION(sbgp_base_component_keyval_t);
-
-END_C_DECLS
-
-#endif /* MCA_SBGP_BASE_H */
diff --git a/ompi/mca/sbgp/base/owner.txt b/ompi/mca/sbgp/base/owner.txt
deleted file mode 100644
index 1c86df367b..0000000000
--- a/ompi/mca/sbgp/base/owner.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# owner/status file
-# owner: institution that is responsible for this package
-# status: e.g. active, maintenance, unmaintained
-#
-owner: ORNL
-status: unmaintained
diff --git a/ompi/mca/sbgp/base/sbgp_base_close.c b/ompi/mca/sbgp/base/sbgp_base_close.c
deleted file mode 100644
index cc7dd26c4e..0000000000
--- a/ompi/mca/sbgp/base/sbgp_base_close.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-
-#include "ompi_config.h"
-
-#include <stdio.h>
-
-#include "ompi/constants.h"
-#include "ompi/mca/mca.h"
-#include "opal/mca/base/base.h"
-#include "ompi/mca/sbgp/sbgp.h"
-#include "ompi/mca/sbgp/base/base.h"
-#include "ompi/include/ompi/constants.h"
-
-
-int mca_sbgp_base_close(void)
-{
-
-    /* Close all remaining available modules */
-
-    mca_base_components_close(ompi_sbgp_base_framework.framework_output,
-                              &mca_sbgp_base_components_opened, NULL);
-
-    /* Close the framework output */
-    opal_output_close (ompi_sbgp_base_framework.framework_output);
-    ompi_sbgp_base_framework.framework_output = -1;
-
-    /* All done */
-
-    return OMPI_SUCCESS;
-}
-
diff --git a/ompi/mca/sbgp/base/sbgp_base_frame.c b/ompi/mca/sbgp/base/sbgp_base_frame.c
deleted file mode 100644
index a0091e3532..0000000000
--- a/ompi/mca/sbgp/base/sbgp_base_frame.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2012-2014 Los Alamos National Security, Inc.  All rights reserved.
- * Copyright (c) 2015      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-#include <stdio.h>
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif  /* HAVE_UNIST_H */
-#include "ompi/mca/mca.h"
-#include "opal/mca/base/base.h"
-
-#include "ompi/mca/sbgp/sbgp.h"
-#include "ompi/mca/sbgp/base/base.h"
-#include "ompi/include/ompi/constants.h"
-#include "opal/util/argv.h"
-
-/*
- * The following file was created by configure.  It contains extern
- * statements and the definition of an array of pointers to each
- * component's public mca_base_component_t struct.
- */
-
-#include "ompi/mca/sbgp/base/static-components.h"
-
-/*
-**  * Global variables
-**   */
-opal_list_t mca_sbgp_base_components_in_use = {{0}};
-int mca_sbgp_base_components_in_use_inited=0;
-OMPI_DECLSPEC char *ompi_sbgp_subgroups_string = NULL;
-
-static void mca_sbgp_base_destruct (mca_sbgp_base_module_t *module)
-{
-   /* free the list of ranks */
-   if(module->group_list ) {
-       free(module->group_list);
-       module->group_list=NULL;
-   }
-}
-
-OBJ_CLASS_INSTANCE(mca_sbgp_base_module_t,
-        opal_object_t,
-        NULL,
-        mca_sbgp_base_destruct);
-
-OBJ_CLASS_INSTANCE(sbgp_base_component_keyval_t,
-        mca_base_component_list_item_t,
-        NULL,
-        NULL);
-
-/* get list of subgrouping coponents to use */
-static int ompi_sbgp_set_components_to_use(opal_list_t *sbgp_components_avail,
-        opal_list_t *sbgp_components_in_use)
-{
-    /* local variables */
-    const mca_base_component_t *component;
-    mca_base_component_list_item_t *cli;
-    sbgp_base_component_keyval_t *clj;
-    char **subgroups_requested = NULL, **sbgp_string = NULL;
-    char *sbgp_component, *sbgp_key;
-    const char *component_name;
-    int i, sbgp_size = 0,
-        sbgp_string_size = 0,
-        rc = OMPI_SUCCESS;
-
-    /* split the list of requested subgroups */
-    subgroups_requested = opal_argv_split(ompi_sbgp_subgroups_string, ',');
-    if(NULL == subgroups_requested) {
-        return OMPI_ERROR;
-    }
-    sbgp_size = opal_argv_count (subgroups_requested);
-
-    /* Initialize list */
-    OBJ_CONSTRUCT(sbgp_components_in_use, opal_list_t);
-
-    /* loop over list of components requested */
-    for (i = 0; i < sbgp_size; i++) {
-        /* get key-value */
-        sbgp_string = opal_argv_split(subgroups_requested[i], ':');
-        if (NULL == sbgp_string) {
-            rc = OMPI_ERR_OUT_OF_RESOURCE;
-            break;
-        }
-
-        sbgp_string_size = opal_argv_count (sbgp_string);
-        if (sbgp_string_size < 1 || sbgp_string_size > 2) {
-            opal_output(ompi_sbgp_base_framework.framework_output,
-                        "Requested SBGP configuration is illegal %s",
-                        subgroups_requested[i]);
-            opal_argv_free (sbgp_string);
-            rc = OMPI_ERROR;
-            break;
-        }
-
-        /* it is garanteed that sbgp_string[1] will either be NULL (count = 1) or a string */
-        sbgp_key = sbgp_string[1];
-        sbgp_component = sbgp_string[0];
-
-        /* loop over discovered components */
-	OPAL_LIST_FOREACH(cli, sbgp_components_avail, mca_base_component_list_item_t) {
-            component = cli->cli_component;
-            component_name = component->mca_component_name;
-
-            /* key_value[0] has the component name, and key_value[1], if
-            ** it is not NULL, has the key_value associated with this
-            ** instance of the compoenent
-            */
-
-            if (0 == strcmp (component_name, sbgp_component)) {
-                 /* found selected component */
-                 clj = OBJ_NEW(sbgp_base_component_keyval_t);
-                 if (NULL == clj) {
-                     rc = OPAL_ERR_OUT_OF_RESOURCE;
-                     opal_argv_free (sbgp_string);
-                     goto exit_ERROR;
-                 }
-                 /* fprintf(stderr,"sbgp selecting %s %s\n", sbgp_component, component_name); */
-
-                 clj->component.cli_component = component;
-                 if (NULL != sbgp_key) {
-                     clj->key_value = strdup(sbgp_key);
-                 } else {
-                     clj->key_value = NULL;
-                 }
-                 opal_list_append(sbgp_components_in_use, (opal_list_item_t *)clj);
-                 break;
-            }
-        }
-
-        opal_argv_free (sbgp_string);
-    }
-
-    /* Note: Need to add error checking to make sure all requested functions
-    ** were found */
-
-    /*
-    ** release resources
-    ** */
- exit_ERROR:
-    opal_argv_free (subgroups_requested);
-
-    return rc;
-}
-
-static int mca_sbgp_base_register(mca_base_register_flag_t flags)
-{
-    /* get list of sub-grouping functions to use */
-    ompi_sbgp_subgroups_string = "basesmsocket,basesmuma,ibnet,p2p";
-    (void) mca_base_var_register("ompi", "sbgp", "base", "subgroups_string",
-                                 "Default set of subgroup operations to apply ",
-                                 MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
-                                 OPAL_INFO_LVL_9,
-                                 MCA_BASE_VAR_SCOPE_LOCAL,
-                                 &ompi_sbgp_subgroups_string);
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_sbgp_base_close(void)
-{
-    opal_list_item_t *item;
-
-    while (NULL != (item = opal_list_remove_first (&mca_sbgp_base_components_in_use))) {
-        OBJ_RELEASE(item);
-    }
-
-    OBJ_DESTRUCT(&mca_sbgp_base_components_in_use);
-
-    return mca_base_framework_components_close(&ompi_sbgp_base_framework, NULL);
-}
-
-/**
- * Function for finding and opening either all MCA components, or the one
- * that was specifically requested via a MCA parameter.
- */
-static int mca_sbgp_base_open(mca_base_open_flag_t flags)
-{
-    int ret;
-
-    if (OMPI_SUCCESS != (ret = mca_base_framework_components_open(&ompi_sbgp_base_framework, flags))) {
-        return ret;
-    }
-
-    ret = ompi_sbgp_set_components_to_use(&ompi_sbgp_base_framework.framework_components,
-            &mca_sbgp_base_components_in_use);
-
-    return ret;
-}
-
-MCA_BASE_FRAMEWORK_DECLARE(ompi, sbgp, "OMPI Subgroup Subsystem", mca_sbgp_base_register,
-                           mca_sbgp_base_open, mca_sbgp_base_close,
-                           mca_sbgp_base_static_components, 0);
-
diff --git a/ompi/mca/sbgp/base/sbgp_base_init.c b/ompi/mca/sbgp/base/sbgp_base_init.c
deleted file mode 100644
index d1f66da9b5..0000000000
--- a/ompi/mca/sbgp/base/sbgp_base_init.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/mca/mca.h"
-#include "opal/mca/base/base.h"
-#include "ompi/mca/sbgp/sbgp.h"
-#include "ompi/mca/sbgp/base/base.h"
-#include "ompi/include/ompi/constants.h"
-
-int mca_sbgp_base_init(bool enable_progress_threads, bool enable_mpi_threads)
-{
-    mca_sbgp_base_component *sbgp_component = NULL;
-    mca_base_component_list_item_t *cli;
-    opal_list_item_t *item;
-    int ret;
-
-    /* loop over component initialization functions */
-    for (item = opal_list_get_first((opal_list_t *) &mca_sbgp_base_components_in_use);
-            opal_list_get_end((opal_list_t *) &mca_sbgp_base_components_in_use) != item;
-            item = opal_list_get_next(item)) {
-
-        cli = (mca_base_component_list_item_t *) item;
-        sbgp_component = (mca_sbgp_base_component *)cli->cli_component;
-
-        ret = sbgp_component->sbgp_init_query(true, true);
-        if( OMPI_SUCCESS != ret) {
-            return ret;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
diff --git a/ompi/mca/sbgp/basesmsocket/Makefile.am b/ompi/mca/sbgp/basesmsocket/Makefile.am
deleted file mode 100644
index e255546573..0000000000
--- a/ompi/mca/sbgp/basesmsocket/Makefile.am
+++ /dev/null
@@ -1,41 +0,0 @@
-#
-# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
-# Copyright (c) 2015 Cisco Systems, Inc.  All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-sources = \
-        sbgp_basesmsocket.h \
-        sbgp_basesmsocket_component.c  \
-        sbgp_basesmsocket_module.c
-
-
-# Make the output library in this directory, and name it either
-# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
-# (for static builds).
-
-component_noinst =
-component_install =
-if MCA_BUILD_ompi_sbgp_basesmsocket_DSO
-component_install += mca_sbgp_basesmsocket.la
-else
-component_noinst += libmca_sbgp_basesmsocket.la
-endif
-
-# See ompi/mca/btl/sm/Makefile.am for an explanation of
-# libmca_common_sm.la.
-
-mcacomponentdir = $(ompilibdir)
-mcacomponent_LTLIBRARIES = $(component_install)
-mca_sbgp_basesmsocket_la_SOURCES = $(sources)
-mca_sbgp_basesmsocket_la_LDFLAGS = -module -avoid-version
-mca_sbgp_basesmsocket_la_LIBADD =
-
-noinst_LTLIBRARIES = $(component_noinst)
-libmca_sbgp_basesmsocket_la_SOURCES =$(sources)
-libmca_sbgp_basesmsocket_la_LDFLAGS = -module -avoid-version
diff --git a/ompi/mca/sbgp/basesmsocket/owner.txt b/ompi/mca/sbgp/basesmsocket/owner.txt
deleted file mode 100644
index 1c86df367b..0000000000
--- a/ompi/mca/sbgp/basesmsocket/owner.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# owner/status file
-# owner: institution that is responsible for this package
-# status: e.g. active, maintenance, unmaintained
-#
-owner: ORNL
-status: unmaintained
diff --git a/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket.h b/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket.h
deleted file mode 100644
index 739f913335..0000000000
--- a/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_basesmsocket_EXPORT_H
-#define MCA_BCOL_basesmsocket_EXPORT_H
-
-#include "ompi_config.h"
-
-#include "mpi.h"
-#include "ompi/mca/mca.h"
-#include "ompi/mca/sbgp/sbgp.h"
-#include "ompi/mca/sbgp/base/base.h"
-#include "opal/mca/mpool/mpool.h"
-#include "ompi/request/request.h"
-#include "ompi/proc/proc.h"
-#include "opal/util/output.h"
-
-BEGIN_C_DECLS
-
-#ifdef HAVE_SCHED_YIELD
-#  include <sched.h>
-#  define SPIN sched_yield()
-#else  /* no switch available */
-#  define SPIN
-#endif
-
-#define BASESMSOCKET_VERBOSE(level, ...)                                \
-    do {								\
-        OPAL_OUTPUT_VERBOSE((ompi_sbgp_base_framework.framework_output, level, \
-                             __VA_ARGS__));                             \
-    } while(0);
-
-/**
- * Structure to hold the basic shared memory coll component.  First it holds the
- * base coll component, and then holds a bunch of
- * sm-coll-component-specific stuff (e.g., current MCA param
- * values).
- */
-struct mca_sbgp_basesmsocket_component_t {
-    /** Base coll component */
-    mca_sbgp_base_component_2_0_0_t super;
-};
-
-/**
- * Convenience typedef
- */
-typedef struct mca_sbgp_basesmsocket_component_t
-    mca_sbgp_basesmsocket_component_t;
-
-
-/*
-** Base sub-group module
-**/
-
-struct mca_sbgp_basesmsocket_module_t {
-    /** Collective modules all inherit from opal_object */
-    mca_sbgp_base_module_t super;
-
-};
-typedef struct mca_sbgp_basesmsocket_module_t mca_sbgp_basesmsocket_module_t;
-OBJ_CLASS_DECLARATION(mca_sbgp_basesmsocket_module_t);
-
-/**
-* Global component instance
-*/
-OMPI_MODULE_DECLSPEC extern mca_sbgp_basesmsocket_component_t mca_sbgp_basesmsocket_component;
-
-
-END_C_DECLS
-
-#endif /* MCA_BCOL_basesmsocket_EXPORT_H */
diff --git a/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket_component.c b/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket_component.c
deleted file mode 100644
index d2cf31d416..0000000000
--- a/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket_component.c
+++ /dev/null
@@ -1,305 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2014 Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#include <sys/types.h>
-#ifdef HAVE_SYS_MMAN_H
-#include <sys/mman.h>
-#endif
-#ifdef HAVE_FCNTL_H
-#include <fcntl.h>
-#endif
-
-#include "opal/mca/hwloc/hwloc.h"
-#include "opal/mca/hwloc/base/base.h"
-#include "opal/dss/dss_internal.h"
-#include "opal/class/opal_object.h"
-
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "sbgp_basesmsocket.h"
-
-#include "ompi/patterns/comm/coll_ops.h"
-
-
-/*
- * Public string showing the coll ompi_sm V2 component version number
- */
-const char *mca_sbgp_basesmsocket_component_version_string =
-    "Open MPI sbgp - basesmsocket collective MCA component version " OMPI_VERSION;
-
-
-/*
- * Local functions
- */
-
-static int basesmsocket_register(void);
-static int basesmsocket_open(void);
-static int basesmsocket_close(void);
-static mca_sbgp_base_module_t *mca_sbgp_basesmsocket_select_procs(struct ompi_proc_t ** procs,
-        int n_procs_in,
-        struct ompi_communicator_t *comm,
-        char *key,
-        void *output_data
-        );
-static int mca_sbgp_basesmsocket_init_query(bool enable_progress_threads,
-        bool enable_mpi_threads);
-/*----end local functions ----*/
-
-/*
- * Instantiate the public struct with all of our public information
- * and pointers to our public functions in it
- */
-
-mca_sbgp_basesmsocket_component_t mca_sbgp_basesmsocket_component = {
-
-    /* First, fill in the super */
-
-    {
-        /* First, the mca_component_t struct containing meta
-           information about the component itself */
-
-        .sbgp_version = {
-            MCA_SBGP_BASE_VERSION_2_0_0,
-
-            /* Component name and version */
-
-            .mca_component_name = "basesmsocket",
-            MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
-                                  OMPI_RELEASE_VERSION),
-
-            /* Component open and close functions */
-
-            .mca_open_component = basesmsocket_open,
-            .mca_close_component = basesmsocket_close,
-            .mca_register_component_params = basesmsocket_register,
-        },
-
-        .sbgp_init_query = mca_sbgp_basesmsocket_init_query,
-        .select_procs = mca_sbgp_basesmsocket_select_procs,
-        .priority = 0,
-    }
-};
-
-/*
- * Register the component
- */
-static int basesmsocket_register(void)
-{
-    mca_sbgp_basesmsocket_component_t *cs = &mca_sbgp_basesmsocket_component;
-
-    cs->super.priority = 90;
-    (void) mca_base_component_var_register(&mca_sbgp_basesmsocket_component.super.sbgp_version,
-                                           "priority", "Priority for the sbgp basesmsocket component",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                           OPAL_INFO_LVL_9,
-                                           MCA_BASE_VAR_SCOPE_READONLY, &cs->super.priority);
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * Open the component
- */
-static int basesmsocket_open(void)
-{
-    return OMPI_SUCCESS;
-}
-
-/*
- * Close the component
- */
-static int basesmsocket_close(void)
-{
-    return OMPI_SUCCESS;
-}
-
-/* query to see if the component is available for use, and can
- * satisfy the thread and progress requirements
- */
-int mca_sbgp_basesmsocket_init_query(bool enable_progress_threads,
-        bool enable_mpi_threads)
-{
-    /* at this stage there is no reason to disaulify this component */
-
-    /* done */
-    return OMPI_SUCCESS;
-}
-
-#if 0
-/* NTH: this is no longer used but may be used if we can determine the binding policy*/
-static int mca_sbgp_map_to_logical_socket_id(int *socket)
-{
-    int ret = OMPI_SUCCESS;
-    hwloc_obj_t obj;
-    hwloc_obj_t first_pu_object;
-    hwloc_bitmap_t good;
-    int pu_os_index = -1, my_logical_socket_id = -1;
-    int this_pus_logical_socket_id = -1;
-
-    *socket = my_logical_socket_id;
-
-    /* bozo check */
-    if (NULL == opal_hwloc_topology) {
-        return OPAL_ERR_NOT_INITIALIZED;
-    }
-
-    good = hwloc_bitmap_alloc();
-    if (NULL == good) {
-        return OPAL_ERR_OUT_OF_RESOURCE;
-    }
-
-    /* get this process' CPU binding */
-    if( 0 !=  hwloc_get_cpubind(opal_hwloc_topology,good, 0)){
-        /* report some error */
-        BASESMSOCKET_VERBOSE(10, "The global variable opal_hwloc_topology appears not to have been initialized\n");
-        hwloc_bitmap_free(good);
-        return OMPI_ERROR;
-    }
-
-    /* find the first logical PU object in the hwloc tree */
-    first_pu_object = hwloc_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_PU, 0);
-
-
-    /* get the next bit in the bitmap (note: if pu_os_index == -1, then the
-     * first bit is returned
-     */
-     /* traverse the hwloc tree */
-     while( -1 != (pu_os_index = hwloc_bitmap_next(good, pu_os_index) ) ) {
-         /* Traverse all PUs in the machine in logical order, in the simple case
-          * there should only be a single PU that this process is bound to, right?
-          *
-          */
-          for( obj = first_pu_object; obj != NULL; obj = obj->next_cousin ) {/* WTF is a "next_cousin" ? */
-              /* is this PU the same as the bit I pulled off the mask? */
-              if( obj->os_index == (unsigned int) pu_os_index) {
-                  /* Then I found it, break out of for loop */
-                  break;
-              }
-          }
-
-          if( NULL != obj) {
-              /* if we found the PU, then go upward in the tree
-               * looking for the enclosing socket
-               */
-               while( (NULL != obj) && ( HWLOC_OBJ_SOCKET != obj->type) ){
-                   obj = obj->parent;
-               }
-
-               if( NULL == obj ) {
-                   /* then we couldn't find an enclosing socket, report this */
-               } else {
-                   /* We found the enclosing socket */
-                   if( -1 == my_logical_socket_id ){
-                       /* this is the first PU that I'm bound to */
-                       this_pus_logical_socket_id = obj->logical_index;
-                       my_logical_socket_id = this_pus_logical_socket_id;
-                   } else {
-                       /* this is not the first PU that I'm bound to.
-                        * Seems I'm bound to more than a single PU. Question
-                        * is, am I bound to the same socket??
-                        */
-                       /* in order to get rid of the compiler warning, I had to cast
-                        * "this_pus_logical_socket_id", at a glance this seems ok,
-                        * but if subgrouping problems arise, maybe look here. I shall
-                        * tag this line with the "mark of the beast" for grepability
-                        * 666
-                        */
-                        if( (unsigned int) this_pus_logical_socket_id != obj->logical_index ){
-                            /* 666 */
-                            /* Then we're bound to more than one socket...fail */
-                            this_pus_logical_socket_id = -1;
-                            my_logical_socket_id = -1;
-                            break;
-                        }
-                   }
-               }
-
-          }
-
-          /* end while */
-     }
-     *socket = my_logical_socket_id;
-     hwloc_bitmap_free(good);
-
-     return ret;
-
-}
-#endif
-
-/* This routine is used to find the list of procs that run on the
-** same host as the calling process.
-*/
-
-static mca_sbgp_base_module_t *mca_sbgp_basesmsocket_select_procs(struct ompi_proc_t ** procs,
-    int n_procs_in,
-    struct ompi_communicator_t *comm,
-    char *key,
-    void *output_data
-    )
-{
-    /* local variables */
-    mca_sbgp_basesmsocket_module_t *module;
-    int proc, cnt, n_local_peers;
-
-    /* initialize data */
-    for (proc = 0, n_local_peers = 0 ; proc < n_procs_in ; ++proc) {
-        if (OPAL_PROC_ON_LOCAL_SOCKET(procs[proc]->super.proc_flags)) {
-	    n_local_peers++;
-        }
-    }
-
-    /* we need to return a module even if there is only one local peer. this
-     * covers the case where there may be a basesmsocket module on one rank
-     * but not another */
-    if (0 == n_local_peers) {
-	return NULL;
-    }
-
-    /* create a new module */
-    module = OBJ_NEW(mca_sbgp_basesmsocket_module_t);
-    if (!module) {
-        return NULL;
-    }
-
-    module->super.group_size = n_local_peers;
-    module->super.group_comm = comm;
-    module->super.group_list = NULL;
-    module->super.group_net = OMPI_SBGP_SOCKET;
-
-    /* allocate memory and fill in the group_list */
-    module->super.group_list = (int *) calloc (n_local_peers, sizeof(int));
-    if (NULL == module->super.group_list) {
-	OBJ_RELEASE(module);
-	return NULL;
-    }
-
-    for (proc = 0, cnt = 0 ; proc < n_procs_in ; ++proc) {
-	if (OPAL_PROC_ON_LOCAL_SOCKET(procs[proc]->super.proc_flags)) {
-	    module->super.group_list[cnt++] = proc;
-	}
-    }
-
-    /* Return the module */
-    return (mca_sbgp_base_module_t *) module;
-}
diff --git a/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket_module.c b/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket_module.c
deleted file mode 100644
index 7f075eecdd..0000000000
--- a/ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket_module.c
+++ /dev/null
@@ -1,35 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#include <sys/types.h>
-#ifdef HAVE_SYS_MMAN_H
-#include <sys/mman.h>
-#endif
-#include <fcntl.h>
-#include <errno.h>
-
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/sbgp/basesmsocket/sbgp_basesmsocket.h"
-
-OBJ_CLASS_INSTANCE(mca_sbgp_basesmsocket_module_t,
-                   mca_sbgp_base_module_t, NULL, NULL);
diff --git a/ompi/mca/sbgp/basesmuma/Makefile.am b/ompi/mca/sbgp/basesmuma/Makefile.am
deleted file mode 100644
index 03470b69ae..0000000000
--- a/ompi/mca/sbgp/basesmuma/Makefile.am
+++ /dev/null
@@ -1,41 +0,0 @@
-#
-# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
-# Copyright (c) 2015 Cisco Systems, Inc.  All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-sources = \
-        sbgp_basesmuma.h \
-        sbgp_basesmuma_component.c  \
-        sbgp_basesmuma_module.c
-
-
-# Make the output library in this directory, and name it either
-# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
-# (for static builds).
-
-component_noinst =
-component_install =
-if MCA_BUILD_ompi_sbgp_basesmuma_DSO
-component_install += mca_sbgp_basesmuma.la
-else
-component_noinst += libmca_sbgp_basesmuma.la
-endif
-
-# See ompi/mca/btl/sm/Makefile.am for an explanation of
-# libmca_common_sm.la.
-
-mcacomponentdir = $(ompilibdir)
-mcacomponent_LTLIBRARIES = $(component_install)
-mca_sbgp_basesmuma_la_SOURCES = $(sources)
-mca_sbgp_basesmuma_la_LDFLAGS = -module -avoid-version
-mca_sbgp_basesmuma_la_LIBADD =
-
-noinst_LTLIBRARIES = $(component_noinst)
-libmca_sbgp_basesmuma_la_SOURCES =$(sources)
-libmca_sbgp_basesmuma_la_LDFLAGS = -module -avoid-version
diff --git a/ompi/mca/sbgp/basesmuma/owner.txt b/ompi/mca/sbgp/basesmuma/owner.txt
deleted file mode 100644
index 1c86df367b..0000000000
--- a/ompi/mca/sbgp/basesmuma/owner.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# owner/status file
-# owner: institution that is responsible for this package
-# status: e.g. active, maintenance, unmaintained
-#
-owner: ORNL
-status: unmaintained
diff --git a/ompi/mca/sbgp/basesmuma/sbgp_basesmuma.h b/ompi/mca/sbgp/basesmuma/sbgp_basesmuma.h
deleted file mode 100644
index efe501e046..0000000000
--- a/ompi/mca/sbgp/basesmuma/sbgp_basesmuma.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-
-#ifndef MCA_BCOL_basesmuma_EXPORT_H
-#define MCA_BCOL_basesmuma_EXPORT_H
-
-#include "ompi_config.h"
-
-#include "mpi.h"
-#include "ompi/mca/mca.h"
-#include "ompi/mca/sbgp/sbgp.h"
-#include "opal/mca/mpool/mpool.h"
-#include "ompi/request/request.h"
-#include "ompi/proc/proc.h"
-
-BEGIN_C_DECLS
-
-#ifdef HAVE_SCHED_YIELD
-#  include <sched.h>
-#  define SPIN sched_yield()
-#else  /* no switch available */
-#  define SPIN
-#endif
-
-
-    /**
-     * Structure to hold the basic shared memory coll component.  First it holds the
-     * base coll component, and then holds a bunch of
-     * sm-coll-component-specific stuff (e.g., current MCA param
-     * values).
-     */
-    struct mca_sbgp_basesmuma_component_t {
-        /** Base coll component */
-        mca_sbgp_base_component_2_0_0_t super;
-
-    };
-
-    /**
-     * Convenience typedef
-     */
-    typedef struct mca_sbgp_basesmuma_component_t
-        mca_sbgp_basesmuma_component_t;
-
-
-    /*
-    ** Base sub-group module
-    **/
-
-    struct mca_sbgp_basesmuma_module_t {
-        /** Collective modules all inherit from opal_object */
-        mca_sbgp_base_module_t super;
-
-    };
-    typedef struct mca_sbgp_basesmuma_module_t mca_sbgp_basesmuma_module_t;
-    OBJ_CLASS_DECLARATION(mca_sbgp_basesmuma_module_t);
-
-    /**
-    * Global component instance
-    */
-    OMPI_MODULE_DECLSPEC extern mca_sbgp_basesmuma_component_t mca_sbgp_basesmuma_component;
-
-
-END_C_DECLS
-
-#endif /* MCA_BCOL_basesmuma_EXPORT_H */
diff --git a/ompi/mca/sbgp/basesmuma/sbgp_basesmuma_component.c b/ompi/mca/sbgp/basesmuma/sbgp_basesmuma_component.c
deleted file mode 100644
index 4c6e232860..0000000000
--- a/ompi/mca/sbgp/basesmuma/sbgp_basesmuma_component.c
+++ /dev/null
@@ -1,208 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#include <sys/types.h>
-#ifdef HAVE_SYS_MMAN_H
-#include <sys/mman.h>
-#endif
-#include <fcntl.h>
-
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "sbgp_basesmuma.h"
-
-
-/*
- * Public string showing the coll ompi_sm V2 component version number
- */
-const char *mca_sbgp_basesmuma_component_version_string =
-    "Open MPI sbgp - basesmuma collective MCA component version " OMPI_VERSION;
-
-
-/*
- * Local functions
- */
-
-static int basesmuma_register(void);
-static int basesmuma_open(void);
-static int basesmuma_close(void);
-static mca_sbgp_base_module_t *mca_sbgp_basesmuma_select_procs(struct ompi_proc_t ** procs,
-        int n_procs_in, struct ompi_communicator_t *comm, char *key, void *output_data);
-
-static int mca_sbgp_basesmuma_init_query(bool enable_progress_threads,
-        bool enable_mpi_threads);
-
-/*
- * Instantiate the public struct with all of our public information
- * and pointers to our public functions in it
- */
-
-mca_sbgp_basesmuma_component_t mca_sbgp_basesmuma_component = {
-
-    /* First, fill in the super */
-
-    {
-        /* First, the mca_component_t struct containing meta
-           information about the component itself */
-
-        .sbgp_version = {
-            MCA_SBGP_BASE_VERSION_2_0_0,
-
-            /* Component name and version */
-
-            .mca_component_name = "basesmuma",
-            MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
-                                  OMPI_RELEASE_VERSION),
-
-            /* Component open, close, and register functions */
-
-            .mca_open_component = basesmuma_open,
-            .mca_close_component = basesmuma_close,
-            .mca_register_component_params = basesmuma_register,
-        },
-        .sbgp_init_query = mca_sbgp_basesmuma_init_query,
-        .select_procs = mca_sbgp_basesmuma_select_procs,
-        .priority = 0,
-    }
-};
-
-/*
- * Register the component
- */
-static int basesmuma_register(void)
-{
-    mca_sbgp_basesmuma_component_t *cs = &mca_sbgp_basesmuma_component;
-
-    /* set component priority */
-    cs->super.priority = 90;
-    (void) mca_base_component_var_register(&cs->super.sbgp_version,
-                                           "priority", "Priority of the sbgp basesmuma",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                           OPAL_INFO_LVL_9,
-                                           MCA_BASE_VAR_SCOPE_READONLY,
-                                           &cs->super.priority);
-    return OMPI_SUCCESS;
-}
-
-/*
- * Open the component
- */
-static int basesmuma_open(void)
-{
-    return OMPI_SUCCESS;
-}
-
-
-/*
- * Close the component
- */
-static int basesmuma_close(void)
-{
-    return OMPI_SUCCESS;
-}
-
-/* query to see if the component is available for use, and can
- * satisfy the thread and progress requirements
- */
-int mca_sbgp_basesmuma_init_query(bool enable_progress_threads,
-        bool enable_mpi_threads)
-{
-    /* at this stage there is no reason to disaulify this component */
-
-    /* done */
-    return OMPI_SUCCESS;
-}
-
-/* This routine is used to find the list of procs that run on the
-** same host as the calling process.
-*/
-static mca_sbgp_base_module_t *mca_sbgp_basesmuma_select_procs(struct ompi_proc_t ** procs,
-        int n_procs_in,
-        struct ompi_communicator_t *comm,
-        char *key,
-        void *output_data
-        )
-{
-    /* local variables */
-    int cnt,proc,local,last_local_proc;
-    mca_sbgp_basesmuma_module_t *module;
-
-    module=OBJ_NEW(mca_sbgp_basesmuma_module_t);
-    if (!module ) {
-        return NULL;
-    }
-    module->super.group_size=0;
-    module->super.group_comm = comm;
-    module->super.group_list = NULL;
-    module->super.group_net = OMPI_SBGP_MUMA;
-    for (proc = 0, cnt = 0, last_local_proc = 0 ; proc < n_procs_in ; ++proc) {
-        local = OPAL_PROC_ON_LOCAL_NODE(procs[proc]->super.proc_flags);
-        if (local) {
-            last_local_proc = proc;
-            cnt++;
-        }
-    }
-    /* if no other local procs found skip to end */
-
-    if( 2 > cnt ) {
-        /* There's always at least one - namely myself */
-        assert(1 == cnt);
-        module->super.group_size = 1;
-        module->super.group_list = (int *) malloc (sizeof (int));
-        module->super.group_list[0] = last_local_proc;
-        /* let ml handle this case */
-        goto OneLocalPeer;
-    }
-
-    /* generate list of local ranks */
-    module->super.group_size=cnt;
-    if( cnt > 0 ) {
-        module->super.group_list=(int *)malloc(sizeof(int)*cnt);
-        if(NULL == module->super.group_list){
-            goto Error;
-        }
-    }
-
-    for (proc = 0, cnt = 0 ; proc < n_procs_in ; ++proc) {
-        local = OPAL_PROC_ON_LOCAL_NODE(procs[proc]->super.proc_flags);
-        if( local ) {
-            module->super.group_list[cnt++] = proc;
-        }
-    }
-OneLocalPeer:
-    /* successful completion */
-    return (mca_sbgp_base_module_t *)module;
-
-    /* return with error */
-
-Error:
-
-    /* clean up */
-    if( NULL != module->super.group_list ) {
-        free(module->super.group_list);
-        module->super.group_list=NULL;
-    }
-
-    OBJ_RELEASE(module);
-
-    return NULL;
-}
diff --git a/ompi/mca/sbgp/basesmuma/sbgp_basesmuma_module.c b/ompi/mca/sbgp/basesmuma/sbgp_basesmuma_module.c
deleted file mode 100644
index 79028c4e25..0000000000
--- a/ompi/mca/sbgp/basesmuma/sbgp_basesmuma_module.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#include <sys/types.h>
-#ifdef HAVE_SYS_MMAN_H
-#include <sys/mman.h>
-#endif
-#include <fcntl.h>
-#include <errno.h>
-
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/sbgp/basesmuma/sbgp_basesmuma.h"
-
-/*
- * Local functions
- */
-static void
-mca_sbgp_basesmuma_module_construct(mca_sbgp_basesmuma_module_t *module)
-{
-}
-
-static void
-mca_sbgp_basesmuma_module_destruct(mca_sbgp_basesmuma_module_t *module)
-{
-    /* done */
-}
-
-OBJ_CLASS_INSTANCE(mca_sbgp_basesmuma_module_t,
-                   mca_sbgp_base_module_t,
-                   mca_sbgp_basesmuma_module_construct,
-                   mca_sbgp_basesmuma_module_destruct);
diff --git a/ompi/mca/sbgp/ibnet/.opal_ignore b/ompi/mca/sbgp/ibnet/.opal_ignore
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/ompi/mca/sbgp/ibnet/Makefile.am b/ompi/mca/sbgp/ibnet/Makefile.am
deleted file mode 100644
index 28c3161eee..0000000000
--- a/ompi/mca/sbgp/ibnet/Makefile.am
+++ /dev/null
@@ -1,55 +0,0 @@
-#
-# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
-# Copyright (c) 2012-2015 Cisco Systems, Inc.  All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-AM_CPPFLAGS = $(sbgp_ibnet_CPPFLAGS) $(btl_openib_CPPFLAGS)
-
-sources = \
-        sbgp_ibnet.h \
-        sbgp_ibnet_mca.h \
-        sbgp_ibnet_mca.c \
-        sbgp_ibnet_component.c  \
-        sbgp_ibnet_module.c
-
-
-# Make the output library in this directory, and name it either
-# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
-# (for static builds).
-
-component_noinst =
-component_install =
-if MCA_BUILD_ompi_sbgp_ibnet_DSO
-component_install += mca_sbgp_ibnet.la
-else
-component_noinst += libmca_sbgp_ibnet.la
-endif
-
-# See ompi/mca/btl/sm/Makefile.am for an explanation of
-# libmca_common_sm.la.
-
-mcacomponentdir = $(ompilibdir)
-mcacomponent_LTLIBRARIES = $(component_install)
-mca_sbgp_ibnet_la_SOURCES = $(sources)
-mca_sbgp_ibnet_la_LDFLAGS = -module -avoid-version $(sbgp_ibnet_LDFLAGS) $(btl_openib_LDFLAGS)
-mca_sbgp_ibnet_la_LIBADD = $(sbgp_ibnet_LIBS) $(btl_openib_LIBS) \
-    $(OMPI_TOP_BUILDDIR)/ompi/mca/common/verbs/libmca_common_verbs.la \
-    $(OMPI_TOP_BUILDDIR)/ompi/mca/common/ofacm/libmca_common_ofacm.la
-
-noinst_LTLIBRARIES = $(component_noinst)
-libmca_sbgp_ibnet_la_SOURCES =$(sources)
-libmca_sbgp_ibnet_la_LDFLAGS = -module -avoid-version
-
-$(OMPI_TOP_BUILDDIR)/ompi/mca/common/ofacm/libmca_common_ofacm.la: foo.c
-	cd $(OMPI_TOP_BUILDDIR)/ompi/mca/common/ofacm && $(MAKE)
-
-$(OMPI_TOP_BUILDDIR)/ompi/mca/common/ofautils/libmca_common_ofautils.la: foo.c
-	cd $(OMPI_TOP_BUILDDIR)/ompi/mca/common/ofautils && $(MAKE)
-
-foo.c:
diff --git a/ompi/mca/sbgp/ibnet/configure.m4 b/ompi/mca/sbgp/ibnet/configure.m4
deleted file mode 100644
index 6fdb24fa40..0000000000
--- a/ompi/mca/sbgp/ibnet/configure.m4
+++ /dev/null
@@ -1,40 +0,0 @@
-# -*- shell-script -*-
-#
-# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
-# Copyright (c) 2015      Research Organization for Information Science
-#                         and Technology (RIST). All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-# MCA_ompi_sbgp_ibnet_CONFIG([should_build])
-# ------------------------------------------
-# AC_DEFUN([MCA_ompi_sbgp_ibnet_POST_CONFIG], [
-# ])
-
-
-# MCA_ompi_sbgp_ibnet_CONFIG([action-if-can-compile],
-#                      [action-if-cant-compile])
-# ------------------------------------------------
-AC_DEFUN([MCA_ompi_sbgp_ibnet_CONFIG],[
-    AC_CONFIG_FILES([ompi/mca/sbgp/ibnet/Makefile])
-    sbgp_ofa_happy="no"
-    sbgp_mlnx_ofed_happy="no"
-
-    OPAL_CHECK_OPENFABRICS([sbgp_ibnet], [sbgp_ofa_happy="yes"])
-    OPAL_CHECK_MLNX_OPENFABRICS([sbgp_ibnet], [sbgp_mlnx_ofed_happy="yes"])
-
-    AS_IF([test "$sbgp_ofa_happy" = "yes" && test "$sbgp_mlnx_ofed_happy" = "yes"],
-          [$1],
-          [$2])
-
-    # substitute in the things needed to build iboffload
-    AC_SUBST([sbgp_ibnet_CFLAGS])
-    AC_SUBST([sbgp_ibnet_CPPFLAGS])
-    AC_SUBST([sbgp_ibnet_LDFLAGS])
-    AC_SUBST([sbgp_ibnet_LIBS])
-])dnl
diff --git a/ompi/mca/sbgp/ibnet/owner.txt b/ompi/mca/sbgp/ibnet/owner.txt
deleted file mode 100644
index 1c86df367b..0000000000
--- a/ompi/mca/sbgp/ibnet/owner.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# owner/status file
-# owner: institution that is responsible for this package
-# status: e.g. active, maintenance, unmaintained
-#
-owner: ORNL
-status: unmaintained
diff --git a/ompi/mca/sbgp/ibnet/sbgp_ibnet.h b/ompi/mca/sbgp/ibnet/sbgp_ibnet.h
deleted file mode 100644
index f29ffc33db..0000000000
--- a/ompi/mca/sbgp/ibnet/sbgp_ibnet.h
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-
-#ifndef MCA_BCOL_ibnet_EXPORT_H
-#define MCA_BCOL_ibnet_EXPORT_H
-
-#include "ompi_config.h"
-
-#include "mpi.h"
-#include "infiniband/verbs.h"
-#include "ompi/mca/mca.h"
-#include "ompi/mca/sbgp/sbgp.h"
-#include "opal/mca/mpool/mpool.h"
-#include "ompi/request/request.h"
-#include "ompi/proc/proc.h"
-#include "ompi/mca/common/ofacm/connect.h"
-
-BEGIN_C_DECLS
-
-#ifdef HAVE_SCHED_YIELD
-#  include <sched.h>
-#  define SPIN sched_yield()
-#else  /* no switch available */
-#  define SPIN
-#endif
-
-typedef enum {
-    OFFLOAD_CONNECTX_B0,
-    OFFLOAD_DISABLE
-} coll_offload_support;
-
-/**
- * Structure to hold the basic shared memory coll component.  First it holds the
- * base coll component, and then holds a bunch of
- * sm-coll-component-specific stuff (e.g., current MCA param
- * values).
- */
-struct mca_sbgp_ibnet_component_t {
-    /** Base coll component */
-    mca_sbgp_base_component_2_0_0_t super;
-
-    /** Enable disable verbose mode */
-    int verbose;
-
-    /* Maximum allowed number of subroups */
-    int max_sbgps;
-    /* Enable disable default subnet id warning */
-    bool warn_default_gid_prefix;
-    bool warn_nonexistent_if;
-    /* IB MTU requested by user */
-    int    mtu;    /** MTU on this port */
-    /** IB partition definition */
-    int pkey_val;
-    /* Keeping hca data */
-    char *if_include;
-    char **if_include_list;
-    char *if_exclude;
-    char **if_exclude_list;
-    /** Dummy argv-style list; a copy of names from the
-      if_[in|ex]clude list that we use for error checking (to ensure
-      that they all exist) */
-    char **if_list;
-    /** List of iboffload devices that have at list one active port */
-    opal_list_t devices;
-    int curr_max_group_id;
-    uint32_t total_active_ports;
-};
-
-/**
- * Convenience typedef
- */
-typedef struct mca_sbgp_ibnet_component_t
-mca_sbgp_ibnet_component_t;
-
-/* IB port OBJ*/
-struct mca_sbgp_ibnet_port_t {
-    uint16_t             id;             /** Port number */
-    int             stat;           /** Port status - Active,Init,etc.. */
-    enum ibv_mtu    mtu;    /** MTU on this port */
-    coll_offload_support coll_offload; /** Collectives offload mode */
-    uint64_t        subnet_id;      /** Sunnet id for the port */
-    /* uint8_t            src_path_bits;  */
-    uint16_t        lid;
-    uint16_t        lmc;
-    /** Array of the peer's CPCs available on this port */
-    uint32_t        num_cpcs;
-    bool            used;
-    ompi_common_ofacm_base_module_data_t *pm_cpc_data;
-    ompi_common_ofacm_base_module_t *local_cpc;            /* selected cpc*/
-    ompi_common_ofacm_base_module_data_t *remote_cpc_data; /* data for remote cpc */
-};
-
-typedef struct mca_sbgp_ibnet_port_t mca_sbgp_ibnet_port_t;
-
-typedef enum {
-    MCA_SBGP_IBNET_NONE          = 0,
-    MCA_SBGP_IBNET_NODE_LEADER   = 1<<0,
-    MCA_SBGP_IBNET_SOCKET_LEADER = 1<<1,
-    MCA_SBGP_IBNET_SWITCH_LEADER = 1<<2
-} mca_sbgp_ibnet_duty_t;
-
-typedef enum {
-    MCA_SBGP_IBNET_ALL_NET,
-    MCA_SBGP_IBNET_NODE_NET,
-    MCA_SBGP_IBNET_NONE_NET
-} mca_sbgp_ibnet_mode_t;
-
-struct mca_sbgp_ibnet_proc_t {
-    opal_list_item_t super;
-    ompi_proc_t      *ompi_proc;      /* Ompi proc pointer */
-    int              ompi_proc_index; /* Index of the proc in array */
-    uint32_t         rank;            /* vpid, remote proc rank */
-    uint32_t         num_ports;       /* number of remote ports */
-    int              *use_port;       /* the size of this array is equal to number of cgroups that points to this proc.
-                                         Each cgroup has own index "I". The array keep remote port number that ne need to use
-                                         for cgroup "I" - use_port[I]. We need it for iboffload module */
-    mca_sbgp_ibnet_port_t *remote_ports_info; /* the array keeps remote port information */
-    mca_sbgp_ibnet_duty_t  duty;              /* Socket leader, Node leader, switch leader, etc. */
-};
-
-typedef struct mca_sbgp_ibnet_proc_t mca_sbgp_ibnet_proc_t;
-OBJ_CLASS_DECLARATION(mca_sbgp_ibnet_proc_t);
-
-/* Device OBJ */
-struct mca_sbgp_ibnet_device_t {
-    opal_list_item_t super;
-    struct ibv_device* ib_dev;              /* pointer to device, from device list */
-    int device_index;                       /* device index in device list */
-    struct ibv_device_attr ib_dev_attr;     /* attributes of the device */
-    int num_act_ports;
-    int num_allowed_ports;
-    struct mca_sbgp_ibnet_port_t *ports;
-    /* CPC stuff */
-    ompi_common_ofacm_base_module_t **cpcs; /* Array of CPCs */
-    uint8_t num_cpcs;                       /* Number of elements in cpc array */
-};
-
-typedef struct mca_sbgp_ibnet_device_t mca_sbgp_ibnet_device_t;
-OBJ_CLASS_DECLARATION(mca_sbgp_ibnet_device_t);
-
-struct mca_sbgp_ibnet_connection_group_info_t {
-    int device_index;                       /* device index in device list */
-    uint32_t port;                          /* port number */
-    /* Used for detect number of a port to communicate with remote proc,
-       index in use_port arrray in the mca_sbgp_ibnet_proc_t structure */
-    uint32_t index;
-    /* array of procs connected with this group */
-    uint32_t num_procs;
-    opal_pointer_array_t *ibnet_procs;
-};
-typedef struct mca_sbgp_ibnet_connection_group_info_t
-               mca_sbgp_ibnet_connection_group_info_t;
-
-/*
- ** Base sub-group module
- **/
-struct mca_sbgp_ibnet_module_t {
-    /** Collective modules all inherit from opal_object */
-    mca_sbgp_base_module_t super;
-    int group_id;
-    /* opal_pointer_array_t *ibnet_procs; */
-    /* number of connection groups */
-    int num_cgroups;
-    /*
-     * Array of connection groups. There are same procs in these groups,
-     * but they were created over different ports (and different devices maybe).
-     */
-    mca_sbgp_ibnet_connection_group_info_t *cgroups;
-    mca_sbgp_ibnet_mode_t mode; /* working mode of the module, it is ALL by default */
-};
-typedef struct mca_sbgp_ibnet_module_t mca_sbgp_ibnet_module_t;
-OBJ_CLASS_DECLARATION(mca_sbgp_ibnet_module_t);
-
-/* Error and verbose prints */
-
-static inline int mca_sbgp_ibnet_err(const char* fmt, ...)
-{
-    va_list list;
-    int ret;
-
-    va_start(list, fmt);
-    ret = vfprintf(stderr, fmt, list);
-    va_end(list);
-    return ret;
-}
-
-#define IBNET_ERROR(args)                                       \
-    do {                                                        \
-        mca_sbgp_ibnet_err("[%s]%s[%s:%d:%s] IBNET ",           \
-            ompi_process_info.nodename,                         \
-            OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),                 \
-            __FILE__, __LINE__, __func__);                      \
-        mca_sbgp_ibnet_err args;                                \
-        mca_sbgp_ibnet_err("\n");                               \
-    } while(0);
-
-#if OPAL_ENABLE_DEBUG
-#define IBNET_VERBOSE(level, args)                              \
-    do {                                                        \
-        if(mca_sbgp_ibnet_component.verbose >= level) {         \
-            mca_sbgp_ibnet_err("[%s]%s[%s:%d:%s] IBNET ",       \
-                    ompi_process_info.nodename,                 \
-                    OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),         \
-                    __FILE__, __LINE__, __func__);              \
-            mca_sbgp_ibnet_err args;                            \
-            mca_sbgp_ibnet_err("\n");                           \
-        }                                                       \
-    } while(0);
-#else
-#define IBNET_VERBOSE(level, args)
-#endif
-
-#define MCA_SBGP_IBNET_PKEY_MASK 0x7fff
-
-/* Error and verbose prints - end */
-
-/* This routine is used to find the list of procs that run on the
- ** same host as the calling process.
- */
-mca_sbgp_base_module_t *mca_sbgp_ibnet_select_procs(struct ompi_proc_t ** procs,
-        int n_procs_in, struct ompi_communicator_t *comm, char *key, void *output_data);
-
-/**
- * Global component instance
- */
-OMPI_MODULE_DECLSPEC extern mca_sbgp_ibnet_component_t mca_sbgp_ibnet_component;
-
-
-END_C_DECLS
-
-#endif /* MCA_BCOL_ibnet_EXPORT_H */
diff --git a/ompi/mca/sbgp/ibnet/sbgp_ibnet_component.c b/ompi/mca/sbgp/ibnet/sbgp_ibnet_component.c
deleted file mode 100644
index 15df331ad3..0000000000
--- a/ompi/mca/sbgp/ibnet/sbgp_ibnet_component.c
+++ /dev/null
@@ -1,600 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2012      Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2014      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * Copyright (c) 2015      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-#include "infiniband/verbs.h"
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "opal/util/argv.h"
-#include "opal/include/opal/types.h"
-#include "opal_stdint.h"
-#include "sbgp_ibnet.h"
-#include "sbgp_ibnet_mca.h"
-#include "ompi/mca/common/ofacm/base.h"
-#include "ompi/mca/common/ofacm/connect.h"
-#include "ompi/mca/common/verbs/common_verbs.h"
-
-/*
- * Public string showing the coll ompi_sm V2 component version number
- */
-const char *mca_sbgp_ibnet_component_version_string =
-    "Open MPI sbgp - ibnet collective MCA component version " OMPI_VERSION;
-
-/*
- * Local functions
- */
-
-static int mca_sbgp_ibnet_open(void);
-static int mca_sbgp_ibnet_close(void);
-static int mca_sbgp_ibnet_init_query(bool enable_progress_threads,
-        bool enable_mpi_threads);
-
-/*
- * Instantiate the public struct with all of our public information
- * and pointers to our public functions in it
- */
-
-mca_sbgp_ibnet_component_t mca_sbgp_ibnet_component = {
-
-    /* First, fill in the super */
-
-    {
-        /* First, the mca_component_t struct containing meta
-           information about the component itself */
-
-        .sbgp_version = {
-            MCA_SBGP_BASE_VERSION_2_0_0,
-
-            /* Component name and version */
-
-            .mca_component_name = "ibnet",
-            MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
-                                  OMPI_RELEASE_VERSION),
-
-            /* Component open and close functions */
-
-            .mca_open_component = mca_sbgp_ibnet_open,
-            .mca_close_component = mca_sbgp_ibnet_close,
-            .mca_register_component_params = mca_sbgp_ibnet_register_params,
-        },
-
-        .sbgp_init_query = mca_sbgp_ibnet_init_query,
-        .select_procs =mca_sbgp_ibnet_select_procs,
-        .priority = 0,
-    },
-
-    /* verbose mode */
-    false,
-
-    /* Maximum allowed number of subroups*/
-    0,
-
-    /* Enable disable default subnet id warning */
-    false,
-    false,
-
-    /* IB MTU requested by user */
-    0,
-
-    /* IB partition definition */
-    0,
-
-    /* Keeping hca data */
-    NULL,
-    NULL,
-    NULL,
-    NULL,
-
-    /** Dummy argv-style list; a copy of names from the
-      if_[in|ex]clude list that we use for error checking (to ensure
-      that they all exist) */
-    NULL,
-};
-
-static int mca_sbgp_ibnet_dummy_init_query(
-    bool enable_progress_threads, bool enable_mpi_threads)
-{
-    return OMPI_SUCCESS;
-}
-
-/*
- * Open the component
- */
-static int mca_sbgp_ibnet_open(void)
-{
-    /* local variables */
-    mca_sbgp_ibnet_component_t *cs = &mca_sbgp_ibnet_component;
-
-    mca_sbgp_ibnet_component.pkey_val &= SBGP_IBNET_IB_PKEY_MASK;
-
-    cs->total_active_ports = 0;
-    cs->curr_max_group_id = 100;
-
-    OBJ_CONSTRUCT(&cs->devices, opal_list_t);
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * Close the component
- */
-static int mca_sbgp_ibnet_close(void)
-{
-    mca_sbgp_ibnet_component_t *cs = &mca_sbgp_ibnet_component;
-
-    OBJ_DESTRUCT(&cs->devices);
-
-    return OMPI_SUCCESS;
-}
-
-static void mca_sbgp_ibnet_device_constructor
-            (mca_sbgp_ibnet_device_t *device)
-{
-    /* Init OFACM stuf */
-    device->ib_dev = NULL;
-    device->device_index = -1;
-    device->num_act_ports = 0;
-    memset(&device->ib_dev_attr, 0, sizeof(struct ibv_device_attr));
-    device->cpcs= NULL;
-    device->num_cpcs = 0;
-    device->ports = NULL;
-}
-
-static void mca_sbgp_ibnet_device_destructor
-            (mca_sbgp_ibnet_device_t *device)
-{
-    /* release memory */
-    if (NULL != device->ports) {
-        free(device->ports);
-    }
-}
-
-OBJ_CLASS_INSTANCE(mca_sbgp_ibnet_device_t,
-                   opal_list_item_t,
-                   mca_sbgp_ibnet_device_constructor,
-                   mca_sbgp_ibnet_device_destructor);
-
-static int
-get_port_list(mca_sbgp_ibnet_device_t *device, int *allowed_ports)
-{
-    char *name;
-    const char *dev_name;
-    int i, j, k, num_ports = 0;
-
-    dev_name = ibv_get_device_name(device->ib_dev);
-    name = (char*) malloc(strlen(dev_name) + 4);
-    if (NULL == name) {
-        return 0;
-    }
-
-    num_ports = 0;
-    if (NULL != mca_sbgp_ibnet_component.if_include_list) {
-        /* If only the device name is given (eg. mtdevice0,mtdevice1) use all
-           ports */
-        i = 0;
-
-        while (mca_sbgp_ibnet_component.if_include_list[i]) {
-            if (0 == strcmp(dev_name,
-                            mca_sbgp_ibnet_component.if_include_list[i])) {
-                num_ports = device->ib_dev_attr.phys_port_cnt;
-
-                IBNET_VERBOSE(10, ("if_include_list - %s.\n", mca_sbgp_ibnet_component.if_include_list[i]));
-                goto done;
-            }
-            ++i;
-        }
-
-        /* Include only requested ports on the device */
-        for (i = 1; i <= device->ib_dev_attr.phys_port_cnt; ++i) {
-            sprintf(name, "%s:%d", dev_name, i);
-
-            for (j = 0;
-                 NULL != mca_sbgp_ibnet_component.if_include_list[j]; ++j) {
-                if (0 == strcmp(name,
-                                mca_sbgp_ibnet_component.if_include_list[j])) {
-
-                    IBNET_VERBOSE(10, ("Allowed port %d: idx %d; if_include_list - %s\n",
-                                       i, num_ports, mca_sbgp_ibnet_component.if_include_list[j]));
-
-                    allowed_ports[num_ports++] = i;
-                    break;
-                }
-            }
-        }
-    } else if (NULL != mca_sbgp_ibnet_component.if_exclude_list) {
-        /* If only the device name is given (eg. mtdevice0,mtdevice1) exclude
-           all ports */
-        i = 0;
-        while (mca_sbgp_ibnet_component.if_exclude_list[i]) {
-            if (0 == strcmp(dev_name,
-                            mca_sbgp_ibnet_component.if_exclude_list[i])) {
-                num_ports = 0;
-                goto done;
-            }
-            ++i;
-        }
-        /* Exclude the specified ports on this device */
-        for (i = 1; i <= device->ib_dev_attr.phys_port_cnt; ++i) {
-            sprintf(name,"%s:%d",dev_name,i);
-            for (j = 0;
-                 NULL != mca_sbgp_ibnet_component.if_exclude_list[j]; ++j) {
-                if (0 == strcmp(name,
-                                mca_sbgp_ibnet_component.if_exclude_list[j])) {
-                    /* If found, set a sentinel value */
-                    j = -1;
-                    break;
-                }
-            }
-            /* If we didn't find it, it's ok to include in the list */
-            if (-1 != j) {
-                allowed_ports[num_ports++] = i;
-            }
-        }
-    } else {
-        /* Assume that all ports are allowed.  num_ports will be adjusted
-           below to reflect whether this is true or not. */
-        for (i = 1; i <= device->ib_dev_attr.phys_port_cnt; ++i) {
-            allowed_ports[num_ports++] = i;
-        }
-    }
-
-done:
-
-    /* Remove the following from the error-checking if_list:
-       - bare device name
-       - device name suffixed with port number */
-    if (NULL != mca_sbgp_ibnet_component.if_list) {
-        for (i = 0; NULL != mca_sbgp_ibnet_component.if_list[i]; ++i) {
-            /* Look for raw device name */
-            if (0 == strcmp(mca_sbgp_ibnet_component.if_list[i], dev_name)) {
-                j = opal_argv_count(mca_sbgp_ibnet_component.if_list);
-                opal_argv_delete(&j, &(mca_sbgp_ibnet_component.if_list),
-                                 i, 1);
-                --i;
-            }
-        }
-
-        for (i = 1; i <= device->ib_dev_attr.phys_port_cnt; ++i) {
-            sprintf(name, "%s:%d", dev_name, i);
-            for (j = 0; NULL != mca_sbgp_ibnet_component.if_list[j]; ++j) {
-                if (0 == strcmp(mca_sbgp_ibnet_component.if_list[j], name)) {
-                    k = opal_argv_count(mca_sbgp_ibnet_component.if_list);
-                    opal_argv_delete(&k, &(mca_sbgp_ibnet_component.if_list),
-                                     j, 1);
-                    --j;
-                    break;
-                }
-            }
-        }
-    }
-
-    free(name);
-
-    return num_ports;
-}
-
-static int ibnet_init_port(struct mca_sbgp_ibnet_device_t *device,
-                           int port_index,  struct ibv_port_attr *ib_port_attr,
-                           struct ibv_context *ib_dev_context)
-{
-    union ibv_gid gid;
-    struct mca_sbgp_ibnet_port_t *p = &device->ports[port_index];
-
-    /* Set port data */
-    p->lmc  = (1 << ib_port_attr->lmc);
-    p->lid  = ib_port_attr->lid;
-    p->stat = ib_port_attr->state;
-    p->mtu  = ib_port_attr->active_mtu;
-
-    IBNET_VERBOSE(10, ("Setting port data (%s:%d) lid=%d, lmc=%d, stat=%d, mtu=%d\n",
-                ibv_get_device_name(device->ib_dev), p->id, p->lid,
-                p->lmc, p->stat, p->mtu));
-
-    if (0 != ibv_query_gid(ib_dev_context, p->id, 0, &gid)) {
-        IBNET_ERROR(("ibv_query_gid failed (%s:%d)\n",
-                    ibv_get_device_name(device->ib_dev), p->id));
-        return OMPI_ERR_NOT_FOUND;
-    }
-    /* set subnet data */
-     p->subnet_id = ntoh64(gid.global.subnet_prefix);
-
-/* p->subnet_id = gid.global.subnet_prefix; */
-
-    IBNET_VERBOSE(10, ("my IB-only subnet_id for HCA %d %s port %d is %lx\n" PRIx64,
-                gid.global.subnet_prefix,ibv_get_device_name(device->ib_dev), p->id, p->subnet_id));
-
-    return OMPI_SUCCESS;
-}
-
-/* Find active port */
-static mca_sbgp_ibnet_device_t* ibnet_load_ports(struct ibv_device *ib_dev, int device_index)
-{
-    struct ibv_context *ib_dev_context = NULL;
-    mca_sbgp_ibnet_device_t *device = NULL;
-    int *allowed_ports = NULL;
-    int rc, port_cnt, port, i, ret, p = 0;
-
-#if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE)
-    if (IBV_TRANSPORT_IB != ib_dev->transport_type) {
-        IBNET_VERBOSE(10, ("Skipping non IB device %s",
-                    ibv_get_device_name(ib_dev)));
-        goto error;
-    }
-#endif
-
-    device = OBJ_NEW(mca_sbgp_ibnet_device_t);
-    device->ib_dev = ib_dev;
-    device->device_index = device_index;
-    ib_dev_context = ibv_open_device(ib_dev);
-
-    if(NULL == ib_dev_context) {
-        IBNET_ERROR(("Error obtaining device context for %s errno says %s",
-                    ibv_get_device_name(device->ib_dev), strerror(errno)));
-        goto error;
-    }
-
-    if(ibv_query_device(ib_dev_context, &device->ib_dev_attr)) {
-        IBNET_ERROR(("error obtaining device attributes for %s errno says %s",
-                    ibv_get_device_name(ib_dev), strerror(errno)));
-        goto error;
-    }
-
-    allowed_ports = (int *) calloc(device->ib_dev_attr.phys_port_cnt, sizeof(int));
-    if (NULL == allowed_ports) {
-        goto error;
-    }
-
-    port_cnt = get_port_list(device, allowed_ports);
-    if (0 == port_cnt) {
-        goto error;
-    }
-
-#if OPAL_ENABLE_DEBUG
-    for (i = 0; i < port_cnt; ++i) {
-        IBNET_VERBOSE(10, ("allowed port %d with idx %d.\n", allowed_ports[i], i));
-    }
-#endif
-
-    device->num_allowed_ports = port_cnt;
-    device->ports = (mca_sbgp_ibnet_port_t *) calloc(port_cnt, sizeof(mca_sbgp_ibnet_port_t));
-    if (NULL == device->ports) {
-        goto error;
-    }
-
-    /* Note ports are 1 based (i >= 1) */
-    for(port = 0; port < port_cnt; port++) {
-        struct ibv_port_attr ib_port_attr;
-
-        i = allowed_ports[port];
-        if(ibv_query_port(ib_dev_context, i, &ib_port_attr)){
-            IBNET_ERROR(("Error getting port attributes for device %s "
-                        "port number %d errno says %s",
-                        ibv_get_device_name(device->ib_dev), i, strerror(errno)));
-            continue;
-        }
-
-        if(IBV_PORT_ACTIVE == ib_port_attr.state) {
-            /* Pasha: Need to think how we want to handle MTUs
-            if (ib_port_attr.active_mtu < mca_bcol_iboffload_component.mtu){
-                device->mtu = ib_port_attr.active_mtu;
-            }
-            */
-            /* start to put port info */
-            device->ports[p].id = i;
-            device->ports[p].stat = ib_port_attr.state;
-            device->ports[p].mtu = ib_port_attr.active_mtu;
-
-            device->ports[p].used = true;
-
-            if (0 == mca_sbgp_ibnet_component.pkey_val) {
-                ret = ibnet_init_port(device, p, &ib_port_attr, ib_dev_context);
-                if (OMPI_SUCCESS != ret) {
-                    IBNET_ERROR(("Device %s "
-                                "port number %d , failed to init port, errno says %s",
-                                ibv_get_device_name(device->ib_dev),
-                                i, strerror(errno)));
-                    continue;
-                }
-            } else {
-                uint16_t pkey,j;
-                device->ports[p].used = false;
-
-                for (j = 0; j < device->ib_dev_attr.max_pkeys; j++) {
-                    if(ibv_query_pkey(ib_dev_context, i, j, &pkey)){
-                        IBNET_ERROR(("error getting pkey for index %d, device %s "
-                                    "port number %d errno says %s",
-                                    j, ibv_get_device_name(device->ib_dev), i, strerror(errno)));
-                        continue;
-                    }
-
-                    pkey = ntohs(pkey) & MCA_SBGP_IBNET_PKEY_MASK;
-                    if (pkey == (uint32_t) mca_sbgp_ibnet_component.pkey_val){
-                        ret = ibnet_init_port(device, p, &ib_port_attr, ib_dev_context);
-                        if (OMPI_SUCCESS != ret) {
-                            IBNET_ERROR(("Device %s "
-                                        "port number %d , failed to init port, errno says %s",
-                                        ibv_get_device_name(device->ib_dev),
-                                        i, strerror(errno)));
-                            continue;
-                        }
-                    }
-                }
-            }
-
-            p++; /* One port was loaded, go to the next one */
-        }
-    }
-
-    device->num_act_ports = p;
-    /* Update total number of active ports */
-    mca_sbgp_ibnet_component.total_active_ports += p;
-
-    if (0 != device->num_act_ports) {
-        ompi_common_ofacm_base_dev_desc_t dev;
-        /* Init dev */
-        dev.ib_dev = ib_dev;
-        dev.ib_dev_context = ib_dev_context;
-        dev.capabilities = 0;
-
-        rc = ompi_common_ofacm_base_select_for_local_port(
-                &dev, &device->cpcs, (int *)&device->num_cpcs);
-        /* If we get NOT_SUPPORTED, then no CPC was found for this
-           port.  But that's not a fatal error -- just keep going;
-           let's see if we find any usable openib modules or not. */
-        if (OMPI_SUCCESS != rc) {
-            /* All others *are* fatal.  Note that we already did a
-               show_help in the lower layer */
-            IBNET_VERBOSE(10, ("Device %s, no CPC found",
-                        ibv_get_device_name(device->ib_dev)));
-            goto error;
-        }
-    }
-
-    /* we do not continue to use the device we just collect data,
-     * so close it for now. We will open it later in iboffload coll*/
-    if(ibv_close_device(ib_dev_context)) {
-        IBNET_ERROR(("Device %s, failed to close the device %s",
-                    ibv_get_device_name(device->ib_dev), strerror(errno)));
-    }
-
-    if (0 == device->num_act_ports) {
-        goto error;
-    }
-
-    /* Pasha - I do not like the error flow here */
-    free(allowed_ports);
-
-    return device;
-
-error:
-
-    if (NULL != allowed_ports) {
-        free(allowed_ports);
-    }
-
-    OBJ_DESTRUCT(device);
-
-    return NULL;
-}
-
-/* Create list of IB hca that have active port */
-static int ibnet_load_devices(void)
-{
-    int num_devs, i;
-    struct ibv_device **ib_devs = NULL;
-
-    mca_sbgp_ibnet_device_t *device = NULL;
-    mca_sbgp_ibnet_component_t *cs = &mca_sbgp_ibnet_component;
-
-    IBNET_VERBOSE(7, ("Entering to ibnet_load_devices"));
-
-    /* Get list of devices */
-    ib_devs = ompi_ibv_get_device_list(&num_devs);
-
-    if(0 == num_devs || NULL == ib_devs) {
-        IBNET_VERBOSE(10, ("No ib devices found"));
-        /* No hca error*/
-        opal_show_help("help-mpi-btl-base.txt", "btl:no-nics", true);
-        return OMPI_ERROR;
-    }
-
-    for (i = 0; i < num_devs; i++) {
-        device = ibnet_load_ports(ib_devs[i], i);
-        if (NULL != device) {
-            IBNET_VERBOSE(10, ("Device %s was appended to device list with index %d.\n",
-                          ibv_get_device_name(device->ib_dev), i));
-            opal_list_append(&cs->devices,
-                    (opal_list_item_t *) device);
-        }
-    }
-
-    if (opal_list_is_empty(&cs->devices)) {
-        /* No relevand devices were found, return error */
-        IBNET_ERROR(("No active devices found"));
-        return OMPI_ERROR;
-        /* Maybe need to add error here*/
-    }
-
-    ompi_ibv_free_device_list(ib_devs);
-
-    return OMPI_SUCCESS;
-}
-
-/* query to see if the component is available for use, and can
- * satisfy the thread and progress requirements
- */
-int mca_sbgp_ibnet_init_query(bool enable_progress_threads,
-        bool enable_mpi_threads)
-{
-    int rc, list_count = 0;
-
-    /* Parse the include and exclude lists, checking for errors */
-    mca_sbgp_ibnet_component.if_list = NULL;
-    mca_sbgp_ibnet_component.if_include_list = NULL;
-    mca_sbgp_ibnet_component.if_exclude_list = NULL;
-
-    IBNET_VERBOSE(7, ("Calling mca_sbgp_ibnet_init_query"));
-
-    if (NULL != mca_sbgp_ibnet_component.if_include) {
-      list_count++;
-    }
-
-    if (NULL != mca_sbgp_ibnet_component.if_exclude) {
-      list_count++;
-    }
-
-    if (list_count > 1) {
-        IBNET_ERROR(("Bad --mca (if_include, if_exclude) parameters !"));
-        return OMPI_ERROR;
-    } else if (NULL != mca_sbgp_ibnet_component.if_include) {
-        mca_sbgp_ibnet_component.if_include_list =
-            opal_argv_split(mca_sbgp_ibnet_component.if_include, ',');
-        mca_sbgp_ibnet_component.if_list =
-            opal_argv_copy(mca_sbgp_ibnet_component.if_include_list);
-    } else if (NULL != mca_sbgp_ibnet_component.if_exclude) {
-        mca_sbgp_ibnet_component.if_exclude_list =
-            opal_argv_split(mca_sbgp_ibnet_component.if_exclude, ',');
-        mca_sbgp_ibnet_component.if_list =
-            opal_argv_copy(mca_sbgp_ibnet_component.if_exclude_list);
-    }
-
-    /* Init CPC components */
-    rc = ompi_common_ofacm_base_init();
-    if (OMPI_SUCCESS != rc) {
-        return rc;
-    }
-
-    /* Load all devices and active ports */
-    rc = ibnet_load_devices();
-    if (OMPI_SUCCESS != rc) {
-        return rc;
-    }
-
-    mca_sbgp_ibnet_component.super.sbgp_init_query =
-                           mca_sbgp_ibnet_dummy_init_query;
-
-    return OMPI_SUCCESS;
-}
diff --git a/ompi/mca/sbgp/ibnet/sbgp_ibnet_mca.c b/ompi/mca/sbgp/ibnet/sbgp_ibnet_mca.c
deleted file mode 100644
index a9c2553c0e..0000000000
--- a/ompi/mca/sbgp/ibnet/sbgp_ibnet_mca.c
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/mca/common/ofacm/base.h"
-
-#include "sbgp_ibnet.h"
-#include "sbgp_ibnet_mca.h"
-
-/*
- * Local flags
- */
-enum {
-    REGINT_NEG_ONE_OK = 0x01,
-    REGINT_GE_ZERO = 0x02,
-    REGINT_GE_ONE = 0x04,
-    REGINT_NONZERO = 0x08,
-    REGINT_MAX = 0x88
-};
-
-enum {
-    REGSTR_EMPTY_OK = 0x01,
-
-    REGSTR_MAX = 0x88
-};
-
-static mca_base_var_enum_value_t mtu_values[] = {
-    {IBV_MTU_512, "256B"},
-    {IBV_MTU_512, "512B"},
-    {IBV_MTU_1024, "1k"},
-    {IBV_MTU_2048, "2k"},
-    {IBV_MTU_4096, "4k"},
-    {0, NULL}
-};
-
-/*
- * utility routine for string parameter registration
- */
-static int reg_string(const char* param_name,
-                      const char* deprecated_param_name,
-                      const char* param_desc,
-                      const char* default_value, char **storage,
-                      int flags)
-{
-    int index;
-
-    /* the MCA variable system will not change this value */
-    *storage = (char *) default_value;
-    index = mca_base_component_var_register(&mca_sbgp_ibnet_component.super.sbgp_version,
-                                            param_name, param_desc, MCA_BASE_VAR_TYPE_STRING,
-                                            NULL, 0, 0, OPAL_INFO_LVL_9,
-                                            MCA_BASE_VAR_SCOPE_READONLY, storage);
-    if (NULL != deprecated_param_name) {
-        (void) mca_base_var_register_synonym(index, "ompi", "sbgp", "ibnet", deprecated_param_name,
-                                             MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-    }
-
-    if (0 != (flags & REGSTR_EMPTY_OK) && (NULL == *storage || 0 == strlen(*storage))) {
-        opal_output(0, "Bad parameter value for parameter \"%s\"",
-                param_name);
-        return OMPI_ERR_BAD_PARAM;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * utility routine for integer parameter registration
- */
-static int reg_int(const char* param_name,
-                   const char* deprecated_param_name,
-                   const char* param_desc,
-                   int default_value, int *storage, int flags)
-{
-    int index;
-
-    *storage = default_value;
-    index = mca_base_component_var_register(&mca_sbgp_ibnet_component.super.sbgp_version,
-                                            param_name, param_desc, MCA_BASE_VAR_TYPE_INT,
-                                            NULL, 0, 0, OPAL_INFO_LVL_9,
-                                            MCA_BASE_VAR_SCOPE_READONLY, storage);
-    if (NULL != deprecated_param_name) {
-        (void) mca_base_var_register_synonym(index, "ompi", "sbgp", "ibnet", deprecated_param_name,
-                                             MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-    }
-
-    if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == *storage) {
-        return OMPI_SUCCESS;
-    }
-
-    if ((0 != (flags & REGINT_GE_ZERO) && *storage < 0) ||
-        (0 != (flags & REGINT_GE_ONE) && *storage < 1) ||
-        (0 != (flags & REGINT_NONZERO) && 0 == *storage)) {
-        opal_output(0, "Bad parameter value for parameter \"%s\"",
-                param_name);
-        return OMPI_ERR_BAD_PARAM;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * utility routine for boolean parameter registration
- */
-static int reg_bool(const char* param_name,
-                    const char* deprecated_param_name,
-                    const char* param_desc,
-                    bool default_value, bool *storage)
-{
-    int index;
-
-    *storage = default_value;
-    index = mca_base_component_var_register(&mca_sbgp_ibnet_component.super.sbgp_version,
-                                            param_name, param_desc, MCA_BASE_VAR_TYPE_BOOL,
-                                            NULL, 0, 0, OPAL_INFO_LVL_9,
-                                            MCA_BASE_VAR_SCOPE_READONLY, storage);
-    if (NULL != deprecated_param_name) {
-        (void) mca_base_var_register_synonym(index, "ompi", "sbgp", "ibnet", deprecated_param_name,
-                                             MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-    }
-
-    return OMPI_SUCCESS;
-}
-
-int mca_sbgp_ibnet_register_params(void)
-{
-    mca_base_var_enum_t *new_enum;
-    char *msg;
-    int ret, tmp;
-
-    ret = OMPI_SUCCESS;
-
-#define CHECK(expr) do {                    \
-        tmp = (expr);                       \
-        if (OMPI_SUCCESS != tmp) ret = tmp; \
-     } while (0)
-
-    /* register openib component parameters */
-
-    CHECK(reg_int("priority", NULL,
-                  "IB offload component priority"
-                  "(from 0(low) to 90 (high))", 90, &mca_sbgp_ibnet_component.super.priority, 0));
-
-    CHECK(reg_int("verbose", NULL,
-                  "Output some verbose IB offload BTL information "
-                  "(0 = no output, nonzero = output)", 0, &mca_sbgp_ibnet_component.verbose, 0));
-
-    CHECK(reg_bool("warn_default_gid_prefix", NULL,
-                   "Warn when there is more than one active ports and at least one of them connected to the network with only default GID prefix configured (0 = do not warn; any other value = warn)",
-                   true, &mca_sbgp_ibnet_component.warn_default_gid_prefix));
-    CHECK(reg_bool("warn_nonexistent_if", NULL,
-                  "Warn if non-existent devices and/or ports are specified in the sbgp_ibnet_if_[in|ex]clude MCA parameters (0 = do not warn; any other value = warn)",
-                  true, &mca_sbgp_ibnet_component.warn_nonexistent_if));
-
-    CHECK(reg_int("max_sbgps", NULL,
-                  "Maximum allowed number of subroups",
-                  100, &mca_sbgp_ibnet_component.max_sbgps, 0));
-
-    CHECK(reg_int("pkey", "ib_pkey_val",
-                  "OpenFabrics partition key (pkey) value. "
-                  "Unsigned integer decimal or hex values are allowed (e.g., \"3\" or \"0x3f\") and will be masked against the maximum allowable IB paritition key value (0x7fff)",
-                  0, &mca_sbgp_ibnet_component.pkey_val, 0));
-    mca_sbgp_ibnet_component.pkey_val &= SBGP_IBNET_IB_PKEY_MASK;
-
-    asprintf(&msg, "OpenFabrics MTU, in bytes (if not specified in INI files).  Valid values are: %d=256 bytes, %d=512 bytes, %d=1024 bytes, %d=2048 bytes, %d=4096 bytes",
-             IBV_MTU_256,
-             IBV_MTU_512,
-             IBV_MTU_1024,
-             IBV_MTU_2048,
-             IBV_MTU_4096);
-    if (NULL == msg) {
-        /* Don't try to recover from this */
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    CHECK(mca_base_var_enum_create("sbgp_ibnet_mtu", mtu_values, &new_enum));
-    if (OPAL_SUCCESS != ret) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    mca_sbgp_ibnet_component.mtu = IBV_MTU_1024;
-    ret = mca_base_component_var_register(&mca_sbgp_ibnet_component.super.sbgp_version,
-                                          "mtu", msg, MCA_BASE_VAR_TYPE_INT, new_enum,
-                                          0, 0, OPAL_INFO_LVL_9,
-                                          MCA_BASE_VAR_SCOPE_READONLY, &mca_sbgp_ibnet_component.mtu);
-    OBJ_RELEASE(new_enum);
-    free(msg);
-
-    if (0 > ret) {
-        return ret;
-    }
-
-    (void) mca_base_var_register_synonym(ret, "ompi", "sbgp", "ibnet", "ib_mtu",
-                                         MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-
-    CHECK(reg_string("if_include", NULL,
-                     "Comma-delimited list of devices/ports to be used (e.g. \"mthca0,mthca1:2\"; empty value means to use all ports found).  Mutually exclusive with sbgp_ibnet_if_exclude.",
-                     NULL, &mca_sbgp_ibnet_component.if_include,
-                     0));
-
-    CHECK(reg_string("if_exclude", NULL,
-                     "Comma-delimited list of device/ports to be excluded (empty value means to not exclude any ports).  Mutually exclusive with sbgp_ibnet_if_include.",
-                     NULL, &mca_sbgp_ibnet_component.if_exclude,
-                     0));
-
-    /* Register any MCA params for the connect pseudo-components */
-    if (OMPI_SUCCESS == ret) {
-        ret = ompi_common_ofacm_base_register(&mca_sbgp_ibnet_component.super.sbgp_version);
-    }
-
-    return ret;
-}
diff --git a/ompi/mca/sbgp/ibnet/sbgp_ibnet_mca.h b/ompi/mca/sbgp/ibnet/sbgp_ibnet_mca.h
deleted file mode 100644
index 58fd8adcb2..0000000000
--- a/ompi/mca/sbgp/ibnet/sbgp_ibnet_mca.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
- /** @file */
-
-#ifndef MCA_SBGP_IBNET_MCA_H
-#define MCA_SBGP_IBNET_MCA_H
-
-#include<ctype.h>
-#include "ompi_config.h"
-
-#define SBGP_IBNET_IB_PKEY_MASK 0x7fff
-
-int mca_sbgp_ibnet_register_params(void);
-
-#endif
diff --git a/ompi/mca/sbgp/ibnet/sbgp_ibnet_module.c b/ompi/mca/sbgp/ibnet/sbgp_ibnet_module.c
deleted file mode 100644
index fa5d54d171..0000000000
--- a/ompi/mca/sbgp/ibnet/sbgp_ibnet_module.c
+++ /dev/null
@@ -1,1029 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <errno.h>
-
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/sbgp/ibnet/sbgp_ibnet.h"
-#include "ompi/mca/common/ofacm/base.h"
-#include "ompi/mca/common/ofacm/connect.h"
-#include "ompi/patterns/comm/coll_ops.h"
-/*
- * Unused
-static int ibnet_module_enable(mca_sbgp_base_module_t *module,
-        struct ompi_communicator_t *comm);
-
-*/
-
-/*
- * Local functions
- */
-static void
-mca_sbgp_ibnet_module_construct(mca_sbgp_ibnet_module_t *module)
-{
-    module->cgroups = NULL;
-    module->group_id = 0;
-}
-
-static void
-mca_sbgp_ibnet_module_destruct(mca_sbgp_ibnet_module_t *module)
-{
-
-}
-
-OBJ_CLASS_INSTANCE(mca_sbgp_ibnet_module_t,
-                   mca_sbgp_base_module_t,
-                   mca_sbgp_ibnet_module_construct,
-                   mca_sbgp_ibnet_module_destruct);
-
-static void
-mca_sbgp_ibnet_proc_construct(mca_sbgp_ibnet_proc_t *proc)
-{
-    /* done */
-    proc->ompi_proc = 0;
-    proc->num_ports = 0;
-    proc->use_port = NULL;
-    proc->remote_ports_info = NULL;
-    proc->duty = MCA_SBGP_IBNET_NONE;
-}
-
-static void
-mca_sbgp_ibnet_proc_destruct(mca_sbgp_ibnet_proc_t *proc)
-{
-    /* done */
-    if (NULL != proc->remote_ports_info) {
-        free(proc->remote_ports_info);
-        /* Pasha: need to check if we need
-         * to release some data from inside of the proc*/
-    }
-
-    if (NULL != proc->use_port) {
-        free(proc->use_port);
-    }
-}
-
-OBJ_CLASS_INSTANCE(mca_sbgp_ibnet_proc_t,
-                   opal_list_item_t,
-                   mca_sbgp_ibnet_proc_construct,
-                   mca_sbgp_ibnet_proc_destruct);
-
-
-/* Pack all data to gather buffer */
-static int pack_gather_sbuff(char* sbuffer)
-{
-    int port, cpc;
-    coll_offload_support coll_offload_flag = OFFLOAD_CONNECTX_B0; /**< Pasha: add query for collectives offload support */
-
-    char* pack_ptr = sbuffer;
-
-    mca_sbgp_ibnet_device_t *device = NULL;
-    uint32_t my_rank = ompi_process_info.my_name.vpid;
-    opal_list_t *devices = &mca_sbgp_ibnet_component.devices;
-
-    /* Message format:
-     *     - my rank                 (uint32_t)
-     *     - number of active ports  (uint32_t)
-     *     - for each active port:
-     *          + lid                (uint16_t)
-     *          + subnetid           (uint64_t)
-     *          + mtu                (uint32_t)
-     *          + colloffload        (uint8_t)
-     *          + num of cpcs        (uint8_t)
-     *          + for each cpc:      (uint8_t)
-     *              * cpc index      (uint8_t)
-     *              * cpc priority   (uint8_t)
-     *              * cpc buffer len (uint8_t)
-     *              * cpc buffer     (byte * buffer_len)
-     *
-     */
-
-    /* Start to put data */
-
-    /* Pack my rank , I need it because allgather doesn't work as expected */
-    IBNET_VERBOSE(10, ("Send pack rank = %d\n", my_rank));
-    IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint32_t)));
-
-    memcpy(pack_ptr, &my_rank, sizeof(uint32_t));
-    pack_ptr += sizeof(uint32_t);
-
-    /* Put number of ports that we send */
-    IBNET_VERBOSE(10, ("Send pack num of ports = %d\n", mca_sbgp_ibnet_component.total_active_ports));
-    IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint32_t)));
-
-    memcpy(pack_ptr, &mca_sbgp_ibnet_component.total_active_ports, sizeof(uint32_t));
-    pack_ptr += sizeof(uint32_t);
-
-    /* Go through list of device and build the message*/
-    for (device = (mca_sbgp_ibnet_device_t *) opal_list_get_first(devices);
-            device != (mca_sbgp_ibnet_device_t *) opal_list_get_end(devices);
-            device  = (mca_sbgp_ibnet_device_t *) opal_list_get_next((opal_list_item_t *)device)) {
-        for (port = 0; port < device->num_allowed_ports; ++port) {
-            if (!device->ports[port].used) {
-                continue;
-            }
-
-            /* put port num */
-            IBNET_VERBOSE(10, ("Send pack port num = %d\n", device->ports[port].id));
-            IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint16_t)));
-
-            memcpy(pack_ptr, &device->ports[port].id, sizeof(uint16_t));
-            pack_ptr += sizeof(uint16_t);
-
-            /* put lid */
-            IBNET_VERBOSE(10, ("Send pack lid = %d\n", device->ports[port].lid));
-            IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint16_t)));
-
-            memcpy(pack_ptr, &device->ports[port].lid, sizeof(uint16_t));
-            pack_ptr += sizeof(uint16_t);
-
-            /* put subnetid */
-            IBNET_VERBOSE(10, ("Send pack subnet id = %lx\n", device->ports[port].subnet_id));
-            IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint64_t)));
-
-            memcpy(pack_ptr, &device->ports[port].subnet_id, sizeof(uint64_t));
-            pack_ptr += sizeof(uint64_t);
-
-            /* put default mtu */
-            IBNET_VERBOSE(10, ("Send pack MTU = %d\n", device->ports[port].mtu));
-            IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint32_t)));
-
-            memcpy(pack_ptr, &device->ports[port].mtu, sizeof(uint32_t));
-            pack_ptr += sizeof(uint32_t);
-
-            /* collectives offload support */
-            IBNET_VERBOSE(10, ("Send pack collectives offload = %d\n", OFFLOAD_CONNECTX_B0));
-            IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint8_t)));
-
-            /* Pasha: add query for collectives offload support */
-            memcpy(pack_ptr, &coll_offload_flag, sizeof(uint8_t));
-            pack_ptr += sizeof(uint8_t);
-
-            /* number of cpcs for this port */
-            IBNET_VERBOSE(10, ("Send pack number of cpcs = %d\n", device->num_cpcs));
-            IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint8_t)));
-
-            memcpy(pack_ptr, &device->num_cpcs, sizeof(uint8_t));
-            pack_ptr += sizeof(uint8_t);
-
-            for (cpc = 0; cpc < device->num_cpcs; cpc++) {
-                uint8_t cpc_index;
-                uint8_t cpc_buflen;
-
-                /* cpc index */
-                cpc_index = ompi_common_ofacm_base_get_cpc_index(device->cpcs[cpc]->data.cbm_component);
-
-                IBNET_VERBOSE(10, ("Send pack cpc index  = %d\n", cpc_index));
-                IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint8_t)));
-
-                memcpy(pack_ptr, &cpc_index, sizeof(uint8_t));
-                pack_ptr += sizeof(uint8_t);
-
-                /* cpc priority */
-                IBNET_VERBOSE(10, ("Send pack cpc priority  = %d\n",
-                                    device->cpcs[cpc]->data.cbm_priority));
-                IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint8_t)));
-
-                memcpy(pack_ptr, &device->cpcs[cpc]->data.cbm_priority, sizeof(uint8_t));
-                pack_ptr += sizeof(uint8_t);
-
-                /* cpc buffer length in bytes */
-                cpc_buflen = device->cpcs[cpc]->data.cbm_modex_message_len;
-
-                IBNET_VERBOSE(10, ("Send pack cpc message len  = %d\n", cpc_buflen));
-                IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint8_t)));
-
-                memcpy(pack_ptr, &cpc_buflen, sizeof(uint8_t));
-                pack_ptr += sizeof(uint8_t);
-
-                /* cpc buffer */
-                if (0 != cpc_buflen) {
-                    IBNET_VERBOSE(10, ("Send pack cpc buffer  len = %d\n", cpc_buflen));
-                    IBNET_VERBOSE(10, ("packing %d of %d\n", 1, sizeof(uint8_t)));
-
-                    memcpy(pack_ptr, device->cpcs[cpc]->data.cbm_modex_message, cpc_buflen);
-                    pack_ptr += (size_t) cpc_buflen;
-                }
-            }
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/* Translation vpid to ompi_proc */
-static int vpid_to_proc(ompi_vpid_t vpid,
-        struct ompi_proc_t ** procs, int n_procs_in, ompi_proc_t** out_proc)
-{
-    int i;
-    for (i = 0; i < n_procs_in; i++) {
-        if (vpid == procs[i]->proc_name.vpid) {
-            *out_proc = procs[i];
-            return i;
-        }
-    }
-
-    return OMPI_ERROR;
-}
-
-static int unpack_and_load_gather_rbuff(char *rbuffer, int max_sent_bytes,
-        struct ompi_proc_t ** procs, int n_procs_in, opal_list_t *peers_data)
-{
-
-    int i;
-    char* unpack_ptr;
-
-    /* Message format:
-     *     - my rank                 (uint32_t)
-     *     - number of active ports  (uint32_t)
-     *     - for each active port:
-     *          + lid                (uint16_t)
-     *          + subnetid           (uint64_t)
-     *          + mtu                (uint32_t)
-     *          + colloffload        (uint8_t)
-     *          + num of cpcs        (uint8_t)
-     *          + for each cpc:      (uint8_t)
-     *              * cpc index      (uint8_t)
-     *              * cpc priority   (uint8_t)
-     *              * cpc buffer len (uint8_t)
-     *              * cpc buffer     (byte*buffer_len)
-     *
-     */
-
-    /* Start to unpack data */
-    for(i = 0; i < n_procs_in; i++) {
-        uint32_t p;
-        mca_sbgp_ibnet_proc_t *ibnet_proc;
-
-        unpack_ptr = rbuffer + (size_t) (i * max_sent_bytes);
-
-        /* create new proc */
-        ibnet_proc = OBJ_NEW(mca_sbgp_ibnet_proc_t);
-
-        IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint32_t)));
-        IBNET_VERBOSE(10, ("Recive remote rank %d\n", ibnet_proc->rank));
-
-        memcpy(&ibnet_proc->rank, unpack_ptr, sizeof(uint32_t));
-        unpack_ptr += sizeof(uint32_t);
-
-        /* set back pointer to ompi_proc */
-        ibnet_proc->ompi_proc_index =
-            vpid_to_proc(ibnet_proc->rank, procs,
-                    n_procs_in, &ibnet_proc->ompi_proc);
-        if (OMPI_ERROR == ibnet_proc->ompi_proc_index) {
-            return OMPI_ERROR;
-        }
-
-        IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint32_t)));
-        IBNET_VERBOSE(10, ("Recive number of ports %d\n", ibnet_proc->num_ports));
-
-        memcpy(&ibnet_proc->num_ports, unpack_ptr, sizeof(uint32_t));
-        unpack_ptr += sizeof(uint32_t);
-
-        /* prepare place for port data*/
-        ibnet_proc->remote_ports_info = calloc(ibnet_proc->num_ports, sizeof(mca_sbgp_ibnet_port_t));
-        if (NULL == ibnet_proc->remote_ports_info) {
-            return OMPI_ERROR;
-        }
-
-        /* load the data */
-        for(p = 0; p < ibnet_proc->num_ports; p++) {
-            mca_sbgp_ibnet_port_t *port = &ibnet_proc->remote_ports_info[p];
-            uint32_t cpc;
-
-            IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint16_t)));
-            IBNET_VERBOSE(10, ("Recive id %d\n", port->id));
-
-            memcpy(&port->id, unpack_ptr, sizeof(uint16_t));
-            unpack_ptr += sizeof(uint16_t);
-
-            IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint16_t)));
-            IBNET_VERBOSE(10, ("Recive lid %d\n", port->lid));
-
-            memcpy(&port->lid, unpack_ptr, sizeof(uint16_t));
-            unpack_ptr += sizeof(uint16_t);
-
-            IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint64_t)));
-            IBNET_VERBOSE(10, ("Recive subnet id %lx\n", port->subnet_id));
-
-            memcpy(&port->subnet_id, unpack_ptr, sizeof(uint64_t));
-            unpack_ptr += sizeof(uint64_t);
-
-            IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint32_t)));
-            IBNET_VERBOSE(10, ("Recive mtu %d\n", port->mtu));
-
-            memcpy(&port->mtu, unpack_ptr, sizeof(uint32_t));
-            unpack_ptr += sizeof(uint32_t);
-
-            IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint8_t)));
-            IBNET_VERBOSE(10, ("Recive offload %d\n", port->coll_offload));
-
-            memcpy(&port->coll_offload, unpack_ptr, sizeof(uint8_t));
-            unpack_ptr += sizeof(uint8_t);
-
-            IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint8_t)));
-            IBNET_VERBOSE(10, ("Recive number of cpcs %d\n", port->num_cpcs));
-
-            memcpy(&port->num_cpcs, unpack_ptr, sizeof(uint8_t));
-            unpack_ptr += sizeof(uint8_t);
-
-            port->pm_cpc_data = calloc(port->num_cpcs,
-                    sizeof(ompi_common_ofacm_base_module_data_t));
-            if (NULL == port->pm_cpc_data) {
-                return OMPI_ERROR;
-            }
-
-            /* load cpc data */
-            for (cpc = 0; cpc < port->num_cpcs; cpc++) {
-                ompi_common_ofacm_base_module_data_t *cpc_data =
-                    &port->pm_cpc_data[cpc];
-                uint8_t cpc_index = -1;
-
-                IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint8_t)));
-                IBNET_VERBOSE(10, ("Recive cpc index %d\n", cpc_index));
-
-                memcpy(&cpc_index, unpack_ptr, sizeof(uint8_t));
-                unpack_ptr += sizeof(uint8_t);
-
-                cpc_data->cbm_component =
-                    ompi_common_ofacm_base_get_cpc_byindex(cpc_index);
-                if (NULL == cpc_data->cbm_component) {
-                    IBNET_VERBOSE(10, ("Failed to resolve cpc index %d\n", cpc_index));
-                    return OMPI_ERROR;
-                }
-
-                IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint8_t)));
-                IBNET_VERBOSE(10, ("Recive priority %d\n", cpc_data->cbm_priority));
-
-                memcpy(&cpc_data->cbm_priority, unpack_ptr, sizeof(uint8_t));
-                unpack_ptr += sizeof(uint8_t);
-
-                IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, 1, sizeof(uint8_t)));
-                IBNET_VERBOSE(10, ("Recive cpc message len %d\n", cpc_data->cbm_modex_message_len));
-
-                memcpy(&cpc_data->cbm_modex_message_len, unpack_ptr, sizeof(uint8_t));
-                unpack_ptr += sizeof(uint8_t);
-
-                if (0 != cpc_data->cbm_modex_message_len) {
-                    int cpc_buflen = cpc_data->cbm_modex_message_len;
-
-                    IBNET_VERBOSE(10, ("Recive cpc message data with len %d\n", cpc_buflen));
-                    IBNET_VERBOSE(10, ("element=%d unpacking %d of %d\n", i, cpc_buflen, cpc_buflen));
-
-                    memcpy(&cpc_data->cbm_modex_message, unpack_ptr, cpc_buflen);
-                    unpack_ptr += (size_t) cpc_buflen;
-                }
-            }
-        }
-
-        /* Put the new proc to the list */
-        opal_list_append(peers_data, (opal_list_item_t*) ibnet_proc);
-    }
-
-    assert((uint32_t) n_procs_in == opal_list_get_size(peers_data));
-    return OMPI_SUCCESS;
-}
-
-static int cmp_cgroups(const void *p1, const void *p2)
-{
-    mca_sbgp_ibnet_connection_group_info_t *g1 =
-        (mca_sbgp_ibnet_connection_group_info_t *)p1;
-    mca_sbgp_ibnet_connection_group_info_t *g2 =
-        (mca_sbgp_ibnet_connection_group_info_t *)p2;
-    return (g2->num_procs - g1->num_procs);
-}
-
-static int set_ibnet_proc_on_cgroup(
-                 mca_sbgp_ibnet_connection_group_info_t *cgroup,
-                 mca_sbgp_ibnet_proc_t *ibnet_proc,
-                 mca_sbgp_ibnet_device_t *device,
-                 mca_sbgp_ibnet_module_t *module)
-{
-    uint32_t p;
-    int k, rc, p_indx; /* port index in array of device */
-
-    for (p_indx = 0; p_indx < device->num_allowed_ports; ++p_indx) {
-        if (cgroup->port == device->ports[p_indx].id) {
-            break;
-        }
-    }
-
-    assert(device->num_act_ports > p_indx);
-
-    if (NULL == ibnet_proc->use_port) {
-        ibnet_proc->use_port = calloc(module->num_cgroups, sizeof(int));
-        if (NULL == ibnet_proc->use_port) {
-            IBNET_ERROR(("Failed to allocate use_port array."));
-            return OMPI_ERROR;
-        }
-    }
-
-    IBNET_VERBOSE(10, ("Local port is %d, idx - %d.\n",
-                       device->ports[p_indx].id, p_indx));
-
-    for(p = 0; p < ibnet_proc->num_ports; p++) {
-        if (device->ports[p_indx].subnet_id  ==
-                ibnet_proc->remote_ports_info[p].subnet_id) {
-            ompi_common_ofacm_base_module_t *local_cpc = NULL;
-            ompi_common_ofacm_base_module_data_t *remote_cpc_data = NULL;
-            /* check if we have matching cpc on both sides */
-            if (OMPI_SUCCESS !=
-                    ompi_common_ofacm_base_find_match(device->cpcs,
-                        device->num_cpcs,
-                        ibnet_proc->remote_ports_info[p].pm_cpc_data,
-                        ibnet_proc->remote_ports_info[p].num_cpcs,
-                        &local_cpc,
-                        &remote_cpc_data)) {
-                /* Failed to match, can not use the port */
-                IBNET_VERBOSE(10, ("Failed to match, can not use the port - %d.\n", p + 1));
-                continue;
-            }
-
-            for (k = 0; k < module->num_cgroups && ((p + 1) != (uint32_t) ibnet_proc->use_port[k]); ++k)
-                ;
-
-            if (k < module->num_cgroups) {
-                /* The port in use - another connection group use it */
-                continue;
-            }
-
-            /* It means that connection group 'cgroup' communicates with
-               this proc over its own remote port */
-            ibnet_proc->use_port[cgroup->index] = p + 1;
-            /* if it is no group array we need to create it*/
-            if(OPAL_UNLIKELY(NULL == cgroup->ibnet_procs)) {
-                cgroup->ibnet_procs = OBJ_NEW(opal_pointer_array_t);
-                rc = opal_pointer_array_init(cgroup->ibnet_procs, 10, INT_MAX, 10);
-                if (OPAL_SUCCESS != rc) {
-                    IBNET_ERROR(("Failed to allocate opal_pointer_array"));
-                    return OMPI_ERROR;
-                }
-            }
-
-            IBNET_VERBOSE(10, ("Device idx %d, local port idx %d; "
-                              "adding rank %d to the module %p, rem port %d",
-                               device->device_index, p_indx, ibnet_proc->rank,
-                               module, ibnet_proc->remote_ports_info[p].id));
-            /* No need to remove: opal_list_remove_item(peers_data, (opal_list_item_t*)ibnet_proc); */
-            rc = opal_pointer_array_set_item(cgroup->ibnet_procs,
-                    /* num_selected, */ cgroup->num_procs,
-                    (void *) ibnet_proc);
-            if (OPAL_SUCCESS != rc) {
-                IBNET_ERROR( ("Failed to set rank %d to index %d",
-                            ibnet_proc->rank, 1 + cgroup->num_procs));
-                return OMPI_ERROR;
-            }
-
-            /* put selected cpc data to this proc */
-            ibnet_proc->remote_ports_info[p].local_cpc = local_cpc;
-            ibnet_proc->remote_ports_info[p].remote_cpc_data = remote_cpc_data;
-
-            ++cgroup->num_procs;
-            /* we done for the proc, go to next one */
-            break;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int setup_cgroup_all(
-                mca_sbgp_ibnet_connection_group_info_t *cgroup,
-                mca_sbgp_ibnet_device_t *device,
-                mca_sbgp_ibnet_module_t *module,
-                opal_list_t *peers_data)
-{
-    int rc;
-    mca_sbgp_ibnet_proc_t *ibnet_proc = NULL;
-
-    for (ibnet_proc = (mca_sbgp_ibnet_proc_t *) opal_list_get_first(peers_data);
-            ibnet_proc != (mca_sbgp_ibnet_proc_t *) opal_list_get_end(peers_data);
-            ibnet_proc  = (mca_sbgp_ibnet_proc_t *)
-                opal_list_get_next((opal_list_item_t *)ibnet_proc)) {
-
-        rc = set_ibnet_proc_on_cgroup(cgroup, ibnet_proc, device, module);
-        if (OMPI_SUCCESS != rc) {
-            return rc;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int setup_cgroup_node(mca_sbgp_ibnet_connection_group_info_t *cgroup, mca_sbgp_ibnet_device_t *device,
-        mca_sbgp_ibnet_module_t *module, opal_list_t *peers_data)
-{
-    int rc, local = 0;
-    mca_sbgp_ibnet_proc_t *ibnet_proc = NULL;
-
-    for (ibnet_proc = (mca_sbgp_ibnet_proc_t *)opal_list_get_first(peers_data);
-            ibnet_proc != (mca_sbgp_ibnet_proc_t *)opal_list_get_end(peers_data);
-            ibnet_proc  = (mca_sbgp_ibnet_proc_t *)
-            opal_list_get_next((opal_list_item_t *)ibnet_proc)) {
-
-        local = OPAL_PROC_ON_LOCAL_NODE(ibnet_proc->ompi_proc->super.proc_flags);
-        if (0 == local) {
-            /* the remote process resides on different node */
-            continue;
-        }
-
-        /* the process resides on the same machine */
-        rc = set_ibnet_proc_on_cgroup(cgroup, ibnet_proc, device, module);
-        if (OMPI_SUCCESS != rc) {
-            return rc;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/* The function should be the heart of the ibnet component.
- * Main purpose:
- *  The function should run over list of all peers and select only "reachable" peers.
- *  Peer that have subnet_id equal to subnet id that  I have on my ports is reachable.
- *  All peers that have the same number of active ports on the same subnet maybe grouped
- *  to subgroup?
- *  Need to think more about the select logic on this stage I just return list of all
- *  procs
- */
-static int select_procs(mca_sbgp_ibnet_module_t *module, opal_list_t *peers_data)
-{
-    mca_sbgp_ibnet_device_t *device = NULL;
-    mca_sbgp_ibnet_proc_t *ibnet_proc = NULL;
-    mca_sbgp_ibnet_connection_group_info_t *cgroup = NULL;
-
-    uint32_t p = 0;
-    int i = 0, j, rc = OMPI_SUCCESS;
-    int num_grouped = 0,
-        groups_to_use = 1;
-
-    mca_sbgp_ibnet_component_t *cs = &mca_sbgp_ibnet_component;
-
-    IBNET_VERBOSE(10, ("Start to select procs.\n"));
-
-    module->num_cgroups = 0;
-    for (device = (mca_sbgp_ibnet_device_t *) opal_list_get_first(&cs->devices);
-            device != (mca_sbgp_ibnet_device_t *) opal_list_get_end(&cs->devices);
-            device  = (mca_sbgp_ibnet_device_t *)
-            opal_list_get_next((opal_list_item_t *) device)) {
-        module->num_cgroups += device->num_act_ports;
-        IBNET_VERBOSE(10, ("Device num %d with index %d num of active ports %d\n",
-                              ++i, device->device_index, device->num_act_ports));
-    }
-
-    module->cgroups = calloc(module->num_cgroups,
-            sizeof(mca_sbgp_ibnet_connection_group_info_t));
-
-    if (NULL == module->cgroups) {
-        IBNET_ERROR(("Failed to allocate cgroups"));
-        goto select_error;
-    }
-
-    IBNET_VERBOSE(10, ("Num of cgroups - %d.\n", module->num_cgroups));
-
-    /* 1. Run over all active ports and build connection group
-     * for each one */
-    for (device = (mca_sbgp_ibnet_device_t *) opal_list_get_first(&cs->devices);
-            device != (mca_sbgp_ibnet_device_t *) opal_list_get_end(&cs->devices);
-            device  = (mca_sbgp_ibnet_device_t *)
-            opal_list_get_next((opal_list_item_t *)device)) {
-        /* run over active ports on the device */
-        for(j = 0; j < device->num_act_ports; j++) {
-            cgroup = &module->cgroups[num_grouped];
-
-            /* Init cgroups structs */
-            cgroup->device_index = device->device_index;
-            cgroup->index = num_grouped;
-            cgroup->port = device->ports[j].id;
-            cgroup->num_procs = 0;
-
-            /* Setup comunication group */
-            switch(module->mode) {
-                case MCA_SBGP_IBNET_ALL_NET:
-                    rc = setup_cgroup_all(cgroup, device, module, peers_data);
-                    break;
-                case MCA_SBGP_IBNET_NODE_NET:
-                    rc = setup_cgroup_node(cgroup, device, module, peers_data);
-                    break;
-                default:
-                    rc = OMPI_ERROR;
-                    IBNET_ERROR(("Module mode is unknow, fatal error"));
-            }
-
-            if (OMPI_SUCCESS != rc) {
-                IBNET_ERROR(("Failed to setup cgroup."));
-                goto select_error;
-            }
-
-            if (0 != cgroup->num_procs) {
-                ++num_grouped;
-            }
-        }
-    }
-
-    if (0 == num_grouped) {
-        /* No connection group was found */
-        IBNET_ERROR(("No connection group was found."));
-        goto select_error;
-    }
-
-    /* If we have more than one single cgroup,
-     * we need to return groups that connects
-     * to exactly the same peers
-     */
-    if (num_grouped > 1) {
-
-        /* 2. Sort connection groups by size */
-        qsort(module->cgroups, num_grouped,
-                sizeof(mca_sbgp_ibnet_connection_group_info_t),
-                cmp_cgroups);
-
-        /* 3. What is the number of groups with maximal size */
-        /* The first is Maximal */
-        for (groups_to_use = 1; groups_to_use < num_grouped; groups_to_use++) {
-            if (module->cgroups[0].num_procs != module->cgroups[groups_to_use].num_procs) {
-                break;
-            }
-        }
-
-        /* Ishai - It looks that noone is uses this groups_to_use value. In any case there is a bug in it. */
-        /* 4. Check that all the maximal size groups are
-         * connect to the same peers, if not we just use FIRST cgroup */
-        if (groups_to_use > 1) {
-            /* we need to check that all groups connects
-             * the same set of peers. */
-            for (j = groups_to_use - 1; j > 0; j--) {
-                for (p = 0; p < module->cgroups[0].num_procs; p++) {
-                    /* compare proc by proc....*/
-                    if (opal_pointer_array_get_item(module->cgroups[0].ibnet_procs, p) !=
-                            opal_pointer_array_get_item(module->cgroups[j].ibnet_procs, p)) {
-                        /* peers are not equal, ignore this group and go to the next one */
-                        groups_to_use--;
-                        if (j != groups_to_use) {
-                            /* it was not the last group, swap last and this one */
-                            mca_sbgp_ibnet_connection_group_info_t tmp = module->cgroups[j];
-                            module->cgroups[j] = module->cgroups[groups_to_use];
-                            module->cgroups[groups_to_use] = tmp;
-                        }
-
-                        break; /* go to the next group */
-                    }
-                }
-            }
-        }
-    }
-    /* updating sgroup number */
-    module->num_cgroups = groups_to_use;
-    /* put array of ranks and size */
-
-    module->super.group_size = module->cgroups[0].num_procs;
-    module->super.group_list = (int *) calloc(module->super.group_size, sizeof(int));
-    if (NULL == module->super.group_list) {
-        IBNET_ERROR(("Failed to allocate memory for group list"));
-        goto select_error;
-    }
-
-    for (i = 0; i < module->super.group_size; i++) {
-        ibnet_proc = (mca_sbgp_ibnet_proc_t *)
-            opal_pointer_array_get_item(module->cgroups[0].ibnet_procs, i);
-
-        assert(NULL != ibnet_proc);
-        IBNET_VERBOSE(10, ("Adding rank %d to group list", ibnet_proc->rank));
-
-        module->super.group_list[i] = ibnet_proc->ompi_proc_index;
-    }
-
-    /* Let proc with lowest index be a leader of the subgroup */
-    ibnet_proc = (mca_sbgp_ibnet_proc_t *)
-        opal_pointer_array_get_item(module->cgroups[0].ibnet_procs, 0);
-
-    assert(NULL != ibnet_proc);
-    ibnet_proc->duty = MCA_SBGP_IBNET_NODE_LEADER;
-
-#if OPAL_ENABLE_DEBUG
-    IBNET_VERBOSE(10, ("Ibnet module: size - %d, num_cgroups - %d.\n",
-                       module->super.group_size, module->num_cgroups));
-
-    for (i = 0; i < module->num_cgroups; ++i) {
-        IBNET_VERBOSE(10, ("cgroup %d uses port %d.\n",
-                           i + 1, module->cgroups[i].port));
-    }
-#endif
-
-    return OMPI_SUCCESS;
-
-select_error:
-    if (NULL != module->cgroups) {
-        for (i = 0; i < num_grouped; i++) {
-            if (NULL != module->cgroups[i].ibnet_procs) {
-            /* Ishai: When do we destruct it if the fucntion was successful - only at the end of the process? */
-                OBJ_DESTRUCT(module->cgroups[i].ibnet_procs);
-            }
-        }
-
-        free(module->cgroups);
-    }
-
-    if (0 != module->super.group_size &&
-            NULL != module->super.group_list) {
-        free(module->super.group_list);
-    }
-
-    for (ibnet_proc = (mca_sbgp_ibnet_proc_t *) opal_list_get_first(peers_data);
-          ibnet_proc != (mca_sbgp_ibnet_proc_t *) opal_list_get_end(peers_data);
-                    ibnet_proc  = (mca_sbgp_ibnet_proc_t *)
-                           opal_list_get_next((opal_list_item_t *) ibnet_proc)) {
-        if (NULL != ibnet_proc->use_port) {
-            free(ibnet_proc->use_port);
-        }
-    }
-
-    return rc;
-}
-
-/* This routine is used to find the list of procs that run on the
-** same host as the calling process.
-*/
-
-#define IBNET_ALL   "all"
-#define IBNET_NODE  "node"
-
-static int key2mode(char *key)
-{
-    if (NULL == key) {
-        IBNET_VERBOSE(6, ("key is NULL, return  MCA_SBGP_IBNET_ALL"));
-        return MCA_SBGP_IBNET_ALL_NET;
-    }
-    if (strlen(IBNET_ALL) == strlen(key) &&
-            0 == strncmp(IBNET_ALL, key, strlen(IBNET_ALL))) {
-        IBNET_VERBOSE(6, ("key is MCA_SBGP_IBNET_ALL"));
-        return MCA_SBGP_IBNET_ALL_NET;
-    }
-    if (strlen(IBNET_NODE) == strlen(key) &&
-            0 == strncmp(IBNET_NODE, key, strlen(IBNET_NODE))) {
-        IBNET_VERBOSE(6, ("key is NODE"));
-        return MCA_SBGP_IBNET_NODE_NET;
-    }
-
-    IBNET_VERBOSE(6, ("key was not detected, return MCA_SBGP_IBNET_NONE"));
-    return MCA_SBGP_IBNET_NONE_NET;
-}
-
-static int mca_sbgp_ibnet_calc_sbuff_size(void)
-{
-    int bytes_tosend = 0, port, cpc;
-    mca_sbgp_ibnet_device_t *device;
-
-    opal_list_t *devices = &mca_sbgp_ibnet_component.devices;
-
-    bytes_tosend += sizeof(uint32_t); /* OPAL_UINT32 rank */
-    bytes_tosend += sizeof(uint32_t); /* OPAL_UINT32 num of active ports */
-
-    /* Go through list of device and build the message*/
-    for (device = (mca_sbgp_ibnet_device_t *) opal_list_get_first(devices);
-            device != (mca_sbgp_ibnet_device_t *) opal_list_get_end(devices);
-            device  = (mca_sbgp_ibnet_device_t *) opal_list_get_next((opal_list_item_t *) device)) {
-        for (port = 0; port < device->num_allowed_ports; ++port) {
-            if (!device->ports[port].used) {
-                continue;
-            }
-
-            /* OPAL_UINT16 port num */
-            bytes_tosend += sizeof(uint16_t);
-
-            /* OPAL_UINT16 lid */
-            bytes_tosend += sizeof(uint16_t);
-
-            /* OPAL_UINT64 subnetid */
-            bytes_tosend += sizeof(uint64_t);
-
-            /* OPAL_UINT32 default mtu */
-            bytes_tosend += sizeof(uint32_t);
-
-            /* OPAL_UINT8 collectives offload support */
-            bytes_tosend += sizeof(uint8_t);
-
-            /* OPAL_UINT8 number of cpcs for this port */
-            bytes_tosend += sizeof(uint8_t);
-
-            for (cpc = 0; cpc < device->num_cpcs; ++cpc) {
-                /* OPAL_UINT8 cpc index */
-                bytes_tosend += sizeof(uint8_t);
-
-                /* OPAL_UINT8 cpc priority */
-                bytes_tosend += sizeof(uint8_t);
-
-                /* cpc buffer length (OPAL_UINT8) in bytes */
-                bytes_tosend += device->cpcs[cpc]->data.cbm_modex_message_len;
-                bytes_tosend += sizeof(uint8_t);
-            }
-        }
-    }
-
-    return bytes_tosend;
-}
-
-mca_sbgp_base_module_t *mca_sbgp_ibnet_select_procs(struct ompi_proc_t **procs,
-        int n_procs_in,
-        struct ompi_communicator_t *comm,
-        char *key,
-        void *output_data
-        )
-{
-    /* local variables */
-    opal_list_t peers_data;
-    mca_sbgp_ibnet_module_t *module;
-
-    uint32_t rc;
-    char *sbuff = NULL, *rbuff = NULL;
-
-    int *sbgp_procs_ranks = NULL, *ranks_in_comm = NULL;
-    int i, my_rank_in_group = -1, my_rank, num_bytes_tosend;
-
-    struct mca_sbgp_ibnet_proc_t *ibnet_proc = NULL;
-    mca_sbgp_ibnet_component_t *cs = &mca_sbgp_ibnet_component;
-
-    /* Create the module */
-    module = OBJ_NEW(mca_sbgp_ibnet_module_t);
-    if (OPAL_UNLIKELY(NULL == module)) {
-        return NULL;
-    }
-
-    module->num_cgroups = 0;
-    module->cgroups = NULL;
-    module->mode = key2mode(key);
-
-    if (OPAL_UNLIKELY(MCA_SBGP_IBNET_NONE_NET == module->mode)) {
-        goto Error_module;
-    }
-
-    module->super.group_size = 0;
-    module->super.group_list = NULL;
-    module->super.group_comm = comm;
-    module->super.group_net = OMPI_SBGP_IBCX2;
-
-    ranks_in_comm = (int *) malloc(n_procs_in * sizeof(int));
-    if (OPAL_UNLIKELY(NULL == ranks_in_comm)) {
-        IBNET_ERROR(("Cannot allocate memory.\n"));
-        goto Error;
-    }
-
-    my_rank = ompi_comm_rank(&ompi_mpi_comm_world.comm);
-
-    for (i = 0; i < n_procs_in; i++) {
-        ranks_in_comm[i] = procs[i]->proc_name.vpid;
-        if (my_rank == ranks_in_comm[i]) {
-            my_rank_in_group = i;
-        }
-    }
-
-    /* Prepare send data */
-    num_bytes_tosend = mca_sbgp_ibnet_calc_sbuff_size();
-
-    rc = comm_allreduce_pml(&num_bytes_tosend,
-                            &num_bytes_tosend, 1,
-                            MPI_INT, my_rank_in_group,
-                            MPI_MAX, n_procs_in,
-                            ranks_in_comm, &ompi_mpi_comm_world.comm);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        goto Error;
-    }
-
-    IBNET_VERBOSE(10, ("The size of the send buff is %d\n", num_bytes_tosend));
-
-    assert(num_bytes_tosend > 0);
-
-    /* Allocate send/recv buffers for allgather comunication */
-    sbuff = (char *) malloc(num_bytes_tosend);
-    rbuff = (char *) malloc(num_bytes_tosend * n_procs_in);
-    if (OPAL_UNLIKELY(NULL == sbuff || NULL == rbuff)) {
-        IBNET_ERROR(("Failed to allocate buffers for send/recv ibnet allgather\n"));
-        goto Error;
-    }
-
-    rc = pack_gather_sbuff(sbuff);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        goto Error;
-    }
-
-    rc = comm_allgather_pml((void *) sbuff, (void *) rbuff,
-                             num_bytes_tosend, MPI_BYTE,
-                             my_rank_in_group, n_procs_in,
-                             ranks_in_comm, &ompi_mpi_comm_world.comm);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBNET_ERROR(("Allgather call failed.\n"));
-        goto Error;
-    }
-
-    /* Prepare list for arraving data */
-    OBJ_CONSTRUCT(&peers_data, opal_list_t);
-
-    /* Load the data to peers data */
-    rc = unpack_and_load_gather_rbuff(rbuff, num_bytes_tosend,
-                                procs, n_procs_in, &peers_data);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        goto Error;
-    }
-
-    /* Select logic */
-    rc = select_procs(module, &peers_data);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        goto Error;
-    }
-
-    /* Put group id */
-    sbgp_procs_ranks = (int *) malloc(module->super.group_size *
-                                      sizeof(int));
-    if (OPAL_UNLIKELY(NULL == sbgp_procs_ranks)) {
-        IBNET_ERROR(("Cannot allocate memory.\n"));
-        goto Error;
-    }
-
-    for (i = 0; i < module->super.group_size; ++i) {
-        ibnet_proc = (struct mca_sbgp_ibnet_proc_t *)
-                         opal_pointer_array_get_item(
-                         module->cgroups[0].ibnet_procs, i);
-
-        sbgp_procs_ranks[i] = ibnet_proc->ompi_proc->proc_name.vpid;
-        if (my_rank == sbgp_procs_ranks[i]) {
-            my_rank_in_group = i;
-        }
-
-    }
-
-    assert(my_rank_in_group >= 0);
-
-    rc = comm_allreduce_pml(&cs->curr_max_group_id,
-                            &cs->curr_max_group_id, 1,
-                            MPI_INT, my_rank_in_group,
-                            MPI_MAX, module->super.group_size,
-                            sbgp_procs_ranks, &ompi_mpi_comm_world.comm);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        goto Error;
-    }
-
-    module->group_id = cs->curr_max_group_id;
-    cs->curr_max_group_id++;
-
-    /* successful completion */
-    /* clean up the temporary structures */
-    OBJ_DESTRUCT(&peers_data);
-
-    free(sbuff);
-    free(rbuff);
-
-    free(ranks_in_comm);
-    free(sbgp_procs_ranks);
-
-    IBNET_VERBOSE(10, ("Return ibnet module.\n"));
-    return (mca_sbgp_base_module_t *) module;
-
-    /* return with error */
-Error:
-    /* clean up */
-    if(NULL != module->super.group_list) {
-        free(module->super.group_list);
-        module->super.group_list = NULL;
-    }
-
-    /* clean up the temporary structures */
-    OBJ_DESTRUCT(&peers_data);
-
-    if (NULL != sbgp_procs_ranks) {
-        free(sbgp_procs_ranks);
-    }
-
-    if (NULL != ranks_in_comm) {
-        free(ranks_in_comm);
-    }
-
-    if (NULL != sbuff) {
-        free(sbuff);
-    }
-
-    if (NULL != rbuff) {
-        free(rbuff);
-    }
-
-Error_module:
-    OBJ_RELEASE(module);
-
-    return NULL;
-}
diff --git a/ompi/mca/sbgp/p2p/Makefile.am b/ompi/mca/sbgp/p2p/Makefile.am
deleted file mode 100644
index d7d14f795f..0000000000
--- a/ompi/mca/sbgp/p2p/Makefile.am
+++ /dev/null
@@ -1,41 +0,0 @@
-#
-# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
-# Copyright (c) 2015 Cisco Systems, Inc.  All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-sources = \
-        sbgp_p2p.h \
-        sbgp_p2p_component.c  \
-        sbgp_p2p_module.c
-
-
-# Make the output library in this directory, and name it either
-# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
-# (for static builds).
-
-component_noinst =
-component_install =
-if MCA_BUILD_ompi_sbgp_p2p_DSO
-component_install += mca_sbgp_p2p.la
-else
-component_noinst += libmca_sbgp_p2p.la
-endif
-
-# See ompi/mca/btl/sm/Makefile.am for an explanation of
-# libmca_common_sm.la.
-
-mcacomponentdir = $(ompilibdir)
-mcacomponent_LTLIBRARIES = $(component_install)
-mca_sbgp_p2p_la_SOURCES = $(sources)
-mca_sbgp_p2p_la_LDFLAGS = -module -avoid-version
-mca_sbgp_p2p_la_LIBADD =
-
-noinst_LTLIBRARIES = $(component_noinst)
-libmca_sbgp_p2p_la_SOURCES =$(sources)
-libmca_sbgp_p2p_la_LDFLAGS = -module -avoid-version
diff --git a/ompi/mca/sbgp/p2p/configure.m4 b/ompi/mca/sbgp/p2p/configure.m4
deleted file mode 100644
index 56cc9a06af..0000000000
--- a/ompi/mca/sbgp/p2p/configure.m4
+++ /dev/null
@@ -1,27 +0,0 @@
-# -*- shell-script -*-
-#
-# Copyright (c) 2013      Sandia National Laboratories.  All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-# MCA_ompi_sbgp_p2p_POST_CONFIG(will_build)
-# ----------------------------------------
-# The p2p sbgp requires a BML endpoint tag to compile, so require it.
-# Require in POST_CONFIG instead of CONFIG so that we only require it
-# if we're not disabled.
-AC_DEFUN([MCA_ompi_sbgp_p2p_POST_CONFIG], [
-    AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([BML])])
-])dnl
-
-# MCA_ompi_sbgp_p2p_CONFIG(action-if-can-compile,
-#                        [action-if-cant-compile])
-# ------------------------------------------------
-# We can always build, unless we were explicitly disabled.
-AC_DEFUN([MCA_ompi_sbgp_p2p_CONFIG],[
-    AC_CONFIG_FILES([ompi/mca/sbgp/p2p/Makefile])
-    [$1]
-])dnl
diff --git a/ompi/mca/sbgp/p2p/owner.txt b/ompi/mca/sbgp/p2p/owner.txt
deleted file mode 100644
index 1c86df367b..0000000000
--- a/ompi/mca/sbgp/p2p/owner.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# owner/status file
-# owner: institution that is responsible for this package
-# status: e.g. active, maintenance, unmaintained
-#
-owner: ORNL
-status: unmaintained
diff --git a/ompi/mca/sbgp/p2p/sbgp_p2p.h b/ompi/mca/sbgp/p2p/sbgp_p2p.h
deleted file mode 100644
index f8fa5fc194..0000000000
--- a/ompi/mca/sbgp/p2p/sbgp_p2p.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-
-#ifndef MCA_BCOL_p2p_EXPORT_H
-#define MCA_BCOL_p2p_EXPORT_H
-
-#include "ompi_config.h"
-
-#include "mpi.h"
-#include "ompi/mca/mca.h"
-#include "ompi/mca/sbgp/sbgp.h"
-#include "opal/mca/mpool/mpool.h"
-#include "ompi/request/request.h"
-#include "ompi/proc/proc.h"
-
-BEGIN_C_DECLS
-
-#ifdef HAVE_SCHED_YIELD
-#  include <sched.h>
-#  define SPIN sched_yield()
-#else  /* no switch available */
-#  define SPIN
-#endif
-
-
-    /**
-     * Structure to hold the basic shared memory coll component.  First it holds the
-     * base coll component, and then holds a bunch of
-     * sm-coll-component-specific stuff (e.g., current MCA param
-     * values).
-     */
-    struct mca_sbgp_p2p_component_t {
-        /** Base coll component */
-        mca_sbgp_base_component_2_0_0_t super;
-
-    };
-
-    /**
-     * Convenience typedef
-     */
-    typedef struct mca_sbgp_p2p_component_t
-        mca_sbgp_p2p_component_t;
-
-
-    /*
-    ** Base sub-group module
-    **/
-
-    struct mca_sbgp_p2p_module_t {
-        /** Collective modules all inherit from opal_object */
-        mca_sbgp_base_module_t super;
-
-    };
-    typedef struct mca_sbgp_p2p_module_t mca_sbgp_p2p_module_t;
-    OBJ_CLASS_DECLARATION(mca_sbgp_p2p_module_t);
-
-    /* This routine is used to find the list of procs that run on the
-    ** same host as the calling process.
-    */
-    /*
-    struct mca_sbgp_base_module_t *mca_sbgp_p2p_select_procs(struct ompi_proc_t ** procs,
-        int n_procs_in, char *key, void *output_data);
-        */
-
-    /**
-    * Global component instance
-    */
-    OMPI_MODULE_DECLSPEC extern mca_sbgp_p2p_component_t mca_sbgp_p2p_component;
-
-
-END_C_DECLS
-
-#endif /* MCA_BCOL_p2p_EXPORT_H */
diff --git a/ompi/mca/sbgp/p2p/sbgp_p2p_component.c b/ompi/mca/sbgp/p2p/sbgp_p2p_component.c
deleted file mode 100644
index 2fd93da404..0000000000
--- a/ompi/mca/sbgp/p2p/sbgp_p2p_component.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#include <sys/types.h>
-#ifdef HAVE_SYS_MMAN_H
-#include <sys/mman.h>
-#endif
-#include <fcntl.h>
-
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "sbgp_p2p.h"
-#include "ompi/mca/bml/bml.h"
-
-
-/*
- * Public string showing the coll ompi_sm V2 component version number
- */
-const char *mca_sbgp_p2p_component_version_string =
-    "Open MPI sbgp - p2p collective MCA component version " OMPI_VERSION;
-
-
-/*
- * Local functions
- */
-
-static int p2p_register(void);
-static int p2p_open(void);
-static int p2p_close(void);
-static mca_sbgp_base_module_t * mca_sbgp_p2p_select_procs(struct ompi_proc_t ** procs,
-        int n_procs_in, struct ompi_communicator_t *comm, char *key, void *output_data);
-
-static int mca_sbgp_p2p_init_query(bool enable_progress_threads,
-        bool enable_mpi_threads);
-
-/*
- * Instantiate the public struct with all of our public information
- * and pointers to our public functions in it
- */
-
-mca_sbgp_p2p_component_t mca_sbgp_p2p_component = {
-
-
-    {
-        /* First, the mca_component_t struct containing meta
-           information about the component itself */
-
-        .sbgp_version = {
-            MCA_SBGP_BASE_VERSION_2_0_0,
-            /* Component name and version */
-
-            .mca_component_name = "p2p",
-            MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
-                                  OMPI_RELEASE_VERSION),
-
-            /* Component open and close functions */
-
-            .mca_open_component = p2p_open,
-            .mca_close_component = p2p_close,
-            .mca_register_component_params = p2p_register,
-        },
-
-        .sbgp_init_query = mca_sbgp_p2p_init_query,
-        .select_procs = mca_sbgp_p2p_select_procs,
-        .priority = 0,
-    }
-
-};
-
-static int p2p_register(void)
-{
-    mca_sbgp_p2p_component_t *cs = &mca_sbgp_p2p_component;
-    cs->super.priority = 90;
-    (void) mca_base_component_var_register(&cs->super.sbgp_version,
-                                           "priority", "Priority for the sbgp p2p component",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                           OPAL_INFO_LVL_9,
-                                           MCA_BASE_VAR_SCOPE_READONLY,
-                                           &cs->super.priority);
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * Open the component
- */
-static int p2p_open(void)
-{
-    return OMPI_SUCCESS;
-}
-
-
-/*
- * Close the component
- */
-static int p2p_close(void)
-{
-    return OMPI_SUCCESS;
-}
-
-/* query to see if the component is available for use, and can
- * satisfy the thread and progress requirements
- */
-int mca_sbgp_p2p_init_query(bool enable_progress_threads,
-        bool enable_mpi_threads)
-{
-    /* at this stage there is no reason to disaulify this component */
-
-    /* done */
-    return OMPI_SUCCESS;
-}
-/* This routine is used to find the list of procs that run on the
-** same host as the calling process.
-*/
-static mca_sbgp_base_module_t * mca_sbgp_p2p_select_procs(struct ompi_proc_t ** procs,
-        int n_procs_in,
-        struct ompi_communicator_t *comm,
-        char *key,
-        void *output_data
-        )
-{
-    /* local variables */
-    int cnt, proc, my_rank;
-    mca_sbgp_p2p_module_t *module;
-
-    /* find my rank in the group */
-    for (my_rank = -1, proc = 0 ; proc < n_procs_in ; ++proc) {
-        if (ompi_proc_local() == procs[proc]) {
-            my_rank = proc;
-        }
-    }
-
-    /* I am not in the list - so will form no local subgroup */
-    if (0 > my_rank) {
-        return NULL;
-    }
-
-    module = OBJ_NEW(mca_sbgp_p2p_module_t);
-    if (!module ) {
-        return NULL;
-    }
-
-    module->super.group_size = 0;
-    module->super.group_comm = comm;
-    module->super.group_net = OMPI_SBGP_P2P;
-
-    /* allocate resources */
-    module->super.group_list = (int *) calloc (n_procs_in, sizeof (int));
-    if (NULL == module->super.group_list) {
-        goto Error;
-    }
-
-    for (cnt = 0, proc = 0 ; proc < n_procs_in ; ++proc) {
-#if defined(OMPI_PROC_ENDPOINT_TAG_BML)
-        mca_bml_base_endpoint_t* endpoint =
-            (mca_bml_base_endpoint_t*) procs[proc]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
-#endif
-
-        if (my_rank == proc || !key) {
-            module->super.group_list[cnt++] = proc;
-            continue;
-        }
-
-#if defined(OMPI_PROC_ENDPOINT_TAG_BML)
-        if (NULL != endpoint) {
-            int num_btls = mca_bml_base_btl_array_get_size(&(endpoint->btl_eager));
-            /* loop over btls */
-
-            for (int i_btl = 0 ; i_btl < num_btls ; ++i_btl) {
-                /* I am checking for specific btl */
-                if (strcmp(endpoint->btl_eager.bml_btls[i_btl].btl->
-                           btl_component->btl_version.mca_component_name, key)) {
-                    module->super.group_list[cnt++] = proc;
-                    break;
-                }
-            }
-        }
-#endif
-    }
-
-    if (0 == cnt) {
-	goto Error;
-    }
-
-    module->super.group_size = cnt;
-    module->super.group_list = (int *) realloc (module->super.group_list, sizeof (int) * cnt);
-    if (NULL == module->super.group_list) {
-        /* Shouldn't ever happen */
-        goto Error;
-    }
-
-    /* successful return */
-    return (mca_sbgp_base_module_t *)module;
-
-    /* return with error */
-Error:
-    /* clean up */
-    if (NULL != module->super.group_list) {
-        free (module->super.group_list);
-        module->super.group_list = NULL;
-    }
-    OBJ_RELEASE(module);
-
-    return NULL;
-}
diff --git a/ompi/mca/sbgp/p2p/sbgp_p2p_module.c b/ompi/mca/sbgp/p2p/sbgp_p2p_module.c
deleted file mode 100644
index 40a1c104bb..0000000000
--- a/ompi/mca/sbgp/p2p/sbgp_p2p_module.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#include <sys/types.h>
-#ifdef HAVE_SYS_MMAN_H
-#include <sys/mman.h>
-#endif
-#include <fcntl.h>
-#include <errno.h>
-
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/sbgp/p2p/sbgp_p2p.h"
-
-/*
- * Local functions
- */
-static void
-mca_sbgp_p2p_module_construct(mca_sbgp_p2p_module_t *module)
-{
-}
-
-static void
-mca_sbgp_p2p_module_destruct(mca_sbgp_p2p_module_t *module)
-{
-    /* done */
-}
-
-
-OBJ_CLASS_INSTANCE(mca_sbgp_p2p_module_t,
-                   mca_sbgp_base_module_t,
-                   mca_sbgp_p2p_module_construct,
-                   mca_sbgp_p2p_module_destruct);
diff --git a/ompi/mca/sbgp/sbgp.h b/ompi/mca/sbgp/sbgp.h
deleted file mode 100644
index c128051b2e..0000000000
--- a/ompi/mca/sbgp/sbgp.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2015      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_SBGP_H
-#define MCA_SBGP_H
-
-#include "ompi_config.h"
-#include "opal/class/opal_list.h"
-#include "ompi/mca/mca.h"
-#include "ompi/communicator/communicator.h"
-
-#include "opal/util/show_help.h"
-
-#if defined(c_plusplus) || defined(__cplusplus)
-extern "C" {
-#endif
-
-/**
- *  List of supported network types
- */
-
-typedef int (*mca_sbgp_component_init_query_fn_t)
-    (bool enable_progress_threads, bool enable_mpi_threads);
-
-typedef enum {
-    OMPI_SBGP_MUMA   = 1 << 0, /* Muma */
-    OMPI_SBGP_SOCKET = 1 << 1, /* CPU socket */
-    OMPI_SBGP_P2P    = 1 << 2, /* Point 2 point networks */
-    OMPI_SBGP_IBCX2  = 1 << 3, /* Infiniband ConnextX2 */
-    OMPI_SBGP_IB     = 1 << 4  /* Infiniband */
-} mca_sbgp_net_type;
-
-/*
- * Interface function for routine that will extract subgroups
- *
- * @param procs (IN)    List of mpi processes to filter
- * @param n_procs_in (IN)    Number of input processes
- * @param key (IN)  optional key
- * @param output_data (OUT) component specific output
- * @return                module, NULL if one is not created.
- *
- */
-
-struct mca_sbgp_base_module_2_0_0_t {
-
-    /** Collective modules all inherit from opal_object */
-    opal_object_t super;
-    /* group size */
-    int group_size;
-
-    /* largest power of 2 in group */
-    int pow_2;
-
-    /* number of levels in the tree */
-    int n_levels_pow2;
-
-    /* my index in the group list,
-     * pointer to my rank */
-    int my_index;
-    /* List of ranks.
-     * Actually we return to ML array of
-     * indexes to ompi_proc.
-     * And ML is responsible to replace
-     * the indexes to ranks */
-    int *group_list;
-    /* pointer to *father* communicator,
-     * Not sure if we really need it now. I know my rank via my index,
-     * and ompi_proc I can cache on sbgp module.
-     * For ib I do not need it */
-    struct ompi_communicator_t *group_comm;
-    /* network supported by this groups */
-    mca_sbgp_net_type group_net;
-
-    /*FIXME:
-     * I don't know where to add the use_hdl flag since the
-     * mca_bcol_basesmuma_comm_query takes just two input parameters.
-     */
-    bool use_hdl;
-
-};
-typedef struct mca_sbgp_base_module_2_0_0_t mca_sbgp_base_module_2_0_0_t;
-typedef struct mca_sbgp_base_module_2_0_0_t mca_sbgp_base_module_t;
-/* typedef mca_sbgp_base_module_2_0_0_t mca_sbgp_base_module_t; */
-OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_sbgp_base_module_t);
-
-typedef mca_sbgp_base_module_t *(*mca_sbgp_create_subgroup_fn_t)(
-        struct ompi_proc_t ** procs, int n_procs_in,
-        struct ompi_communicator_t *comm, char *key,
-        void *output_data
-        );
-
-/**
- * Subgrouping  component interface
- *
- * Component interface for the sub-gorup  framework.  A public
- * instance of this structure, called
- * mca_sbgp_[component_name]_component, must exist in any sub-group
- * component.
- */
-struct mca_sbgp_base_component_2_0_0_t {
-    /** Base component description */
-    mca_base_component_t sbgp_version;
-
-    /** Sbgp component init query function */
-    mca_sbgp_component_init_query_fn_t sbgp_init_query;
-
-    /** process selection function */
-    mca_sbgp_create_subgroup_fn_t select_procs;
-
-    /** priority */
-    int priority;
-
-};
-typedef struct mca_sbgp_base_component_2_0_0_t mca_sbgp_base_component_2_0_0_t;
-typedef struct mca_sbgp_base_component_2_0_0_t mca_sbgp_base_component;
-
-
-/*
-* Macro for use in components that are of type coll
-*/
-#define MCA_SBGP_BASE_VERSION_2_0_0 \
-    OMPI_MCA_BASE_VERSION_2_1_0("sbgp", 2, 0, 0)
-
-#if defined(c_plusplus) || defined(__cplusplus)
-}
-#endif
-#endif /* MCA_SBGP_H */