Merge pull request #2658 from rhc54/topic/removal

Remove the bcol, coll/ml, and sbgp code as stale and lacking a maintainer
2017-01-03 20:34:09 -08:00 · 2017-01-03 20:34:09 -08:00 · 5737a45b35
--- a/ompi/mca/bcol/Makefile.am
+++ b/ompi/mca/bcol/Makefile.am
@ -1,35 +0,0 @@
-#
-# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-# main library setup
-noinst_LTLIBRARIES = libmca_bcol.la
-libmca_bcol_la_SOURCES =
-
-# header setup
-nobase_ompi_HEADERS =
-nobase_nodist_ompi_HEADERS =
-
-# local files
-headers = bcol.h
-libmca_bcol_la_SOURCES += $(headers) $(nodist_headers)
-
-# Conditionally install the header files
-if WANT_INSTALL_HEADERS
-nobase_ompi_HEADERS += $(headers)
-nobase_nodist_ompi_HEADERS += $(nodist_headers)
-ompidir = $(ompiincludedir)/ompi/mca/bcol
-else
-ompidir = $(includedir)
-endif
-
-include base/Makefile.am
-
-distclean-local:
-	rm -f base/static-components.h
--- a/ompi/mca/bcol/base/Makefile.am
+++ b/ompi/mca/bcol/base/Makefile.am
@ -1,16 +0,0 @@
-#
-# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-
-headers += \
-        base/base.h
-libmca_bcol_la_SOURCES += \
-        base/bcol_base_frame.c \
-        base/bcol_base_init.c
--- a/ompi/mca/bcol/base/base.h
+++ b/ompi/mca/bcol/base/base.h
@ -1,49 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_BASE_H
-#define MCA_BCOL_BASE_H
-
-#include "ompi_config.h"
-
-#include "ompi/mca/mca.h"
-#include "opal/class/opal_list.h"
-#include "ompi/mca/bcol/bcol.h"
-
-/*
- * Global functions for BCOL
- */
-
-BEGIN_C_DECLS
-
-OMPI_DECLSPEC extern opal_list_t mca_bcol_base_components_in_use;
-OMPI_DECLSPEC extern char *ompi_bcol_bcols_string;
-
-OMPI_DECLSPEC extern mca_base_framework_t ompi_bcol_base_framework;
-
-OMPI_DECLSPEC int mca_bcol_base_init(bool enable_progress_threads, bool enable_mpi_threads);
-
-struct mca_bcol_base_module_t;
-OMPI_DECLSPEC int mca_bcol_base_bcol_fns_table_init(struct mca_bcol_base_module_t *bcol_module);
-
-OMPI_DECLSPEC int mca_bcol_base_fn_table_construct(struct mca_bcol_base_module_t *bcol_module);
-
-OMPI_DECLSPEC int mca_bcol_base_fn_table_destroy(struct mca_bcol_base_module_t *bcol_module);
-
-OMPI_DECLSPEC int mca_bcol_base_set_attributes(struct mca_bcol_base_module_t *bcol_module,
-                mca_bcol_base_coll_fn_comm_attributes_t *comm_attribs,
-                mca_bcol_base_coll_fn_invoke_attributes_t *inv_attribs,
-                mca_bcol_base_module_collective_fn_primitives_t bcol_fn,
-                mca_bcol_base_module_collective_fn_primitives_t progress_fn);
-
-END_C_DECLS
-
-#endif /* MCA_BCOL_BASE_H */
--- a/ompi/mca/bcol/base/bcol_base_frame.c
+++ b/ompi/mca/bcol/base/bcol_base_frame.c
@ -1,374 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2014-2015 Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-
-
-#include "ompi_config.h"
-#include <stdio.h>
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif  /* HAVE_UNIST_H */
-#include "ompi/mca/mca.h"
-#include "opal/mca/base/base.h"
-#include "opal/util/argv.h"
-
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/include/ompi/constants.h"
-#include "opal/mca/mpool/mpool.h"
-#include "opal/class/opal_list.h"
-/*
- * The following file was created by configure.  It contains extern
- * statements and the definition of an array of pointers to each
- * component's public mca_base_component_t struct.
- */
-
-#include "ompi/mca/bcol/base/static-components.h"
-
-static int mca_bcol_base_open(mca_base_open_flag_t flags);
-static int mca_bcol_base_close (void);
-static int mca_bcol_base_register(mca_base_register_flag_t flags);
-
-/*
-**  * Global variables
-**   */
-MCA_BASE_FRAMEWORK_DECLARE(ompi, bcol, NULL, mca_bcol_base_register, mca_bcol_base_open, mca_bcol_base_close,
-                           mca_bcol_base_static_components, 0);
-
-OMPI_DECLSPEC opal_list_t mca_bcol_base_components_in_use = {{0}};
-OMPI_DECLSPEC char *ompi_bcol_bcols_string = NULL;
-OMPI_DECLSPEC int bcol_mpool_compatibility[BCOL_SIZE][BCOL_SIZE] = {{0}};
-OMPI_DECLSPEC int bcol_mpool_index[BCOL_SIZE][BCOL_SIZE] = {{0}};
-
-static void bcol_base_module_constructor(mca_bcol_base_module_t *module)
-{
-    int fnc;
-
-    module->bcol_component = NULL;
-    module->network_context = NULL;
-    module->context_index = -1;
-    module->supported_mode = 0;
-    module->init_module = NULL;
-    module->sbgp_partner_module = NULL;
-    module->squence_number_offset = 0;
-    module->n_poll_loops = 0;
-
-    for (fnc = 0; fnc < BCOL_NUM_OF_FUNCTIONS; fnc++) {
-        module->bcol_function_table[fnc] = NULL;
-        module->small_message_thresholds[fnc] = BCOL_THRESHOLD_UNLIMITED;
-    }
-
-    module->set_small_msg_thresholds = NULL;
-
-    module->header_size = 0;
-    module->bcol_memory_init = NULL;
-
-    module->next_inorder = NULL;
-
-    mca_bcol_base_fn_table_construct(module);
-}
-
-static void bcol_base_module_destructor(mca_bcol_base_module_t *module)
-{
-    int fnc;
-
-    module->bcol_component = NULL;
-
-    module->context_index = -1;
-    module->init_module = NULL;
-    module->sbgp_partner_module = NULL;
-    module->squence_number_offset = 0;
-    module->n_poll_loops = 0;
-
-    for (fnc = 0; fnc < BCOL_NUM_OF_FUNCTIONS; fnc++) {
-        module->bcol_function_table[fnc] = NULL;
-    }
-
-    module->bcol_memory_init = NULL;
-}
-
-OBJ_CLASS_INSTANCE(mca_bcol_base_module_t,
-        opal_object_t,
-        bcol_base_module_constructor,
-        bcol_base_module_destructor);
-
-static void bcol_base_network_context_constructor(bcol_base_network_context_t *nc)
-{
-    nc->context_id = -1;
-    nc->context_data = NULL;
-}
-
-static void bcol_base_network_context_destructor(bcol_base_network_context_t *nc)
-{
-    nc->context_id = -1;
-    nc->context_data = NULL;
-    nc->register_memory_fn = NULL;
-    nc->deregister_memory_fn = NULL;
-}
-
-OBJ_CLASS_INSTANCE(bcol_base_network_context_t,
-        opal_object_t,
-        bcol_base_network_context_constructor,
-        bcol_base_network_context_destructor);
-
-/* get list of subgrouping coponents to use */
-static int mca_bcol_base_set_components_to_use(opal_list_t *bcol_components_avail,
-                opal_list_t *bcol_components_in_use)
-{
-    /* local variables */
-    const mca_base_component_t *b_component;
-
-    mca_base_component_list_item_t *b_cli;
-    mca_base_component_list_item_t *b_clj;
-
-    char **bcols_requested;
-    const char *b_component_name;
-
-    /* split the requst for the bcol modules */
-    bcols_requested = opal_argv_split(ompi_bcol_bcols_string, ',');
-    if (NULL == bcols_requested) {
-        return OMPI_ERROR;
-    }
-
-    /* Initialize list */
-    OBJ_CONSTRUCT(bcol_components_in_use, opal_list_t);
-
-    /* figure out basic collective modules to use */
-    /* loop over list of components requested */
-    for (int i = 0 ; bcols_requested[i] ; ++i) {
-        /* loop over discovered components */
-        OPAL_LIST_FOREACH(b_cli, bcol_components_avail, mca_base_component_list_item_t) {
-            b_component = b_cli->cli_component;
-            b_component_name = b_component->mca_component_name;
-
-            if (0 == strcmp (b_component_name, bcols_requested[i])) {
-                /* found selected component */
-                b_clj = OBJ_NEW(mca_base_component_list_item_t);
-                if (NULL == b_clj) {
-                    opal_argv_free (bcols_requested);
-                    return OPAL_ERR_OUT_OF_RESOURCE;
-                }
-
-                b_clj->cli_component = b_component;
-                opal_list_append(bcol_components_in_use,
-                                (opal_list_item_t *) b_clj);
-                break;
-             } /* end check for bcol component */
-         }
-     }
-
-    /* Note: Need to add error checking to make sure all requested functions
-    ** were found */
-
-    /*
-    ** release resources
-    ** */
-
-    opal_argv_free (bcols_requested);
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_bcol_base_register(mca_base_register_flag_t flags)
-{
-    /* figure out which bcol and sbgp components will actually be used */
-    /* get list of sub-grouping functions to use */
-    ompi_bcol_bcols_string = "basesmuma,basesmuma,iboffload,ptpcoll,ugni";
-    (void) mca_base_var_register("ompi", "bcol", "base", "string",
-                                 "Default set of basic collective components to use",
-                                 MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
-                                 OPAL_INFO_LVL_9,
-                                 MCA_BASE_VAR_SCOPE_READONLY,
-                                 &ompi_bcol_bcols_string);
-
-    return OMPI_SUCCESS;
-}
-
-/**
- * Function for finding and opening either all MCA components, or the one
- * that was specifically requested via a MCA parameter.
- */
-static int mca_bcol_base_open(mca_base_open_flag_t flags)
-{
-    int ret;
-
-    /* Open up all available components */
-    if (OMPI_SUCCESS !=
-        (ret = mca_base_framework_components_open(&ompi_bcol_base_framework, flags))) {
-        return ret;
-    }
-
-    ret = mca_bcol_base_set_components_to_use(&ompi_bcol_base_framework.framework_components,
-                                              &mca_bcol_base_components_in_use);
-    if (OMPI_SUCCESS != ret) {
-        return ret;
-    }
-
-    /* memory registration compatibilities */
-    bcol_mpool_compatibility[BCOL_SHARED_MEMORY_UMA][BCOL_SHARED_MEMORY_UMA]=1;
-    bcol_mpool_compatibility[BCOL_SHARED_MEMORY_UMA][BCOL_SHARED_MEMORY_SOCKET]=1;
-    bcol_mpool_compatibility[BCOL_SHARED_MEMORY_UMA][BCOL_POINT_TO_POINT]=1;
-    bcol_mpool_compatibility[BCOL_SHARED_MEMORY_UMA][BCOL_IB_OFFLOAD]=1;
-    bcol_mpool_compatibility[BCOL_SHARED_MEMORY_SOCKET][BCOL_SHARED_MEMORY_UMA]=1;
-    bcol_mpool_compatibility[BCOL_POINT_TO_POINT]      [BCOL_SHARED_MEMORY_UMA]=1;
-    bcol_mpool_compatibility[BCOL_IB_OFFLOAD]          [BCOL_SHARED_MEMORY_UMA]=1;
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_bcol_base_close (void)
-{
-    opal_list_item_t *item;
-
-    while (NULL != (item = opal_list_remove_first (&mca_bcol_base_components_in_use))) {
-        OBJ_RELEASE(item);
-    }
-
-    OBJ_DESTRUCT(&mca_bcol_base_components_in_use);
-
-    return mca_base_framework_components_close(&ompi_bcol_base_framework, NULL);
-}
-
-/*
- * Prototype implementation of selection logic
- */
-int mca_bcol_base_fn_table_construct(struct mca_bcol_base_module_t *bcol_module){
-
-        int bcol_fn;
-        /* Call all init functions */
-
-        /* Create a function table */
-        for (bcol_fn = 0; bcol_fn < BCOL_NUM_OF_FUNCTIONS; bcol_fn++){
-            /* Create a list object for each bcol type list */
-            OBJ_CONSTRUCT(&(bcol_module->bcol_fns_table[bcol_fn]), opal_list_t);
-        }
-
-    return OMPI_SUCCESS;
-}
-
-int mca_bcol_base_fn_table_destroy(struct mca_bcol_base_module_t *bcol_module){
-
-    int bcol_fn;
-
-    for (bcol_fn = 0; bcol_fn < BCOL_NUM_OF_FUNCTIONS; bcol_fn++){
-        /* gvm FIX: Go through the list and destroy each item */
-        /* Destroy the function table object for each bcol type list */
-        OBJ_DESTRUCT(&(bcol_module->bcol_fns_table[bcol_fn]));
-    }
-
-    return OMPI_SUCCESS;
-}
-
-int mca_bcol_base_set_attributes(struct mca_bcol_base_module_t *bcol_module,
-                mca_bcol_base_coll_fn_comm_attributes_t *arg_comm_attribs,
-                mca_bcol_base_coll_fn_invoke_attributes_t *arg_inv_attribs,
-                mca_bcol_base_module_collective_fn_primitives_t bcol_fn,
-                mca_bcol_base_module_collective_fn_primitives_t progress_fn
-                )
-{
-    mca_bcol_base_coll_fn_comm_attributes_t *comm_attribs = NULL;
-    mca_bcol_base_coll_fn_invoke_attributes_t *inv_attribs = NULL;
-    struct mca_bcol_base_coll_fn_desc_t *fn_filtered = NULL;
-    int coll_type;
-
-    comm_attribs = malloc(sizeof(mca_bcol_base_coll_fn_comm_attributes_t));
-    if (NULL == comm_attribs) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-    inv_attribs = malloc(sizeof(mca_bcol_base_coll_fn_invoke_attributes_t));
-
-    if (NULL == inv_attribs) {
-        free(comm_attribs);
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    coll_type = comm_attribs->bcoll_type = arg_comm_attribs->bcoll_type;
-    comm_attribs->comm_size_min = arg_comm_attribs->comm_size_min;
-    comm_attribs->comm_size_max = arg_comm_attribs->comm_size_max;
-    comm_attribs->data_src = arg_comm_attribs->data_src;
-    comm_attribs->waiting_semantics = arg_comm_attribs->waiting_semantics;
-
-    inv_attribs->bcol_msg_min = arg_inv_attribs->bcol_msg_min;
-    inv_attribs->bcol_msg_max = arg_inv_attribs->bcol_msg_max ;
-    inv_attribs->datatype_bitmap = arg_inv_attribs->datatype_bitmap ;
-    inv_attribs->op_types_bitmap = arg_inv_attribs->op_types_bitmap;
-
-    fn_filtered = OBJ_NEW(mca_bcol_base_coll_fn_desc_t);
-
-    fn_filtered->coll_fn = bcol_fn;
-    fn_filtered->progress_fn = progress_fn;
-
-    fn_filtered->comm_attr = comm_attribs;
-    fn_filtered->inv_attr = inv_attribs;
-
-
-    opal_list_append(&(bcol_module->bcol_fns_table[coll_type]),(opal_list_item_t*)fn_filtered);
-
-    return OMPI_SUCCESS;
-}
-
-int mca_bcol_base_bcol_fns_table_init(struct mca_bcol_base_module_t *bcol_module){
-
-    int ret, bcol_init_fn;
-
-    for (bcol_init_fn =0; bcol_init_fn < BCOL_NUM_OF_FUNCTIONS; bcol_init_fn++) {
-        if (NULL != bcol_module->bcol_function_init_table[bcol_init_fn]) {
-            ret = (bcol_module->bcol_function_init_table[bcol_init_fn]) (bcol_module);
-            if (OMPI_SUCCESS != ret) {
-                return OMPI_ERROR;
-            }
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static void mca_bcol_base_coll_fn_desc_constructor(mca_bcol_base_coll_fn_desc_t *fn)
-{
-    fn->comm_attr = NULL;
-    fn->inv_attr = NULL;
-}
-
-static void mca_bcol_base_coll_fn_desc_destructor(mca_bcol_base_coll_fn_desc_t *fn)
-{
-    if (fn->comm_attr) {
-        free(fn->comm_attr);
-    }
-
-    if (fn->inv_attr) {
-        free(fn->inv_attr);
-    }
-}
-
-OBJ_CLASS_INSTANCE(mca_bcol_base_coll_fn_desc_t,
-                   opal_list_item_t,
-                   mca_bcol_base_coll_fn_desc_constructor,
-                   mca_bcol_base_coll_fn_desc_destructor);
-
-static void lmngr_block_constructor(mca_bcol_base_lmngr_block_t *item)
-{
-    item->base_addr = NULL;
-}
-
-static void lnmgr_block_destructor(mca_bcol_base_lmngr_block_t *item)
-{
-    /* I have nothing to do here */
-}
-OBJ_CLASS_INSTANCE(mca_bcol_base_lmngr_block_t,
-        opal_list_item_t,
-        lmngr_block_constructor,
-        lnmgr_block_destructor);
--- a/ompi/mca/bcol/base/bcol_base_init.c
+++ b/ompi/mca/bcol/base/bcol_base_init.c
@ -1,45 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/mca/mca.h"
-#include "opal/mca/base/base.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/include/ompi/constants.h"
-
-int mca_bcol_base_init(bool enable_progress_threads, bool enable_mpi_threads)
-{
-    mca_bcol_base_component_t *bcol_component;
-    mca_base_component_list_item_t *cli;
-    int ret;
-
-    OPAL_LIST_FOREACH(cli, &mca_bcol_base_components_in_use, mca_base_component_list_item_t) {
-        bcol_component = (mca_bcol_base_component_t *) cli->cli_component;
-
-        if (false == bcol_component->init_done) {
-            ret = bcol_component->collm_init_query(true, true);
-            if (OMPI_SUCCESS != ret) {
-                return ret;
-            }
-
-            bcol_component->init_done = true;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-
-
--- a/ompi/mca/bcol/base/owner.txt
+++ b/ompi/mca/bcol/base/owner.txt
@ -1,7 +0,0 @@
-#
-# owner/status file
-# owner: institution that is responsible for this package
-# status: e.g. active, maintenance, unmaintained
-#
-owner: ORNL
-status: unmaintained
--- a/ompi/mca/bcol/basesmuma/Makefile.am
+++ b/ompi/mca/bcol/basesmuma/Makefile.am
@ -1,66 +0,0 @@
-#
-# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
-# Copyright (c) 2015 Cisco Systems, Inc.  All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-sources = \
-        bcol_basesmuma.h \
-        bcol_basesmuma_utils.h \
-        bcol_basesmuma_bcast.c \
-        bcol_basesmuma_component.c  \
-        bcol_basesmuma_module.c \
-        bcol_basesmuma_buf_mgmt.c \
-        bcol_basesmuma_mem_mgmt.c \
-	bcol_basesmuma_fanin.c \
-        bcol_basesmuma_fanout.c \
-        bcol_basesmuma_progress.c \
-        bcol_basesmuma_reduce.h \
-        bcol_basesmuma_reduce.c \
-        bcol_basesmuma_allreduce.c \
-        bcol_basesmuma_setup.c \
-	bcol_basesmuma_rd_barrier.c  \
-        bcol_basesmuma_rd_nb_barrier.c \
-        bcol_basesmuma_rk_barrier.c \
-        bcol_basesmuma_utils.c    \
-        bcol_basesmuma_bcast_prime.c \
-        bcol_basesmuma_lmsg_knomial_bcast.c \
-        bcol_basesmuma_lmsg_bcast.c \
-        bcol_basesmuma_gather.c \
-        bcol_basesmuma_allgather.c \
-        bcol_basesmuma_smcm.h \
-        bcol_basesmuma_smcm.c
-
-# Make the output library in this directory, and name it either
-# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
-# (for static builds).
-
-component_noinst =
-component_install =
-if MCA_BUILD_ompi_bcol_basesmuma_DSO
-component_install += mca_bcol_basesmuma.la
-else
-component_noinst += libmca_bcol_basesmuma.la
-endif
-
-# See ompi/mca/btl/sm/Makefile.am for an explanation of
-# libmca_common_sm.la.
-
-AM_CPPFLAGS = $(btl_portals_CPPFLAGS)
-
-mcacomponentdir = $(ompilibdir)
-mcacomponent_LTLIBRARIES = $(component_install)
-mca_bcol_basesmuma_la_SOURCES = $(sources)
-mca_bcol_basesmuma_la_LDFLAGS = -module -avoid-version $(btl_portals_LDFLAGS)
-mca_bcol_basesmuma_la_LIBADD = \
-    	$(btl_portals_LIBS)
-
-
-noinst_LTLIBRARIES = $(component_noinst)
-libmca_bcol_basesmuma_la_SOURCES =$(sources)
-libmca_bcol_basesmuma_la_LDFLAGS = -module -avoid-version $(btl_portals_LDFLAGS)
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma.h
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma.h
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_allgather.c
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_allgather.c
@ -1,352 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/include/ompi/constants.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/basesmuma/bcol_basesmuma.h"
-/*
-  #define IS_AGDATA_READY(peer, my_flag, my_sequence_number)\
-  (((peer)->sequence_number == (my_sequence_number) && \
-  (peer)->flags[ALLGATHER_FLAG][bcol_id] >= (my_flag) \
-  )? true : false )
-*/
-
-#define CALC_ACTIVE_REQUESTS(active_requests,peers, tree_order) \
-    do{                                                         \
-        for( j = 0; j < (tree_order - 1); j++){                 \
-            if( 0 > peers[j] ) {                                \
-                /* set the bit */                               \
-                *active_requests ^= (1<<j);                     \
-            }                                                   \
-        }                                                       \
-    }while(0)
-
-
-
-/*
- * Recursive K-ing allgather
- */
-
-/*
- *
- * Recurssive k-ing algorithm
- * Example k=3 n=9
- *
- *
- * Number of Exchange steps = log (basek) n
- * Number of steps in exchange step = k (radix)
- *
- */
-int bcol_basesmuma_k_nomial_allgather_init(bcol_function_args_t *input_args,
-                                           struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) const_args->bcol_module;
-    netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-    uint32_t buffer_index = input_args->buffer_index;
-    int *active_requests =
-        &(bcol_module->ml_mem.nb_coll_desc[buffer_index].active_requests);
-
-    int *iteration = &bcol_module->ml_mem.nb_coll_desc[buffer_index].iteration;
-    int *status = &bcol_module->ml_mem.nb_coll_desc[buffer_index].status;
-    int leading_dim, buff_idx, idx;
-
-    int64_t sequence_number = input_args->sequence_num;
-    int my_rank = bcol_module->super.sbgp_partner_module->my_index;
-
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-
-    /* control structures */
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-
-    volatile int8_t ready_flag;
-
-    /* initialize the iteration counter */
-    buff_idx = input_args->src_desc->buffer_index;
-    leading_dim = bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-    data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs+idx;
-
-    /* Set pointer to current proc ctrl region */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-
-    /* initialize headers and ready flag */
-    BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id);
-
-    /* initialize these */
-    *iteration = -1;
-    *active_requests = 0;
-    *status = ready_flag;
-
-    if (EXTRA_NODE == exchange_node->node_type) {
-        /* I am ready at this level */
-        opal_atomic_wmb ();
-        my_ctl_pointer->flags[ALLGATHER_FLAG][bcol_id] = ready_flag;
-    }
-
-    return bcol_basesmuma_k_nomial_allgather_progress (input_args, const_args);
-}
-
-
-/* allgather progress function */
-
-int bcol_basesmuma_k_nomial_allgather_progress(bcol_function_args_t *input_args,
-                                               struct mca_bcol_base_function_t *const_args)
-{
-    /* local variables */
-    int8_t flag_offset;
-    uint32_t buffer_index = input_args->buffer_index;
-    volatile int8_t ready_flag;
-    mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) const_args->bcol_module;
-    netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree;
-    int group_size = bcol_module->colls_no_user_data.size_of_group;
-    int *list_connected = bcol_module->super.list_n_connected; /* critical for hierarchical colls */
-    int bcol_id = (int) bcol_module->super.bcol_id;
-    mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component;
-    int *active_requests =
-        &(bcol_module->ml_mem.nb_coll_desc[buffer_index].active_requests);
-
-    int *iteration = &bcol_module->ml_mem.nb_coll_desc[buffer_index].iteration;
-    int *status = &bcol_module->ml_mem.nb_coll_desc[buffer_index].status;
-    int leading_dim, idx, buff_idx;
-
-    int i, j, probe;
-    int knt;
-    int src;
-    int recv_offset, recv_len;
-    int max_requests = 0; /* critical to set this */
-    int pow_k, tree_order;
-
-    int64_t sequence_number=input_args->sequence_num;
-    int my_rank = bcol_module->super.sbgp_partner_module->my_index;
-
-    int pack_len = input_args->count * input_args->dtype->super.size;
-
-    void *data_addr = (void*)(
-        (unsigned char *) input_args->sbuf +
-        (size_t) input_args->sbuf_offset);
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    volatile char *peer_data_pointer;
-
-    /* control structures */
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    volatile mca_bcol_basesmuma_header_t *peer_ctl_pointer;
-
-#if 0
-    fprintf(stderr,"%d: entering sm allgather progress active requests %d iter %d ready_flag %d\n", my_rank,
-            *active_requests, *iteration, *status);
-#endif
-
-    buff_idx = input_args->src_desc->buffer_index;
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-    data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs+idx;
-
-    /* Set pointer to current proc ctrl region */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-
-    /* increment the starting flag by one and return */
-    /* flag offset seems unnecessary here */
-    flag_offset = my_ctl_pointer->starting_flag_value[bcol_id];
-    ready_flag = *status;
-    my_ctl_pointer->sequence_number = sequence_number;
-    /* k-nomial parameters */
-    tree_order = exchange_node->tree_order;
-    pow_k = exchange_node->log_tree_order;
-
-    /* calculate the maximum number of requests
-     * at each level each rank communicates with
-     * at most (k - 1) peers
-     * so if we set k - 1 bit fields in "max_requests", then
-     * we have max_request  == 2^(k - 1) -1
-     */
-    for(i = 0; i < (tree_order - 1); i++){
-        max_requests ^= (1<<i);
-    }
-
-    /* let's begin the collective, starting with extra ranks and their
-     * respective proxies
-     */
-
-    if (OPAL_UNLIKELY(-1 == *iteration)) {
-        if (EXTRA_NODE == exchange_node->node_type) {
-            /* If I'm in here, then I must be looking for data */
-            ready_flag = flag_offset + 1 + pow_k + 2;
-
-            src = exchange_node->rank_extra_sources_array[0];
-            peer_data_pointer = data_buffs[src].payload;
-            peer_ctl_pointer = data_buffs[src].ctl_struct;
-
-            /* calculate the count */
-            for (i = 0, knt = 0 ; i < group_size ; ++i){
-                knt += list_connected[i];
-            }
-
-            for (i = 0 ; i < cm->num_to_probe ; ++i) {
-                if (IS_PEER_READY(peer_ctl_pointer, ready_flag, sequence_number, ALLGATHER_FLAG, bcol_id)) {
-                    /* we receive the entire message */
-                    opal_atomic_mb ();
-                    memcpy (data_addr, (void *) peer_data_pointer, knt * pack_len);
-
-                    goto FINISHED;
-                }
-            }
-
-            /* haven't found it, state is saved, bail out */
-            return BCOL_FN_STARTED;
-        } else if (0 < exchange_node->n_extra_sources) {
-            /* I am a proxy for someone */
-            src = exchange_node->rank_extra_sources_array[0];
-            peer_data_pointer = data_buffs[src].payload;
-            peer_ctl_pointer = data_buffs[src].ctl_struct;
-
-            /* calculate the offset */
-            for (i = 0, knt = 0 ; i < src ; ++i){
-                knt += list_connected[i];
-            }
-
-            /* probe for extra rank's arrival */
-            for (i = 0 ; i < cm->num_to_probe ; ++i) {
-                if (IS_PEER_READY(peer_ctl_pointer, ready_flag, sequence_number, ALLGATHER_FLAG, bcol_id)) {
-                    opal_atomic_mb ();
-                    /* copy it in */
-                    memcpy ((void *) ((uintptr_t) data_addr + knt * pack_len),
-                            (void *) ((uintptr_t) peer_data_pointer + knt * pack_len),
-                            pack_len * list_connected[src]);
-                    break;
-                }
-            }
-
-            if (i == cm->num_to_probe) {
-                return BCOL_FN_STARTED;
-            }
-        }
-
-        /* bump the ready flag to indicate extra node exchange complete */
-        ++ready_flag;
-        *iteration = 0;
-    }
-
-    /* start the recursive k - ing phase */
-    for (i = *iteration ; i < pow_k ; ++i) {
-        /* I am ready at this level */
-        opal_atomic_wmb ();
-        my_ctl_pointer->flags[ALLGATHER_FLAG][bcol_id] = ready_flag;
-
-        if (0 == *active_requests) {
-            /* flip some bits, if we don't have active requests from a previous visit */
-            CALC_ACTIVE_REQUESTS(active_requests,exchange_node->rank_exchanges[i],tree_order);
-        }
-
-        for (j = 0; j < (tree_order - 1); ++j) {
-
-            /* recv phase */
-            src = exchange_node->rank_exchanges[i][j];
-
-            if (src < 0) {
-                /* then not a valid rank, continue */
-                continue;
-            }
-
-            if (!(*active_requests&(1<<j))) {
-                /* then this peer hasn't been processed at this level */
-                peer_data_pointer = data_buffs[src].payload;
-                peer_ctl_pointer = data_buffs[src].ctl_struct;
-
-                recv_offset = exchange_node->payload_info[i][j].r_offset * pack_len;
-                recv_len = exchange_node->payload_info[i][j].r_len * pack_len;
-
-                /* I am putting the probe loop as the inner most loop to achieve
-                 * better temporal locality
-                 */
-                for (probe = 0 ; probe < cm->num_to_probe ; ++probe) {
-                    if (IS_PEER_READY(peer_ctl_pointer, ready_flag, sequence_number, ALLGATHER_FLAG, bcol_id)) {
-                        /* flip the request's bit */
-                        *active_requests ^= (1<<j);
-                        /* copy the data */
-                        memcpy((void *)((unsigned char *) data_addr + recv_offset),
-                               (void *)((unsigned char *) peer_data_pointer + recv_offset),
-                               recv_len);
-                        break;
-                    }
-                }
-            }
-        }
-
-        if( max_requests == *active_requests ){
-            /* bump the ready flag */
-            ready_flag++;
-            /* reset the active requests for the next level */
-            *active_requests = 0;
-            /* calculate the number of active requests
-             * logically makes sense to do it here. We don't
-             * want to inadvertantly flip a bit to zero that we
-             * set previously
-             */
-        } else {
-            /* state is saved hop out
-             */
-            *status = my_ctl_pointer->flags[ALLGATHER_FLAG][bcol_id];
-            *iteration = i;
-            return BCOL_FN_STARTED;
-        }
-    }
-
-    /* bump the flag one more time for the extra rank */
-    ready_flag = flag_offset + 1 + pow_k + 2;
-
-    /* finish off the last piece, send the data back to the extra  */
-    if( 0 < exchange_node->n_extra_sources ) {
-        /* simply announce my arrival */
-        opal_atomic_wmb ();
-        my_ctl_pointer->flags[ALLGATHER_FLAG][bcol_id] = ready_flag;
-    }
-
-FINISHED:
-    /* bump this up for others to see */
-    my_ctl_pointer->starting_flag_value[bcol_id]++;
-    return BCOL_FN_COMPLETE;
-}
-
-/* Register allreduce functions to the BCOL function table,
- * so they can be selected
- */
-int bcol_basesmuma_allgather_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    comm_attribs.bcoll_type = BCOL_ALLGATHER;
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_basesmuma_k_nomial_allgather_init,
-                                 bcol_basesmuma_k_nomial_allgather_progress);
-
-    return OMPI_SUCCESS;
-}
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_allreduce.c
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_allreduce.c
@ -1,611 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2015 Cisco Systems, Inc.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/constants.h"
-#include "ompi/op/op.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/communicator/communicator.h"
-
-#include "opal/include/opal_stdint.h"
-
-#include "ompi/mca/bcol/base/base.h"
-#include "bcol_basesmuma.h"
-
-static int bcol_basesmuma_allreduce_intra_fanin_fanout_progress (bcol_function_args_t *input_args, mca_bcol_base_function_t *c_input_args);
-
-int bcol_basesmuma_allreduce_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    comm_attribs.bcoll_type = BCOL_ALLREDUCE;
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1048576;
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    /* selection logic at the ml level specifies a
-     * request for a non-blocking algorithm
-     * however, these algorithms are blocking
-     * following what was done at the p2p level
-     * we will specify non-blocking, but beware,
-     * these algorithms are blocking and will not make use
-     * of the progress engine
-     */
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000;
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    /* Set attributes for fanin fanout algorithm */
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_basesmuma_allreduce_intra_fanin_fanout,
-                                 bcol_basesmuma_allreduce_intra_fanin_fanout_progress);
-
-    inv_attribs.bcol_msg_min = 20000;
-    inv_attribs.bcol_msg_max = 10485760; /* range 4 */
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_basesmuma_allreduce_intra_fanin_fanout,
-                                 bcol_basesmuma_allreduce_intra_fanin_fanout_progress);
-
-    /* Differs only in comm size */
-
-    comm_attribs.data_src = DATA_SRC_UNKNOWN;
-    comm_attribs.waiting_semantics = BLOCKING;
-
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 8;
-
-    /* Set attributes for recursive doubling algorithm */
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_basesmuma_allreduce_intra_recursive_doubling,
-                                 NULL);
-
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * Small data fanin reduce
- * ML buffers are used for both payload and control structures
- * This functions works with hierarchical allreduce and
- * progress engine
- */
-static inline int reduce_children (mca_bcol_basesmuma_module_t *bcol_module, volatile void *rbuf, netpatterns_tree_node_t *my_reduction_node,
-                                   int *iteration, volatile mca_bcol_basesmuma_header_t *my_ctl_pointer, ompi_datatype_t *dtype,
-                                   volatile mca_bcol_basesmuma_payload_t *data_buffs, int count, struct ompi_op_t *op, int process_shift)
-{
-    volatile mca_bcol_basesmuma_header_t *child_ctl_pointer;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-    int64_t sequence_number = my_ctl_pointer->sequence_number;
-    int8_t ready_flag = my_ctl_pointer->ready_flag;
-    int group_size = bcol_module->colls_no_user_data.size_of_group;
-
-    if (LEAF_NODE != my_reduction_node->my_node_type) {
-        volatile char *child_data_pointer;
-        volatile void *child_rbuf;
-
-        /* for each child */
-        /* my_result_data = child_result_data (op) my_source_data */
-
-        for (int child = *iteration ; child < my_reduction_node->n_children ; ++child) {
-            int child_rank = my_reduction_node->children_ranks[child] + process_shift;
-
-            if (group_size <= child_rank){
-                child_rank -= group_size;
-            }
-
-            child_ctl_pointer = data_buffs[child_rank].ctl_struct;
-
-            if (!IS_PEER_READY(child_ctl_pointer, ready_flag, sequence_number, ALLREDUCE_FLAG, bcol_id)) {
-                *iteration = child;
-                return BCOL_FN_STARTED;
-            }
-
-            child_data_pointer = data_buffs[child_rank].payload;
-            child_rbuf = child_data_pointer + child_ctl_pointer->roffsets[bcol_id];
-
-            ompi_op_reduce(op, (void *)child_rbuf, (void *)rbuf, count, dtype);
-        } /* end child loop */
-    }
-
-    if (ROOT_NODE != my_reduction_node->my_node_type) {
-        opal_atomic_wmb ();
-        my_ctl_pointer->flags[ALLREDUCE_FLAG][bcol_id] = ready_flag;
-    }
-
-    /* done with this step. move on to fan out */
-    *iteration = -1;
-
-    return BCOL_FN_COMPLETE;
-}
-
-static int allreduce_fanout (mca_bcol_basesmuma_module_t *bcol_module, volatile mca_bcol_basesmuma_header_t *my_ctl_pointer,
-                             volatile void *my_data_pointer, int process_shift, volatile mca_bcol_basesmuma_payload_t *data_buffs,
-                             int sequence_number, int group_size, int rbuf_offset, size_t pack_len)
-{
-    volatile mca_bcol_basesmuma_header_t *parent_ctl_pointer;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-    int8_t ready_flag = my_ctl_pointer->ready_flag + 1;
-    netpatterns_tree_node_t *my_fanout_read_tree;
-    volatile void *parent_data_pointer;
-    int my_fanout_parent, my_rank;
-    void *parent_rbuf, *rbuf;
-
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    my_fanout_read_tree = &(bcol_module->fanout_read_tree[my_rank]);
-
-    if (ROOT_NODE != my_fanout_read_tree->my_node_type) {
-        my_fanout_parent = my_fanout_read_tree->parent_rank + process_shift;
-        if (group_size <= my_fanout_parent) {
-            my_fanout_parent -= group_size;
-        }
-
-        rbuf = (void *)((char *) my_data_pointer + rbuf_offset);
-
-        /*
-         * Get parent payload data and control data.
-         * Get the pointer to the base address of the parent's payload buffer.
-         * Get the parent's control buffer.
-         */
-        parent_data_pointer = data_buffs[my_fanout_parent].payload;
-        parent_ctl_pointer = data_buffs[my_fanout_parent].ctl_struct;
-
-        parent_rbuf = (void *) ((char *) parent_data_pointer + rbuf_offset);
-
-        /* Wait until parent signals that data is ready */
-        /* The order of conditions checked in this loop is important, as it can
-         * result in a race condition.
-         */
-        if (!IS_PEER_READY(parent_ctl_pointer, ready_flag, sequence_number, ALLREDUCE_FLAG, bcol_id)) {
-            return BCOL_FN_STARTED;
-        }
-
-        assert (parent_ctl_pointer->flags[ALLREDUCE_FLAG][bcol_id] == ready_flag);
-
-        /* Copy the rank to a shared buffer writable by the current rank */
-        memcpy ((void *) rbuf, (const void*) parent_rbuf, pack_len);
-    }
-
-    if (LEAF_NODE != my_fanout_read_tree->my_node_type) {
-        opal_atomic_wmb ();
-
-        /* Signal to children that they may read the data from my shared buffer (bump the ready flag) */
-        my_ctl_pointer->flags[ALLREDUCE_FLAG][bcol_id] = ready_flag;
-    }
-
-    my_ctl_pointer->starting_flag_value[bcol_id] += 1;
-
-    return BCOL_FN_COMPLETE;
-
-}
-
-static int bcol_basesmuma_allreduce_intra_fanin_fanout_progress (bcol_function_args_t *input_args, mca_bcol_base_function_t *c_input_args)
-{
-    mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module;
-    int buff_idx = input_args->src_desc->buffer_index;
-    int *iteration = &bcol_module->ml_mem.nb_coll_desc[buff_idx].iteration;
-    void *data_addr = (void *) input_args->src_desc->data_addr;
-    int my_node_index, my_rank, group_size, leading_dim, idx;
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    int64_t sequence_number = input_args->sequence_num;
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    struct ompi_datatype_t *dtype = input_args->dtype;
-    netpatterns_tree_node_t *my_reduction_node;
-    struct ompi_op_t *op = input_args->op;
-    volatile void *my_data_pointer;
-    int count = input_args->count;
-    int rc, process_shift;
-    ptrdiff_t lb, extent;
-    volatile void *rbuf;
-
-    /* get addressing information */
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    group_size = bcol_module->colls_no_user_data.size_of_group;
-    leading_dim = bcol_module->colls_no_user_data.size_of_group;
-    idx = SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-
-    /* Align node index to around sbgp root */
-    process_shift = input_args->root;
-    my_node_index = my_rank - input_args->root;
-    if (0 > my_node_index ) {
-        my_node_index += group_size;
-    }
-
-    data_buffs = (volatile mca_bcol_basesmuma_payload_t *) bcol_module->colls_with_user_data.data_buffs + idx;
-    /* Get control structure and payload buffer */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-    my_data_pointer = (volatile char *) data_addr;
-
-    my_data_pointer = (volatile char *) data_addr;
-    rbuf = (volatile void *)((char *) my_data_pointer + input_args->rbuf_offset);
-
-    /***************************
-     * Fan into root phase
-     ***************************/
-
-    my_reduction_node = &(bcol_module->reduction_tree[my_node_index]);
-    if (-1 != *iteration) {
-        rc = reduce_children (bcol_module, rbuf, my_reduction_node, iteration, my_ctl_pointer,
-                              dtype, data_buffs, count, op, process_shift);
-        if (BCOL_FN_COMPLETE != rc) {
-            return rc;
-        }
-    }
-
-    /* there might be non-contig dtype - so compute the length with get_extent */
-    ompi_datatype_get_extent(dtype, &lb, &extent);
-
-    /***************************
-     * Fan out from root
-     ***************************/
-
-    /* all nodes will have the result after fanout */
-    input_args->result_in_rbuf = true;
-
-    /* Signal that you are ready for fanout phase */
-    return allreduce_fanout (bcol_module, my_ctl_pointer, my_data_pointer, process_shift, data_buffs,
-                             sequence_number, group_size, input_args->rbuf_offset, count * (size_t) extent);
-}
-
-/**
- * Shared memory blocking allreduce.
- */
-int bcol_basesmuma_allreduce_intra_fanin_fanout(bcol_function_args_t *input_args, mca_bcol_base_function_t *c_input_args)
-{
-    /* local variables */
-    mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module;
-    int buff_idx = input_args->src_desc->buffer_index;
-    int *iteration = &bcol_module->ml_mem.nb_coll_desc[buff_idx].iteration;
-    void *data_addr = (void *) input_args->src_desc->data_addr;
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    struct ompi_datatype_t *dtype = input_args->dtype;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-    int rc, my_rank, leading_dim, idx;
-    volatile void *my_data_pointer;
-    volatile void *sbuf, *rbuf;
-    int8_t ready_flag;
-
-    /* get addressing information */
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    leading_dim = bcol_module->colls_no_user_data.size_of_group;
-    idx = SM_ARRAY_INDEX(leading_dim, buff_idx, 0);
-
-    data_buffs = (volatile mca_bcol_basesmuma_payload_t *) bcol_module->colls_with_user_data.data_buffs + idx;
-    /* Get control structure */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-
-    my_data_pointer = (volatile char *) data_addr;
-    rbuf = (volatile void *)((char *) my_data_pointer + input_args->rbuf_offset);
-    sbuf = (volatile void *)((char *) my_data_pointer + input_args->sbuf_offset);
-
-    /* Setup resource recycling */
-    /* Set for multiple instances of bcols */
-    BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, input_args->sequence_num, bcol_id);
-
-    if (sbuf != rbuf) {
-        rc = ompi_datatype_copy_content_same_ddt (dtype, input_args->count, (char *)rbuf,
-                                                  (char *)sbuf);
-        if( 0 != rc ) {
-            return OMPI_ERROR;
-        }
-    }
-
-    *iteration = 0;
-    my_ctl_pointer->ready_flag = ready_flag;
-
-    return bcol_basesmuma_allreduce_intra_fanin_fanout_progress (input_args, c_input_args);
-}
-
-
-
-/* this thing uses the old bcol private control structures */
-int bcol_basesmuma_allreduce_intra_recursive_doubling(bcol_function_args_t *input_args,
-                                                      mca_bcol_base_function_t *c_input_args)
-{
-
-    int my_rank,group_size,my_node_index;
-    int pair_rank, exchange, extra_rank, payload_len;
-    size_t dt_size;
-    int read_offset, write_offset;
-    volatile void *my_data_pointer;
-    volatile mca_bcol_basesmuma_ctl_struct_t *my_ctl_pointer = NULL,
-        *partner_ctl_pointer = NULL,
-        *extra_ctl_pointer = NULL;
-    volatile void *my_read_pointer, *my_write_pointer, *partner_read_pointer,
-        *extra_rank_readwrite_data_pointer,*extra_rank_read_data_pointer;
-    mca_bcol_basesmuma_module_t* bcol_module =
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-
-    int8_t ready_flag;
-    int sbuf_offset,rbuf_offset,flag_offset;
-    int root,count;
-    struct ompi_op_t *op;
-    int64_t sequence_number=input_args->sequence_num;
-    struct ompi_datatype_t *dtype;
-    int first_instance = 0;
-    int leading_dim,idx;
-    int buff_idx;
-    mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
-    /*volatile void **data_buffs;*/
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    netpatterns_pair_exchange_node_t *my_exchange_node;
-
-
-    /*
-     * Get addressing information
-     */
-    buff_idx = input_args->src_desc->buffer_index;
-
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    group_size = bcol_module->colls_no_user_data.size_of_group;
-    leading_dim = bcol_module->colls_no_user_data.size_of_group;
-    idx = SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-
-    /*
-     * Get SM control structures and payload buffers
-     */
-    ctl_structs = (mca_bcol_basesmuma_ctl_struct_t **)
-        bcol_module->colls_with_user_data.ctl_buffs+idx;
-    /*data_buffs = (volatile void **)
-      bcol_module->colls_with_user_data.data_buffs+idx;*/
-
-    data_buffs = (volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs + idx;
-
-
-    /*
-     * Get control structure and payload buffer
-     */
-    my_ctl_pointer = ctl_structs[my_rank];
-    if (my_ctl_pointer->sequence_number < sequence_number) {
-        first_instance=1;
-    }
-    my_data_pointer = data_buffs[my_rank].payload;
-
-    /*
-     * Align node index to around sbgp root
-     */
-    root = input_args->root;
-    my_node_index = my_rank - root;
-    if (0 > my_node_index) {
-        my_node_index += group_size;
-    }
-
-    /*
-     * Get data from arguments
-     */
-    sbuf_offset = input_args->sbuf_offset;
-    rbuf_offset = input_args->rbuf_offset;
-    op   = input_args->op;
-    count = input_args->count;
-    dtype = input_args->dtype;
-
-    /*
-     * Get my node for the reduction tree
-     */
-    my_exchange_node = &(bcol_module->recursive_doubling_tree);
-
-
-    if (first_instance) {
-        my_ctl_pointer->index = 1;
-        my_ctl_pointer->starting_flag_value = 0;
-        flag_offset = 0;
-        my_ctl_pointer->flag = -1;
-        /*
-          for( i = 0; i < NUM_SIGNAL_FLAGS; i++){
-          my_ctl_pointer->flags[ALLREDUCE_FLAG] = -1;
-          }
-        */
-    } else {
-        my_ctl_pointer->index++;
-        flag_offset = my_ctl_pointer->starting_flag_value;
-    }
-
-    /* signal that I have arrived */
-    /* opal_atomic_wmb (); */
-    my_ctl_pointer->sequence_number = sequence_number;
-
-    /* If we use this buffer more than once by an sm module in
-     * a given collective, will need to distinguish between instances, so
-     * we pick up the right data.
-     */
-    ready_flag = flag_offset + sequence_number + 1;
-
-    /*
-     * Set up pointers for using during recursive doubling phase
-     */
-    read_offset = sbuf_offset;
-    write_offset = rbuf_offset;
-    fprintf(stderr,"read offset %d write offset %d\n",read_offset,write_offset);
-    my_read_pointer =  (volatile void *)((char *) my_data_pointer + read_offset);
-    my_write_pointer = (volatile void *)((char *) my_data_pointer + write_offset);
-
-    /*
-     * When there are non-power 2 nodes, the extra nodes' data is copied and
-     * reduced by partner exchange nodes.
-     * Extra nodes: Nodes with rank greater nearest power of 2
-     * Exchange nodes: Nodes with rank lesser than nearest power of 2 that
-     * partner with extras nodes during reduction
-     */
-
-    if (0 < my_exchange_node->n_extra_sources) {
-        /*
-         * Signal extra node that data is ready
-         */
-        opal_atomic_wmb ();
-
-        my_ctl_pointer->flag = ready_flag;
-
-        if (EXCHANGE_NODE == my_exchange_node->node_type) {
-            extra_rank = my_exchange_node->rank_extra_source;
-            extra_ctl_pointer = ctl_structs[extra_rank];
-            extra_rank_readwrite_data_pointer = (void *) ((char *) data_buffs[extra_rank].payload +
-                                                          read_offset);
-
-            /*
-             * Wait for data to get ready
-             */
-            while (!((sequence_number == extra_ctl_pointer->sequence_number) &&
-                     (extra_ctl_pointer->flag >= ready_flag))){
-            }
-
-            ompi_op_reduce(op,(void *)extra_rank_readwrite_data_pointer,
-                           (void *)my_read_pointer, count, dtype);
-        }
-    }
-
-
-    /* --Exchange node that reduces with extra node --: Signal to extra node that data is read
-     * --Exchange node that doesn't reduce data with extra node --: This assignment
-     * is used so it can sync with other nodes during exchange phase
-     * --Extra node--: It can pass to next phase
-     */
-    ready_flag++;
-    /*my_ctl_pointer->flags[ALLREDUCE_FLAG] = ready_flag;*/
-    my_ctl_pointer->flag = ready_flag;
-
-
-    /*
-     * Exchange data with all the nodes that are less than max_power_2
-     */
-    for (exchange=0 ; exchange < my_exchange_node->n_exchanges ; exchange++) {
-        int tmp=0;
-
-        /*my_ctl_pointer->flags[ALLREDUCE_FLAG] = ready_flag;*/
-        my_ctl_pointer->flag = ready_flag;
-        pair_rank=my_exchange_node->rank_exchanges[exchange];
-        partner_ctl_pointer = ctl_structs[pair_rank];
-        partner_read_pointer = (volatile void *) ((char *)data_buffs[pair_rank].payload + read_offset);
-
-        my_read_pointer =  (volatile void *)((char *) my_data_pointer + read_offset);
-        my_write_pointer = (volatile void *)((char *) my_data_pointer + write_offset);
-
-        /*
-         * Wait for partner to be ready, so we can read
-         */
-        /*
-          JSL ----  FIX ME  !!!!! MAKE ME COMPLIANT WITH NEW BUFFERS
-          while (!IS_ALLREDUCE_PEER_READY(partner_ctl_pointer,
-          ready_flag, sequence_number)) {
-          }
-        */
-
-        /*
-         * Perform reduction operation
-         */
-        ompi_3buff_op_reduce(op,(void *)my_read_pointer, (void *)partner_read_pointer,
-                             (void *)my_write_pointer, count, dtype);
-
-
-        /*
-         * Signal that I am done reading my partner's data
-         */
-        ready_flag++;
-        /*my_ctl_pointer->flags[ALLREDUCE_FLAG] = ready_flag;*/
-        my_ctl_pointer->flag = ready_flag;
-
-        while (ready_flag > partner_ctl_pointer->flag){
-            opal_progress();
-        }
-
-        /*
-         * Swap read and write offsets
-         */
-        tmp = read_offset;
-        read_offset = write_offset;
-        write_offset = tmp;
-
-    }
-
-
-    /*
-     * Copy data in from the "extra" source, if need be
-     */
-
-    if (0 < my_exchange_node->n_extra_sources) {
-
-        if (EXTRA_NODE == my_exchange_node->node_type) {
-
-            int extra_rank_read_offset=-1,my_write_offset=-1;
-
-            /* Offset the ready flag to sync with
-             * exchange node which might going through exchange phases
-             * unlike the extra node
-             */
-            ready_flag = ready_flag + my_exchange_node->log_2;
-
-            if (my_exchange_node->log_2%2) {
-                extra_rank_read_offset = rbuf_offset;
-                my_write_offset = rbuf_offset;
-
-            } else {
-                extra_rank_read_offset = sbuf_offset;
-                my_write_offset = sbuf_offset;
-
-            }
-
-            my_write_pointer = (volatile void*)((char *)my_data_pointer + my_write_offset);
-            extra_rank = my_exchange_node->rank_extra_source;
-            extra_ctl_pointer = ctl_structs[extra_rank];
-
-            extra_rank_read_data_pointer = (volatile void *) ((char *)data_buffs[extra_rank].payload +
-                                                              extra_rank_read_offset);
-
-            /*
-             * Wait for the exchange node to be ready
-             */
-            ompi_datatype_type_size(dtype, &dt_size);
-            payload_len = count*dt_size;
-#if 0
-            fix me JSL !!!!!
-                while (!IS_DATA_READY(extra_ctl_pointer, ready_flag, sequence_number)){
-                }
-#endif
-            memcpy((void *)my_write_pointer,(const void *)
-                   extra_rank_read_data_pointer, payload_len);
-
-            ready_flag++;
-            /*my_ctl_pointer->flags[ALLREDUCE_FLAG] = ready_flag;*/
-            my_ctl_pointer->flag = ready_flag;
-
-
-        } else {
-
-            /*
-             * Signal parent that data is ready
-             */
-            opal_atomic_wmb ();
-            /*my_ctl_pointer->flags[ALLREDUCE_FLAG] = ready_flag;*/
-            my_ctl_pointer->flag = ready_flag;
-
-            /* wait until child is done to move on - this buffer will
-             *   be reused for the next stripe, so don't want to move
-             *   on too quick.
-             */
-            extra_rank = my_exchange_node->rank_extra_source;
-            extra_ctl_pointer = ctl_structs[extra_rank];
-        }
-    }
-
-    input_args->result_in_rbuf = my_exchange_node->log_2 & 1;
-
-    my_ctl_pointer->starting_flag_value += 1;
-
-    return BCOL_FN_COMPLETE;
-}
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c
@ -1,487 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-
-#include "ompi_config.h"
-
-#include "ompi/constants.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/communicator/communicator.h"
-
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-
-#include "bcol_basesmuma.h"
-
-#define __TEST_BLOCKING__   1
-#define __TEST_WAIT__       0
-#define __TEST_TEST__       0
-
-/* debug
- *   #include "opal/sys/timer.h"
- *
- *   extern uint64_t timers[7];
- *   end debug */
-
-/* debug */
-/* end debug */
-int bcol_basesmuma_bcast_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    comm_attribs.bcoll_type = BCOL_BCAST;
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1048576;
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_basesmuma_bcast_k_nomial_knownroot,
-                                 bcol_basesmuma_bcast_k_nomial_knownroot);
-
-    inv_attribs.bcol_msg_min = 10000000;
-    inv_attribs.bcol_msg_max = 10485760; /* range 4 */
-
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_basesmuma_bcast_k_nomial_knownroot,
-                                 bcol_basesmuma_bcast_k_nomial_knownroot);
-
-    comm_attribs.data_src = DATA_SRC_UNKNOWN;
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_basesmuma_bcast_k_nomial_anyroot,
-                                 bcol_basesmuma_bcast_k_nomial_anyroot);
-
-    comm_attribs.data_src = DATA_SRC_UNKNOWN;
-    inv_attribs.bcol_msg_min = 10000000;
-    inv_attribs.bcol_msg_max = 10485760; /* range 4 */
-
-#ifdef __PORTALS_AVAIL__
-
-    comm_attribs.waiting_semantics = BLOCKING;
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_basesmuma_lmsg_scatter_allgather_portals_bcast,
-                                 bcol_basesmuma_lmsg_scatter_allgather_portals_bcast);
-
-
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_basesmuma_lmsg_scatter_allgather_portals_nb_bcast,
-                                 bcol_basesmuma_lmsg_scatter_allgather_portals_nb_bcast);
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                                 bcol_basesmuma_lmsg_scatter_allgather_portals_nb_knownroot_bcast,
-                                 bcol_basesmuma_lmsg_scatter_allgather_portals_nb_knownroot_bcast);
-
-#else
-    /*
-      if (super->use_hdl) {
-      mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-      bcol_basesmuma_hdl_zerocopy_bcast,
-      bcol_basesmuma_hdl_zerocopy_bcast);
-      } else { */
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, NULL, NULL);
-    /*
-      bcol_basesmuma_binary_scatter_allgather_segment,
-      bcol_basesmuma_binary_scatter_allgather_segment);
-    */
-    /*    } */
-#endif
-
-    return OMPI_SUCCESS;
-}
-
-/* includes shared memory optimization */
-
-/**
- * Shared memory blocking Broadcast - fanin, for small data buffers.
- * This routine assumes that buf (the input buffer) is a single writer
- * multi reader (SWMR) shared memory buffer owned by the calling rank
- * which is the only rank that can write to this buffers.
- * It is also assumed that the buffers are registered and fragmented
- * at the ML level and that buf is sufficiently large to hold the data.
- *
- *
- * @param buf - SWMR shared buffer within a sbgp that the
- * executing rank can write to.
- * @param count - the number of elements in the shared buffer.
- * @param dtype - the datatype of a shared buffer element.
- * @param root - the index within the sbgp of the root.
- * @param module - basesmuma module.
- */
-int bcol_basesmuma_bcast(bcol_function_args_t *input_args,
-                         mca_bcol_base_function_t *c_input_args)
-{
-    /* local variables */
-    int group_size, process_shift, my_node_index;
-    int my_rank;
-    int rc = OMPI_SUCCESS;
-    int my_fanout_parent;
-    int leading_dim, buff_idx, idx;
-    volatile int8_t ready_flag;
-    int count=input_args->count;
-    struct ompi_datatype_t* dtype=input_args->dtype;
-    int root=input_args->root;
-    int64_t sequence_number=input_args->sequence_num;
-    mca_bcol_basesmuma_module_t* bcol_module=
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    volatile char* parent_data_pointer;
-    mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    volatile mca_bcol_basesmuma_header_t *parent_ctl_pointer;
-    netpatterns_tree_node_t* my_fanout_read_tree;
-    size_t pack_len = 0, dt_size;
-
-    void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr );
-
-#if 0
-    fprintf(stderr,"Entering sm broadcast input_args->sbuf_offset %d \n",input_args->sbuf_offset);
-    fflush(stderr);
-#endif
-
-
-    /* we will work only on packed data - so compute the length*/
-    ompi_datatype_type_size(dtype, &dt_size);
-    pack_len=count*dt_size;
-
-    buff_idx = input_args->src_desc->buffer_index;
-
-    /* Get addressing information */
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    group_size = bcol_module->colls_no_user_data.size_of_group;
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-    data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs+idx;
-
-    /* Align node index to around sbgp root */
-    process_shift = root;
-    my_node_index = my_rank - root;
-    if(0 > my_node_index ) {
-        my_node_index += group_size;
-    }
-
-    /* get my node for the bcast tree */
-    my_fanout_read_tree = &(bcol_module->fanout_read_tree[my_node_index]);
-    my_fanout_parent = my_fanout_read_tree->parent_rank + process_shift;
-    if(group_size <= my_fanout_parent){
-        my_fanout_parent -= group_size;
-    }
-
-    /* Set pointer to current proc ctrl region */
-    /*my_ctl_pointer = ctl_structs[my_rank]; */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-
-    /* setup resource recycling */
-
-    BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id);
-
-    /*
-     * Fan out from root
-     */
-    if(ROOT_NODE == my_fanout_read_tree->my_node_type) {
-        input_args->result_in_rbuf = false;
-        /* Root should only signal it is ready */
-        my_ctl_pointer->flags[BCAST_FLAG][bcol_id] = ready_flag;
-
-    }else if(LEAF_NODE == my_fanout_read_tree->my_node_type) {
-        input_args->result_in_rbuf = false;
-        /*
-         * Get parent payload data and control data.
-         * Get the pointer to the base address of the parent's payload buffer.
-         * Get the parent's control buffer.
-         */
-        parent_data_pointer = data_buffs[my_fanout_parent].payload;
-        parent_ctl_pointer = data_buffs[my_fanout_parent].ctl_struct;
-
-        /* Wait until parent signals that data is ready */
-        /* The order of conditions checked in this loop is important, as it can
-         * result in a race condition.
-         */
-        while (!IS_PEER_READY(parent_ctl_pointer, ready_flag, sequence_number, BCAST_FLAG, bcol_id)){
-            opal_progress();
-        }
-
-        /* Copy the rank to a shared buffer writable by the current rank */
-        memcpy(data_addr, (void *)parent_data_pointer, pack_len);
-
-        if( 0 != rc ) {
-            return OMPI_ERROR;
-        }
-
-    }else{
-        input_args->result_in_rbuf = false;
-        /* Interior node */
-
-        /* Get parent payload data and control data */
-        parent_data_pointer = data_buffs[my_fanout_parent].payload;
-        parent_ctl_pointer =  data_buffs[my_fanout_parent].ctl_struct;
-
-
-        /* Wait until parent signals that data is ready */
-        /* The order of conditions checked in this loop is important, as it can
-         * result in a race condition.
-         */
-        while (!IS_PEER_READY(parent_ctl_pointer, ready_flag, sequence_number, BCAST_FLAG, bcol_id)){
-            opal_progress();
-        }
-
-        /* Copy the rank to a shared buffer writable by the current rank */
-        memcpy(data_addr, (void *)parent_data_pointer,pack_len);
-
-        /* Signal to children that they may read the data from my shared buffer */
-        opal_atomic_wmb ();
-        my_ctl_pointer->flags[BCAST_FLAG][bcol_id] = ready_flag;
-    }
-
-    /* if I am the last instance of a basesmuma function in this collectie,
-     *   release the resrouces */
-
-    my_ctl_pointer->starting_flag_value[bcol_id]++;
-
-    return rc;
-}
-
-
-/*zero-copy large massage communication methods*/
-#if 0
-int bcol_basesmuma_hdl_zerocopy_bcast(bcol_function_args_t *input_args,
-                                      mca_bcol_base_function_t   *c_input_args)
-{
-    /* local variables */
-    int group_size, process_shift, my_node_index;
-    int my_rank, first_instance=0, flag_offset;
-    int rc = OMPI_SUCCESS;
-    int my_fanout_parent;
-    int leading_dim, buff_idx, idx;
-    volatile int64_t ready_flag;
-    int count=input_args->count;
-    struct ompi_datatype_t* dtype=input_args->dtype;
-    int root=input_args->root;
-    int64_t sequence_number=input_args->sequence_num;
-    mca_bcol_basesmuma_module_t* bcol_module=
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-
-    netpatterns_tree_node_t* my_fanout_read_tree;
-    size_t pack_len = 0, dt_size;
-
-    void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr);
-
-    struct mca_hdl_base_descriptor_t *hdl_desc;
-    struct mca_hdl_base_segment_t *hdl_seg;
-    int ret, completed, ridx/*remote rank index*/;
-    bool status;
-    volatile mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
-    mca_bcol_basesmuma_ctl_struct_t  *my_ctl_pointer= NULL;
-    volatile mca_bcol_basesmuma_ctl_struct_t  *parent_ctl_pointer= NULL;
-    volatile mca_bcol_basesmuma_ctl_struct_t  *child_ctl_pointer= NULL;
-    struct mca_hdl_base_module_t* hdl = bcol_module->hdl_module[0];
-
-
-    /* we will work only on packed data - so compute the length*/
-    ompi_datatype_type_size(dtype, &dt_size);
-    pack_len = count * dt_size;
-
-    buff_idx = input_args->src_desc->buffer_index;
-
-    /* Get addressing information */
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    group_size = bcol_module->colls_no_user_data.size_of_group;
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-    ctl_structs = (volatile mca_bcol_basesmuma_ctl_struct_t **)
-        bcol_module->colls_with_user_data.ctl_buffs+idx;
-    my_ctl_pointer = ctl_structs[my_rank];
-
-    /* Align node index to around sbgp root */
-    process_shift = root;
-    my_node_index = my_rank - root;
-    if(0 > my_node_index ) {
-        my_node_index += group_size;
-    }
-
-    /* get my node for the bcast tree */
-    my_fanout_read_tree = &(bcol_module->fanout_read_tree[my_node_index]);
-    my_fanout_parent = my_fanout_read_tree->parent_rank + process_shift;
-    if(group_size <= my_fanout_parent){
-        my_fanout_parent -= group_size;
-    }
-
-    /* setup resource recycling */
-    if( my_ctl_pointer->sequence_number < sequence_number ) {
-        first_instance = 1;
-    }
-
-    if( first_instance ) {
-        /* Signal arrival */
-        my_ctl_pointer->flag  = -1;
-        my_ctl_pointer->index = 1;
-        /* this does not need to use any flag values , so only need to
-         * set the value for subsequent values that may need this */
-        my_ctl_pointer->starting_flag_value = 0;
-        flag_offset = 0;
-    } else {
-        /* only one thread at a time will be making progress on this
-         *   collective, so no need to make this atomic */
-        my_ctl_pointer->index++;
-    }
-
-
-    /* increment the starting flag by one and return */
-    flag_offset = my_ctl_pointer->starting_flag_value;
-    ready_flag = flag_offset + sequence_number + 1;
-    my_ctl_pointer->sequence_number = sequence_number;
-
-    hdl_desc = (mca_hdl_base_descriptor_t *)
-        malloc (sizeof (mca_hdl_base_descriptor_t) * 1);
-
-    /*prepare a hdl data segment*/
-    hdl_seg = (mca_hdl_base_segment_t*)
-        malloc ( sizeof (mca_hdl_base_segment_t) * 1);
-    hdl_seg->seg_addr.pval = input_args->sbuf;
-    hdl_seg->seg_len = pack_len;
-
-
-    hdl->endpoint->ready_flag = ready_flag;
-    hdl->endpoint->local_ctrl  = my_ctl_pointer;
-    hdl->endpoint->sbgp_contextid =
-        bcol_module->super.sbgp_partner_module->group_comm->c_contextid;
-
-    /*
-     * Fan out from root
-     */
-    if(ROOT_NODE == my_fanout_read_tree->my_node_type) {
-        input_args->result_in_rbuf = false;
-
-        hdl_desc->des_src = hdl_seg;
-        hdl_desc->des_src_cnt = 1;
-        hdl_desc->isroot = true;
-
-        /*As the general semantics, there might multiple pairs of send/recv
-         *on the topology tree*/
-        for (ridx = 0; ridx < my_fanout_read_tree->n_children; ridx++) {
-            child_ctl_pointer =
-                ctl_structs[my_fanout_read_tree->children_ranks[ridx]];
-            hdl->endpoint->remote_ctrl = child_ctl_pointer;
-            ret = hdl->hdl_send(hdl, hdl->endpoint, hdl_desc);
-            if (ret !=  OMPI_SUCCESS) {
-                BASESMUMA_VERBOSE(1, ("send eror on rank %d ........", my_rank));
-                goto exit_ERROR;
-            }
-        }
-    }else if(LEAF_NODE == my_fanout_read_tree->my_node_type) {
-        input_args->result_in_rbuf = false;
-        /*
-         * Get parent payload data and control data.
-         * Get the pointer to the base address of the parent's payload buffer.
-         * Get the parent's control buffer.
-         */
-        parent_ctl_pointer = ctl_structs[my_fanout_parent];
-
-        hdl_desc->des_dst = hdl_seg;
-        hdl_desc->des_dst_cnt = 1;
-        hdl_desc->isroot = false;
-        hdl->endpoint->remote_ctrl = parent_ctl_pointer;
-
-#if __TEST_BLOCKING__
-        ret = hdl->hdl_recv(hdl, hdl->endpoint, hdl_desc);
-#else
-        ret = hdl->hdl_recvi(hdl, hdl->endpoint, NULL, 0, 0, &hdl_desc);
-#endif
-
-#if __TEST_WAIT__
-        ret = hdl->hdl_wait(hdl, hdl->endpoint, hdl_desc);
-        BASESMUMA_VERBOSE(1,("wait on rank %d is done!", my_rank));
-#endif
-        if (OMPI_SUCCESS != ret) {
-            BASESMUMA_VERBOSE(1, ("recvi eror on rank %d ........", my_rank));
-            goto exit_ERROR;
-        }
-
-        status = false;
-#if __TEST_TEST__
-        while (!status) {
-            hdl->hdl_test(&hdl_desc, &completed, &status);
-            opal_progress();
-            BASESMUMA_VERBOSE(1, ("test on rank %d ........", my_rank));
-        }
-#endif
-
-        goto Release;
-
-    }else{
-        input_args->result_in_rbuf = false;
-        /* Interior node */
-
-        /* Get parent payload data and control data */
-        parent_ctl_pointer = ctl_structs[my_fanout_parent];
-
-        hdl_desc->des_dst = hdl_seg;
-        hdl_desc->des_dst_cnt = 1;
-        hdl_desc->isroot = false;
-
-        hdl->endpoint->remote_ctrl = parent_ctl_pointer;
-
-        ret = hdl->hdl_recv(hdl, hdl->endpoint, hdl_desc);
-        if (OMPI_SUCCESS != ret) {
-            goto exit_ERROR;
-        }
-        if (OMPI_SUCCESS != ret) {
-            BASESMUMA_VERBOSE(1, ("recvi eror on rank %d ........", my_rank));
-            goto exit_ERROR;
-        }
-
-        /* Signal to children that they may read the data from my shared buffer */
-        opal_atomic_wmb ();
-        hdl_desc->des_src = hdl_seg;
-        hdl_desc->des_src_cnt = 1;
-        for (ridx = 0; ridx < my_fanout_read_tree->n_children; ridx++) {
-            child_ctl_pointer =
-                ctl_structs[my_fanout_read_tree->children_ranks[ridx]];
-            hdl->endpoint->remote_ctrl = child_ctl_pointer;
-
-            ret = hdl->hdl_send(hdl, hdl->endpoint, hdl_desc);
-            if (ret !=  OMPI_SUCCESS) {
-                BASESMUMA_VERBOSE(1, ("send eror on rank %d ........", my_rank));
-                goto exit_ERROR;
-            }
-        }
-        goto Release;
-    }
-
- Release:
-    /* if I am the last instance of a basesmuma function in this collectie,
-     *   release the resrouces */
-    if (IS_LAST_BCOL_FUNC(c_input_args)) {
-        rc = bcol_basesmuma_free_buff(
-                                      &(bcol_module->colls_with_user_data),
-                                      sequence_number);
-    }
-
-    my_ctl_pointer->starting_flag_value += 1;
-
-    return BCOL_FN_COMPLETE;
- exit_ERROR:
-    return OMPI_ERROR;
-}
-#endif
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast_prime.c
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast_prime.c
@ -1,895 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013 Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-#include "ompi/constants.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/communicator/communicator.h"
-
-#include "bcol_basesmuma_utils.h"
-#include "bcol_basesmuma.h"
-
-/* debug
- *   #include "opal/sys/timer.h"
- *
- *   extern uint64_t timers[7];
- *   end debug */
-
-/* debug */
-#include <unistd.h>
-/* end debug */
-
-/* includes shared memory optimization */
-
-#define  BCOL_BASESMUMA_SM_PROBE(src_list, n_src, my_index, matched, src) \
-  do {                                                                    \
-    int j;                                                                \
-    for( j = 0; j < n_src; j++) {                                         \
-      parent_ctl_pointer = data_buffs[src_list[j]].ctl_struct;            \
-      parent_data_pointer = data_buffs[src_list[j]].payload;              \
-      if( IS_DATA_READY(parent_ctl_pointer,ready_flag,sequence_number)) { \
-        src = src_list[j];                                                \
-        matched = 1;                                                      \
-        break;                                                            \
-      }                                                                   \
-    }                                                                     \
-  } while(0)
-
-/*
-  #define IS_LARGE_DATA_READY(peer, my_flag, my_sequence_number) \
-  (((peer)->sequence_number == (my_sequence_number) && \
-  (peer)->flags[BCAST_FLAG] >= (my_flag) \
-  )? true : false )
-*/
-
-/*
-  #define IS_KNOWN_ROOT_DATA_READY(peer, my_flag, my_sequence_number) \
-  (((peer)->sequence_number == (my_sequence_number) && \
-  (peer)->flags[BCAST_FLAG][bcol_id] >= (my_flag) \
-  )? true : false )
-*/
-
-#define  BCOL_BASESMUMA_SM_LARGE_MSG_PROBE(src_list, n_src, my_index, matched, src, flag_index, bcol_id) \
-  do {                                                                        \
-    int j;                                                                \
-    for( j = 0; j < n_src; j++) {                                        \
-      /* fprintf(stderr,"my_rank %d and %d\n",my_rank,1);         */        \
-      if(src_list[j] != -1) {                                                \
-        parent_ctl_pointer = ctl_structs[src_list[j]];                        \
-        parent_data_pointer = (void *) data_buffs[src_list[j]].ctl_struct; \
-        /*fprintf(stderr,"my_rank %d ready flag %d partner flag %d and %d\n",my_rank,ready_flag,parent_ctl_pointer->flag,2);    */ \
-        if( IS_PEER_READY(parent_ctl_pointer,ready_flag,sequence_number, flag_index, bcol_id)) { \
-          src = src_list[j];                                                \
-          matched = 1;                                                        \
-          index = j;                                                        \
-          /*  fprintf(stderr,"found it from %d!\n",src);*/                \
-          break;                                                        \
-        }                                                                \
-      }                                                                        \
-    }                                                                        \
-  } while(0)
-
-#define K_NOMIAL_DATA_SRC(radix, my_group_index, group_size, group_root, data_src, radix_mask) \
-  do {                                                                        \
-    int relative_rank = (my_group_index >= group_root) ? my_group_index - group_root : \
-      my_group_index - group_root + group_size;                                \
-    radix_mask = 1;                                                        \
-    while (radix_mask < group_size) {                                        \
-      if (relative_rank % (radix * radix_mask)) {                        \
-        data_src = relative_rank/(radix * radix_mask) * (radix * radix_mask) + group_root; \
-        if (data_src >= group_size) data_src -= group_size;                \
-        break;                                                                \
-      }                                                                        \
-      radix_mask *= radix;                                                \
-    }                                                                        \
-  } while (0)
-
-int bcol_basesmuma_bcast_k_nomial_knownroot(bcol_function_args_t *input_args,
-                                            mca_bcol_base_function_t *c_input_args)
-{
-  /* local variables */
-  mca_bcol_basesmuma_module_t* bcol_module=
-    (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-  mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-  int i, matched = 0;
-  int group_size;
-  int my_rank;
-  int leading_dim,
-    buff_idx,
-    idx;
-  int count = input_args->count;
-  struct ompi_datatype_t* dtype = input_args->dtype;
-  int64_t sequence_number = input_args->sequence_num;
-  int radix =
-    mca_bcol_basesmuma_component.k_nomial_radix;
-  int radix_mask;
-  int16_t data_src = -1;
-
-  volatile int8_t ready_flag;
-  int bcol_id = (int) bcol_module->super.bcol_id;
-  volatile mca_bcol_basesmuma_payload_t *data_buffs;
-  volatile char* parent_data_pointer;
-  volatile mca_bcol_basesmuma_header_t *parent_ctl_pointer;
-  volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-
-  size_t pack_len = 0;
-  void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr +
-                             input_args->sbuf_offset);
-
-#if 0
-  fprintf(stderr,"Entering nb-sm broadcast input_args->sbuf_offset %d \n",input_args->sbuf_offset);
-  fflush(stderr);
-#endif
-
-
-  /* we will work only on packed data - so compute the length*/
-  BASESMUMA_VERBOSE(3, ("Calling bcol_basesmuma_bcast_k_nomial_knownroot"));
-
-  pack_len = mca_bcol_base_get_buff_length(dtype, count);
-  /* Some hierarchical algorithms have data that is accumulated at each step
-   * this factor accounts for this
-   */
-  pack_len = pack_len*input_args->hier_factor;
-  buff_idx = input_args->buffer_index;
-
-  /* Get addressing information */
-  my_rank     = bcol_module->super.sbgp_partner_module->my_index;
-  group_size  = bcol_module->colls_no_user_data.size_of_group;
-  leading_dim = bcol_module->colls_no_user_data.size_of_group;
-  idx         = SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-  data_buffs = (volatile mca_bcol_basesmuma_payload_t *)
-    bcol_module->colls_with_user_data.data_buffs + idx;
-
-  /* Set pointer to current proc ctrl region */
-  my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-
-  /* setup resource recycling */
-  BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id);
-  /* removing dependence on sequence number */
-  /* I believe this is resolved now with the signaling flags */
-  /*
-    ready_temp = 1 + (int8_t) flag_offset + (int8_t) bcol_id;
-    if( ready_temp >= my_ctl_pointer->flags[BCAST_FLAG][bcol_id]) {
-    ready_flag = ready_temp;
-    } else {
-    ready_flag =  my_ctl_pointer->flags[BCAST_FLAG][bcol_id];
-    }
-    opal_atomic_wmb ();
-    my_ctl_pointer->sequence_number = sequence_number;
-  */
-
-
-  /* non-blocking broadcast algorithm */
-
-  /* If I am the root, then signal ready flag */
-  if(input_args->root_flag) {
-    BASESMUMA_VERBOSE(10,("I am the root of the data"));
-    /*
-     * signal ready flag
-     */
-    opal_atomic_wmb ();
-    my_ctl_pointer->flags[BCAST_FLAG][bcol_id] = ready_flag;
-
-    /* root is finished */
-    goto Release;
-  }
-
-
-  /* Calculate source of the data */
-  K_NOMIAL_DATA_SRC(radix, my_rank, group_size,
-                    input_args->root_route->rank, data_src, radix_mask);
-
-
-  parent_ctl_pointer = data_buffs[data_src].ctl_struct;
-  parent_data_pointer = data_buffs[data_src].payload;
-
-  for( i = 0; i < cs->num_to_probe && 0 == matched; i++) {
-
-    if(IS_PEER_READY(parent_ctl_pointer,ready_flag,sequence_number, BCAST_FLAG, bcol_id)) {
-      matched = 1;
-      break;
-    }
-  }
-
-  /* If not matched, then hop out and put me on progress list */
-  if(0 == matched ) {
-    BASESMUMA_VERBOSE(10,("Shared memory probe didn't find a match"));
-    return BCOL_FN_NOT_STARTED;
-  }
-
-  /* else, we found our root within the group ... */
-  BASESMUMA_VERBOSE(10,("Shared memory probe was matched, the root is %d", data_src));
-
-  /* copy the data */
-  memcpy(data_addr, (void *) parent_data_pointer, pack_len);
-  /* set the memory barrier to ensure completion */
-  opal_atomic_wmb ();
-  /* signal that I am done */
-  my_ctl_pointer->flags[BCAST_FLAG][bcol_id] = ready_flag;
-
-
- Release:
-  my_ctl_pointer->starting_flag_value[bcol_id]++;
-  return BCOL_FN_COMPLETE;
-}
-
-
-/**
- * Shared memory non-blocking Broadcast - K-nomial fan-out for small data buffers.
- * This routine assumes that buf (the input buffer) is a single writer
- * multi reader (SWMR) shared memory buffer owned by the calling rank
- * which is the only rank that can write to this buffers.
- * It is also assumed that the buffers are registered and fragmented
- * at the ML level and that buf is sufficiently large to hold the data.
- *
- *
- * @param buf - SWMR shared buffer within a sbgp that the
- * executing rank can write to.
- * @param count - the number of elements in the shared buffer.
- * @param dtype - the datatype of a shared buffer element.
- * @param root - the index within the sbgp of the root.
- * @param module - basesmuma module.
- */
-int bcol_basesmuma_bcast_k_nomial_anyroot(bcol_function_args_t *input_args,
-                                          mca_bcol_base_function_t *c_input_args)
-{
-  /* local variables */
-  mca_bcol_basesmuma_module_t* bcol_module=
-    (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-  mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-  int i;
-  int group_size;
-  int my_rank;
-  int leading_dim, buff_idx, idx;
-  int count=input_args->count;
-  struct ompi_datatype_t* dtype=input_args->dtype;
-  int64_t sequence_number=input_args->sequence_num;
-  int radix = cs->k_nomial_radix;
-  int radix_mask;
-  int relative_rank;
-  int pow_k_group_size;
-
-  volatile int8_t ready_flag;
-  int bcol_id = (int) bcol_module->super.bcol_id;
-  volatile mca_bcol_basesmuma_payload_t *data_buffs;
-  volatile void* parent_data_pointer;
-
-  volatile mca_bcol_basesmuma_header_t *child_ctl_pointer;
-  volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-
-  size_t pack_len = 0;
-  void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr +
-                             input_args->sbuf_offset);
-
-#if 0
-  fprintf(stderr,"Entering nb-sm broadcast input_args->sbuf_offset %d \n",input_args->sbuf_offset);
-  fflush(stderr);
-#endif
-
-
-
-  /* we will work only on packed data - so compute the length*/
-  pack_len = mca_bcol_base_get_buff_length(dtype, count);
-
-  buff_idx = input_args->buffer_index;
-
-  /* Get addressing information */
-  my_rank = bcol_module->super.sbgp_partner_module->my_index;
-  group_size = bcol_module->colls_no_user_data.size_of_group;
-  leading_dim=bcol_module->colls_no_user_data.size_of_group;
-  idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-
-  /* get pow_k_levels and pow_k_group_size */
-  pow_k_group_size = bcol_module->pow_k;
-
-
-  data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
-    bcol_module->colls_with_user_data.data_buffs+idx;
-
-  /* Set pointer to current proc ctrl region */
-  my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-
-  BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id);
-
-  /* non-blocking broadcast algorithm */
-
-  /* If I am the root, then signal ready flag */
-  if(input_args->root_flag) {
-
-    BASESMUMA_VERBOSE(10,("I am the root of the data"));
-    /*
-     * set the radix_mask */
-    radix_mask = pow_k_group_size;
-    /* send to children */
-    opal_atomic_wmb ();
-    BASESMUMA_K_NOMIAL_SEND_CHILDREN(radix_mask,
-                                     radix,0,
-                                     my_rank,group_size, ready_flag);
-    /* root is finished */
-    goto Release;
-  }
-
-  /* If I am not the root, then poll on possible "senders'" control structs */
-  for( i = 0; i < cs->num_to_probe; i++) {
-
-    if( ready_flag == my_ctl_pointer->flags[BCAST_FLAG][bcol_id]) {
-
-      /* else, we found our root within the group ... */
-      parent_data_pointer = data_buffs[my_ctl_pointer->src].payload;
-      BASESMUMA_VERBOSE(5,("%d found it from %d \n",my_rank,my_ctl_pointer->src));
-      /* memcopy the data */
-      memcpy(data_addr, (void *) parent_data_pointer, pack_len);
-      /* compute my relative rank */
-      relative_rank = (my_rank - my_ctl_pointer->src) < 0 ? my_rank -
-        my_ctl_pointer->src + group_size : my_rank - my_ctl_pointer->src;
-
-      /* compute my radix mask */
-      radix_mask = 1;
-      while(radix_mask < group_size ){
-        if( 0 != relative_rank % (radix*radix_mask)) {
-          /* found it */
-          break;
-        }
-        radix_mask *= radix;
-      }
-      /* go one step back */
-      radix_mask /= radix;
-
-      /* send to children */
-      opal_atomic_wmb ();
-      BASESMUMA_K_NOMIAL_SEND_CHILDREN(radix_mask,
-                                       radix, relative_rank,
-                                       my_rank, group_size, ready_flag);
-      /* bail */
-
-      goto Release;
-    }
-
-  }
-
-
-
-  /* If not matched, then hop out and put me on progress list */
-  BASESMUMA_VERBOSE(10,("Shared memory probe didn't find a match"));
-  /*fprintf(stderr,"bcol_id %d Not started\n",bcol_id);*/
-  return BCOL_FN_NOT_STARTED;
-
-
-
- Release:
-
-
-  my_ctl_pointer->starting_flag_value[bcol_id]++;
-
-  return BCOL_FN_COMPLETE;
-}
-
-
-/* non-blocking binary scatter allgather anyroot algorithm for large data
- * broadcast
- */
-
-
-#if 0
-/* prototype code for shared memory scatter/allgather algorithm. Signaling scheme
- * works, should be used as a reference for other types of shared memory scatter/allgather
- * algorithms.
- */
-int bcol_basesmuma_binary_scatter_allgather_segment(bcol_function_args_t *input_args,
-                                                    mca_bcol_base_function_t *c_input_args)
-{
-
-  /* local variables */
-  int i, j;
-  int length;
-  int start;
-  int my_rank, parent_rank;
-  int partner;
-  int src = -1;
-  int matched = 0;
-  int group_size;
-  int first_instance=0;
-  int leading_dim, buff_idx, idx;
-  int64_t sequence_number=input_args->sequence_num;
-
-  int64_t ready_flag;
-  int64_t local_offset;
-
-  int flag_offset;
-  int pow_2, pow_2_levels;
-  int index = -1;
-
-  mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-  mca_bcol_basesmuma_module_t *bcol_module =
-    (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module;
-  /* use the old control structs for large messages,
-   * otherwise we will destroy the shared memory
-   * optimization
-   */
-  mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
-  mca_bcol_basesmuma_ctl_struct_t  *my_ctl_pointer;
-  mca_bcol_basesmuma_ctl_struct_t  *parent_ctl_pointer; /* binomial fanout */
-  mca_bcol_basesmuma_ctl_struct_t  *partner_ctl_pointer; /* recursive double */
-
-  /* for now, we use the payload buffer for single fragment */
-  volatile mca_bcol_basesmuma_payload_t *data_buffs;
-  volatile void *parent_data_pointer; /* binomial scatter */
-  volatile void *partner_data_pointer;  /* recursive double */
-
-  uint32_t fragment_size;  /* ml buffer size for now */
-
-  /* we will transfer the entire buffer,
-   * so start at the base address of the ml buffer
-   */
-  void *data_addr = (void *) ((unsigned char *) input_args->src_desc->base_data_addr);
-#if 0
-  fprintf(stderr,"AAA Entering nb-sm large msg broadcast input_args->frag_size %d \n",input_args->frag_size);
-  fflush(stderr);
-#endif
-
-  buff_idx = input_args->src_desc->buffer_index;
-
-  group_size = bcol_module->colls_no_user_data.size_of_group;
-  leading_dim=bcol_module->colls_no_user_data.size_of_group;
-
-  /* get the largest power of two that is smaller than
-   * or equal to the group size
-   */
-  pow_2_levels = bcol_module->pow_2_levels;
-  pow_2 = bcol_module->pow_2;
-
-  /* get the fragment size
-   */
-
-  /* still just the size of the entire buffer */
-  fragment_size = input_args->buffer_size;
-  idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-  my_rank = bcol_module->super.sbgp_partner_module->my_index;
-
-
-  /* grab the control structs */
-  ctl_structs = (mca_bcol_basesmuma_ctl_struct_t **)
-    bcol_module->colls_with_user_data.ctl_buffs+idx;
-
-  /* grab the data buffs */
-  data_buffs = (mca_bcol_basesmuma_payload_t *)
-    bcol_module->colls_with_user_data.data_buffs+idx;
-
-  my_ctl_pointer = ctl_structs[my_rank];
-
-  if(my_ctl_pointer->sequence_number < sequence_number) {
-    first_instance = 1;
-  }
-
-  if(first_instance) {
-    my_ctl_pointer->flag = -1;
-    my_ctl_pointer->index = 1;
-
-    my_ctl_pointer->starting_flag_value = 0;
-
-    flag_offset = 0;
-
-  } else {
-
-    my_ctl_pointer->index++;
-  }
-
-  /* increment the starting flag by one and return */
-  flag_offset = my_ctl_pointer->starting_flag_value;
-  ready_flag = flag_offset + sequence_number + 1;
-
-  my_ctl_pointer->sequence_number = sequence_number;
-
-  /* am I the root */
-  if(input_args->root_flag) {
-    /* if I've already been here, then
-     * hop down to the allgather
-     */
-    if(ALLGATHER == my_ctl_pointer->status) {
-      goto Allgather;
-    }
-    BASESMUMA_VERBOSE(10,("I am the root of the data"));
-    /* debug print */
-    /*fprintf(stderr,"I am the root %d\n",my_rank);*/
-    /*
-     * signal ready flag
-     */
-    /* set the offset into the buffer */
-    my_ctl_pointer->offset = 0;
-    /* how many children do I have */
-    my_ctl_pointer->n_sends = pow_2_levels;
-    /* my data length */
-    my_ctl_pointer->length = fragment_size;
-
-    /* important that these be set before my children
-     * see the ready flag raised
-     */
-    opal_atomic_wmb ();
-    my_ctl_pointer->flag = ready_flag;
-
-    /* root is finished */
-    if( my_rank < pow_2 ) {
-      /* if I'm in the power of two group,
-       * then goto the allgather
-       */
-      my_ctl_pointer->status = ALLGATHER;
-      goto Allgather;
-
-    } else {
-
-      /* if I'm not, then I'm done and release */
-      goto Release;
-    }
-
-  }
-
-  /* what phase am I participating in
-   */
-  switch(my_ctl_pointer->status) {
-
-  case SCATTER:
-    goto Scatter;
-    break;
-
-  case ALLGATHER:
-    goto Allgather;
-    break;
-
-  case EXTRA_RANK:
-    goto Extra;
-    break;
-
-  default:
-    break;
-  }
-
-
- Extra:
-  /* am I part of the non-power-of-2 group */
-  if( my_rank >= pow_2 ) {
-    /* find parent to copy from */
-    parent_rank = my_rank&(pow_2-1);
-    parent_ctl_pointer = ctl_structs[parent_rank];
-    /* start at the base */
-    parent_data_pointer = (void *) data_buffs[parent_rank].ctl_struct;
-
-    /* now, I need to do some arithmetic to
-     * arrive at the value everyone else does
-     * when they have completed the algorithm
-     */
-
-    /* compute ready flag value to poll on */
-    ready_flag = ready_flag + pow_2_levels;
-
-    /* start to poll */
-    for( i = 0; i< cs->num_to_probe; i++) {
-      if(IS_LARGE_DATA_READY(parent_ctl_pointer,ready_flag, sequence_number)) {
-        /* copy the data and bail */
-        memcpy(data_addr,(void *)parent_data_pointer,fragment_size);
-        goto Release;
-      }
-      /*
-         else {
-         opal_progress();
-         }
-      */
-    }
-    my_ctl_pointer->status = EXTRA_RANK;
-
-    /* hop out and put me onto a progress queue */
-    return BCOL_FN_NOT_STARTED;
-  }
-
- Scatter:
-
-  /* on first entry, compute the list of possible sources */
-  if( NULL == my_ctl_pointer->src_ptr ) {
-    my_ctl_pointer->src_ptr = (int *) malloc(sizeof(int)*(pow_2_levels+1));
-
-    for( i = 0; i < pow_2_levels; i++) {
-      my_ctl_pointer->src_ptr[i] = my_rank ^ (1<<i);
-    }
-    /* am I participating in the non-power of two */
-    if((my_rank+pow_2) < group_size) {
-      /* extra rank that I'm paired with */
-      my_ctl_pointer->src_ptr[i] = my_rank + pow_2;
-    } else {
-      /* no extra rank to worry about */
-      my_ctl_pointer->src_ptr[i] = -1;
-    }
-  }
-
-  /* If I am not the root, then poll on possible "senders'" control structs */
-  for( i = 0; i < cs->num_to_probe && 0 == matched; i++) {
-
-    /* Shared memory iprobe */
-    BCOL_BASESMUMA_SM_LARGE_MSG_PROBE(my_ctl_pointer->src_ptr, pow_2_levels+1,
-                                      my_rank, matched, src);
-  }
-
-  /* If not matched, then hop out and put me on progress list */
-  if(0 == matched ) {
-
-    BASESMUMA_VERBOSE(10,("Shared memory probe didn't find a match"));
-
-    my_ctl_pointer->status = SCATTER;
-    return BCOL_FN_NOT_STARTED;
-
-  } else if ( src >= pow_2 ){
-
-    /* If matched from an extra rank, then get the whole message from partner */
-    memcpy((void *) data_addr, (void *) parent_data_pointer,
-           parent_ctl_pointer->length);
-
-    /* now I am the psuedo-root in the power-of-two group */
-    my_ctl_pointer->offset = 0;
-    my_ctl_pointer->length = parent_ctl_pointer->length;
-    my_ctl_pointer->n_sends = parent_ctl_pointer->n_sends;
-
-    /* set the memory barrier */
-    opal_atomic_wmb ();
-
-    /* fire the ready flag */
-    my_ctl_pointer->flag = ready_flag;
-    my_ctl_pointer->status = ALLGATHER;
-    /* go to the allgather */
-    goto Allgather;
-  }
-
-
-  /* we need to see whether this is really
-   * who we are looking for
-   */
-  for( i = 0; i < parent_ctl_pointer->n_sends; i++) {
-    /* debug print */
-    /*
-        fprintf(stderr,"I am %d checking on a hit from %d with n_sends %d\n",my_rank,src,parent_ctl_pointer->n_sends);
-        fflush(stderr);
-    */
-    /* end debug */
-    if( my_rank == (src^(1<<i))) {
-
-      /* we found our root within the group ... */
-      BASESMUMA_VERBOSE(10,("Shared memory probe was matched, the root is %d", src));
-      /* this is who I've been looking for */
-      my_ctl_pointer->n_sends = i;
-
-      if ( i > 0) {
-        /* compute the size of the chunk to copy */
-        length = (parent_ctl_pointer->length)/
-          (1<<(parent_ctl_pointer->n_sends - my_ctl_pointer->n_sends));
-        my_ctl_pointer->length = length;
-        my_ctl_pointer->offset =
-          parent_ctl_pointer->offset+length;
-
-        /*fprintf(stderr,"%d's offset %d and length %d \n",my_rank,my_ctl_pointer->offset,length);*/
-
-        /* now we can copy the data */
-        memcpy((void *) ((uint64_t) data_addr+my_ctl_pointer->offset),
-               (void *) ((uint64_t) parent_data_pointer+(uint64_t) parent_ctl_pointer->offset +
-                         (uint64_t) length),
-               (size_t)length);
-      } else {
-        /* this "trick" takes care of the first level
-         * of recurssive doubling
-         */
-        length = parent_ctl_pointer->length/
-          (1<<(parent_ctl_pointer->n_sends - 1));
-        my_ctl_pointer->length = length;
-        my_ctl_pointer->offset = parent_ctl_pointer->offset;
-
-        /*fprintf(stderr,"%d's offset %d and length %d\n",my_rank,my_ctl_pointer->offset,length);*/
-        /* now we can copy the data */
-        memcpy((void *) ((uint64_t) data_addr+my_ctl_pointer->offset),
-               (void *) ((uint64_t) parent_data_pointer+(uint64_t) my_ctl_pointer->offset),
-               (size_t)length);
-      }
-      /* set the memory barrier to ensure completion */
-      opal_atomic_wmb ();
-      /* signal that I am done */
-      my_ctl_pointer->flag = ready_flag;
-      /* set my status */
-      my_ctl_pointer->status = ALLGATHER;
-      /* time for allgather phase */
-      goto Allgather;
-    }
-
-  }
-
-  /* this is not who we are looking for,
-   * mark as false positive so we don't
-   * poll here again
-   */
-  my_ctl_pointer->src_ptr[index] = -1;
-  /* probably we should jump out and put onto progress list */
-  my_ctl_pointer->status = SCATTER;
-  return BCOL_FN_NOT_STARTED;
-
- Allgather:
-
-  /* zip it back up - we have already taken care of first level */
-  /* needed for non-blocking conditional */
-  matched = 0;
-
-  /* get my local_offset */
-  local_offset = my_ctl_pointer->offset;
-
-  /* bump the ready flag */
-  ready_flag++;
-
-  /* first level of zip up */
-  length = 2*fragment_size/pow_2;
-
-  /* first level of zip-up
-   * already includes first level of
-   * recursive doubling
-   */
-  start = 1;
-
-  /* for non-blocking, check to see if I need to reset the state */
-  if(my_ctl_pointer->flag >= ready_flag) {
-    /* then reset the state */
-    ready_flag = my_ctl_pointer->flag;
-    start = my_ctl_pointer->start;
-    /* get the local offset */
-    local_offset = my_ctl_pointer->offset_zip;
-    /* compute the correct length */
-    length = length*(1<<(start - 1));
-    /* careful! skip over the opal_atomic_wmb () to avoid the
-     * cost on every re-entry
-     */
-    goto Loop;
-  }
-
-
-  opal_atomic_wmb ();
-  /* I am ready, set the flag */
-  my_ctl_pointer->flag = ready_flag;
-
- Loop:
-
-  for( i = start; i < pow_2_levels; i++) {
-    /* get my partner for this level */
-    partner = my_rank^(1<<i);
-    partner_ctl_pointer = ctl_structs[partner];
-    partner_data_pointer = (void *) data_buffs[partner].ctl_struct;
-
-    /* is data ready */
-    for( j = 0; j < cs->num_to_probe && matched == 0; j++) {
-      if(IS_LARGE_DATA_READY(partner_ctl_pointer, ready_flag, sequence_number)) {
-
-        /* debug prints
-           fprintf(stderr,"666 I am %d and sequence num is %d partner is %d ready_flag %d parent ready_flag %d buff_idx %d partner_offset %d\n",
-           my_rank,sequence_number,partner, ready_flag,partner_ctl_pointer->flag,buff_idx,partner_ctl_pointer->offset);
-        */
-        /* debug print */
-#if 0
-        fprintf(stderr,"I am %d and sequence num is %d partner is %d ready_flag %d parent ready_flag %d buff_idx %d \n",
-                my_rank,sequence_number,partner, ready_flag,parent_ctl_pointer->flag,buff_idx);
-#endif
-        /* end debug prints */
-
-        assert(partner_ctl_pointer->flag >= ready_flag);
-        /* found it */
-        matched = 1;
-        /* only copy it, if you sit at a lower level in the tree */
-        if( my_ctl_pointer->n_sends <= partner_ctl_pointer->n_sends ) {
-
-          /* calculate the local offset based on partner's remote offset */
-          if( partner_ctl_pointer->offset < my_ctl_pointer->offset ) {
-            /* then I'm looking "up" the tree */
-            local_offset -= length;
-            /* debug print */
-            /*fprintf(stderr,"I am %d and partner is %d partner offset %d length %d \n",my_rank,partner, local_offset,length);*/
-            /* end debug */
-            memcpy((void *) ((uint64_t) data_addr + (uint64_t) local_offset),
-                   (void *) ((uint64_t) partner_data_pointer + (uint64_t) local_offset),
-                   length);
-          } else {
-            /* I'm looking "down" the tree */
-            local_offset += length;
-            /* debug print */
-            /*fprintf(stderr,"I am %d and partner is %d partner offset %d length %d \n",my_rank,partner, local_offset,length);*/
-            /* end debug */
-            memcpy((void *) ((uint64_t) data_addr + (uint64_t) local_offset),
-                   (void *) ((uint64_t) partner_data_pointer + (uint64_t) local_offset),
-                   length);
-            /* reset my local offset */
-            local_offset -= length;
-          }
-
-        }
-        /* bump the ready flag */
-        ready_flag++;
-        /* ensure completion */
-        opal_atomic_wmb ();
-
-        /* fire the flag for the next level */
-        my_ctl_pointer->flag = ready_flag;
-
-        /* double the length */
-        length *= 2;
-      }
-    }
-    /* check to see what kind of progress I've made */
-    if( 0 == matched ) {
-      /* save state, hop out and try again later */
-      my_ctl_pointer->start = i;
-      /* save the local offset */
-      my_ctl_pointer->offset_zip = local_offset;
-      /* put in progress queue */
-      return BCOL_FN_STARTED;
-    }
-    /* else, start next level of recursive doubling */
-    matched = 0;
-
-  }
-
-
-  /* cleanup */
-  if(NULL != my_ctl_pointer->src_ptr) {
-    free(my_ctl_pointer->src_ptr);
-    my_ctl_pointer->src_ptr = NULL;
-  }
-
- Release:
-
-
-  /* If I am the last instance, release the resource */
-  /*
-    if( IS_LAST_BCOL_FUNC(c_input_args)) {
-    rc = bcol_basesmuma_free_buff(
-    &(bcol_module->colls_with_user_data),
-    sequence_number);
-    }
-  */
-
-  my_ctl_pointer->starting_flag_value++;
-  my_ctl_pointer->status = FINISHED;
-  return BCOL_FN_COMPLETE;
-
-}
-#endif
-
-#if 0
-int mca_bcol_basesmuma_bcast_binomial_scatter_allgather(void *desc)
-{
-  /* local variables */
-  int rc, n_frags_sent;
-  uint32_t stripe_number;
-  int count, count_processed;
-  size_t dt_size;
-  uint32_t n_data_segments_to_schedule;
-  ompi_datatype_t *dtype;
-  message_descriptor_t *message_descriptor;
-  mca_bcol_basesmuma_module_t *bcol_module;
-  int pipe_depth;
-
-
-  /* get the full message descriptor */
-
-
-  /* compute the number of fragments to send */
-
-
-  /* start to fill the pipeline */
-
-
-  return OMPI_SUCCESS;
-
-
-
-
-}
-#endif
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_buf_mgmt.c
@ -1,486 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2014 Los Alamos National Security, LLC.
- *                         All rights reserved.
- * Copyright (c) 2014      Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2014-2015 Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-#include "ompi/constants.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/patterns/comm/coll_ops.h"
-
-#include "opal/dss/dss.h"
-
-#include "bcol_basesmuma.h"
-/*
- * With support for nonblocking collectives, we don't have an upper
- * limit on the number of outstanding collectives per communicator.
- * Also, since we want to avoid communication to figure out which
- * buffers other ranks in the group will use, we will rely on the
- * fact that collective operations are called in the same order
- * in each process, to assign a unique ID to each collective operation.
- * We use this to create a static mapping from the index to the buffer
- * that will be used.  Also, because there is no limit to the number of
- * outstanding collective operations, we use a generation index for each
- * memory bank, so the collective will use the buffer only when the
- * correct generation of the bank is ready for use.
- */
-int bcol_basesmuma_get_buff_index( sm_buffer_mgmt *buff_block,
-                                   uint64_t buff_id )
-{
-    /* local variables */
-    int memory_bank;
-    uint64_t generation;
-    int index=-1;
-
-
-    /* get the bank index that will be used */
-    memory_bank=buff_id& buff_block->mask;
-    memory_bank = memory_bank SHIFT_DOWN buff_block->log2_num_buffs_per_mem_bank;
-
-    /* get the generation of the bank this maps to */
-    generation = buff_id SHIFT_DOWN (buff_block->log2_number_of_buffs);
-
-    /* check to see if the bank is available */
-    if( generation == buff_block->ctl_buffs_mgmt[memory_bank].
-        bank_gen_counter ) {
-
-        /* get the buffer index that will be returned */
-        index=buff_id & buff_block->mask;
-
-        /* no in-use counter increment, as the mapping is static, and
-         * all we need to know if the number of collectives that complete */
-
-    } else {
-        /* progress communications so that resources can be freed up */
-        opal_progress();
-    }
-
-    /* return */
-    return index;
-}
-
-/* release the shared memory buffers
- *  buf_id is the unique ID assigned to the particular buffer
- */
-int bcol_basesmuma_free_buff( sm_buffer_mgmt * buff_block,
-                              uint64_t buff_id )
-{
-    /* local variables */
-    int ret=OMPI_SUCCESS;
-    int memory_bank;
-    uint64_t generation;
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-
-    /* get the bank index that will be used */
-    memory_bank=buff_id& buff_block->mask;
-    memory_bank = memory_bank SHIFT_DOWN buff_block->log2_num_buffs_per_mem_bank;
-
-    /* get the generation of the bank this maps to */
-    generation = buff_id SHIFT_DOWN (buff_block->log2_number_of_buffs);
-
-    /* the generation counter should not change until all resrouces
-     *   associated with this bank have been freed.
-     */
-    assert(generation == buff_block->ctl_buffs_mgmt[memory_bank].bank_gen_counter);
-    (void)generation;  // silence compiler warning
-
-    /*
-     * increment counter of completed buffers
-     */
-    OPAL_THREAD_ADD32(&(buff_block->ctl_buffs_mgmt[memory_bank].n_buffs_freed),
-                      1);
-
-    /*
-     * If I am the last to checkin - initiate resource recycling
-     */
-    if( buff_block->ctl_buffs_mgmt[memory_bank].n_buffs_freed ==
-        buff_block->ctl_buffs_mgmt[memory_bank].number_of_buffers ) {
-
-        /* Lock to ensure atomic recycling of resources */
-        OPAL_THREAD_LOCK(&(buff_block->ctl_buffs_mgmt[memory_bank].mutex));
-
-        /* make sure someone else did not already get to this */
-        if( buff_block->ctl_buffs_mgmt[memory_bank].n_buffs_freed !=
-            buff_block->ctl_buffs_mgmt[memory_bank].number_of_buffers ) {
-            /* release lock and exit */
-            OPAL_THREAD_UNLOCK(&(buff_block->ctl_buffs_mgmt[memory_bank].mutex));
-        } else {
-            sm_nbbar_desc_t *p_sm_nb_desc = NULL;
-            /* initiate the freeing of resources.  Need to make sure the other
-             * ranks in the group are also done with their resources before this
-             * block is made available for use again.
-             * No one else will try to allocate from this block or free back to
-             * this block until the next genration counter has been incremented,
-             * so will just reset the number of freed buffers to 0, so no one else
-             * will try to also initialize the recycling of these resrouces
-             */
-            buff_block->ctl_buffs_mgmt[memory_bank].n_buffs_freed=0;
-
-            /* Start the nonblocking barrier */
-            p_sm_nb_desc = &(buff_block->ctl_buffs_mgmt[memory_bank].nb_barrier_desc);
-            p_sm_nb_desc->coll_buff = buff_block;
-            bcol_basesmuma_rd_nb_barrier_init_admin(p_sm_nb_desc);
-
-            if( NB_BARRIER_DONE !=
-                buff_block->ctl_buffs_mgmt[memory_bank].
-                nb_barrier_desc.collective_phase) {
-
-                opal_list_t *list=&(cs->nb_admin_barriers);
-                opal_list_item_t *append_item;
-
-                /* put this onto the progression list */
-                OPAL_THREAD_LOCK(&(cs->nb_admin_barriers_mutex));
-                append_item=(opal_list_item_t *)
-                    &(buff_block->ctl_buffs_mgmt[memory_bank].nb_barrier_desc);
-                opal_list_append(list,append_item);
-                OPAL_THREAD_UNLOCK(&(cs->nb_admin_barriers_mutex));
-                /* progress communications so that resources can be freed up */
-                opal_progress();
-            } else {
-                /* mark the block as available */
-                (buff_block->ctl_buffs_mgmt[memory_bank].bank_gen_counter)++;
-            }
-
-            /* get out of here */
-            OPAL_THREAD_UNLOCK(&(buff_block->ctl_buffs_mgmt[memory_bank].mutex));
-        }
-
-    }
-
-    /* return */
-    return ret;
-}
-
-/*
- * Allocate buffers for storing non-blocking collective descriptions, required
- * for making code re-entrant
- *
- */
-static int init_nb_coll_buff_desc(mca_bcol_basesmuma_nb_coll_buff_desc_t **desc,
-                                  void *base_addr, uint32_t num_banks,
-                                  uint32_t num_buffers_per_bank,
-                                  uint32_t size_buffer,
-                                  uint32_t header_size,
-                                  int group_size,
-                                  int pow_k)
-{
-    uint32_t i, j, ci;
-    mca_bcol_basesmuma_nb_coll_buff_desc_t *tmp_desc = NULL;
-    int k_nomial_radix = mca_bcol_basesmuma_component.k_nomial_radix;
-    int pow_k_val = (0 == pow_k) ? 1 : pow_k;
-    int num_to_alloc = (k_nomial_radix - 1) * pow_k_val * 2 + 1 ;
-
-
-    *desc = (mca_bcol_basesmuma_nb_coll_buff_desc_t *)calloc(num_banks * num_buffers_per_bank, sizeof(mca_bcol_basesmuma_nb_coll_buff_desc_t));
-    if (NULL == *desc) {
-        return OMPI_ERROR;
-    }
-
-    tmp_desc = *desc;
-
-    for (i = 0; i < num_banks; i++) {
-        for (j = 0; j < num_buffers_per_bank; j++) {
-            ci = i * num_buffers_per_bank + j;
-            tmp_desc[ci].bank_index = i;
-            tmp_desc[ci].buffer_index = j;
-            /* *2  is for gather session  +1 for extra peer */
-            tmp_desc[ci].requests = (ompi_request_t **)
-                calloc(num_to_alloc, sizeof(ompi_request_t *));
-            tmp_desc[ci].data_addr = (void *)
-                ((unsigned char*)base_addr + ci * size_buffer + header_size);
-            BASESMUMA_VERBOSE(10, ("ml memory cache setup %d %d - %p", i, j, tmp_desc[ci].data_addr));
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-
-/*
- * Free buffers for storing non-blocking collective descriptions.
- *
- */
-void cleanup_nb_coll_buff_desc(mca_bcol_basesmuma_nb_coll_buff_desc_t **desc,
-                                  uint32_t num_banks,
-                                  uint32_t num_buffers_per_bank)
-{
-    uint32_t ci;
-    if (NULL != *desc) {
-        for (ci=0; ci<num_banks*num_buffers_per_bank; ci++) {
-            if (NULL != ((*desc)[ci]).requests) {
-                free(((*desc)[ci]).requests);
-                ((*desc))[ci].requests = NULL;
-            }
-        }
-        free(*desc);
-        *desc = NULL;
-    }
-}
-
-
-#if 1
-/* New init function used for new control scheme where we put the control
- * struct at the top of the payload buffer
- */
-int bcol_basesmuma_bank_init_opti(struct mca_bcol_base_memory_block_desc_t *payload_block,
-        uint32_t data_offset,
-        mca_bcol_base_module_t *bcol_module,
-        void *reg_data)
-{
-    /* assumption here is that the block has been registered with
-     * sm bcol hence has been mapped by each process, need to be
-     * sure that memory is mapped amongst sm peers
-     */
-
-    /* local variables */
-    int ret = OMPI_SUCCESS, i, j;
-    sm_buffer_mgmt *pload_mgmt;
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-    bcol_basesmuma_registration_data_t *sm_reg_data =
-        (bcol_basesmuma_registration_data_t *) reg_data;
-    mca_bcol_basesmuma_module_t *sm_bcol =
-        (mca_bcol_basesmuma_module_t *) bcol_module;
-    mca_bcol_base_memory_block_desc_t *ml_block = payload_block;
-    size_t malloc_size;
-    bcol_basesmuma_smcm_file_t input_file;
-    int leading_dim,loop_limit,buf_id;
-    unsigned char *base_ptr;
-    mca_bcol_basesmuma_module_t *sm_bcol_module=
-        (mca_bcol_basesmuma_module_t *)bcol_module;
-    int my_idx, array_id;
-    mca_bcol_basesmuma_header_t *ctl_ptr;
-    void **results_array=NULL, *mem_offset;
-
-    mca_bcol_basesmuma_local_mlmem_desc_t *ml_mem = &sm_bcol_module->ml_mem;
-
-    /* first, we get a pointer to the payload buffer management struct */
-    pload_mgmt = &(sm_bcol->colls_with_user_data);
-
-    /* go ahead and get the header size that is cached on the payload block
-     */
-    sm_bcol->total_header_size = data_offset;
-
-    /* allocate memory for pointers to mine and my peers' payload buffers
-     * difference here is that now we use our new data struct
-     */
-    malloc_size = ml_block->num_banks*ml_block->num_buffers_per_bank*
-        pload_mgmt->size_of_group *sizeof(mca_bcol_basesmuma_payload_t);
-    pload_mgmt->data_buffs = (mca_bcol_basesmuma_payload_t *) malloc(malloc_size);
-    if( !pload_mgmt->data_buffs) {
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-
-    /* allocate some memory to hold the offsets */
-    results_array = (void **) malloc(pload_mgmt->size_of_group * sizeof (void *));
-    if (NULL == results_array) {
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-
-    /* setup the input file for the shared memory connection manager */
-    input_file.file_name = sm_reg_data->file_name;
-    input_file.size = sm_reg_data->size;
-    input_file.size_ctl_structure = 0;
-    input_file.data_seg_alignment = BASESMUMA_CACHE_LINE_SIZE;
-    input_file.mpool_size = sm_reg_data->size;
-
-    /* call the connection manager and map my shared memory peers' file
-     */
-    ret = bcol_basesmuma_smcm_allgather_connection(
-        sm_bcol,
-        sm_bcol->super.sbgp_partner_module,
-        &(cs->sm_connections_list),
-        &(sm_bcol->payload_backing_files_info),
-        sm_bcol->super.sbgp_partner_module->group_comm,
-        input_file, cs->payload_base_fname,
-        false);
-    if( OMPI_SUCCESS != ret ) {
-        goto exit_ERROR;
-    }
-
-
-    /* now we exchange offset info - don't assume symmetric virtual memory
-     */
-
-    mem_offset = (void *) ((uintptr_t) ml_block->block->base_addr -
-                           (uintptr_t) cs->sm_payload_structs->data_addr);
-
-    /* call into the exchange offsets function */
-    ret=comm_allgather_pml(&mem_offset, results_array, sizeof (void *), MPI_BYTE,
-                           sm_bcol_module->super.sbgp_partner_module->my_index,
-                           sm_bcol_module->super.sbgp_partner_module->group_size,
-                           sm_bcol_module->super.sbgp_partner_module->group_list,
-                           sm_bcol_module->super.sbgp_partner_module->group_comm);
-    if( OMPI_SUCCESS != ret ) {
-        goto exit_ERROR;
-    }
-
-    /* convert memory offset to virtual address in current rank */
-    leading_dim = pload_mgmt->size_of_group;
-    loop_limit =  ml_block->num_banks*ml_block->num_buffers_per_bank;
-    for (i=0;i< sm_bcol_module->super.sbgp_partner_module->group_size;i++) {
-
-        /* get the base pointer */
-        int array_id=SM_ARRAY_INDEX(leading_dim,0,i);
-        if( i == sm_bcol_module->super.sbgp_partner_module->my_index) {
-            /* me */
-            base_ptr=cs->sm_payload_structs->map_addr;
-        } else {
-            base_ptr=sm_bcol_module->payload_backing_files_info[i]->
-                sm_mmap->map_addr;
-        }
-
-        /* first, set the pointer to the control struct */
-        pload_mgmt->data_buffs[array_id].ctl_struct=(mca_bcol_basesmuma_header_t *)
-            (uintptr_t)(((uint64_t)(uintptr_t)results_array[array_id])+(uint64_t)(uintptr_t)base_ptr);
-        /* second, calculate where to set the data pointer */
-        pload_mgmt->data_buffs[array_id].payload=(void *)
-            (uintptr_t)((uint64_t)(uintptr_t) pload_mgmt->data_buffs[array_id].ctl_struct +
-                        (uint64_t)(uintptr_t) data_offset);
-
-        for( buf_id = 1 ; buf_id < loop_limit ; buf_id++ ) {
-            int array_id_m1=SM_ARRAY_INDEX(leading_dim,(buf_id-1),i);
-            array_id=SM_ARRAY_INDEX(leading_dim,buf_id,i);
-            /* now, play the same game as above
-             *
-             * first, set the control struct's position */
-            pload_mgmt->data_buffs[array_id].ctl_struct=(mca_bcol_basesmuma_header_t *)
-                (uintptr_t)(((uint64_t)(uintptr_t)(pload_mgmt->data_buffs[array_id_m1].ctl_struct) +
-                             (uint64_t)(uintptr_t)ml_block->size_buffer));
-
-            /* second, set the payload pointer */
-            pload_mgmt->data_buffs[array_id].payload =(void *)
-                (uintptr_t)((uint64_t)(uintptr_t) pload_mgmt->data_buffs[array_id].ctl_struct +
-                            (uint64_t)(uintptr_t) data_offset);
-        }
-
-    }
-
-    /* done with the index array */
-    free (results_array);
-    results_array = NULL;
-
-    /* initialize my control structures!! */
-    my_idx = sm_bcol_module->super.sbgp_partner_module->my_index;
-    leading_dim = sm_bcol_module->super.sbgp_partner_module->group_size;
-    for( buf_id = 0; buf_id < loop_limit; buf_id++){
-        array_id = SM_ARRAY_INDEX(leading_dim,buf_id,my_idx);
-        ctl_ptr = pload_mgmt->data_buffs[array_id].ctl_struct;
-
-        /* initialize the data structures */
-        for( j = 0; j < SM_BCOLS_MAX; j++){
-            for( i = 0; i < NUM_SIGNAL_FLAGS; i++){
-                ctl_ptr->flags[i][j] = -1;
-            }
-        }
-        ctl_ptr->sequence_number = -1;
-        ctl_ptr->src = -1;
-    }
-
-
-
-
-    /* setup the data structures needed for releasing the payload
-     * buffers back to the ml level
-     */
-    for( i=0 ; i < (int) ml_block->num_banks ; i++ ) {
-        sm_bcol->colls_with_user_data.
-            ctl_buffs_mgmt[i].nb_barrier_desc.ml_memory_block_descriptor=
-            ml_block;
-    }
-
-    ml_mem->num_banks = ml_block->num_banks;
-    ml_mem->bank_release_counter = calloc(ml_block->num_banks, sizeof(uint32_t));
-    ml_mem->num_buffers_per_bank = ml_block->num_buffers_per_bank;
-    ml_mem->size_buffer = ml_block->size_buffer;
-    /* pointer to ml level descriptor */
-    ml_mem->ml_mem_desc = ml_block;
-
-    if (OMPI_SUCCESS != init_nb_coll_buff_desc(&ml_mem->nb_coll_desc,
-                                               ml_block->block->base_addr,
-                                               ml_mem->num_banks,
-                                               ml_mem->num_buffers_per_bank,
-                                               ml_mem->size_buffer,
-                                               data_offset,
-                                               sm_bcol_module->super.sbgp_partner_module->group_size,
-                                               sm_bcol_module->pow_k)) {
-
-        BASESMUMA_VERBOSE(10, ("Failed to allocate memory descriptors for storing state of non-blocking collectives\n"));
-        return OMPI_ERROR;
-    }
-
-    return OMPI_SUCCESS;
-
-exit_ERROR:
-    if (NULL != results_array) {
-        free(results_array);
-    }
-    return ret;
-}
-
-#endif
-
-
-
-/* Basesmuma interface function used for buffer release */
-#if 0
-/* gvm
- * A collective operation calls this routine to release the payload buffer.
- * All processes in the shared memory sub-group of a bcol should call the non-blocking
- * barrier on the last payload buffer of a memory bank. On the completion
- * of the non-blocking barrier, the ML callback is called which is responsible
- * for recycling the memory bank.
- */
-mca_bcol_basesmuma_module_t *sm_bcol_module
-int bcol_basesmuma_free_payload_buff(
-    struct mca_bcol_base_memory_block_desc_t *block,
-    sm_buffer_mgmt *ctl_mgmt,
-    uint64_t buff_id)
-{
-    /* local variables */
-    int ret = OMPI_SUCCESS;
-
-    memory_bank = BANK_FROM_BUFFER_IDX(buff_id);
-    ctl_mgmt->ctl_buffs_mgmt[memory_bank].n_buffs_freed++;
-
-    OPAL_THREAD_ADD32(&(ctl_mgmt->ctl_buffs_mgmt[memory_bank].n_buffs_freed),1);
-
-    if (ctl_mgmt->ctl_buffs_mgmt[memory_bank].n_buffs_freed == block->size_buffers_bank){
-
-        /* start non-blocking barrier */
-        bcol_basesmuma_rd_nb_barrier_init_admin(
-            &(ctl_mgmt->ctl_buffs_mgmt[memory_bank].nb_barrier_desc));
-
-        if (NB_BARRIER_DONE !=
-            ctl_mgmt->ctl_buffs_mgmt[memory_bank].
-            nb_barrier_desc.collective_phase){
-
-            /* progress the barrier */
-            opal_progress();
-        }
-        else{
-            /* free the buffer - i.e. initiate callback to ml level */
-            block->ml_release_cb(block,memory_bank);
-        }
-    }
-    return ret;
-}
-#endif
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_component.c
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_component.c
@ -1,380 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "opal/mca/mpool/base/base.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "opal/align.h"
-#include "bcol_basesmuma.h"
-
-/*
- * Public string showing the coll ompi_sm V2 component version number
- */
-const char *mca_bcol_basesmuma_component_version_string =
-    "Open MPI bcol - basesmuma collective MCA component version " OMPI_VERSION;
-
-/*
- * Local functions
- */
-
-static int basesmuma_register(void);
-static int basesmuma_open(void);
-static int basesmuma_close(void);
-static int mca_bcol_basesmuma_deregister_ctl_sm(
-                                                mca_bcol_basesmuma_component_t *bcol_component);
-
-
-static inline int mca_bcol_basesmuma_param_register_int(
-                                                        const char* param_name, int default_value, int *storage)
-{
-    *storage = default_value;
-    return mca_base_component_var_register(&mca_bcol_basesmuma_component.super.bcol_version, param_name,
-                                           NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                           OPAL_INFO_LVL_9,
-                                           MCA_BASE_VAR_SCOPE_READONLY, storage);
-}
-
-static inline int mca_bcol_basesmuma_param_register_bool(
-                                                         const char* param_name, bool default_value, bool *storage)
-{
-    *storage = default_value;
-    return mca_base_component_var_register(&mca_bcol_basesmuma_component.super.bcol_version, param_name,
-                                           NULL, MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
-                                           OPAL_INFO_LVL_9,
-                                           MCA_BASE_VAR_SCOPE_READONLY, storage);
-}
-
-/*
- * Instantiate the public struct with all of our public information
- * and pointers to our public functions in it
- */
-
-mca_bcol_basesmuma_component_t mca_bcol_basesmuma_component = {
-
-    /* First, fill in the super */
-
-    {
-        /* First, the mca_component_t struct containing meta
-           information about the component itself */
-
-        .bcol_version = {
-            MCA_BCOL_BASE_VERSION_2_0_0,
-
-            /* Component name and version */
-
-            .mca_component_name = "basesmuma",
-            MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
-                                  OMPI_RELEASE_VERSION),
-
-            /* Component open and close functions */
-
-            .mca_open_component = basesmuma_open,
-            .mca_close_component = basesmuma_close,
-            .mca_register_component_params = basesmuma_register,
-        },
-
-        /* Initialization / querying functions */
-
-        .collm_init_query = mca_bcol_basesmuma_init_query,
-        .collm_comm_query = mca_bcol_basesmuma_comm_query,
-        .init_done = false,
-        .need_ordering = false,
-        .priority = 0, /* (default) priority */
-    },
-};
-
-/*
- * Register the component
- */
-static int basesmuma_register(void)
-{
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-
-    /* set component priority */
-    mca_bcol_basesmuma_param_register_int("priority", 90, &cs->super.priority);
-
-    /* Number of memory banks */
-    mca_bcol_basesmuma_param_register_int("basesmuma_num_ctl_banks", 2,
-                                          &cs->basesmuma_num_mem_banks);
-
-    /* Number of regions per memory bank */
-    mca_bcol_basesmuma_param_register_int("basesmuma_num_buffs_per_bank", 16,
-                                          &cs->basesmuma_num_regions_per_bank);
-
-    /* number of polling loops to allow pending resources to
-     * complete their work
-     */
-    mca_bcol_basesmuma_param_register_int("n_poll_loops", 4, &cs->n_poll_loops);
-
-
-    /* Number of groups supported */
-    mca_bcol_basesmuma_param_register_int("n_groups_supported", 100,
-                                          &cs->n_groups_supported);
-
-    /* order of fanin tree */
-    mca_bcol_basesmuma_param_register_int("radix_fanin", 2, &cs->radix_fanin);
-
-    /* order of fanout tree */
-    mca_bcol_basesmuma_param_register_int("radix_fanout", 2, &cs->radix_fanout);
-
-    /* order of read tree */
-    mca_bcol_basesmuma_param_register_int("radix_read_tree", 3,
-                                          &cs->radix_read_tree);
-
-    /* order of reduction fanout tree */
-    mca_bcol_basesmuma_param_register_int("order_reduction_tree", 2,
-                                          &cs->order_reduction_tree);
-
-    /* k-nomial radix */
-    mca_bcol_basesmuma_param_register_int("k_nomial_radix", 3, &cs->k_nomial_radix);
-
-    /* number of polling loops for non-blocking algorithms */
-    mca_bcol_basesmuma_param_register_int("num_to_probe", 10, &cs->num_to_probe);
-
-    /* radix of the k-ary scatter tree */
-    mca_bcol_basesmuma_param_register_int("scatter_kary_radix", 4,
-                                          &cs->scatter_kary_radix);
-
-    /* register parmeters controlling message fragementation */
-    mca_bcol_basesmuma_param_register_int("min_frag_size", getpagesize(),
-                                          &cs->super.min_frag_size);
-    mca_bcol_basesmuma_param_register_int("max_frag_size", FRAG_SIZE_NO_LIMIT,
-                                          &cs->super.max_frag_size);
-
-    /* by default use pre-registered shared memory segments */
-    /* RLG NOTE: When we have a systematic way to handle single memory
-     * copy semantics, we need to update this logic
-     */
-    mca_bcol_basesmuma_param_register_bool("can_use_user_buffers", false,
-                                           &cs->super.can_use_user_buffers);
-
-    mca_bcol_basesmuma_param_register_int("verbose", 0, &cs->verbose);
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * Open the component
- */
-static int basesmuma_open(void)
-{
-
-    /* local variables */
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-    int ret = OMPI_SUCCESS;
-    opal_mutex_t *mutex_ptr;
-    int dummy;
-
-    /*
-     * Make sure that the number of banks is a power of 2
-     */
-    cs->basesmuma_num_mem_banks=
-        ompi_roundup_to_power_radix(2,cs->basesmuma_num_mem_banks, &dummy);
-    if ( 0 == cs->basesmuma_num_mem_banks ) {
-        ret=OMPI_ERROR;
-        goto exit_ERROR;
-    }
-
-    /*
-     * Make sure that the the number of buffers is a power of 2
-     */
-    cs->basesmuma_num_regions_per_bank=
-        ompi_roundup_to_power_radix(2,cs->basesmuma_num_regions_per_bank, &dummy);
-    if ( 0 == cs->basesmuma_num_regions_per_bank ) {
-        ret=OMPI_ERROR;
-        goto exit_ERROR;
-    }
-
-    /* Portals initialization */
-    cs->portals_init = false;
-    cs->portals_info = NULL;
-
-    /*
-     * initialization
-     */
-    cs->sm_ctl_structs=NULL;
-    OBJ_CONSTRUCT(&(cs->sm_connections_list),opal_list_t);
-    OBJ_CONSTRUCT(&(cs->nb_admin_barriers),opal_list_t);
-    mutex_ptr= &(cs->nb_admin_barriers_mutex);
-    OBJ_CONSTRUCT(mutex_ptr, opal_mutex_t);
-
-    /* Control structures object construct
-     */
-    OBJ_CONSTRUCT(&(cs->ctl_structures), opal_list_t);
-
-    /* shared memory has not been registered yet */
-    cs->mpool_inited = false;
-
-    /* initialize base file names */
-    cs->clt_base_fname="sm_ctl_mem_";
-    cs->payload_base_fname="sm_payload_mem_";
-
-    /* initialize the size of the shared memory scartch region */
-    cs->my_scratch_shared_memory_size=getpagesize();
-    cs->my_scratch_shared_memory=NULL;
-    cs->scratch_offset_from_base_ctl_file=0;
-
-    /*
-     * register the progess function
-     */
-    ret=opal_progress_register(bcol_basesmuma_progress);
-    if (MPI_SUCCESS != ret) {
-        opal_output(ompi_bcol_base_framework.framework_output, "failed to register the progress function");
-    }
-
-    return ret;
-
- exit_ERROR:
-    return ret;
-}
-
-/*
- * release the control structure backing file
- */
-static int mca_bcol_basesmuma_deregister_ctl_sm(mca_bcol_basesmuma_component_t *bcol_component)
-{
-    if (NULL != bcol_component->sm_ctl_structs) {
-        OBJ_RELEASE(bcol_component->sm_ctl_structs);
-    }
-
-    return OMPI_SUCCESS;
-}
-
-
-/*
- * Close the component
- */
-static int basesmuma_close(void)
-{
-    int ret;
-    bcol_basesmuma_registration_data_t *net_ctx;
-    bcol_base_network_context_t *net_reg;
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-
-    /* gvm Leak FIX */
-    OPAL_LIST_DESTRUCT (&cs->ctl_structures);
-
-    /* deregister the progress function */
-    ret=opal_progress_unregister(bcol_basesmuma_progress);
-    if (MPI_SUCCESS != ret) {
-        opal_output(ompi_bcol_base_framework.framework_output, "failed to unregister the progress function");
-    }
-
-    /* remove the control structure backing file */
-    ret=mca_bcol_basesmuma_deregister_ctl_sm(&mca_bcol_basesmuma_component);
-    if (MPI_SUCCESS != ret) {
-        opal_output(ompi_bcol_base_framework.framework_output, "failed to remove control structure backing file");
-    }
-
-    /* remove the network contexts - only one network context defined for
-     * this component.
-     */
-    /* file_name returne by asprintf, so need to free the resource */
-    if(mca_bcol_basesmuma_component.super.network_contexts ) {
-        net_reg=(bcol_base_network_context_t *)
-            mca_bcol_basesmuma_component.super.network_contexts[0];
-        if(net_reg) {
-            net_ctx=(bcol_basesmuma_registration_data_t *)net_reg->context_data;
-            if( net_ctx) {
-                if(net_ctx->file_name) {
-                    free(net_ctx->file_name);
-                }
-                free(net_ctx);
-            }
-            free(net_reg);
-        }
-        free(mca_bcol_basesmuma_component.super.network_contexts);
-        mca_bcol_basesmuma_component.super.network_contexts=NULL;
-    }
-
-    /* normal return */
-    return OMPI_SUCCESS;
-}
-
-/* query to see if the component is available for use, and can
- * satisfy the thread and progress requirements
- */
-int mca_bcol_basesmuma_init_query(bool enable_progress_threads,
-                                  bool enable_mpi_threads)
-{
-    /* done */
-    return OMPI_SUCCESS;
-}
-
-/* This routine is used to allocate shared memory for the the shared
- * memory control regions.
- */
-int mca_bcol_basesmuma_allocate_sm_ctl_memory(mca_bcol_basesmuma_component_t *cs)
-{
-    /* local variables */
-    int name_length, ret = OMPI_SUCCESS;
-    size_t ctl_length;
-    char *name;
-    size_t page_size = getpagesize ();
-
-    /* set the file name */
-    name_length=asprintf(&name,
-                         "%s"OPAL_PATH_SEP"%s""%0d",
-                         ompi_process_info.job_session_dir,
-                         cs->clt_base_fname,
-                         (int)getpid());
-    if( 0 > name_length ) {
-        return OMPI_ERROR;
-    }
-    /* make sure name is not too long */
-    if ( OPAL_PATH_MAX < (name_length-1) ) {
-        free (name);
-        return OMPI_ERROR;
-    }
-
-    /* compute segment length */
-
-    ctl_length=(cs->basesmuma_num_mem_banks*
-                cs->basesmuma_num_regions_per_bank+cs->basesmuma_num_mem_banks)
-        *sizeof(mca_bcol_basesmuma_ctl_struct_t)*cs->n_groups_supported;
-    /* need two banks of memory per group - for algorithms that have
-     * user payload, and those that don't
-     */
-    ctl_length*=2;
-
-    /* add space for internal library management purposes */
-    ctl_length+=cs->my_scratch_shared_memory_size;
-
-    /* round up to multiple of page size */
-    ctl_length = OPAL_ALIGN(ctl_length, page_size, size_t);
-
-    /* allocate the shared file */
-    cs->sm_ctl_structs=bcol_basesmuma_smcm_mem_reg (NULL, ctl_length, getpagesize(), name);
-    if( !cs->sm_ctl_structs) {
-        opal_output (ompi_bcol_base_framework.framework_output,
-                     "In mca_bcol_basesmuma_allocate_sm_ctl_memory failed to allocathe backing file %s\n", name);
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    /* free the memory allocated by asprintf for the file name -
-     * in mca_base_smcm_mem_reg this name is copied into a new
-     * memory location */
-    free (name);
-
-    /* successful return */
-    return ret;
-}
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanin.c
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanin.c
@ -1,218 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/* Recursive doubling blocking barrier */
-
-#include "ompi_config.h"
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/patterns/net/netpatterns.h"
-
-#include "opal/sys/atomic.h"
-
-#include "ompi/mca/bcol/base/base.h"
-#include "bcol_basesmuma.h"
-
-/********************************************************************************/
-/********************************** New Fan-In **********************************/
-/********************************************************************************/
-
-static int bcol_basesmuma_fanin_new(bcol_function_args_t *input_args,
-                                    mca_bcol_base_function_t *c_input_args)
-{
-    /* local variables */
-    int64_t sequence_number;
-
-    mca_bcol_basesmuma_module_t* bcol_module =
-        (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module;
-
-    int i, child_rank, idx, n_children, probe,
-        my_rank = bcol_module->super.sbgp_partner_module->my_index,
-        leading_dim = bcol_module->colls_no_user_data.size_of_group;
-    int8_t  ready_flag;
-    int8_t bcol_id = (int8_t) bcol_module->super.bcol_id;
-    int buff_index = input_args->buffer_index;
-    int *active_requests =
-        &(bcol_module->ml_mem.nb_coll_desc[buff_index].active_requests);
-    mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component;
-    int matched = 0;
-
-
-    volatile mca_bcol_basesmuma_payload_t *ctl_structs;
-
-    /* control structures */
-    volatile mca_bcol_basesmuma_header_t *my_ctl;
-    volatile mca_bcol_basesmuma_header_t *child_ctl;
-
-
-    netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
-
-    /* Figure out - what instance of the basesmuma bcol I am */
-    sequence_number = input_args->sequence_num;
-
-    idx = SM_ARRAY_INDEX(leading_dim, buff_index, 0);
-    ctl_structs = (volatile mca_bcol_basesmuma_payload_t *)
-                        bcol_module->colls_with_user_data.data_buffs + idx;
-    my_ctl = ctl_structs[my_rank].ctl_struct;
-
-    /* Init the header */
-    BASESMUMA_HEADER_INIT(my_ctl, ready_flag, sequence_number, bcol_id);
-
-    /* Cache num of children value in a local variable */
-    n_children = my_tree_node->n_children;
-
-    /* initialize the active requests */
-    *active_requests = 0;
-    /* create a bit map for children */
-    for( i = 0; i < n_children; i++){
-        *active_requests ^= (1<<i);
-    }
-
-    /* Wait until my childeren arrive */
-    for (i = 0; i < n_children; ++i) {
-        matched = 0;
-        /* Get child ctl struct */
-        child_rank = my_tree_node->children_ranks[i];
-        child_ctl = ctl_structs[child_rank].ctl_struct;
-        /* I'm sacrificing cache for concurrency */
-        for( probe = 0; probe < cm->num_to_probe && (0 == matched); probe++){
-            if(IS_PEER_READY(child_ctl, ready_flag, sequence_number,BARRIER_FANIN_FLAG, bcol_id)) {
-                matched = 1;
-                /* flip the bit */
-                *active_requests ^= (1<<i);
-            }
-        }
-    }
-
-    if(0 == *active_requests ) {
-        if(ROOT_NODE != my_tree_node->my_node_type){
-            /* I have no more active requests,
-               signal my parent */
-            my_ctl->flags[BARRIER_FANIN_FLAG][bcol_id] = ready_flag;
-        }
-    } else {
-        return BCOL_FN_STARTED;
-    }
-
-    my_ctl->starting_flag_value[bcol_id]++;
-
-    return BCOL_FN_COMPLETE;
-}
-
-static int bcol_basesmuma_fanin_new_progress(bcol_function_args_t *input_args,
-                                    mca_bcol_base_function_t *c_input_args)
-{
-    /* local variables */
-    int64_t sequence_number;
-
-    mca_bcol_basesmuma_module_t* bcol_module =
-        (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module;
-
-    int i, child_rank, flag_offset, idx, n_children, probe,
-        my_rank = bcol_module->super.sbgp_partner_module->my_index,
-        leading_dim = bcol_module->colls_no_user_data.size_of_group;
-    int8_t  ready_flag;
-    int8_t bcol_id = (int8_t) bcol_module->super.bcol_id;
-    int buff_index = input_args->buffer_index;
-    int *active_requests =
-        &(bcol_module->ml_mem.nb_coll_desc[buff_index].active_requests);
-    mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component;
-    int matched = 0;
-
-
-    volatile mca_bcol_basesmuma_payload_t *ctl_structs;
-
-    /* control structures */
-    volatile mca_bcol_basesmuma_header_t *my_ctl;
-    volatile mca_bcol_basesmuma_header_t *child_ctl;
-
-
-    netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
-
-    sequence_number = input_args->sequence_num;
-
-    idx = SM_ARRAY_INDEX(leading_dim, buff_index, 0);
-    ctl_structs = (volatile mca_bcol_basesmuma_payload_t *)
-                        bcol_module->colls_with_user_data.data_buffs + idx;
-    my_ctl = ctl_structs[my_rank].ctl_struct;
-
-
-    flag_offset = my_ctl->starting_flag_value[bcol_id];
-    ready_flag = flag_offset + 1;
-    my_ctl->sequence_number = sequence_number;
-
-    /* Cache num of children value in a local variable */
-    n_children = my_tree_node->n_children;
-
-
-    /* Wait until my childeren arrive */
-    for (i = 0; i < n_children; ++i) {
-        matched = 0;
-        /* Get child ctl struct */
-        if ( 1 == ((*active_requests >> i)&1) ) {
-            child_rank = my_tree_node->children_ranks[i];
-            child_ctl = ctl_structs[child_rank].ctl_struct;
-            /* I'm sacrificing cache for concurrency */
-            for( probe = 0; probe < cm->num_to_probe && (0 == matched); probe++){
-                if(IS_PEER_READY(child_ctl, ready_flag, sequence_number, BARRIER_FANIN_FLAG,bcol_id)) {
-                    matched = 1;
-                    /* flip the bit */
-                    *active_requests ^= (1<<i);
-                }
-            }
-        }
-    }
-    if(0 == *active_requests ){
-        if(ROOT_NODE != my_tree_node->my_node_type){
-            /* If I am not the root of the fanin tree,
-               then signal my parent */
-            my_ctl->flags[BARRIER_FANIN_FLAG][bcol_id] = ready_flag;
-        }
-    } else {
-        return BCOL_FN_STARTED;
-    }
-
-    my_ctl->starting_flag_value[bcol_id]++;
-
-    return BCOL_FN_COMPLETE;
-}
-
-
-int bcol_basesmuma_fanin_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    BASESMUMA_VERBOSE(10, ("Basesmuma Fan-In register.\n"));
-
-    comm_attribs.bcoll_type = BCOL_FANIN;
-
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super,
-        &comm_attribs, &inv_attribs,
-        bcol_basesmuma_fanin_new,
-        bcol_basesmuma_fanin_new_progress);
-
-    return OMPI_SUCCESS;
-}
-
-
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanout.c
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_fanout.c
@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/* Recursive doubling blocking barrier */
-
-#include "ompi_config.h"
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/patterns/net/netpatterns.h"
-
-#include "opal/sys/atomic.h"
-
-#include "ompi/mca/bcol/base/base.h"
-#include "bcol_basesmuma.h"
-
-/***********************************************************************************/
-/*********************************** New Fan-Out ***********************************/
-/***********************************************************************************/
-
-static int bcol_basesmuma_fanout_new(
-                bcol_function_args_t *input_args,
-                mca_bcol_base_function_t *c_input_args)
-{
-    /* local variables */
-    int64_t sequence_number;
-
-    mca_bcol_basesmuma_module_t* bcol_module =
-        (mca_bcol_basesmuma_module_t *) c_input_args->bcol_module;
-
-    int idx, probe,
-        my_rank = bcol_module->super.sbgp_partner_module->my_index,
-        leading_dim = bcol_module->colls_no_user_data.size_of_group;
-    int8_t  ready_flag;
-    int8_t bcol_id = (int8_t) bcol_module->super.bcol_id;
-    int buff_index = input_args->buffer_index;
-    mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component;
-
-
-    volatile mca_bcol_basesmuma_payload_t *ctl_structs;
-
-    /* control structures */
-    volatile mca_bcol_basesmuma_header_t *my_ctl;
-    volatile mca_bcol_basesmuma_header_t *parent_ctl;
-
-
-    netpatterns_tree_node_t *my_tree_node = &(bcol_module->fanin_node);
-
-    /* Figure out - what instance of the basesmuma bcol I am */
-    sequence_number = input_args->sequence_num;
-
-    idx = SM_ARRAY_INDEX(leading_dim, buff_index, 0);
-    ctl_structs = (volatile mca_bcol_basesmuma_payload_t *)
-                        bcol_module->colls_with_user_data.data_buffs + idx;
-    my_ctl = ctl_structs[my_rank].ctl_struct;
-
-    /* init the header */
-    BASESMUMA_HEADER_INIT(my_ctl, ready_flag, sequence_number, bcol_id);
-
-    /* Wait on my parent to arrive */
-    if (my_tree_node->n_parents) {
-        parent_ctl = ctl_structs[my_tree_node->parent_rank].ctl_struct;
-        for( probe = 0; probe < cm->num_to_probe; probe++){
-           if (IS_PEER_READY(parent_ctl, ready_flag, sequence_number, BARRIER_FANOUT_FLAG, bcol_id)) {
-              /* signal my children */
-               my_ctl->flags[BARRIER_FANOUT_FLAG][bcol_id] = ready_flag;
-               /* bump the starting flag */
-               my_ctl->starting_flag_value[bcol_id]++;
-               return BCOL_FN_COMPLETE;
-
-            }
-        }
-
-    } else {
-        /* I am the root of the fanout */
-        my_ctl->flags[BARRIER_FANOUT_FLAG][bcol_id] = ready_flag;
-        /* bump the starting flag */
-        my_ctl->starting_flag_value[bcol_id]++;
-        return BCOL_FN_COMPLETE;
-    }
-
-
-
-
-
-    return BCOL_FN_STARTED;
-}
-
-int bcol_basesmuma_fanout_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    BASESMUMA_VERBOSE(10, ("Basesmuma Fan-Out register.\n"));
-
-    comm_attribs.bcoll_type = BCOL_FANOUT;
-
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super,
-        &comm_attribs, &inv_attribs,
-        bcol_basesmuma_fanout_new,
-        bcol_basesmuma_fanout_new);
-
-    return OMPI_SUCCESS;
-}
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_gather.c
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_gather.c
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.c
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.c
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.h
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.h
@ -1,626 +0,0 @@
-#ifdef __PORTALS_AVAIL__
-#define __PORTALS_ENABLE__
-
-#include <unistd.h>
-
-#include "ompi_config.h"
-#include "ompi/constants.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/communicator/communicator.h"
-
-#include "bcol_basesmuma_utils.h"
-#include "bcol_basesmuma_portals.h"
-#include "bcol_basesmuma.h"
-
-#if 0
-struct scatter_allgather_nb_bcast_state_t
-{
-    /* local variables */
-    uint64_t length;
-    int my_rank, src, matched;
-    int *src_list;
-    int group_size;
-	int64_t ready_flag;
-    int pow_2, pow_2_levels;
-    int src_list_index;
-    uint64_t fragment_size;  /* user buffer size */
-
-	/* Input argument variables */
-	void *my_userbuf;
-	int64_t sequence_number;
-
-	/* Extra source variables */
-	bool secondary_root;
-	int partner , extra_partner;
-
-	/* Scatter Allgather offsets */
-	uint64_t local_sg_offset , global_sg_offset , partner_offset ;
-
-	/* Portals messaging relevant variables */
-	ptl_handle_eq_t allgather_eq_h;
-	ptl_handle_eq_t read_eq;
-	ptl_event_t  allgather_event;
-	bool msg_posted;
-
-	/* OMPI module and component variables */
-    mca_bcol_basesmuma_component_t *cs;
-    mca_bcol_basesmuma_module_t *bcol_module;
-
-	/* Control structure and payload variables */
-	volatile mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
-    volatile mca_bcol_basesmuma_ctl_struct_t  *my_ctl_pointer;
-	volatile mca_bcol_basesmuma_ctl_struct_t  *parent_ctl_pointer; /* scatter source */
-	volatile mca_bcol_basesmuma_ctl_struct_t  *extra_partner_ctl_pointer; /* scatter source */
-
-	int phase;
-};
-
-typedef struct scatter_allgather_nb_bcast_state_t sg_state_t;
-#endif
-
-bool blocked_post = false;
-
-#define IS_SG_DATA_READY(peer, my_flag, my_sequence_number) 								\
-    (((peer)->sequence_number == (my_sequence_number) && 									\
-      (peer)->flags[BCAST_FLAG] >= (my_flag) 															\
-     )? true : false )
-
-
-
-#define  SG_LARGE_MSG_PROBE(src_list, n_src, src_list_index, matched,								\
-						    src, data_buffs, data_src_ctl_pointer,							\
-							data_src_lmsg_ctl_pointer, ready_flag,							\
-							sequence_number)  					  							\
-do {                                                                              			\
-    int j;                                                                        			\
-    for( j = 0; j < n_src; j++) {                                                 			\
-        if(src_list[j] != -1) {                                                   			\
-            data_src_ctl_pointer = data_buffs[src_list[j]].ctl_struct;                      \
-            data_src_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t*)				\
-										data_buffs[src_list[j]].payload;                    \
-            if( IS_SG_DATA_READY(data_src_ctl_pointer,ready_flag,sequence_number)) {   	    \
-                src = src_list[j];                                                			\
-                matched = 1;                                                      			\
-                src_list_index = j;   																\
-                break;                                                        				\
-            }                                                                     			\
-        }                                                                         			\
-    }                                                                             			\
-} while(0)
-
-#define  SG_LARGE_MSG_NB_PROBE(src_list, n_src, src_list_index, matched,					\
-						    src, ctl_structs, data_src_ctl_pointer,							\
-							ready_flag, sequence_number)  									\
-do {                                                                              			\
-    int j;                                                                        			\
-    for( j = 0; j < n_src; j++) {                                                 			\
-        if(src_list[j] != -1) {                                                   			\
-            data_src_ctl_pointer = ctl_structs[src_list[j]];		                        \
-            if( IS_SG_DATA_READY(data_src_ctl_pointer,ready_flag,sequence_number)) {   	    \
-                src = src_list[j];                                                			\
-                matched = 1;                                                      			\
-                src_list_index = j;   														\
-                break;                                                        				\
-            }                                                                     			\
-        }                                                                         			\
-    }                                                                             			\
-} while(0)
-
-
-
-
-
-static inline  __opal_attribute_always_inline__
-int wait_for_peers(int my_rank, int npeers, volatile mca_bcol_basesmuma_payload_t *data_buffs,
-				int flag_value, int sn)
-{
-	int *peers_list = NULL;
-	int counter = 0, diter = 0;
-	volatile mca_bcol_basesmuma_header_t *peer_ctl_pointer = NULL;
-
-	peers_list = (int *)malloc(sizeof(int) * npeers);
-
-	for (diter = 0; diter < npeers; diter++ ){
-		peers_list[diter] = my_rank ^ (1<<diter);
-		assert(peers_list[diter] != -1);
-	}
-
-	counter = 0;
-	while (counter < npeers) {
-		for (diter = 0; diter < npeers; diter++){
-			if (-1 != peers_list[diter]) {
-				peer_ctl_pointer = data_buffs[peers_list[diter]].ctl_struct;
-
-				if (IS_SG_DATA_READY(peer_ctl_pointer, flag_value, sn)) {
-					counter++;
-					peers_list[diter] = -1;
-				}
-			}
-		}
-		opal_progress();
-	}
-
-	return 0;
-}
-
-static inline  __opal_attribute_always_inline__
-int wait_for_peers_nb(int my_rank, int npeers,
-				volatile mca_bcol_basesmuma_ctl_struct_t **ctl_structs,
-				volatile int flag_value, int sn)
-{
-	int *peers_list = NULL;
-	int counter = 0, diter = 0;
-	volatile mca_bcol_basesmuma_ctl_struct_t *peer_ctl_pointer = NULL;
-
-	peers_list = (int *)malloc(sizeof(int) * npeers);
-
-	for (diter = 0; diter < npeers; diter++ ){
-		peers_list[diter] = my_rank ^ (1<<diter);
-		assert(peers_list[diter] != -1);
-	}
-
-	counter = 0;
-	while (counter < npeers) {
-		for (diter = 0; diter < npeers; diter++){
-			if (-1 != peers_list[diter]) {
-				peer_ctl_pointer = ctl_structs[peers_list[diter]];
-
-				if (IS_SG_DATA_READY(peer_ctl_pointer, flag_value, sn)) {
-					counter++;
-					peers_list[diter] = -1;
-				}
-			}
-		}
-		opal_progress();
-	}
-
-	return 0;
-}
-
-static inline  __opal_attribute_always_inline__
-int wait_for_post_complete_nb(int my_rank, int npeers,
-				volatile mca_bcol_basesmuma_ctl_struct_t **ctl_structs,
-				int flag_value, int sn)
-{
-	/* int *peers_list = NULL; */
-	int peers_list[MAX_SM_GROUP_SIZE];
-	int counter = 0, diter = 0;
-	volatile mca_bcol_basesmuma_ctl_struct_t *peer_ctl_pointer = NULL;
-
-/*	peers_list = (int *)malloc(sizeof(int) * npeers); */
-
-	assert(npeers < MAX_SM_GROUP_SIZE);
-
-	for (diter = 0; diter < npeers; diter++ ){
-		peers_list[diter] = my_rank ^ (1<<diter);
-		assert(peers_list[diter] != -1);
-	}
-
-	counter = 0;
-	for (diter = 0; diter < npeers; diter++){
-		peer_ctl_pointer = ctl_structs[peers_list[diter]];
-
-		if (IS_SG_DATA_READY(peer_ctl_pointer, flag_value, sn)) {
-					counter++;
-		}
-	}
-
-/*	free(peers_list); */
-	return counter;
-}
-
-static inline  __opal_attribute_always_inline__
-int  sg_large_msg_probe(sg_state_t *sg_state)
-{
-	int j,n_src = sg_state->pow_2_levels+1;
-
-
-	for( j = 0; j < n_src; j++) {
-        if(sg_state->src_list[j] != -1) {
-			sg_state->parent_ctl_pointer = sg_state->ctl_structs[sg_state->src_list[j]];
-
-			BASESMUMA_VERBOSE(5,("Parent %d ctl pointer (parent=%x, my ctl=%x) flag %d",
-								sg_state->src_list[j],sg_state->parent_ctl_pointer,
-								sg_state->my_ctl_pointer,
-								sg_state->parent_ctl_pointer->flag));
-
-			if (IS_SG_DATA_READY(sg_state->parent_ctl_pointer,
-						sg_state->ready_flag, sg_state->sequence_number)) {
-                sg_state->src = sg_state->src_list[j];
-                sg_state->matched = 1;
-                sg_state->src_list_index = j;
-				break;
-            }
-        }
-    }
-
-	return 0;
-}
-/*
- * I will post message for all the my children
- */
-static inline  __opal_attribute_always_inline__
-int sm_portals_root_scatter(sg_state_t *sg_state)
-{
-	int extra_src_posts = -1, scatter_posts = -1, allgather_posts = -1,
-						total_msg_posts = -1;
-
-	BASESMUMA_VERBOSE(10,("I am the root of the data"));
-    sg_state->my_ctl_pointer->offset = 0;
-    sg_state->my_ctl_pointer->n_sends = sg_state->pow_2_levels;
-    sg_state->my_ctl_pointer->length = sg_state->fragment_size;
-
-
-
-	extra_src_posts = (sg_state->my_rank + sg_state->pow_2 < sg_state->group_size ) ? 1: 0;
-	scatter_posts = sg_state->my_ctl_pointer->n_sends;
-	allgather_posts = sg_state->pow_2_levels - 1;
-
-	total_msg_posts = scatter_posts + allgather_posts + extra_src_posts ;
-
-	if ( total_msg_posts <= 0) {
-		BASESMUMA_VERBOSE(10,("No need to post the data "));
-		return OMPI_SUCCESS;
-	}
-
-	mca_bcol_basesmuma_portals_post_msg(sg_state->cs,
-						 &sg_state->my_ctl_pointer->portals_buf_addr,
-						   sg_state->my_userbuf, sg_state->fragment_size,
-						   PTL_EQ_NONE,
-						   total_msg_posts,
-						   blocked_post,
-						  PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE |
-						  PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE);
-
-	/*
-	 mca_bcol_basesmuma_portals_post_msg(sg_state->cs,
-						 &sg_state->my_ctl_pointer->portals_buf_addr,
-						   sg_state->my_userbuf, sg_state->fragment_size,
-						   sg_state->allgather_eq_h,
-						   total_msg_posts,
-						   blocked_post,
-						  PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE |
-						  PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE);
-	 */
-
-	 sg_state->msg_posted = true ;
-
-	/*
-	opal_atomic_wmb();
-	*/
-	sg_state->my_ctl_pointer->flag = sg_state->ready_flag;
-
-	return OMPI_SUCCESS;
-}
-
-/*
- * Im root but my rank > pow2_groupsize, so will copy to partner who
- * will act as root (secondary)
- */
-static inline  __opal_attribute_always_inline__
-int sm_portals_extra_root_scatter(sg_state_t *sg_state)
-{
-	int scatter_partner = -1;
-	volatile mca_bcol_basesmuma_ctl_struct_t *scatter_partner_ctl_pointer = NULL;
-
-	int	total_msg_posts  = 1;
-
-	if ( total_msg_posts <= 0) {
-		BASESMUMA_VERBOSE(10,("No need to post the data "));
-	}
-	else {
-		mca_bcol_basesmuma_portals_post_msg(sg_state->cs,
-						 &sg_state->my_ctl_pointer->portals_buf_addr,
-						   sg_state->my_userbuf, sg_state->fragment_size,
-						   PTL_EQ_NONE,
-						   total_msg_posts,
-						   blocked_post,
-						  PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE | PTL_MD_OP_GET
-						  | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE);
-	sg_state->msg_posted = true ;
-
-	}
-
-	opal_atomic_wmb();
-	sg_state->my_ctl_pointer->flag = sg_state->ready_flag;
-
-
-
-	scatter_partner = sg_state->my_rank - sg_state->pow_2;
-	scatter_partner_ctl_pointer =
-					sg_state->ctl_structs[scatter_partner];
-
-	while(!IS_SG_DATA_READY(scatter_partner_ctl_pointer, sg_state->ready_flag,
-									sg_state->sequence_number)){
-					opal_progress();
-	}
-
-	return OMPI_SUCCESS;
-}
-
-/*
- * Gets msg from the partner (> pow2_groupsize) and posts the
- * message acting as root
- */
-static inline  __opal_attribute_always_inline__
-int sm_portals_secondary_root_scatter(sg_state_t *sg_state)
-{
-
-	volatile mca_bcol_basesmuma_ctl_struct_t *extra_src_ctl_pointer = NULL;
-	int scatter_posts, allgather_posts, extra_src_posts, total_msg_posts;
-
-	sg_state->secondary_root = true;
-    BASESMUMA_VERBOSE(10,("I am the secondary root for the data"));
-    sg_state->my_ctl_pointer->offset = 0;
-    sg_state->my_ctl_pointer->n_sends = sg_state->pow_2_levels;
-    sg_state->my_ctl_pointer->length = sg_state->fragment_size;
-
-	extra_src_ctl_pointer = sg_state->ctl_structs[sg_state->src];
-
-	mca_bcol_basesmuma_portals_get_msg_fragment(sg_state->cs,
-						sg_state->read_eq,
-						&sg_state->my_ctl_pointer->portals_buf_addr,
-						&extra_src_ctl_pointer->portals_buf_addr, 0,
-						0, sg_state->fragment_size);
-
-
-	extra_src_posts = 0;
-	scatter_posts = sg_state->my_ctl_pointer->n_sends;
-	allgather_posts = sg_state->pow_2_levels - 1;
-
-	total_msg_posts = scatter_posts + allgather_posts + extra_src_posts ;
-
-	if (total_msg_posts > 0) {
-		mca_bcol_basesmuma_portals_post_msg(sg_state->cs,
-						  &sg_state->my_ctl_pointer->portals_buf_addr,
-						   sg_state->my_userbuf, sg_state->fragment_size,
-						   PTL_EQ_NONE,
-						   total_msg_posts,
-						   blocked_post,
-						   PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE | PTL_MD_OP_GET
-						   | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE);
-		sg_state->msg_posted = true ;
-	}
-    opal_atomic_wmb();
-    sg_state->my_ctl_pointer->flag = sg_state->ready_flag;
-
-	return OMPI_SUCCESS;
-}
-
-/*
- * Internode Scatter: Get data from my parent and post for my children
- */
-
-static inline  __opal_attribute_always_inline__
-int sm_portals_internode_scatter(sg_state_t *sg_state)
-{
-
-	int scatter_posts, allgather_posts, extra_src_posts,
-					total_msg_posts;
-	uint64_t local_offset, remote_offset;
-
-	/* compute the size of the chunk to copy */
-	sg_state->length = (sg_state->parent_ctl_pointer->length)/
-       (1<<(sg_state->parent_ctl_pointer->n_sends - sg_state->my_ctl_pointer->n_sends));
-	sg_state->my_ctl_pointer->length = sg_state->length;
-	sg_state->my_ctl_pointer->offset =
-				sg_state->parent_ctl_pointer->offset + sg_state->length;
-
-
-	local_offset = sg_state->my_ctl_pointer->offset;
-	remote_offset = sg_state->parent_ctl_pointer->offset +
-						sg_state->length;
-
-	mca_bcol_basesmuma_portals_get_msg_fragment(sg_state->cs,
-								sg_state->read_eq,
-								&sg_state->my_ctl_pointer->portals_buf_addr,
-								&sg_state->parent_ctl_pointer->portals_buf_addr,local_offset,
-								remote_offset,sg_state->length);
-
-	/* Now post the message for other children to read */
-	extra_src_posts = (sg_state->my_rank + sg_state->pow_2 <
-								sg_state->group_size ) ? 1: 0;
-	scatter_posts = sg_state->my_ctl_pointer->n_sends;
-	allgather_posts = sg_state->pow_2_levels - 1;
-
-	total_msg_posts = scatter_posts + allgather_posts + extra_src_posts ;
-
-	if (total_msg_posts > 0) {
-		mca_bcol_basesmuma_portals_post_msg(sg_state->cs, &sg_state->my_ctl_pointer->portals_buf_addr,
-						   sg_state->my_userbuf, sg_state->my_ctl_pointer->portals_buf_addr.userbuf_length,
-						   PTL_EQ_NONE,
-						   total_msg_posts,
-						   blocked_post,
-						   PTL_MD_EVENT_START_DISABLE| PTL_MD_EVENT_END_DISABLE
-						   | PTL_MD_OP_GET | PTL_MD_MANAGE_REMOTE | PTL_MD_TRUNCATE | PTL_MD_EVENT_AUTO_UNLINK_ENABLE);
-
-		sg_state->msg_posted = true;
-	}
-	/*
-    opal_atomic_wmb();
-	 */
-    sg_state->my_ctl_pointer->flag = sg_state->ready_flag;
-
-	return OMPI_SUCCESS;
-}
-
-/*
- * Bcast's Allgather Phase:
- * Combines data from all processes using recursive doubling algorithm
- */
-static inline  __opal_attribute_always_inline__
-int sm_portals_bcasts_allgather_phase(sg_state_t *sg_state)
-{
-	int ag_loop,  partner;
-	volatile mca_bcol_basesmuma_ctl_struct_t  *partner_ctl_pointer = NULL; /* recursive double */
-
-
-	for( ag_loop = 1; ag_loop < sg_state->pow_2_levels; ag_loop++) {
-	        /* get my partner for this level */
-        partner = sg_state->my_rank^(1<<ag_loop);
-        partner_ctl_pointer = sg_state->ctl_structs[partner];
-
-
-		/* Block until partner is at this level of recursive-doubling stage */
-        while(!IS_SG_DATA_READY(partner_ctl_pointer, sg_state->ready_flag,
-								sg_state->sequence_number)) {
-            opal_progress();
-        }
-        assert(partner_ctl_pointer->flag >= sg_state->ready_flag);
-
-		if (partner_ctl_pointer->offset < sg_state->my_ctl_pointer->offset) {
-			sg_state->global_sg_offset -= sg_state->length;
-			sg_state->local_sg_offset = sg_state->global_sg_offset;
-		} else {
-			sg_state->local_sg_offset = sg_state->global_sg_offset + sg_state->length;
-		}
-
-
-		BASESMUMA_VERBOSE(10,("Allgather Phase: Get message from process %d, length %d",
-								partner, sg_state->length));
-		mca_bcol_basesmuma_portals_get_msg_fragment(sg_state->cs,
-								sg_state->read_eq,
-								&sg_state->my_ctl_pointer->portals_buf_addr,
-								&partner_ctl_pointer->portals_buf_addr,sg_state->local_sg_offset,
-								sg_state->local_sg_offset, sg_state->length);
-
-		sg_state->ready_flag++;
-		opal_atomic_wmb();
-	sg_state->my_ctl_pointer->flag = sg_state->ready_flag;
-
-		/* Block until partner is at this level of recursive-doubling stage */
-	while(!IS_SG_DATA_READY(partner_ctl_pointer, sg_state->ready_flag,
-								sg_state->sequence_number)) {
-		    opal_progress();
-        }
-
-        /* double the length */
-        sg_state->length *= 2;
-    }
-
-	return OMPI_SUCCESS;
-
-}
-
-
-static inline  __opal_attribute_always_inline__
-int init_sm_group_info(sg_state_t *sg_state, int buff_idx)
-{
-	int idx, leading_dim;
-	int first_instance=0;
-    int flag_offset;
-
-	/* Get addresing information */
-    sg_state->group_size = sg_state->bcol_module->colls_no_user_data.size_of_group;
-    leading_dim = sg_state->bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-
-	BASESMUMA_VERBOSE(1,("My buffer idx %d group size %d, leading dim %d, idx %d",
-							buff_idx,sg_state->group_size,leading_dim,idx));
-    /* grab the ctl buffs */
-    sg_state->ctl_structs = (volatile mca_bcol_basesmuma_ctl_struct_t **)
-        sg_state->bcol_module->colls_with_user_data.ctl_buffs+idx;
-
-	sg_state->my_rank = sg_state->bcol_module->super.sbgp_partner_module->my_index;
-    sg_state->my_ctl_pointer = sg_state->ctl_structs[sg_state->my_rank];
-
-	if (sg_state->my_ctl_pointer->sequence_number < sg_state->sequence_number) {
-        first_instance = 1;
-    }
-
-    if(first_instance) {
-        sg_state->my_ctl_pointer->flag = -1;
-        sg_state->my_ctl_pointer->index = 1;
-
-        sg_state->my_ctl_pointer->starting_flag_value = 0;
-        flag_offset = 0;
-
-    } else {
-        sg_state->my_ctl_pointer->index++;
-    }
-
-	/* For bcast we shud have only entry to this bcol
-	assert(sg_state->my_ctl_pointer->flag == -1);
-	*/
-
-	/* increment the starting flag by one and return */
-    flag_offset = sg_state->my_ctl_pointer->starting_flag_value;
-    sg_state->ready_flag = flag_offset + sg_state->sequence_number + 1;
-
-    sg_state->my_ctl_pointer->sequence_number = sg_state->sequence_number;
-
-	return OMPI_SUCCESS;
-
-}
-
-static inline  __opal_attribute_always_inline__
-int init_sm_portals_sg_info(sg_state_t *sg_state)
-{
-/* Get portals info*/
-	mca_bcol_basesmuma_portal_proc_info_t *portals_info;
-	int rc = OMPI_SUCCESS;
-	int sg_matchbits;
-
-	portals_info = (mca_bcol_basesmuma_portal_proc_info_t*)sg_state->cs->portals_info;
-
-	sg_matchbits = sg_state->sequence_number ;
-
-	/* Construct my portal buffer address and copy to payload buffer */
-	mca_bcol_basesmuma_construct_portal_address(&sg_state->my_ctl_pointer->portals_buf_addr,
-						portals_info->portal_id.nid,
-						portals_info->portal_id.pid,
-						sg_matchbits,
-						sg_state->bcol_module->super.sbgp_partner_module->group_comm->c_contextid);
-
-	sg_state->my_ctl_pointer->portals_buf_addr.userbuf = sg_state->my_userbuf;
-	sg_state->my_ctl_pointer->portals_buf_addr.userbuf_length = sg_state->fragment_size;
-
-	return OMPI_SUCCESS;
-}
-
-static inline  __opal_attribute_always_inline__
-int compute_src_from_root(int group_root, int my_group_rank, int pow2, int
-				group_size)
-{
-
-	int root, relative_rank, src, i;
-
-	if (group_root < pow2) {
-        root = group_root;
-    } else {
-        /* the source of the data is extra node,
-           the real root it represented by some rank from
-           pow2 group */
-        root = group_root - pow2;
-        /* shortcut for the case when my rank is root for the group */
-        if (my_group_rank == root) {
-            return group_root;
-        }
-    }
-
-    relative_rank = (my_group_rank - root) < 0 ? my_group_rank - root + pow2 :
-                                           my_group_rank - root;
-
-    for (i = 1; i < pow2; i<<=1) {
-        if (relative_rank & i) {
-            src = my_group_rank ^ i;
-            if (src >= pow2)
-                src -= pow2;
-
-            return src;
-        }
-    }
-
-	return -1;
-}
-
-int bcol_basesmuma_lmsg_scatter_allgather_portals_bcast(bcol_function_args_t *input_args,
-    mca_bcol_base_function_t *c_input_args);
-
-int bcol_basesmuma_lmsg_scatter_allgather_portals_nb_bcast(bcol_function_args_t *input_args,
-    mca_bcol_base_function_t *c_input_args);
-
-int bcol_basesmuma_lmsg_scatter_allgather_portals_nb_knownroot_bcast(bcol_function_args_t *input_args,
-    mca_bcol_base_function_t *c_input_args);
-
-#endif
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_knomial_bcast.c
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_knomial_bcast.c
@ -1,452 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-/* #define __PORTALS_AVAIL__ */
-#ifdef __PORTALS_AVAIL__
-
-#define __PORTALS_ENABLE__
-#include "ompi/mca/bcol/basesmuma/bcol_basesmuma.h"
-#include "ompi/constants.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/communicator/communicator.h"
-#include "bcol_basesmuma_utils.h"
-
-#include "bcol_basesmuma_portals.h"
-
-/* debug */
-#include <unistd.h>
-/* end debug */
-
-
-/**
- * Shared memory non-blocking Broadcast - K-nomial fan-out for small data buffers.
- * This routine assumes that buf (the input buffer) is a single writer
- * multi reader (SWMR) shared memory buffer owned by the calling rank
- * which is the only rank that can write to this buffers.
- * It is also assumed that the buffers are registered and fragmented
- * at the ML level and that buf is sufficiently large to hold the data.
- *
- *
- * @param buf - SWMR shared buffer within a sbgp that the
- * executing rank can write to.
- * @param count - the number of elements in the shared buffer.
- * @param dtype - the datatype of a shared buffer element.
- * @param root - the index within the sbgp of the root.
- * @param module - basesmuma module.
- */
-int bcol_basesmuma_lmsg_bcast_k_nomial_anyroot(bcol_function_args_t *input_args,
-    mca_bcol_base_function_t *c_input_args)
-{
-#if 0
-		/* local variables */
-    mca_bcol_basesmuma_module_t* bcol_module=
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-    int i, matched = 0;
-    int src=-1;
-    int group_size;
-    int my_rank, first_instance=0, flag_offset;
-    int rc = OMPI_SUCCESS;
-    int leading_dim, buff_idx, idx;
-    int count=input_args->count;
-    struct ompi_datatype_t* dtype=input_args->dtype;
-    int64_t sequence_number=input_args->sequence_num;
-
-	volatile int64_t ready_flag;
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    volatile char* parent_data_pointer;
-    volatile mca_bcol_basesmuma_header_t *parent_ctl_pointer;
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    void *userbuf = (void *)((unsigned char *)input_args->userbuf);
-
-    size_t pack_len = 0, dt_size;
-
-    struct mca_bcol_basesmuma_portal_buf_addr_t *my_lmsg_ctl_pointer = NULL;
-    struct mca_bcol_basesmuma_portal_buf_addr_t *parent_lmsg_ctl_pointer = NULL;
-	mca_bcol_basesmuma_portal_proc_info_t *portals_info;
-	portals_info = (mca_bcol_basesmuma_portal_proc_info_t*)cs->portals_info;
-
-    /* we will work only on packed data - so compute the length*/
-    ompi_datatype_type_size(dtype, &dt_size);
-    pack_len=count*dt_size;
-    buff_idx = input_args->src_desc->buffer_index;
-
-    /* Get addressing information */
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    group_size = bcol_module->colls_no_user_data.size_of_group;
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-
-    data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs+idx;
-
-    /* Set pointer to current proc ctrl region */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-	my_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t*) data_buffs[my_rank].payload;
-
-    /* setup resource recycling */
-    if( my_ctl_pointer->sequence_number < sequence_number ) {
-        first_instance=1;
-    }
-
-	if( first_instance ) {
-        /* Signal arrival */
-        my_ctl_pointer->flag = -1;
-        my_ctl_pointer->index=1;
-        /* this does not need to use any flag values , so only need to
-         * set the value for subsequent values that may need this */
-        my_ctl_pointer->starting_flag_value=0;
-        flag_offset=0;
-
-    } else {
-        /* only one thread at a time will be making progress on this
-         *   collective, so no need to make this atomic */
-        my_ctl_pointer->index++;
-    }
-
-
-    /* increment the starting flag by one and return */
-    flag_offset = my_ctl_pointer->starting_flag_value;
-    ready_flag = flag_offset + sequence_number + 1;
-    my_ctl_pointer->sequence_number = sequence_number;
-
-
-	/* Construct my portal buffer address and copy to payload buffer */
-	mca_bcol_basesmuma_construct_portal_address(my_lmsg_ctl_pointer,
-						portals_info->portal_id.nid,
-						portals_info->portal_id.pid,
-						sequence_number,
-						bcol_module->super.sbgp_partner_module->group_comm->c_contextid);
-
-    /* non-blocking broadcast algorithm */
-
-    /* If I am the root, then signal ready flag */
-    if(input_args->root_flag) {
-		ptl_handle_eq_t eq_h;
-		ptl_event_t event;
-		int ret;
-
-        BASESMUMA_VERBOSE(10,("I am the root of the data"));
-
-		/* create an event queue for the incoming buffer */
-	ret = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*)
-				cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q, PTL_EQ_HANDLER_NONE, &eq_h);
-
-		if (ret != PTL_OK) {
-	  fprintf(stderr, "PtlEQAlloc() failed: %d \n",ret);
-			return OMPI_ERR_OUT_OF_RESOURCE;
-	}
-
-		/* Post the message using portal copy */
-
-		 mca_bcol_basesmuma_portals_post_msg_nb_nopers(cs, my_lmsg_ctl_pointer, userbuf,
-							pack_len, eq_h, my_lmsg_ctl_pointer->nsends);
-
-		/*
-         * signal ready flag
-         */
-        my_ctl_pointer->flag = ready_flag;
-
-		/* wait for a response from the client */
-	mca_bcol_basesmuma_portals_wait_event_nopers(eq_h, POST_MSG_EVENT,
-					&event, my_lmsg_ctl_pointer->nsends);
-
-		/* free the event queue */
-		ret = PtlEQFree(eq_h);
-		if (ret != PTL_OK) {
-		    fprintf(stderr, "PtlEQFree() failed: %d )\n",ret);
-		}
-
-        /* root is finished */
-        goto Release;
-    }
-
-    /* If I am not the root, then poll on possible "senders'" control structs */
-    for( i = 0; i < cs->num_to_probe && 0 == matched; i++) {
-
-        /* Shared memory iprobe */
-		/*
-		BCOL_BASESMUMA_SM_PROBE(bcol_module->src, bcol_module->src_size,
-                my_rank, matched, src);
-		*/
-		do {
-			int j, n_src, my_index;
-			n_src = bcol_module->src_size;
-
-			for( j = 0; j < n_src; j++) {
-			parent_ctl_pointer = data_buffs[bcol_module->src[j]].ctl_struct;
-			parent_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t *)
-											data_buffs[bcol_module->src[j]].payload;
-			if (IS_DATA_READY(parent_ctl_pointer,ready_flag,sequence_number)) {
-
-					src = bcol_module->src[j];
-			matched = 1;
-			break;
-			}
-		}
-		} while(0);
-
-    }
-
-    /* If not matched, then hop out and put me on progress list */
-    if(0 == matched ) {
-        BASESMUMA_VERBOSE(10,("Shared memory probe didn't find a match"));
-        return BCOL_FN_NOT_STARTED;
-    }
-
-    /* else, we found our root within the group ... */
-    BASESMUMA_VERBOSE(10,("Shared memory probe was matched, the root is %d", src));
-
-    /* receive the data from sender */
-    /* get the data buff */
-    /* taken care of in the macro */
-    /*parent_data_pointer = data_buffs[src].payload;*/
-    /* copy the data */
-	mca_bcol_basesmuma_portals_get_msg(cs, parent_lmsg_ctl_pointer, userbuf, pack_len);
-
-    /* set the memory barrier to ensure completion */
-    opal_atomic_wmb ();
-    /* signal that I am done */
-    my_ctl_pointer->flag = ready_flag;
-
-    /* am I the last one? If so, release buffer */
-
-Release:
-    my_ctl_pointer->starting_flag_value++;
-
-    return BCOL_FN_COMPLETE;
-#endif
-}
-
-#if 0
-
-#define BASESMUMA_K_NOMIAL_SEND_SIGNAL(radix_mask, radix, my_relative_index,		\
-		my_group_index, group_size,sm_data_buffs,sender_ready_flag,			\
-				num_pending_sends) 													\
-{																					\
-    int k, rc;																  	    \
-    int dst; 			                                               			    \
-	int comm_dst;																	\
-    volatile mca_bcol_basesmuma_header_t *recv_ctl_pointer = NULL;					\
-	volatile mca_bcol_basesmuma_portal_buf_addr_t  *recv_lmsg_ctl_pointer = NULL;   \
-                                                                                    \
-    num_pending_sends = 0;													        \
-    while(radix_mask > 0) {															\
-        /* For each level of tree, do sends */										\
-        for (k = 1;																	\
-			k < radix && my_relative_index + radix_mask * k < group_size;  	  		\
-			++k) {   	                                              	   			\
-                                                                                    \
-            dst = my_group_index + radix_mask * k;                        		    \
-            if (dst >= group_size) {												\
-                dst -= group_size;													\
-            }                                                                	    \
-			/* Signal the children to get data */									\
-			recv_ctl_pointer	  = data_buffs[dst].ctl;							\
-			recv_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t *)		\
-											data_buffs[dst].payload;				\
-			recv_lmsg_ctl_pointer->src_index = my_group_index;						\
-			recv_lmsg_ctl_pointer->flag = sender_ready_flag;						\
-            ++num_pending_sends;													\
-        }																			\
-        radix_mask /= radix;														\
-    }                                                                      			\
-																		    \
-}
-
-
-
-int bcol_basesmuma_lmsg_bcast_k_nomial_anyroot(bcol_function_args_t *input_args,
-    mca_bcol_base_function_t *c_input_args)
-{
-    /* local variables */
-    mca_bcol_basesmuma_module_t* bcol_module=
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-    int i, matched = 0;
-    int src=-1;
-    int group_size;
-    int my_rank, first_instance=0, flag_offset;
-    int rc = OMPI_SUCCESS;
-    int leading_dim, buff_idx, idx;
-    int count=input_args->count;
-    struct ompi_datatype_t* dtype=input_args->dtype;
-    int64_t sequence_number=input_args->sequence_num;
-
-	volatile int64_t ready_flag;
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    volatile char* parent_data_pointer;
-    volatile mca_bcol_basesmuma_header_t *parent_ctl_pointer;
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    void *userbuf = (void *)((unsigned char *)input_args->userbuf);
-
-    size_t pack_len = 0, dt_size;
-
-    struct mca_bcol_basesmuma_portal_buf_addr_t *my_lmsg_ctl_pointer = NULL;
-    struct mca_bcol_basesmuma_portal_buf_addr_t *parent_lmsg_ctl_pointer = NULL;
-	mca_bcol_basesmuma_portal_proc_info_t *portals_info;
-	portals_info = (mca_bcol_basesmuma_portal_proc_info_t*)cs->portals_info;
-
-    /* we will work only on packed data - so compute the length*/
-    ompi_datatype_type_size(dtype, &dt_size);
-    pack_len=count*dt_size;
-    buff_idx = input_args->src_desc->buffer_index;
-
-    /* Get addressing information */
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    group_size = bcol_module->colls_no_user_data.size_of_group;
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-
-    data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs+idx;
-
-    /* Set pointer to current proc ctrl region */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-	my_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t*) data_buffs[my_rank].payload;
-
-    /* setup resource recycling */
-    if( my_ctl_pointer->sequence_number < sequence_number ) {
-        first_instance=1;
-    }
-
-	if( first_instance ) {
-        /* Signal arrival */
-        my_ctl_pointer->flag = -1;
-        my_ctl_pointer->index=1;
-        /* this does not need to use any flag values , so only need to
-         * set the value for subsequent values that may need this */
-        my_ctl_pointer->starting_flag_value=0;
-        flag_offset=0;
-
-    } else {
-        /* only one thread at a time will be making progress on this
-         *   collective, so no need to make this atomic */
-        my_ctl_pointer->index++;
-    }
-
-
-    /* increment the starting flag by one and return */
-    flag_offset = my_ctl_pointer->starting_flag_value;
-    ready_flag = flag_offset + sequence_number + 1;
-    my_ctl_pointer->sequence_number = sequence_number;
-
-
-	/* Construct my portal buffer address and copy to payload buffer */
-	mca_bcol_basesmuma_construct_portal_address(my_lmsg_ctl_pointer,
-						portals_info->portal_id.nid,
-						portals_info->portal_id.pid,
-						sequence_number,
-						bcol_module->super.sbgp_partner_module->group_comm->c_contextid);
-
-	my_lmsg_ctl_pointer->userbuf = userbuff;
-	my_lsmg_ctl_pointer->userbuf_length = fragment_length;
-	/* create an event queue  */
-	ret = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*)
-				cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q, PTL_EQ_HANDLER_NONE, &eq_h);
-
-    /* non-blocking broadcast algorithm */
-
-    /* If I am the root, then signal ready flag */
-    if(input_args->root_flag) {
-		ptl_handle_eq_t eq_h;
-		ptl_event_t event;
-		int ret;
-		int root_radix_mask = sm_module->pow_knum;
-
-        BASESMUMA_VERBOSE(10,("I am the root of the data"));
-
-
-		if (ret != PTL_OK) {
-	  fprintf(stderr, "PtlEQAlloc() failed: %d \n",ret);
-			return OMPI_ERR_OUT_OF_RESOURCE;
-	}
-
-		BASESMUMA_K_NOMIAL_SEND_SIGNAL(root_radix_mask, radix, 0,
-			my_rank, group_size, data_buffs, ready_flag, nsends) ;
-
-		mca_bcol_basesmuma_portals_post_msg_nb_nopers(cs, my_lmsg_ctl_pointer, userbuf,
-							pack_len, eq_h, nsends);
-
-		/* wait for a response from the client */
-	mca_bcol_basesmuma_portals_wait_event_nopers(eq_h, POST_MSG_EVENT,
-					&event, nsends);
-
-        /* root is finished */
-        goto Release;
-    }
-
-	/* Im not a root so wait until someone puts data and
-	 * compute where to get data from */
-
-	while (my_ctl_pointer->flag != ready_flag) ;
-
-	my_data_source_index = lmsg_ctl_pointer->src_index;
-
-	parent_lmsg_ctl_pointer = (mca_bcol_basesmuma_portal_buf_addr_t *)
-											data_buffs[my_data_source_index].payload;
-
-	mca_bcol_basesmuma_portals_get_msg(cs, parent_lmsg_ctl_pointer, userbuf, pack_len);
-
-
-
-
-	/* I am done getting data, should I send the data to someone  */
-
-	my_relative_index = (my_rank - my_data_source_index) < 0 ? my_rank -
-		my_data_source_index  + group_size : my_rank - my_data_source_index;
-
-	/*
-     * 2. Locate myself in the tree:
-     * calculate number of radix steps that we should to take
-     */
-    radix_mask = 1;
-    while (radix_mask < group_size) {
-        if (0 != my_relative_index % (radix * radix_mask)) {
-            /* I found my level in tree */
-            break;
-        }
-        radix_mask *= radix;
-    }
-
-	/* go one step back */
-    radix_mask /=radix;
-
-	BASESMUMA_K_NOMIAL_SEND_SIGNAL(radix_mask, radix, my_relative_index,
-		my_rank, group_size,data_buffs,ready_flag,nsends)
-
-	mca_bcol_basesmuma_portals_post_msg_nb_nopers(cs, my_lmsg_ctl_pointer, userbuf,
-							pack_len, eq_h, nsends);
-
-	/* wait for childrens to read */
-    mca_bcol_basesmuma_portals_wait_event_nopers(eq_h, POST_MSG_EVENT,
-					&event, nsends);
-
-
-
-Release:
-	/* free the event queue */
-	ret = PtlEQFree(eq_h);
-	if (ret != PTL_OK) {
-		    fprintf(stderr, "PtlEQFree() failed: %d )\n",ret);
-	}
-
-
-    my_ctl_pointer->starting_flag_value++;
-
-    return BCOL_FN_COMPLETE;
-}
-
-#endif
-#endif
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_mem_mgmt.c
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_mem_mgmt.c
@ -1,101 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-#include "bcol_basesmuma.h"
-
-
-/* Shared memory registration function: Calls into the "shared memory
-   connection manager" (aka - smcm) and registers a chunk of memory by
-   opening and mmaping a file.
-
-   @input:
-
-   void *reg_data  - shared memory specific data needed by the registration
-   function.
-
-   void *base      - pointer to memory address.
-
-   size_t size     - size of memory chunk to be registered with sm.
-
-   mca_mpool_base_registration_t *reg  - registration data is cached here.
-
-   @output:
-
-   returns OMPI_SUCCESS on successful registration.
-
-   returns OMPI_ERROR on failure.
-
-*/
-
-int mca_bcol_basesmuma_register_sm(void *context_data, void *base, size_t size,
-                                   void **reg_desc)
-{
-
-    /* local variables */
-    int ret = OMPI_SUCCESS;
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-    bcol_basesmuma_registration_data_t *sm_reg =
-        (bcol_basesmuma_registration_data_t*) context_data;
-
-    /* cache some info on sm_reg aka "context_data", you'll need it later */
-    sm_reg->base_addr = base;
-    sm_reg->size = size;
-
-    /* call into the shared memory registration function in smcm
-     * we need to be sure that the memory is page aligned in order
-     * to "map_fixed"
-     */
-    sm_reg->sm_mmap = bcol_basesmuma_smcm_mem_reg(base, size,
-                                                  sm_reg->data_seg_alignment,
-                                                  sm_reg->file_name);
-    if(NULL == sm_reg->sm_mmap) {
-        opal_output (ompi_bcol_base_framework.framework_output, "Bcol_basesmuma memory registration error");
-        return OMPI_ERROR;
-    }
-
-    /* don't let other communicators re-register me! */
-    cs->mpool_inited = true;
-    /* alias back to component */
-    cs->sm_payload_structs = sm_reg->sm_mmap;
-
-    return ret;
-}
-
-/* Shared memory deregistration function - deregisters memory by munmapping it and removing the
-   shared memory file.
-
-   Basic steps (please let me know if this is incompatible with your notion of deregistration
-   or if it causes problems on cleanup):
-
-   1. munmap the shared memory file.
-   2. set the base pointer to the mmaped memory to NULL.
-   3. permanently remove the shared memory file from the directory.
-
-*/
-
-int mca_bcol_basesmuma_deregister_sm(void *context_data, void *reg)
-{
-    /* local variables */
-    bcol_basesmuma_registration_data_t *sm_reg =
-        (bcol_basesmuma_registration_data_t*) context_data;
-
-    if (sm_reg->sm_mmap) {
-        OBJ_RELEASE(sm_reg->sm_mmap);
-    }
-
-    /* set the pointer to NULL */
-    sm_reg->base_addr = NULL;
-
-    return OMPI_SUCCESS;
-}
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_module.c
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_module.c
@ -1,687 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2014-2015 Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/patterns/net/netpatterns.h"
-
-#include "opal/util/show_help.h"
-#include "opal/align.h"
-
-#include "ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.h"
-#include "bcol_basesmuma.h"
-#include "bcol_basesmuma_utils.h"
-
-#ifdef __PORTALS_AVAIL__
-#include "bcol_basesmuma_portals.h"
-#endif
-
-
-/*
- * Local functions
- */
-static int alloc_lmsg_reduce_offsets_array(mca_bcol_basesmuma_module_t *sm_module)
-{
-    int rc = OMPI_SUCCESS, i = 0;
-    netpatterns_k_exchange_node_t *k_node = &sm_module->knomial_exchange_tree;
-    int n_exchanges = k_node->n_exchanges;
-
-    /* Precalculate the allreduce offsets */
-    if (0 < k_node->n_exchanges) {
-        sm_module->reduce_offsets = (int **)malloc(n_exchanges * sizeof(int*));
-
-        if (!sm_module->reduce_offsets) {
-            rc = OMPI_ERROR;
-            return rc;
-        }
-
-        for (i=0; i < n_exchanges ; i++) {
-            sm_module->reduce_offsets[i] = (int *)malloc (sizeof(int) * NOFFSETS);
-
-            if (!sm_module->reduce_offsets[i]){
-                rc = OMPI_ERROR;
-                return rc;
-            }
-        }
-    }
-    return rc;
-}
-
-static int free_lmsg_reduce_offsets_array(mca_bcol_basesmuma_module_t *sm_module)
-{
-    int rc = OMPI_SUCCESS, i = 0;
-    netpatterns_k_exchange_node_t *k_node = &sm_module->knomial_exchange_tree;
-    int n_exchanges = k_node->n_exchanges;
-
-    if (sm_module->reduce_offsets) {
-        for (i=0; i < n_exchanges; i++) {
-            free (sm_module->reduce_offsets[i]);
-        }
-
-        free(sm_module->reduce_offsets);
-    }
-    return rc;
-}
-
-static void
-mca_bcol_basesmuma_module_construct(mca_bcol_basesmuma_module_t *module)
-{
-    /* initialize all values to 0 */
-    memset((void*)((uintptr_t) module + sizeof (module->super)), 0, sizeof (*module) - sizeof (module->super));
-    module->super.bcol_component = (mca_bcol_base_component_t *) &mca_bcol_basesmuma_component;
-    module->super.list_n_connected = NULL;
-    module->super.hier_scather_offset = 0;
-}
-
-static void
-mca_bcol_basesmuma_module_destruct(mca_bcol_basesmuma_module_t *sm_module)
-{
-    /* local variables */
-    mca_sbgp_base_module_t *sbgp_module = sm_module->super.sbgp_partner_module;
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-
-    /*
-     * release allocated resrouces
-     */
-
-    /* ...but not until you're sure you have no outstanding collectives */
-    while(0 != opal_list_get_size(&(cs->nb_admin_barriers))) {
-        opal_progress();
-    }
-
-#ifdef __PORTALS_AVAIL__
-    /* Remove portals bcast specific resources */
-    if ( PTL_OK != PtlEQFree(sm_module->sg_state.read_eq)) {
-        BASESMUMA_VERBOSE(10,("PtlEQFree() failed:  )"));
-    }
-#endif
-
-    /* Remove Lmsg Reduce Offsets Array */
-    free_lmsg_reduce_offsets_array(sm_module);
-
-    /* collective topology data */
-    if( sm_module->fanout_read_tree) {
-        for (int i = 0 ; i < sm_module->super.size_of_subgroup ; i++ ) {
-            if(0 < sm_module->fanout_read_tree[i].n_children ) {
-                free(sm_module->fanout_read_tree[i].children_ranks);
-                sm_module->fanout_read_tree[i].children_ranks=NULL;
-            }
-        }
-        free(sm_module->fanout_read_tree);
-        sm_module->fanout_read_tree=NULL;
-    }
-
-    /* gvm Leak FIX Reduction_tree[].children_ranks has
-     * to be removed. I don't how to get the size (which is
-     * size of subgroup) of array reduction_tree
-     */
-    if( sm_module->reduction_tree) {
-        for (int i = 0 ; i < sm_module->super.size_of_subgroup ; i++ ) {
-            if(0 < sm_module->reduction_tree[i].n_children ) {
-                free(sm_module->reduction_tree[i].children_ranks);
-                sm_module->reduction_tree[i].children_ranks=NULL;
-            }
-        }
-        free(sm_module->reduction_tree);
-        sm_module->reduction_tree=NULL;
-    }
-
-    /* gvm Leak FIX */
-    if (sm_module->fanout_node.children_ranks){
-        free(sm_module->fanout_node.children_ranks);
-        sm_module->fanout_node.children_ranks = NULL;
-    }
-
-    if (sm_module->fanin_node.children_ranks){
-        free(sm_module->fanin_node.children_ranks);
-        sm_module->fanin_node.children_ranks = NULL;
-    }
-
-    /* colls_no_user_data resrouces */
-    if(sm_module->colls_no_user_data.ctl_buffs_mgmt){
-        free(sm_module->colls_no_user_data.ctl_buffs_mgmt);
-        sm_module->colls_no_user_data.ctl_buffs_mgmt=NULL;
-    }
-    if(sm_module->colls_no_user_data.ctl_buffs){
-        free(sm_module->colls_no_user_data.ctl_buffs);
-        sm_module->colls_no_user_data.ctl_buffs=NULL;
-    }
-
-    /* return control */
-    opal_list_append (&cs->ctl_structures,  (opal_list_item_t *) sm_module->no_userdata_ctl);
-
-    /* colls_with_user_data resrouces */
-    /*
-     *debug print */
-    /*
-      fprintf(stderr,"AAA colls_with_user_data.ctl_buffs %p \n",
-      sm_module->colls_with_user_data.ctl_buffs_mgmt);
-      end debug */
-
-    if(sm_module->colls_with_user_data.ctl_buffs_mgmt){
-        free(sm_module->colls_with_user_data.ctl_buffs_mgmt);
-        sm_module->colls_with_user_data.ctl_buffs_mgmt=NULL;
-    }
-    if(sm_module->colls_with_user_data.ctl_buffs){
-        free(sm_module->colls_with_user_data.ctl_buffs);
-        sm_module->colls_with_user_data.ctl_buffs=NULL;
-    }
-
-    if(sm_module->shared_memory_scratch_space) {
-        free(sm_module->shared_memory_scratch_space);
-        sm_module->shared_memory_scratch_space=NULL;
-    }
-
-    /* return control */
-    opal_list_append (&cs->ctl_structures,  (opal_list_item_t *) sm_module->userdata_ctl);
-
-#if 1
-    if(sm_module->scatter_kary_tree) {
-        for (int i = 0 ; i < sm_module->super.size_of_subgroup ; i++ ) {
-            if(0 < sm_module->scatter_kary_tree[i].n_children) {
-                free(sm_module->scatter_kary_tree[i].children_ranks);
-                sm_module->scatter_kary_tree[i].children_ranks=NULL;
-            }
-        }
-        free(sm_module->scatter_kary_tree);
-    }
-#endif
-
-    if(NULL != sm_module->super.list_n_connected ){
-        free(sm_module->super.list_n_connected);
-        sm_module->super.list_n_connected = NULL;
-    }
-
-    cleanup_nb_coll_buff_desc(&sm_module->ml_mem.nb_coll_desc,
-                              sm_module->ml_mem.num_banks,
-                              sm_module->ml_mem.num_buffers_per_bank);
-
-    for (int i = 0; i < BCOL_NUM_OF_FUNCTIONS; i++){
-        /* gvm FIX: Go through the list and destroy each item */
-        /* Destroy the function table object for each bcol type list */
-        OPAL_LIST_DESTRUCT((&sm_module->super.bcol_fns_table[i]));
-    }
-
-    if (NULL != sm_module->payload_backing_files_info) {
-        bcol_basesmuma_smcm_release_connections (sm_module, sbgp_module, &cs->sm_connections_list,
-                                                 &sm_module->payload_backing_files_info);
-    }
-
-    if (NULL != sm_module->ctl_backing_files_info) {
-        bcol_basesmuma_smcm_release_connections (sm_module, sbgp_module, &cs->sm_connections_list,
-                                                 &sm_module->ctl_backing_files_info);
-    }
-
-    if (NULL != sm_module->ml_mem.bank_release_counter) {
-        free(sm_module->ml_mem.bank_release_counter);
-        sm_module->ml_mem.bank_release_counter = NULL;
-    }
-
-    if (NULL != sm_module->colls_with_user_data.data_buffs) {
-        free((void *)sm_module->colls_with_user_data.data_buffs);
-        sm_module->colls_with_user_data.data_buffs = NULL;
-    }
-
-    /* free the k-nomial allgather tree here */
-    netpatterns_cleanup_recursive_knomial_allgather_tree_node(&sm_module->knomial_allgather_tree);
-    netpatterns_cleanup_recursive_doubling_tree_node(&sm_module->recursive_doubling_tree);
-    netpatterns_cleanup_recursive_knomial_tree_node(&sm_module->knomial_exchange_tree);
-
-    /* done */
-}
-
-static void bcol_basesmuma_set_small_msg_thresholds(struct mca_bcol_base_module_t *super)
-{
-    mca_bcol_basesmuma_module_t *basesmuma_module =
-        (mca_bcol_basesmuma_module_t *) super;
-
-    size_t basesmuma_offset = bcol_basesmuma_data_offset_calc(basesmuma_module);
-
-    /* Set the Allreduce threshold, for Basesmuma it equals to ML buffer size - data offset */
-    super->small_message_thresholds[BCOL_ALLREDUCE] =
-        basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset;
-
-    /* Set the Bcast threshold, for Basesmuma it equals to ML buffer size - data offset */
-    super->small_message_thresholds[BCOL_BCAST] =
-        basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset;
-
-    /* Set the Gather threshold, for Basesmuma it equals to ML buffer size - data offset */
-    super->small_message_thresholds[BCOL_GATHER] =
-        (basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset) /
-        ompi_comm_size(basesmuma_module->super.sbgp_partner_module->group_comm);
-
-    /* Set the ALLgather threshold, for Basesmuma it equals to ML buffer size - data offset */
-    super->small_message_thresholds[BCOL_ALLGATHER] =
-        (basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset) /
-        ompi_comm_size(basesmuma_module->super.sbgp_partner_module->group_comm);
-
-    /* Set the Reduce threshold, for Basesmuma it equals to ML buffer size - data offset */
-    super->small_message_thresholds[BCOL_REDUCE] =
-        basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset;
-
-    /* Set the Scatter threshold, for Basesmuma it equals to ML buffer size - data offset */
-    super->small_message_thresholds[BCOL_SCATTER] =
-        basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset;
-}
-
-/* setup memory management and collective routines */
-
-static void load_func(mca_bcol_base_module_t *super)
-{
-    int fnc;
-
-    /* Loading memory management and collective functions */
-
-    for (fnc = 0; fnc < BCOL_NUM_OF_FUNCTIONS; fnc++) {
-        super->bcol_function_table[fnc] = NULL;
-    }
-
-    /*super->bcol_function_table[BCOL_BARRIER] = bcol_basesmuma_recursive_double_barrier;*/
-
-#ifdef __PORTALS_AVAIL__
-    super->bcol_function_table[BCOL_BCAST] = bcol_basesmuma_lmsg_scatter_allgather_portals_bcast;
-    /* super->bcol_function_table[BCOL_BCAST]   =
-       bcol_basesmuma_lmsg_bcast_k_nomial_anyroot; */
-#endif
-
-    /*super->bcol_function_table[BCOL_BCAST]   = bcol_basesmuma_bcast;*/
-    /*super->bcol_function_table[BCOL_BCAST]   = bcol_basesmuma_binary_scatter_allgather_segment;*/
-    /*super->bcol_function_table[BCOL_BCAST]    =  bcol_basesmuma_bcast_k_nomial_anyroot;*/
-    super->bcol_function_table[BCOL_BCAST]    =  bcol_basesmuma_bcast;
-#ifdef __PORTALS_AVAIL__
-    super->bcol_function_table[BCOL_BCAST] =
-        bcol_basesmuma_lmsg_scatter_allgather_portals_bcast;
-#endif
-    /* super->bcol_function_table[BCOL_ALLREDUCE]  = bcol_basesmuma_allreduce_intra_fanin_fanout; */
-    super->bcol_function_table[BCOL_ALLREDUCE]  = bcol_basesmuma_allreduce_intra_recursive_doubling;
-    super->bcol_function_table[BCOL_REDUCE]  = bcol_basesmuma_reduce_intra_fanin_old;
-    /* memory management */
-    super->bcol_memory_init                  = bcol_basesmuma_bank_init_opti;
-
-    super->k_nomial_tree                     = bcol_basesmuma_setup_knomial_tree;
-
-    /* Set thresholds */
-    super->set_small_msg_thresholds = bcol_basesmuma_set_small_msg_thresholds;
-}
-
-static void load_func_with_choices(mca_bcol_base_module_t *super)
-{
-    int fnc;
-
-    /* Loading memory management and collective functions */
-
-    for (fnc=0; fnc < BCOL_NUM_OF_FUNCTIONS; fnc++) {
-        super->bcol_function_init_table[fnc] = NULL;
-    }
-
-    super->bcol_function_init_table[BCOL_FANIN]  = bcol_basesmuma_fanin_init;
-    super->bcol_function_init_table[BCOL_FANOUT] = bcol_basesmuma_fanout_init;
-    super->bcol_function_init_table[BCOL_BARRIER] = bcol_basesmuma_barrier_init;
-
-    super->bcol_function_init_table[BCOL_BCAST]  = bcol_basesmuma_bcast_init;
-    super->bcol_function_init_table[BCOL_ALLREDUCE]  = bcol_basesmuma_allreduce_init;
-    super->bcol_function_init_table[BCOL_REDUCE]  = bcol_basesmuma_reduce_init;
-    super->bcol_function_init_table[BCOL_GATHER]  = bcol_basesmuma_gather_init;
-    super->bcol_function_init_table[BCOL_ALLGATHER]  = bcol_basesmuma_allgather_init;
-    super->bcol_function_init_table[BCOL_SYNC]  = bcol_basesmuma_memsync_init;
-    /* memory management */
-    super->bcol_memory_init                  = bcol_basesmuma_bank_init_opti;
-
-    super->k_nomial_tree                     = bcol_basesmuma_setup_knomial_tree;
-
-}
-
-static int load_recursive_knomial_info(mca_bcol_basesmuma_module_t
-                                       *sm_module)
-{
-    int rc = OMPI_SUCCESS;
-    rc = netpatterns_setup_recursive_knomial_tree_node(sm_module->super.sbgp_partner_module->group_size,
-                                                       sm_module->super.sbgp_partner_module->my_index,
-                                                       mca_bcol_basesmuma_component.k_nomial_radix,
-                                                       &sm_module->knomial_exchange_tree);
-    return rc;
-}
-
-
-int bcol_basesmuma_setup_knomial_tree(mca_bcol_base_module_t *super)
-{
-    mca_bcol_basesmuma_module_t *sm_module = (mca_bcol_basesmuma_module_t *) super;
-
-    return netpatterns_setup_recursive_knomial_allgather_tree_node(sm_module->super.sbgp_partner_module->group_size,
-                                                                   sm_module->super.sbgp_partner_module->my_index,
-                                                                   mca_bcol_basesmuma_component.k_nomial_radix,
-                                                                   super->list_n_connected,
-                                                                   &sm_module->knomial_allgather_tree);
-}
-
-
-
-
-/* query to see if the module is available for use on the given
- * communicator, and if so, what it's priority is.  This is where
- * the backing shared-memory file is created.
- */
-mca_bcol_base_module_t **
-mca_bcol_basesmuma_comm_query(mca_sbgp_base_module_t *module, int *num_modules)
-{
-    /* local variables */
-    mca_bcol_base_module_t **sm_modules = NULL;
-    mca_bcol_basesmuma_module_t *sm_module;
-    bcol_basesmuma_registration_data_t *sm_reg_data;
-    int ret, my_rank, name_length;
-    char *name;
-    int i;
-
-    int bcast_radix;
-
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-    /*mca_base_component_list_item_t *hdl_cli = NULL;*/
-    /*int hdl_num;*/
-
-    /* at this point I think there is only a sinle shared
-       memory bcol that we need to be concerned with */
-
-    /* No group, no modules */
-    if (OPAL_UNLIKELY(NULL == module)) {
-        return NULL;
-    }
-
-    /* allocate and initialize an sm_bcol module */
-    sm_module = OBJ_NEW(mca_bcol_basesmuma_module_t);
-
-    /* set the subgroup */
-    sm_module->super.sbgp_partner_module=module;
-
-    (*num_modules)=1;
-    cs->super.n_net_contexts = *num_modules;
-    sm_module->reduction_tree = NULL;
-    sm_module->fanout_read_tree = NULL;
-
-    ret=netpatterns_setup_recursive_doubling_tree_node(
-                                                       module->group_size,module->my_index,
-                                                       &(sm_module->recursive_doubling_tree));
-    if(OMPI_SUCCESS != ret) {
-        opal_output (ompi_bcol_base_framework.framework_output, "Error setting up recursive_doubling_tree \n");
-        return NULL;
-    }
-
-    /* setup the fanin tree - this is used only as part of a hierarchical
-     *   barrier, so will set this up with rank 0 as the root */
-    my_rank=module->my_index;
-    ret=netpatterns_setup_narray_tree(cs->radix_fanin,
-                                      my_rank,module->group_size,&(sm_module->fanin_node));
-    if(OMPI_SUCCESS != ret) {
-        opal_output (ompi_bcol_base_framework.framework_output, "Error setting up fanin tree \n");
-        return NULL;
-    }
-
-    /* setup the fanout tree - this is used only as part of a hierarchical
-     *   barrier, so will set this up with rank 0 as the root */
-    ret=netpatterns_setup_narray_tree(cs->radix_fanout,
-                                      my_rank,module->group_size,&(sm_module->fanout_node));
-    if(OMPI_SUCCESS != ret) {
-        opal_output (ompi_bcol_base_framework.framework_output, "Error setting up fanout tree \n");
-        return NULL;
-    }
-
-    /*
-     * Setup the broadcast tree - this is used only as part of a hierarchical
-     * bcast, so will set this up with rank 0 as the root.
-     */
-
-    /* set the radix of the bcast tree */
-    bcast_radix = cs->radix_read_tree;
-
-    /* initialize fan-out read tree */
-    sm_module->fanout_read_tree=(netpatterns_tree_node_t*) malloc(
-                                                                  sizeof(netpatterns_tree_node_t)*module->group_size);
-    if( NULL == sm_module->fanout_read_tree ) {
-        goto Error;
-    }
-
-    for(i = 0; i < module->group_size; i++){
-        ret = netpatterns_setup_narray_tree(bcast_radix,
-                                            i, module->group_size, &(sm_module->fanout_read_tree[i]));
-        if(OMPI_SUCCESS != ret) {
-            goto Error;
-        }
-    }
-
-    ret = load_recursive_knomial_info(sm_module);
-    if (OMPI_SUCCESS != ret) {
-        BASESMUMA_VERBOSE(10, ("Failed to load recursive knomial tree"));
-        goto Error;
-    }
-
-    /* Allocate offsets array for lmsg reduce */
-    ret = alloc_lmsg_reduce_offsets_array(sm_module);
-    if (OMPI_SUCCESS != ret) {
-        BASESMUMA_VERBOSE(10, ("Failed to allocate reduce offsets array"));
-        goto Error;
-    }
-
-    /* initialize reduction tree */
-    sm_module->reduction_tree=(netpatterns_tree_node_t *) malloc(
-                                                                 sizeof(netpatterns_tree_node_t )*module->group_size);
-    if( NULL == sm_module->reduction_tree ) {
-        goto Error;
-    }
-
-    ret=netpatterns_setup_multinomial_tree(
-                                           cs->order_reduction_tree,module->group_size,
-                                           sm_module->reduction_tree);
-    if( MPI_SUCCESS != ret ) {
-        goto Error;
-    }
-
-    /* get largest power of k for given group size */
-    sm_module->pow_k_levels = pow_sm_k(cs->k_nomial_radix,
-                                       sm_module->super.sbgp_partner_module->group_size,
-                                       &(sm_module->pow_k));
-
-    /* get largest power of 2 for a given group size
-     * used in scatter allgather
-     */
-    sm_module->pow_2_levels = pow_sm_k(2,
-                                       sm_module->super.sbgp_partner_module->group_size,
-                                       &(sm_module->pow_2));
-
-    /*
-     * setup scatter data
-     */
-    sm_module->scatter_kary_radix=cs->scatter_kary_radix;
-    sm_module->scatter_kary_tree=NULL;
-    ret=netpatterns_setup_narray_tree_contigous_ranks(
-                                                      sm_module->scatter_kary_radix,
-                                                      sm_module->super.sbgp_partner_module->group_size,
-                                                      &(sm_module->scatter_kary_tree));
-    if(OMPI_SUCCESS != ret) {
-        opal_output (ompi_bcol_base_framework.framework_output, "In base_bcol_masesmuma_setup_library_buffers and scatter k-ary tree setup failed \n");
-        return NULL;
-    }
-
-    /* setup the module shared memory management */
-    ret=base_bcol_basesmuma_setup_library_buffers(sm_module, cs);
-
-    if(OMPI_SUCCESS != ret) {
-        opal_output (ompi_bcol_base_framework.framework_output, "In base_bcol_masesmuma_setup_library_buffers and mpool was not successfully setup!\n");
-        return NULL;
-    }
-
-    /* setup the collectives and memory management */
-
-    /* check to see whether or not the mpool has been inited */
-    /* allocate some space for the network contexts */
-    if(!cs->mpool_inited) {
-        /* if it's empty, then fill it for first time */
-        cs->super.network_contexts = (bcol_base_network_context_t **)
-            malloc((cs->super.n_net_contexts)*
-                   sizeof(bcol_base_network_context_t *));
-        /* you need to do some basic setup - define the file name,
-         * set data seg alignment and size of cntl structure in sm
-         * file.
-         */
-        /* give the payload sm file a name */
-        name_length=asprintf(&name,
-                             "%s"OPAL_PATH_SEP"0%s%0d",
-                             ompi_process_info.job_session_dir,
-                             cs->payload_base_fname,
-                             (int)getpid());
-        if( 0 > name_length ) {
-            opal_output (ompi_bcol_base_framework.framework_output, "Failed to assign the shared memory payload file a name\n");
-            return NULL;
-        }
-        /* make sure name is not too long */
-        if ( OPAL_PATH_MAX < (name_length-1) ) {
-            opal_output (ompi_bcol_base_framework.framework_output, "Shared memory file name is too long!\n");
-            return NULL;
-        }
-        /* set the name and alignment characteristics */
-        sm_reg_data = (bcol_basesmuma_registration_data_t *) malloc(
-                                                                    sizeof(bcol_basesmuma_registration_data_t));
-        sm_reg_data->file_name = name;
-
-        sm_reg_data->data_seg_alignment = getpagesize();
-        sm_reg_data->size_ctl_structure = 0;
-        cs->super.network_contexts[0] = (bcol_base_network_context_t *)
-            malloc(sizeof(bcol_base_network_context_t));
-        cs->super.network_contexts[0]->context_data =
-            (void *) sm_reg_data;
-        cs->super.network_contexts[0]->
-            register_memory_fn = mca_bcol_basesmuma_register_sm;
-        cs->super.network_contexts[0]->
-            deregister_memory_fn = mca_bcol_basesmuma_deregister_sm;
-        sm_module->super.network_context = cs->super.network_contexts[0];
-    } else {
-
-        sm_module->super.network_context = cs->super.network_contexts[0];
-    }
-
-    /* Set the header size */
-    sm_module->super.header_size = sizeof(mca_bcol_basesmuma_header_t);
-
-    /*initialize the hdl module if it's to be enabled*/
-#if 0
-    if (module->use_hdl) {
-        sm_module->super.use_hdl = module->use_hdl;
-        hdl_cli = (mca_base_component_list_item_t *)
-            opal_list_get_first(&mca_hdl_base_components_in_use);
-        sm_module->hdl_module = ((mca_hdl_base_component_t*)
-                                 hdl_cli->cli_component)->hdl_comm_query(sm_module, &hdl_num);
-        if (1 != hdl_num || sm_module->hdl_module == NULL) {
-            ML_ERROR(("hdl modules are not successfully initialized!\n"));
-            goto Error;
-        }
-    } else {
-        sm_module->hdl_module = NULL;
-    }
-#else
-    sm_module->hdl_module = NULL;
-#endif
-
-
-    /* collective setup */
-    load_func(&(sm_module->super));
-    load_func_with_choices(&(sm_module->super));
-
-    /*
-     * This initializes all collective algorithms
-     */
-
-    ret = mca_bcol_base_bcol_fns_table_init(&(sm_module->super));
-
-    if (OMPI_SUCCESS != ret) {
-
-        goto Error;
-    }
-
-    sm_module->super.supported_mode = 0;
-
-    /* NTH: this is not set anywhere on the trunk as of 08/13/13 */
-#if 0
-    if (module->use_hdl) {
-        sm_module->super.supported_mode = MCA_BCOL_BASE_ZERO_COPY;
-    }
-#endif
-
-    /* Initializes portals library required for basesmuma large message */
-#ifdef __PORTALS_AVAIL__
-    /* Enable zero copy mode */
-    sm_module->super.supported_mode = MCA_BCOL_BASE_ZERO_COPY;
-
-    ret = mca_bcol_basesmuma_portals_init(cs);
-    if (OMPI_SUCCESS != ret) {
-        return NULL;
-    }
-
-    sm_module->sg_state.phase = INIT;
-
-    ret = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*)
-                      cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q,
-                     PTL_EQ_HANDLER_NONE, &sm_module->sg_state.read_eq);
-
-    if (ret != PTL_OK) {
-        BASESMUMA_VERBOSE(10,( "PtlEQAlloc() failed: %d",ret));
-        return NULL;
-    }
-
-#endif
-    /* blocking recursive double barrier test */
-    /*
-      {
-      opal_output (ompi_bcol_base_framework.framework_output, "BBB About to hit the barrier test\n");
-      int rc;
-      bcol_function_args_t bogus;
-      rc = bcol_basesmuma_rd_barrier_init(&(sm_module->super));
-      rc = bcol_basesmuma_recursive_double_barrier(
-      &bogus, &(sm_module->super));
-      }
-    */
-
-    /* in this case we only expect a single network context.
-       in the future we should loop around this */
-    sm_modules = (mca_bcol_base_module_t **) malloc(sizeof(mca_bcol_base_module_t *));
-    if( !sm_modules ) {
-        opal_output (ompi_bcol_base_framework.framework_output, "In base_bcol_masesmuma_setup_library_buffers failed to allocate memory for sm_modules\n");
-        return NULL;
-    }
-
-    sm_modules[0] = &(sm_module->super);
-
-    return sm_modules;
-
- Error:
-
-    /* cleanup */
-    if( sm_module->reduction_tree ) {
-        free(sm_module->reduction_tree);
-        sm_module->reduction_tree=NULL;
-    }
-
-    return NULL;
-}
-
-OBJ_CLASS_INSTANCE(mca_bcol_basesmuma_module_t,
-                   mca_bcol_base_module_t,
-                   mca_bcol_basesmuma_module_construct,
-                   mca_bcol_basesmuma_module_destruct);
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_progress.c
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_progress.c
@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-#include "ompi/constants.h"
-
-#include "bcol_basesmuma.h"
-
-/* the progress function to be called from the opal progress function
- */
-int bcol_basesmuma_progress(void)
-{
-    /* local variables */
-    volatile int32_t *cntr;
-    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-
-    /* check to see if release of memory blocks needs to be done */
-    if( opal_list_get_size(&(cs->nb_admin_barriers)) ) {
-        sm_nbbar_desc_t *item_ptr;
-        opal_list_t *list=&(cs->nb_admin_barriers);
-        /* process only if the list is non-empty */
-        if( !OPAL_THREAD_TRYLOCK(&cs->nb_admin_barriers_mutex)) {
-
-            for (item_ptr = (sm_nbbar_desc_t*) opal_list_get_first(list);
-                    item_ptr != (sm_nbbar_desc_t*) opal_list_get_end(list);
-                    item_ptr = (sm_nbbar_desc_t*) opal_list_get_next(item_ptr) )
-            {
-                bcol_basesmuma_rd_nb_barrier_progress_admin(item_ptr);
-                /* check to see if an complete */
-                if( NB_BARRIER_DONE == item_ptr->collective_phase ) {
-                    /* barrier is complete - remove from the list.  No need
-                     * to put it on another list, as it is part of the memory
-                     * bank control structure, and will be picked up
-                     * again when needed.
-                     */
-                    int index=
-                        item_ptr->pool_index;
-                    /* old way - ctl_struct specific */
-                    /*
-                       volatile uint64_t *cntr= (volatile uint64_t *)
-                       &(item_ptr->sm_module->colls_no_user_data.
-                       ctl_buffs_mgmt[index].bank_gen_counter);
-                     */
-
-                    cntr= (volatile int32_t *) &(item_ptr->coll_buff->
-                                ctl_buffs_mgmt[index].bank_gen_counter);
-                    item_ptr=(sm_nbbar_desc_t*)opal_list_remove_item((opal_list_t *)list,
-                            ( opal_list_item_t *)item_ptr);
-                    /* increment the generation number */
-                    OPAL_THREAD_ADD32(cntr,1);
-                }
-            }
-
-            OPAL_THREAD_UNLOCK(&cs->nb_admin_barriers_mutex);
-        }
-
-    }
-    return OMPI_SUCCESS;
-
-}
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_barrier.c
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_barrier.c
@ -1,218 +0,0 @@
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/* Recursive doubling blocking barrier */
-
-#include "ompi_config.h"
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/patterns/net/netpatterns.h"
-
-#include "opal/sys/atomic.h"
-
-#include "bcol_basesmuma.h"
-
-#if 0
-int bcol_basesmuma_recursive_double_barrier(bcol_function_args_t *input_args,
-                                            mca_bcol_base_function_t *c_input_args)
-{
-
-    /* local variables */
-    int ret=OMPI_SUCCESS, idx, leading_dim, loop_cnt, exchange, flag_to_set;
-    int pair_rank, flag_offset;
-    mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
-    netpatterns_pair_exchange_node_t *my_exchange_node;
-    int extra_rank, my_rank, pow_2;
-    volatile mca_bcol_basesmuma_ctl_struct_t *partner_ctl;
-    volatile mca_bcol_basesmuma_ctl_struct_t *my_ctl;
-    int64_t sequence_number;
-    bool found;
-    int buff_index, first_instance=0;
-    mca_bcol_basesmuma_module_t* bcol_module =
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-#if 0
-    fprintf(stderr,"Entering the sm rd barrier\n");
-    fflush(stderr);
-#endif
-
-    /* get the pointer to the segment of control structures */
-    my_exchange_node=&(bcol_module->recursive_doubling_tree);
-    my_rank=bcol_module->super.sbgp_partner_module->my_index;
-    pow_2=bcol_module->super.sbgp_partner_module->pow_2;
-
-    /* figure out what instance of the basesmuma bcol I am */
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    sequence_number=input_args->sequence_num - c_input_args->bcol_module->squence_number_offset;
-
-    buff_index=sequence_number & (bcol_module->colls_no_user_data.mask);
-
-    idx=SM_ARRAY_INDEX(leading_dim,buff_index,0);
-    ctl_structs=(mca_bcol_basesmuma_ctl_struct_t **)
-        bcol_module->colls_no_user_data.ctl_buffs+idx;
-    my_ctl=ctl_structs[my_rank];
-    if( my_ctl->sequence_number < sequence_number ) {
-        first_instance=1;
-    }
-
-    /* get the pool index */
-    if( first_instance ) {
-        idx = -1;
-        while( idx == -1 ) {
-
-            idx=bcol_basesmuma_get_buff_index(
-                &(bcol_module->colls_no_user_data),sequence_number);
-        }
-        if( -1 == idx ){
-            return ORTE_ERR_TEMP_OUT_OF_RESOURCE;
-        }
-        my_ctl->index=1;
-        /* this does not need to use any flag values , so only need to
-         * set the value for subsequent values that may need this */
-        my_ctl->starting_flag_value=0;
-        flag_offset=0;
-    } else {
-        /* only one thread at a time will be making progress on this
-         *   collective, so no need to make this atomic */
-        my_ctl->index++;
-        flag_offset=my_ctl->starting_flag_value;
-    }
-
-    /* signal that I have arrived */
-    my_ctl->flag = -1;
-    /* don't need to set this flag anymore */
-    my_ctl->sequence_number = sequence_number;
-    /* opal_atomic_wmb ();*/
-
-    if(0 < my_exchange_node->n_extra_sources) {
-        if (EXCHANGE_NODE == my_exchange_node->node_type) {
-            volatile int64_t *partner_sn;
-            int cnt=0;
-
-            /* I will participate in the exchange - wait for signal from extra
-            ** process */
-            extra_rank = my_exchange_node->rank_extra_source;
-            partner_ctl=(volatile mca_bcol_basesmuma_ctl_struct_t *)ctl_structs[extra_rank];
-
-            /*partner_ctl=ctl_structs[extra_rank];*/
-            partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number);
-
-            /* spin n iterations until partner registers */
-            loop_cnt=0;
-            found=false;
-            while( !found )
-            {
-                if( *partner_sn >= sequence_number ) {
-                    found=true;
-                }
-                cnt++;
-                if( cnt == 1000 ) {
-                    opal_progress();
-                    cnt=0;
-                }
-            }
-
-        }  else {
-
-            /* Nothing to do, already registared that I am here */
-        }
-    }
-
-    for(exchange = 0; exchange < my_exchange_node->n_exchanges; exchange++) {
-
-        volatile int64_t *partner_sn;
-        volatile int *partner_flag;
-        int cnt=0;
-
-        /* rank of exchange partner */
-        pair_rank = my_rank ^ ( 1 SHIFT_UP exchange );
-        partner_ctl=ctl_structs[pair_rank];
-        partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number);
-        partner_flag=(volatile int *)&(partner_ctl->flag);
-
-        /* signal that I am at iteration exchange of the algorithm */
-        flag_to_set=flag_offset+exchange;
-        my_ctl->flag = flag_to_set;
-
-        /* check to see if the partner has arrived */
-
-        /* spin n iterations until partner registers */
-        found=false;
-        while( !found )
-        {
-            if( (*partner_sn > sequence_number) ||
-                ( *partner_sn == sequence_number &&
-                  *partner_flag >= flag_to_set ) ) {
-                found=true;
-            }  else {
-                cnt++;
-                if( cnt == 1000 ) {
-                    opal_progress();
-                    cnt=0;
-                }
-            }
-        }
-    }
-
-    if(0 < my_exchange_node->n_extra_sources)  {
-        if ( EXTRA_NODE == my_exchange_node->node_type ) {
-            int cnt=0;
-
-            /* I will not participate in the exchange -
-             *   wait for signal from extra partner */
-            extra_rank = my_exchange_node->rank_extra_source;
-            partner_ctl=ctl_structs[extra_rank];
-            flag_to_set=flag_offset+my_exchange_node->log_2;
-
-            /* spin n iterations until partner registers */
-            found=false;
-            while( !found )
-            {
-                if (IS_PEER_READY(partner_ctl, flag_to_set, sequence_number)){
-                    found=true;
-                } else {
-                    cnt++;
-                    if( cnt == 1000 ) {
-                        opal_progress();
-                        cnt=0;
-                    }
-                }
-            }
-
-        }  else {
-
-            /* signal the extra rank that I am done with the recursive
-             * doubling phase.
-             */
-            flag_to_set=flag_offset+my_exchange_node->log_2;
-            my_ctl->flag = flag_to_set;
-
-        }
-    }
-
-    /* if I am the last instance of a basesmuma function in this collectie,
-     *   release the resrouces */
-    if (IS_LAST_BCOL_FUNC(c_input_args)){
-        idx=bcol_basesmuma_free_buff(
-            &(bcol_module->colls_no_user_data),
-            sequence_number);
-    }  else {
-        /* increment flag value - so next sm collective in the hierarchy
-         *    will not collide with the current one, as they share the
-         *    control structure */
-        my_ctl->starting_flag_value+=(my_exchange_node->log_2+1);
-    }
-
-    /* return */
-    return ret;
-}
-#endif
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c
@ -1,462 +0,0 @@
-/*
- * Copyright (c) 2009-2012 UT-Battelle, LLC. All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2013 Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-/* we need make cleanup with all these includes  START */
-#include <unistd.h>
-#include <sys/types.h>
-
-#include "ompi/constants.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "bcol_basesmuma.h"
-#include "opal/sys/atomic.h"
-#include "ompi/patterns/net/netpatterns.h"
-#include "ompi/mca/bcol/base/base.h"
-
-/*
- * Initialize nonblocking barrier.  This is code specific for handling
- * the recycling of data, and uses only a single set of control buffers.
- * It also assumes that for a given process, only a single outstanding
- * barrier operation will occur for a given control structure,
- * with the sequence number being used for potential overlap in time
- * between succesive barrier calls on different processes.
- */
-int bcol_basesmuma_rd_nb_barrier_init_admin(
-        sm_nbbar_desc_t *sm_desc)
-
-{
-    /* local variables */
-    int ret=OMPI_SUCCESS, idx, leading_dim, loop_cnt, exchange;
-    int pair_rank;
-    mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
-    netpatterns_pair_exchange_node_t *my_exchange_node;
-    int extra_rank, my_rank;
-    mca_bcol_basesmuma_ctl_struct_t volatile *partner_ctl;
-    mca_bcol_basesmuma_ctl_struct_t volatile *my_ctl;
-    int64_t bank_genaration;
-    bool found;
-    int pool_index=sm_desc->pool_index;
-    mca_bcol_basesmuma_module_t *bcol_module=sm_desc->sm_module;
-
-    /* get the pointer to the segment of control structures */
-    idx=sm_desc->coll_buff->number_of_buffs+pool_index;
-    leading_dim=sm_desc->coll_buff->size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,idx,0);
-    ctl_structs=(mca_bcol_basesmuma_ctl_struct_t **)
-        sm_desc->coll_buff->ctl_buffs+idx;
-    bank_genaration= sm_desc->coll_buff->ctl_buffs_mgmt[pool_index].bank_gen_counter;
-
-	my_exchange_node=&(bcol_module->recursive_doubling_tree);
-    my_rank=bcol_module->super.sbgp_partner_module->my_index;
-    my_ctl=ctl_structs[my_rank];
-    /* debug print */
-    /*
-    {
-	    int ii;
-	    for(ii = 0; ii < 6; ii++) {
-		    fprintf(stderr,"UUU ctl_struct[%d] := %p\n",ii,
-			    bcol_module->colls_no_user_data.ctl_buffs[ii]);
-		    fflush(stderr);
-	    }
-    }
-    */
-    /* end debug */
-
-    /* signal that I have arrived */
-    my_ctl->flag = -1;
-
-    opal_atomic_wmb ();
-
-	/* don't need to set this flag anymore */
-    my_ctl->sequence_number = bank_genaration;
-
-    if(0 < my_exchange_node->n_extra_sources) {
-        if (EXCHANGE_NODE == my_exchange_node->node_type) {
-            volatile int64_t *partner_sn;
-            /* I will participate in the exchange - wait for signal from extra
-             ** process */
-            extra_rank = my_exchange_node->rank_extra_source;
-            partner_ctl=ctl_structs[extra_rank];
-            partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number);
-
-            /* spin n iterations until partner registers */
-            loop_cnt=0;
-            found=false;
-            while( loop_cnt < bcol_module->super.n_poll_loops )
-            {
-                if( *partner_sn >= bank_genaration ) {
-                    found=true;
-                    break;
-                }
-                loop_cnt++;
-            }
-            if( !found ) {
-                /* set restart parameters */
-                sm_desc->collective_phase=NB_PRE_PHASE;
-                return OMPI_SUCCESS;
-            }
-
-        }  else {
-
-            /* Nothing to do, already registared that I am here */
-        }
-    }
-
-    for(exchange = 0; exchange < my_exchange_node->n_exchanges; exchange++) {
-
-        volatile int64_t *partner_sn;
-        volatile int *partner_flag;
-
-        /* rank of exchange partner */
-        pair_rank = my_rank ^ ( 1 SHIFT_UP exchange );
-        partner_ctl=ctl_structs[pair_rank];
-        partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number);
-        partner_flag=(volatile int *)&(partner_ctl->flag);
-
-        /* signal that I am at iteration exchange of the algorithm */
-        my_ctl->flag = exchange;
-
-        /* check to see if the partner has arrived */
-
-        /* spin n iterations until partner registers */
-        loop_cnt=0;
-        found=false;
-        while( loop_cnt < bcol_module->super.n_poll_loops )
-        {
-            if( (*partner_sn > bank_genaration) ||
-                    ( *partner_sn == bank_genaration &&
-                      *partner_flag >= exchange ) ) {
-                found=true;
-                break;
-            }
-
-             loop_cnt++;
-
-		}
-        if( !found ) {
-            /* set restart parameters */
-            sm_desc->collective_phase=NB_RECURSIVE_DOUBLING;
-            sm_desc->recursive_dbl_iteration=exchange;
-            return OMPI_SUCCESS;
-        }
-
-    }
-
-    if(0 < my_exchange_node->n_extra_sources)  {
-        if ( EXTRA_NODE == my_exchange_node->node_type ) {
-            volatile int64_t *partner_sn;
-            volatile int *partner_flag;
-
-            /* I will not participate in the exchange -
-             *   wait for signal from extra partner */
-            extra_rank = my_exchange_node->rank_extra_source;
-            partner_ctl=ctl_structs[extra_rank];
-            partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number);
-            partner_flag=(volatile int *)&(partner_ctl->flag);
-
-            /* spin n iterations until partner registers */
-            loop_cnt=0;
-            found=false;
-            while( loop_cnt < bcol_module->super.n_poll_loops )
-            {
-                if( (*partner_sn > bank_genaration) ||
-                        ( (*partner_sn == bank_genaration) &&
-                        (*partner_flag == (my_exchange_node->log_2)) ) ) {
-                    found=true;
-                    break;
-                }
-                loop_cnt++;
-			}
-            if( !found ) {
-                /* set restart parameters */
-                sm_desc->collective_phase=NB_POST_PHASE;
-                return OMPI_SUCCESS;
-            }
-
-        }  else {
-
-            /* signal the extra rank that I am done with the recursive
-             * doubling phase.
-             */
-            my_ctl->flag = my_exchange_node->n_exchanges;
-
-        }
-    }
-
-    /* set the barrier as complete */
-    sm_desc->collective_phase=NB_BARRIER_DONE;
-    /* return */
-    return ret;
-}
-
-/* admin nonblocking barrier - progress function */
-int bcol_basesmuma_rd_nb_barrier_progress_admin(
-        sm_nbbar_desc_t *sm_desc)
-
-{
-    /* local variables */
-    int ret=OMPI_SUCCESS, idx, leading_dim, loop_cnt, exchange;
-    int pair_rank, start_index, restart_phase;
-    mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
-    netpatterns_pair_exchange_node_t *my_exchange_node;
-    int extra_rank, my_rank;
-    mca_bcol_basesmuma_ctl_struct_t volatile *partner_ctl;
-    mca_bcol_basesmuma_ctl_struct_t volatile *my_ctl;
-    int64_t bank_genaration;
-    int pool_index=sm_desc->pool_index;
-    bool found;
-    mca_bcol_basesmuma_module_t *bcol_module=sm_desc->sm_module;
-
-    /* get the pointer to the segment of control structures */
-    idx = sm_desc->coll_buff->number_of_buffs+pool_index;
-    leading_dim = sm_desc->coll_buff->size_of_group;
-    idx = SM_ARRAY_INDEX(leading_dim,idx,0);
-    ctl_structs = (mca_bcol_basesmuma_ctl_struct_t **)
-        sm_desc->coll_buff->ctl_buffs+idx;
-    bank_genaration = sm_desc->coll_buff->ctl_buffs_mgmt[pool_index].bank_gen_counter;
-
-    my_exchange_node=&(bcol_module->recursive_doubling_tree);
-    my_rank=bcol_module->super.sbgp_partner_module->my_index;
-    my_ctl=ctl_structs[my_rank];
-
-    /* check to make sure that this should be progressed */
-    if( ( sm_desc->collective_phase == NB_BARRIER_INACTIVE ) ||
-        ( sm_desc->collective_phase == NB_BARRIER_DONE ) )
-    {
-        return OMPI_SUCCESS;
-    }
-
-    /* set the restart up - and jump to the correct place in the algorithm */
-    restart_phase=sm_desc->collective_phase;
-    if ( NB_PRE_PHASE == restart_phase ) {
-        start_index=0;
-    } else if ( NB_RECURSIVE_DOUBLING == restart_phase ) {
-        start_index=sm_desc->recursive_dbl_iteration;
-        goto Exchange_phase;
-    } else {
-        goto Post_phase;
-    }
-
-    if(0 < my_exchange_node->n_extra_sources) {
-        if (EXCHANGE_NODE == my_exchange_node->node_type) {
-            volatile int64_t *partner_sn;
-            /* I will participate in the exchange - wait for signal from extra
-             ** process */
-            extra_rank = my_exchange_node->rank_extra_source;
-            partner_ctl=ctl_structs[extra_rank];
-            partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number);
-
-            /* spin n iterations until partner registers */
-            loop_cnt=0;
-            while( loop_cnt < bcol_module->super.n_poll_loops )
-            {
-                found=false;
-                if( *partner_sn >= bank_genaration ) {
-                    found=true;
-                    break;
-                }
-                loop_cnt++;
-            }
-            if( !found ) {
-                /* set restart parameters */
-                sm_desc->collective_phase=NB_PRE_PHASE;
-                return OMPI_SUCCESS;
-            }
-
-        }  else {
-
-            /* Nothing to do, already registared that I am here */
-        }
-    }
-
-Exchange_phase:
-
-    for(exchange = start_index;
-        exchange < my_exchange_node->n_exchanges; exchange++) {
-
-        volatile int64_t *partner_sn;
-        volatile int *partner_flag;
-
-        /* rank of exchange partner */
-        pair_rank = my_rank ^ ( 1 SHIFT_UP exchange );
-        partner_ctl=ctl_structs[pair_rank];
-        partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number);
-        partner_flag=(volatile int *)&(partner_ctl->flag);
-
-        /* signal that I am at iteration exchange of the algorithm */
-        my_ctl->flag = exchange;
-
-        /* check to see if the partner has arrived */
-
-        /* spin n iterations until partner registers */
-        loop_cnt=0;
-        found=false;
-        while( loop_cnt < bcol_module->super.n_poll_loops )
-        {
-            if( (*partner_sn > bank_genaration) ||
-                    ( (*partner_sn == bank_genaration) &&
-                      (*partner_flag >= exchange) ) ) {
-                found=true;
-                break;
-            }
-            loop_cnt++;
-        }
-        if( !found ) {
-            /* set restart parameters */
-            sm_desc->collective_phase=NB_RECURSIVE_DOUBLING;
-            sm_desc->recursive_dbl_iteration=exchange;
-            return OMPI_SUCCESS;
-        }
-
-    }
-
-Post_phase:
-    if(0 < my_exchange_node->n_extra_sources)  {
-        if ( EXTRA_NODE == my_exchange_node->node_type ) {
-            volatile int64_t *partner_sn;
-            volatile int *partner_flag;
-
-            /* I will not participate in the exchange -
-             *   wait for signal from extra partner */
-            extra_rank = my_exchange_node->rank_extra_source;
-            partner_ctl=ctl_structs[extra_rank];
-            partner_sn=(volatile int64_t *)&(partner_ctl->sequence_number);
-            partner_flag=(volatile int *)&(partner_ctl->flag);
-
-            /* spin n iterations until partner registers */
-            loop_cnt=0;
-            found=false;
-            while( loop_cnt < bcol_module->super.n_poll_loops )
-            {
-                if( (*partner_sn > bank_genaration) ||
-                        ( *partner_sn == bank_genaration &&
-                        *partner_flag == (my_exchange_node->log_2) ) ) {
-                    found=true;
-                    break;
-                }
-                loop_cnt++;
-            }
-            if( !found ) {
-                /* set restart parameters */
-                sm_desc->collective_phase=NB_POST_PHASE;
-                return OMPI_SUCCESS;
-            }
-
-        }  else {
-
-            /* signal the extra rank that I am done with the recursive
-             * doubling phase.
-             */
-            my_ctl->flag = my_exchange_node->n_exchanges;
-
-        }
-    }
-
-    /* set the barrier as complete */
-    sm_desc->collective_phase=NB_BARRIER_DONE;
-
-    /* return */
-    return ret;
-}
-
-static int bcol_basesmuma_memsync(bcol_function_args_t *input_args,
-                mca_bcol_base_function_t *c_input_args)
-{
-    int rc;
-    int memory_bank = input_args->root;
-
-    mca_bcol_basesmuma_module_t* bcol_module =
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-    sm_buffer_mgmt *buff_block = &(bcol_module->colls_with_user_data);
-    sm_nbbar_desc_t *sm_desc = &(buff_block->ctl_buffs_mgmt[memory_bank].nb_barrier_desc);
-
-    sm_desc->coll_buff = buff_block;
-    /*
-    printf("XXX SYNC call\n");
-    */
-
-    rc = bcol_basesmuma_rd_nb_barrier_init_admin(
-            sm_desc);
-    if (OMPI_SUCCESS != rc) {
-        return rc;
-    }
-
-    if (NB_BARRIER_DONE != sm_desc->collective_phase) {
-        mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
-        opal_list_t *list=&(cs->nb_admin_barriers);
-        opal_list_item_t *append_item;
-
-        /* put this onto the progression list */
-        OPAL_THREAD_LOCK(&(cs->nb_admin_barriers_mutex));
-        append_item=(opal_list_item_t *)
-            &(buff_block->ctl_buffs_mgmt[memory_bank].nb_barrier_desc);
-        opal_list_append(list,append_item);
-        OPAL_THREAD_UNLOCK(&(cs->nb_admin_barriers_mutex));
-        /* progress communications so that resources can be freed up */
-        return BCOL_FN_STARTED;
-    }
-
-    /* Done - bump the counter */
-    (buff_block->ctl_buffs_mgmt[memory_bank].bank_gen_counter)++;
-    /*
-    printf("XXX SYNC call done \n");
-    */
-    return BCOL_FN_COMPLETE;
-}
-
-static int bcol_basesmuma_memsync_progress(bcol_function_args_t *input_args,
-                mca_bcol_base_function_t *c_input_args)
-{
-    int memory_bank = input_args->root;
-
-    mca_bcol_basesmuma_module_t* bcol_module =
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-    sm_buffer_mgmt *buff_block = &(bcol_module->colls_with_user_data);
-    sm_nbbar_desc_t *sm_desc = &(buff_block->ctl_buffs_mgmt[memory_bank].nb_barrier_desc);
-
-    /* I do not have to do anything, since the
-       progress done by basesmuma progress engine */
-
-    if (NB_BARRIER_DONE != sm_desc->collective_phase) {
-        return BCOL_FN_STARTED;
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-int bcol_basesmuma_memsync_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    comm_attribs.bcoll_type = BCOL_SYNC;
-
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super,
-            &comm_attribs, &inv_attribs,
-            bcol_basesmuma_memsync,
-            bcol_basesmuma_memsync_progress);
-
-    return OMPI_SUCCESS;
-}
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.c
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.c
@ -1,382 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/constants.h"
-#include "ompi/op/op.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/mca/bcol/bcol.h"
-
-#include "opal/include/opal_stdint.h"
-
-#include "bcol_basesmuma.h"
-#include "bcol_basesmuma_reduce.h"
-/**
- * gvm - Shared memory reduce
- */
-
-static int bcol_basesmuma_reduce_intra_fanin_progress(bcol_function_args_t *input_args,
-                                                      mca_bcol_base_function_t *c_input_args);
-
-int bcol_basesmuma_reduce_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    comm_attribs.bcoll_type = BCOL_REDUCE;
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1048576;
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000;
-    inv_attribs.datatype_bitmap = 0x11111111;
-    inv_attribs.op_types_bitmap = 0x11111111;
-
-
-    /* Set attributes for fanin fanout algorithm */
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, bcol_basesmuma_reduce_intra_fanin,
-                                 bcol_basesmuma_reduce_intra_fanin_progress);
-
-    inv_attribs.bcol_msg_min = 10000000;
-    inv_attribs.bcol_msg_max = 10485760; /* range 4 */
-
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, NULL, NULL);
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * Small data fanin reduce
- * ML buffers are used for both payload and control structures
- * This functions works with hierarchical allreduce and
- * progress engine
- */
-static inline int reduce_children (mca_bcol_basesmuma_module_t *bcol_module, volatile void *rbuf, netpatterns_tree_node_t *my_reduction_node,
-                                   int *iteration, volatile mca_bcol_basesmuma_header_t *my_ctl_pointer, ompi_datatype_t *dtype,
-                                   volatile mca_bcol_basesmuma_payload_t *data_buffs, int count, struct ompi_op_t *op, int process_shift) {
-    volatile mca_bcol_basesmuma_header_t * child_ctl_pointer;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-    int64_t sequence_number = my_ctl_pointer->sequence_number;
-    int8_t ready_flag = my_ctl_pointer->ready_flag;
-    int group_size = bcol_module->colls_no_user_data.size_of_group;
-
-    if (LEAF_NODE != my_reduction_node->my_node_type) {
-        volatile char *child_data_pointer;
-        volatile void *child_rbuf;
-
-        /* for each child */
-        /* my_result_data = child_result_data (op) my_source_data */
-
-        for (int child = *iteration ; child < my_reduction_node->n_children ; ++child) {
-            int child_rank = my_reduction_node->children_ranks[child] + process_shift;
-
-            if (group_size <= child_rank){
-                child_rank -= group_size;
-            }
-
-            child_ctl_pointer = data_buffs[child_rank].ctl_struct;
-            child_data_pointer = data_buffs[child_rank].payload;
-
-            if (!IS_PEER_READY(child_ctl_pointer, ready_flag, sequence_number, REDUCE_FLAG, bcol_id)) {
-                *iteration = child;
-                return BCOL_FN_STARTED;
-            }
-
-            child_rbuf = child_data_pointer + child_ctl_pointer->roffsets[bcol_id];
-
-            ompi_op_reduce(op,(void *)child_rbuf,(void *)rbuf, count, dtype);
-        } /* end child loop */
-    }
-
-    if (ROOT_NODE != my_reduction_node->my_node_type) {
-        opal_atomic_wmb ();
-        my_ctl_pointer->flags[REDUCE_FLAG][bcol_id] = ready_flag;
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-static int bcol_basesmuma_reduce_intra_fanin_progress(bcol_function_args_t *input_args,
-                                                      mca_bcol_base_function_t *c_input_args)
-{
-    mca_bcol_basesmuma_module_t* bcol_module =
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-
-    netpatterns_tree_node_t *my_reduction_node;
-    int my_rank, my_node_index;
-    struct ompi_datatype_t *dtype = input_args->dtype;
-    int leading_dim, idx;
-
-    /* Buffer index */
-    int buff_idx = input_args->src_desc->buffer_index;
-
-    int *iteration = &bcol_module->ml_mem.nb_coll_desc[buff_idx].iteration;
-
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    void *data_addr = (void *)input_args->src_desc->data_addr;
-    volatile void *rbuf;
-
-    /* get addressing information */
-    my_rank = bcol_module->super.sbgp_partner_module->my_index;
-    leading_dim = bcol_module->colls_no_user_data.size_of_group;
-    idx = SM_ARRAY_INDEX(leading_dim, buff_idx, 0);
-
-    data_buffs = (volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs + idx;
-
-    /* Get control structure and payload buffer */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-
-    my_node_index = my_rank - input_args->root;
-    if (0 > my_node_index) {
-        int group_size = bcol_module->colls_no_user_data.size_of_group;
-        my_node_index += group_size;
-    }
-
-    my_reduction_node = bcol_module->reduction_tree + my_node_index;
-    rbuf = (volatile void *)((uintptr_t) data_addr + input_args->rbuf_offset);
-
-    return reduce_children (bcol_module, rbuf, my_reduction_node, iteration, my_ctl_pointer, dtype,
-                            data_buffs, input_args->count, input_args->op, input_args->root);
-}
-
-int bcol_basesmuma_reduce_intra_fanin(bcol_function_args_t *input_args,
-                                      mca_bcol_base_function_t *c_input_args)
-{
-    /* local variables */
-    int rc=BCOL_FN_COMPLETE;
-    int my_rank,group_size,my_node_index;
-    mca_bcol_basesmuma_module_t* bcol_module =
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-
-    netpatterns_tree_node_t *my_reduction_node;
-    volatile int8_t ready_flag;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-    volatile void *sbuf,*rbuf;
-    int sbuf_offset,rbuf_offset;
-    int root,count;
-    int64_t sequence_number=input_args->sequence_num;
-    struct ompi_datatype_t *dtype;
-    int leading_dim,idx;
-
-    /* Buffer index */
-    int buff_idx = input_args->src_desc->buffer_index;
-
-    int *iteration = &bcol_module->ml_mem.nb_coll_desc[buff_idx].iteration;
-
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    volatile char * my_data_pointer;
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    void *data_addr = (void *)input_args->src_desc->data_addr;
-
-#if 0
-    fprintf(stderr,"777 entering sm reduce \n");
-#endif
-
-    /* get addressing information */
-    my_rank=bcol_module->super.sbgp_partner_module->my_index;
-    group_size=bcol_module->colls_no_user_data.size_of_group;
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-
-    data_buffs = (volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs+idx;
-    /* fprintf(stderr,"AAA the devil!!\n"); */
-    /* Get control structure and payload buffer */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-    my_data_pointer = (volatile char *)data_addr;
-
-    /* Align node index to around sbgp root */
-    root = input_args->root;
-    my_node_index = my_rank - root;
-    if (0 > my_node_index) {
-        my_node_index += group_size;
-    }
-
-    /* get arguments */
-    sbuf_offset = input_args->sbuf_offset;
-    rbuf_offset = input_args->rbuf_offset;
-    sbuf = (volatile void *)(my_data_pointer + sbuf_offset);
-    data_buffs[my_rank].payload = (void*)sbuf;
-    rbuf = (volatile void *)(my_data_pointer + rbuf_offset);
-    count = input_args->count;
-    dtype = input_args->dtype;
-
-    /* Cache my rbuf_offset */
-    my_ctl_pointer->roffsets[bcol_id] = rbuf_offset;
-
-    /* get my node for the reduction tree */
-    my_reduction_node=&(bcol_module->reduction_tree[my_node_index]);
-
-    /* init the header */
-    BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id);
-
-    input_args->result_in_rbuf = (ROOT_NODE == my_reduction_node->my_node_type);
-
-    /* set starting point for progress loop */
-    *iteration = 0;
-    my_ctl_pointer->ready_flag = ready_flag;
-
-    if (sbuf != rbuf) {
-        rc = ompi_datatype_copy_content_same_ddt(dtype, count, (char *)rbuf,
-                                                 (char *)sbuf);
-        if( 0 != rc ) {
-            return OMPI_ERROR;
-        }
-    }
-
-    rc = reduce_children (bcol_module, rbuf, my_reduction_node, iteration, my_ctl_pointer, dtype,
-                          data_buffs, count, input_args->op, root);
-
-    /* Flag value if other bcols are called */
-    my_ctl_pointer->starting_flag_value[bcol_id]++;
-
-    /* Recycle payload buffers */
-
-    return rc;
-}
-
-/* Small data fanin reduce
- * Uses SM buffer (backed by SM file) for both control structures and
- * payload
- *
- * NTH: How does this differ from the new one? Can we replace this
- * with a call to the new init then a call the new progress until
- * complete?
- */
-int bcol_basesmuma_reduce_intra_fanin_old(bcol_function_args_t *input_args,
-                                          mca_bcol_base_function_t *c_input_args)
-{
-    /* local variables */
-    int rc=OMPI_SUCCESS;
-    int my_rank,group_size,process_shift,my_node_index;
-    int n_children,child;
-    mca_bcol_basesmuma_module_t* bcol_module =
-        (mca_bcol_basesmuma_module_t *)c_input_args->bcol_module;
-
-    netpatterns_tree_node_t *my_reduction_node;
-    volatile int8_t ready_flag;
-    volatile void *sbuf,*rbuf;
-    int sbuf_offset,rbuf_offset;
-    int root,count;
-    struct ompi_op_t *op;
-    int64_t sequence_number=input_args->sequence_num;
-    struct ompi_datatype_t *dtype;
-    int leading_dim,idx;
-    int buff_idx;
-    int child_rank;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-    volatile char * my_data_pointer;
-    volatile char * child_data_pointer;
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    volatile mca_bcol_basesmuma_header_t * child_ctl_pointer;
-
-#if 0
-    fprintf(stderr,"Entering fanin reduce \n");
-#endif
-
-    /* Buffer index */
-    buff_idx = input_args->src_desc->buffer_index;
-    /* get addressing information */
-    my_rank=bcol_module->super.sbgp_partner_module->my_index;
-    group_size=bcol_module->colls_no_user_data.size_of_group;
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-
-    /*ctl_structs=(mca_bcol_basesmuma_ctl_struct_t **)
-      bcol_module->colls_with_user_data.ctl_buffs+idx;*/
-    data_buffs = (volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs+idx;
-
-    /* Get control structure and payload buffer */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-    my_data_pointer = (volatile char *) data_buffs[my_rank].payload;
-
-    /* Align node index to around sbgp root */
-    root = input_args->root;
-    process_shift = root;
-    my_node_index = my_rank - root;
-    if (0 > my_node_index ) {
-        my_node_index += group_size;
-    }
-
-    /* get arguments */
-    sbuf_offset = input_args->sbuf_offset;
-    rbuf_offset = input_args->rbuf_offset;
-    sbuf = (volatile void *)(my_data_pointer + sbuf_offset);
-    rbuf = (volatile void *)(my_data_pointer + rbuf_offset);
-    op   = input_args->op;
-    count = input_args->count;
-    dtype = input_args->dtype;
-
-    /* get my node for the reduction tree */
-    my_reduction_node=&(bcol_module->reduction_tree[my_node_index]);
-    n_children=my_reduction_node->n_children;
-
-    /* init the header */
-    BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id);
-
-    input_args->result_in_rbuf = (ROOT_NODE == my_reduction_node->my_node_type);
-
-    rc = ompi_datatype_copy_content_same_ddt(dtype, count, (char *)rbuf,
-                                             (char *)sbuf);
-    if (0 != rc) {
-        return OMPI_ERROR;
-    }
-
-    if (LEAF_NODE != my_reduction_node->my_node_type) {
-        volatile void *child_rbuf;
-        /* for each child */
-        /* my_result_data = child_result_data (op) my_source_data */
-
-        for (child = 0 ; child < n_children ; ++child) {
-            child_rank = my_reduction_node->children_ranks[child];
-            child_rank += process_shift;
-
-            /* wrap around */
-            if( group_size <= child_rank ){
-                child_rank-=group_size;
-            }
-
-            /*child_ctl_pointer = ctl_structs[child_rank];*/
-            child_ctl_pointer = data_buffs[child_rank].ctl_struct;
-            child_data_pointer = data_buffs[child_rank].payload;
-
-            child_rbuf = child_data_pointer + rbuf_offset;
-            /* wait until child child's data is ready for use */
-            while (!IS_PEER_READY(child_ctl_pointer, ready_flag, sequence_number, REDUCE_FLAG, bcol_id)) {
-                opal_progress();
-            }
-
-            /* apply collective operation */
-            ompi_op_reduce(op,(void *)child_rbuf,(void *)rbuf, count,dtype);
-        } /* end child loop */
-    }
-
-    if (ROOT_NODE != my_reduction_node->my_node_type) {
-        opal_atomic_wmb ();
-        my_ctl_pointer->flags[REDUCE_FLAG][bcol_id] = ready_flag;
-    }
-
-    my_ctl_pointer->starting_flag_value[bcol_id]++;
-
-    return rc;
-}
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.h
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.h
@ -1,92 +0,0 @@
-#ifndef __BASESMUMA_REDUCE_H_
-
-#define __BASESMUMA_REDUCE_H_
-
-#include "ompi_config.h"
-#include "ompi/mca/bcol/basesmuma/bcol_basesmuma.h"
-#include "ompi/constants.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/communicator/communicator.h"
-#include "bcol_basesmuma_utils.h"
-#include <unistd.h>
-
-enum {
-    BLOCK_OFFSET = 0,
-    LOCAL_REDUCE_SEG_OFFSET,
-    BLOCK_COUNT,
-    SEG_SIZE,
-	NOFFSETS
-};
-
-int compute_knomial_reduce_offsets(int group_index, int count, struct
-				ompi_datatype_t *dtype,int k_radix,int n_exchanges,
-				int **offsets);
-
-int compute_knomial_reduce_offsets_reverse(int group_index, int count, struct
-				ompi_datatype_t *dtype,int k_radix,int n_exchanges,
-				int **offsets);
-
-int bcol_basesmuma_lmsg_reduce_recursivek_scatter_reduce(mca_bcol_basesmuma_module_t *sm_module,
-						const int buffer_index, void *sbuf,
-					    void *rbuf,
-						struct ompi_op_t *op,
-						const int count, struct ompi_datatype_t *dtype,
-						const int relative_group_index,
-						const int padded_start_byte,
-					volatile int8_t ready_flag,
-						volatile mca_bcol_basesmuma_payload_t *data_buffs);
-
-int bcol_basesmuma_lmsg_reduce_knomial_gather(mca_bcol_basesmuma_module_t *basesmuma_module,
-				const int buffer_index,
-				void *sbuf,void *rbuf, int count, struct
-				ompi_datatype_t *dtype,
-				const int my_group_index,
-				const int padded_start_byte,
-				volatile int8_t rflag,
-				volatile mca_bcol_basesmuma_payload_t *data_buffs);
-
-int bcol_basesmuma_lmsg_reduce_extra_root(mca_bcol_basesmuma_module_t *sm_module,
-						const int buffer_index, void *sbuf,
-					    void *rbuf,
-						struct ompi_op_t *op,
-						const int count, struct ompi_datatype_t *dtype,
-						const int relative_group_index,
-						const int padded_start_byte,
-					volatile int8_t rflag,
-						volatile mca_bcol_basesmuma_payload_t *data_buffs);
-
-
-
-int bcol_basesmuma_lmsg_reduce_extra_non_root(mca_bcol_basesmuma_module_t *sm_module,
-						const int buffer_index, void *sbuf,
-					    void *rbuf,
-						int root,
-						struct ompi_op_t *op,
-						const int count, struct ompi_datatype_t *dtype,
-						const int relative_group_index,
-						const int group_size,
-						const int padded_start_byte,
-					volatile int8_t rflag,
-						volatile mca_bcol_basesmuma_payload_t *data_buffs);
-
-int bcol_basesmuma_lmsg_reduce(bcol_function_args_t *input_args,
-        mca_bcol_base_function_t *c_input_args);
-
-int bcol_basesmuma_lmsg_reduce_extra(bcol_function_args_t *input_args,
-        mca_bcol_base_function_t *c_input_args);
-
-void basesmuma_reduce_recv(int my_group_index, int peer,
-						   void *recv_buffer,
-						   int recv_size,
-					   volatile int8_t ready_flag_val,
-					   volatile mca_bcol_basesmuma_payload_t *data_buffs);
-
-void  basesmuma_reduce_send(int my_group_index,
-						   int peer,
-						   void *send_buffer,
-						   int snd_size,
-						   int send_offset,
-					   volatile int8_t ready_flag_val,
-					   volatile mca_bcol_basesmuma_payload_t *data_buffs);
-
-#endif
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rk_barrier.c
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_rk_barrier.c
@ -1,442 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2016      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/include/ompi/constants.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/mca/bcol/basesmuma/bcol_basesmuma.h"
-
-/*
-#define IS_BARRIER_READY(peer, my_flag, my_sequence_number)\
-    (((peer)->sequence_number == (my_sequence_number) && \
-      (peer)->flags[BARRIER_RKING_FLAG][bcol_id] >= (my_flag) \
-     )? true : false )
-*/
-
-#define CALC_ACTIVE_REQUESTS(active_requests,peers, tree_order) \
-do{                                                             \
-    for( j = 0; j < (tree_order - 1); j++){                     \
-       if( 0 > peers[j] ) {                                     \
-           /* set the bit */                                    \
-           *active_requests ^= (1<<j);                          \
-       }                                                        \
-    }                                                           \
-}while(0)
-
-
-
-/*
- * Recursive K-ing barrier
- */
-
-/*
- *
- * Recurssive k-ing algorithm
- * Example k=3 n=9
- *
- *
- * Number of Exchange steps = log (basek) n
- * Number of steps in exchange step = k (radix)
- *
- */
-int bcol_basesmuma_k_nomial_barrier_init(bcol_function_args_t *input_args,
-                struct mca_bcol_base_function_t *const_args)
-{
-    /* local variables */
-    int flag_offset = 0;
-    volatile int8_t ready_flag;
-    mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) const_args->bcol_module;
-    netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree;
-    mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component;
-    uint32_t buffer_index = input_args->buffer_index;
-    int *active_requests =
-        &(bcol_module->ml_mem.nb_coll_desc[buffer_index].active_requests);
-
-    int *iteration = &bcol_module->ml_mem.nb_coll_desc[buffer_index].iteration;
-    int *status = &bcol_module->ml_mem.nb_coll_desc[buffer_index].status;
-    int leading_dim, buff_idx, idx;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-
-    int i, j, probe;
-    int src;
-
-    int pow_k, tree_order;
-    int max_requests = 0; /* important to initialize this */
-
-    bool matched;
-    int64_t sequence_number=input_args->sequence_num;
-    int my_rank = bcol_module->super.sbgp_partner_module->my_index;
-
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-
-    /* control structures */
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    volatile mca_bcol_basesmuma_header_t *peer_ctl_pointer;
-#if 0
-    fprintf(stderr,"entering sm barrier sn = %d buff index = %d\n",sequence_number,input_args->buffer_index);
-#endif
-    /* initialize the iteration counter */
-    buff_idx = input_args->buffer_index;
-    leading_dim = bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-    data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
-        bcol_module->colls_with_user_data.data_buffs+idx;
-    /* Set pointer to current proc ctrl region */
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-
-    /* init the header */
-    BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id);
-    /* initialize these */
-    *iteration = 0;
-    *active_requests = 0;
-    *status = 0;
-
-    /* k-nomial parameters */
-    tree_order = exchange_node->tree_order;
-    pow_k = exchange_node->log_tree_order;
-
-    /* calculate the maximum number of requests
-     * at each level each rank communicates with
-     * at most (k - 1) peers
-     * so if we set k - 1 bit fields in "max_requests", then
-     * we have max_request  == 2^(k - 1) -1
-     */
-    for(i = 0; i < (tree_order - 1); i++){
-        max_requests ^=  (1<<i);
-    }
-    /* let's begin the collective, starting with extra ranks and their
-     * respective proxies
-     */
-
-    if( EXTRA_NODE == exchange_node->node_type ) {
-
-        /* then I will signal to my proxy rank*/
-
-        my_ctl_pointer->flags[BARRIER_RKING_FLAG][bcol_id] = ready_flag;
-        ready_flag = flag_offset + 1 + pow_k + 2;
-        /* now, poll for completion */
-
-        src = exchange_node->rank_extra_sources_array[0];
-        peer_ctl_pointer = data_buffs[src].ctl_struct;
-
-        for( i = 0; i < cm->num_to_probe ; i++ ) {
-            if(IS_PEER_READY(peer_ctl_pointer, ready_flag, sequence_number, BARRIER_RKING_FLAG, bcol_id)){
-                goto FINISHED;
-            }
-
-        }
-
-        /* cache state and bail */
-        *iteration = -1;
-        return BCOL_FN_STARTED;
-
-    }else if ( 0 < exchange_node->n_extra_sources ) {
-
-        /* I am a proxy for someone */
-        src = exchange_node->rank_extra_sources_array[0];
-        peer_ctl_pointer = data_buffs[src].ctl_struct;
-
-        /* probe for extra rank's arrival */
-        for( i = 0, matched = false ; i < cm->num_to_probe && !matched  ; i++) {
-            if(IS_PEER_READY(peer_ctl_pointer,ready_flag, sequence_number, BARRIER_RKING_FLAG, bcol_id)){
-              /* copy it in */
-              matched = true;
-              break;
-            }
-        }
-
-        if (!matched) {
-          *status = ready_flag;
-          *iteration = -1;
-          return BCOL_FN_STARTED;
-        }
-    }
-
-    /* bump the ready flag */
-    ready_flag++;
-
-    /* we start the recursive k - ing phase */
-    for( *iteration = 0; *iteration < pow_k; (*iteration)++) {
-        /* announce my arrival */
-        my_ctl_pointer->flags[BARRIER_RKING_FLAG][bcol_id] = ready_flag;
-        /* calculate the number of active requests */
-        CALC_ACTIVE_REQUESTS(active_requests,exchange_node->rank_exchanges[*iteration],tree_order);
-        /* Now post the recv's */
-        for( j = 0; j < (tree_order - 1); j++ ) {
-
-            /* recv phase */
-            src = exchange_node->rank_exchanges[*iteration][j];
-            if( src < 0 ) {
-                /* then not a valid rank, continue */
-                continue;
-            }
-
-            peer_ctl_pointer = data_buffs[src].ctl_struct;
-            if( !(*active_requests&(1<<j))) {
-               /* then the bit hasn't been set, thus this peer
-                * hasn't been processed at this level
-                * I am putting the probe loop as the inner most loop to achieve
-                * better temporal locality, this comes at a cost to asynchronicity
-                * but should get better cache performance
-                */
-                for( probe = 0; probe < cm->num_to_probe ; probe++){
-                    if(IS_PEER_READY(peer_ctl_pointer,ready_flag, sequence_number, BARRIER_RKING_FLAG, bcol_id)){
-                        /* set this request's bit */
-                        *active_requests ^= (1<<j);
-                        break;
-                    }
-                }
-            }
-
-
-        }
-        if( max_requests == *active_requests ){
-            /* bump the ready flag */
-            ready_flag++;
-            /*reset the active requests */
-            *active_requests = 0;
-        } else {
-            /* cache the state and hop out
-             * only the iteration needs to be tracked
-             */
-            *status = my_ctl_pointer->flags[BARRIER_RKING_FLAG][bcol_id];
-            return BCOL_FN_STARTED;
-        }
-    }
-
-    /* bump the flag one more time for the extra rank */
-    ready_flag = flag_offset + 1 + pow_k + 2;
-
-    /* finish off the last piece, send the data back to the extra  */
-    if( 0 < exchange_node->n_extra_sources ) {
-        /* simply announce my arrival */
-        my_ctl_pointer->flags[BARRIER_RKING_FLAG][bcol_id] = ready_flag;
-
-    }
-
-FINISHED:
-
-
-    my_ctl_pointer->starting_flag_value[bcol_id]++;
-    return BCOL_FN_COMPLETE;
-}
-
-
-/* allgather progress function */
-
-int bcol_basesmuma_k_nomial_barrier_progress(bcol_function_args_t *input_args,
-                        struct mca_bcol_base_function_t *const_args)
-{
-
-
-    /* local variables */
-    int flag_offset;
-    volatile int8_t ready_flag;
-    mca_bcol_basesmuma_module_t *bcol_module = (mca_bcol_basesmuma_module_t *) const_args->bcol_module;
-    netpatterns_k_exchange_node_t *exchange_node = &bcol_module->knomial_allgather_tree;
-    mca_bcol_basesmuma_component_t *cm = &mca_bcol_basesmuma_component;
-    uint32_t buffer_index = input_args->buffer_index;
-    int *active_requests =
-        &(bcol_module->ml_mem.nb_coll_desc[buffer_index].active_requests);
-
-    int *iteration = &bcol_module->ml_mem.nb_coll_desc[buffer_index].iteration;
-    int *status = &bcol_module->ml_mem.nb_coll_desc[buffer_index].status;
-    int *iter = iteration; /* double alias */
-    int leading_dim, idx, buff_idx;
-
-    int i, j, probe;
-    int src;
-    int max_requests = 0; /* critical to set this */
-    int pow_k, tree_order;
-    int bcol_id = (int) bcol_module->super.bcol_id;
-
-    bool matched;
-    int64_t sequence_number=input_args->sequence_num;
-    int my_rank = bcol_module->super.sbgp_partner_module->my_index;
-
-    volatile mca_bcol_basesmuma_payload_t *data_buffs;
-
-    /* control structures */
-    volatile mca_bcol_basesmuma_header_t *my_ctl_pointer;
-    volatile mca_bcol_basesmuma_header_t *peer_ctl_pointer;
-#if 0
-    fprintf(stderr,"%d: entering sm allgather progress active requests %d iter %d ready_flag %d\n",my_rank,
-            *active_requests,*iter,*status);
-#endif
-    buff_idx = buffer_index;
-    leading_dim=bcol_module->colls_no_user_data.size_of_group;
-    idx=SM_ARRAY_INDEX(leading_dim,buff_idx,0);
-
-    data_buffs=(volatile mca_bcol_basesmuma_payload_t *)
-                bcol_module->colls_with_user_data.data_buffs+idx;
-    my_ctl_pointer = data_buffs[my_rank].ctl_struct;
-
-    /* increment the starting flag by one and return */
-    flag_offset = my_ctl_pointer->starting_flag_value[bcol_id];
-    ready_flag = *status;
-    /* k-nomial parameters */
-    tree_order = exchange_node->tree_order;
-    pow_k = exchange_node->log_tree_order;
-
-    /* calculate the maximum number of requests
-     * at each level each rank communicates with
-     * at most (k - 1) peers
-     * so if we set k - 1 bit fields in "max_requests", then
-     * we have max_request  == 2^(k - 1) -1
-     */
-    for(i = 0; i < (tree_order - 1); i++){
-        max_requests ^= (1<<i);
-    }
-
-    /* let's begin the collective, starting with extra ranks and their
-     * respective proxies
-     */
-
-    if( EXTRA_NODE == exchange_node->node_type ) {
-
-        /* If I'm in here, then I must be looking for data */
-        ready_flag = flag_offset + 1 + pow_k + 2;
-
-        src = exchange_node->rank_extra_sources_array[0];
-        peer_ctl_pointer = data_buffs[src].ctl_struct;
-
-        for( i = 0; i < cm->num_to_probe ; i++ ) {
-            if(IS_PEER_READY(peer_ctl_pointer, ready_flag, sequence_number, BARRIER_RKING_FLAG, bcol_id)){
-                goto FINISHED;
-            }
-
-        }
-
-        /* haven't found it, state is cached, bail out */
-        return BCOL_FN_STARTED;
-
-    }else if ( ( -1 == *iteration ) && (0 < exchange_node->n_extra_sources) ) {
-
-        /* I am a proxy for someone */
-        src = exchange_node->rank_extra_sources_array[0];
-        peer_ctl_pointer = data_buffs[src].ctl_struct;
-
-        /* probe for extra rank's arrival */
-        for( i = 0, matched = false ; i < cm->num_to_probe && !matched ; i++) {
-            if(IS_PEER_READY(peer_ctl_pointer,ready_flag, sequence_number, BARRIER_RKING_FLAG, bcol_id)){
-                matched = true;
-                /* bump the flag */
-                ready_flag++;
-                *iteration = 0;
-                break;
-            }
-        }
-
-        if (!matched) {
-          return BCOL_FN_STARTED;
-        }
-    }
-
-    /* start the recursive k - ing phase */
-    for( *iter=*iteration; *iter < pow_k; (*iter)++) {
-        /* I am ready at this level */
-        my_ctl_pointer->flags[BARRIER_RKING_FLAG][bcol_id] = ready_flag;
-        if( 0 == *active_requests ) {
-            /* flip some bits, if we don't have active requests from a previous visit */
-            CALC_ACTIVE_REQUESTS(active_requests,exchange_node->rank_exchanges[*iter],tree_order);
-        }
-        for( j = 0; j < (tree_order - 1); j++ ) {
-
-            /* recv phase */
-            src = exchange_node->rank_exchanges[*iter][j];
-            if( src < 0 ) {
-                /* then not a valid rank, continue
-                 */
-                continue;
-            }
-
-            peer_ctl_pointer = data_buffs[src].ctl_struct;
-            if( !(*active_requests&(1<<j))){
-
-                /* I am putting the probe loop as the inner most loop to achieve
-                 * better temporal locality
-                 */
-                for( probe = 0; probe < cm->num_to_probe ; probe++){
-                    if(IS_PEER_READY(peer_ctl_pointer,ready_flag, sequence_number, BARRIER_RKING_FLAG, bcol_id)){
-                        /* flip the request's bit */
-                        *active_requests ^= (1<<j);
-                        break;
-                    }
-                }
-            }
-
-
-        }
-        if( max_requests == *active_requests ){
-            /* bump the ready flag */
-            ready_flag++;
-            /* reset the active requests for the next level */
-            *active_requests = 0;
-            /* calculate the number of active requests
-             * logically makes sense to do it here. We don't
-             * want to inadvertantly flip a bit to zero that we
-             * set previously
-             */
-        } else {
-            /* state is saved hop out
-             */
-            *status = my_ctl_pointer->flags[BARRIER_RKING_FLAG][bcol_id];
-            return BCOL_FN_STARTED;
-        }
-    }
-    /* bump the flag one more time for the extra rank */
-    ready_flag = flag_offset + 1 + pow_k + 2;
-
-    /* finish off the last piece, send the data back to the extra  */
-    if( 0 < exchange_node->n_extra_sources ) {
-        /* simply announce my arrival */
-        my_ctl_pointer->flags[BARRIER_RKING_FLAG][bcol_id] = ready_flag;
-
-    }
-
-FINISHED:
-
-    my_ctl_pointer->starting_flag_value[bcol_id]++;
-    return BCOL_FN_COMPLETE;
-}
-
-/* Register k-nomial barrier functions to the BCOL function table,
- * so they can be selected
- */
-int bcol_basesmuma_barrier_init(mca_bcol_base_module_t *super)
-{
-mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    comm_attribs.bcoll_type = BCOL_BARRIER;
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                bcol_basesmuma_k_nomial_barrier_init,
-                bcol_basesmuma_k_nomial_barrier_progress);
-
-    return OMPI_SUCCESS;
-}
-
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_setup.c
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_setup.c
@ -1,588 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2014 Los Alamos National Security, LLC.
- *                         All rights reserved.
- * Copyright (c) 2014 Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2014      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-#include "mpi.h"
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "opal/mca/mpool/base/base.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/patterns/comm/coll_ops.h"
-
-#include "opal/class/opal_object.h"
-#include "opal/dss/dss.h"
-
-#include "bcol_basesmuma.h"
-
-int base_bcol_basesmuma_setup_ctl_struct(
-    mca_bcol_basesmuma_module_t *sm_bcol_module,
-    mca_bcol_basesmuma_component_t *cs,
-    sm_buffer_mgmt *ctl_mgmt);
-
-/* this is the new one, uses the pml allgather */
-int base_bcol_basesmuma_exchange_offsets(
-    mca_bcol_basesmuma_module_t *sm_bcol_module,
-    void **result_array, uint64_t mem_offset, int loop_limit,
-    int leading_dim)
-{
-    int ret=OMPI_SUCCESS,i;
-    int count;
-    int index_in_group;
-    char *send_buff;
-    char *recv_buff;
-    uint64_t rem_mem_offset;
-
-    /* malloc some memory */
-    count = sizeof(uint64_t) + sizeof(int);
-    send_buff = (char *) malloc(count);
-    recv_buff = (char *) malloc(count *
-                           sm_bcol_module->super.sbgp_partner_module->group_size);
-    /*  exchange the base pointer for the controls structures - gather
-     *  every one else's infromation.
-     */
-
-
-    /* pack the offset of the allocated region */
-    memcpy((void *) send_buff, (void *) &(sm_bcol_module->super.sbgp_partner_module->my_index), sizeof(int));
-    memcpy((void *) (send_buff+ sizeof(int)), (void *) &(mem_offset), sizeof(uint64_t));
-
-    /* get the offsets from all procs, so can setup the control data
-     * structures.
-     */
-
-    ret=comm_allgather_pml((void *) send_buff,(void *) recv_buff,count,
-            MPI_BYTE,
-            sm_bcol_module->super.sbgp_partner_module->my_index,
-            sm_bcol_module->super.sbgp_partner_module->group_size,
-            sm_bcol_module->super.sbgp_partner_module->group_list,
-            sm_bcol_module->super.sbgp_partner_module->group_comm);
-    if( OMPI_SUCCESS != ret ) {
-        goto exit_ERROR;
-    }
-
-    /* get the control stucture offsets within the shared memory
-     *   region and populate the control structures - we do not assume
-     *   any symmetry in memory layout of each process
-     */
-
-    /* loop over the procs in the group */
-    for(i = 0; i < sm_bcol_module->super.sbgp_partner_module->group_size; i++){
-        int array_id;
-        /* get this peer's index in the group */
-        memcpy((void *) &index_in_group, (void *) (recv_buff + i*count) , sizeof(int));
-
-        /* get the offset */
-        memcpy((void *) &rem_mem_offset, (void *) (recv_buff + i*count + sizeof(int)), sizeof(uint64_t));
-
-        array_id=SM_ARRAY_INDEX(leading_dim,0,index_in_group);
-        result_array[array_id]=(void *)(uintptr_t)rem_mem_offset;
-
-    }
-
-exit_ERROR:
-    /* clean up */
-    if( NULL != send_buff ) {
-        free(send_buff);
-        send_buff = NULL;
-    }
-    if( NULL != recv_buff ) {
-        free(recv_buff);
-        recv_buff = NULL;
-    }
-
-    return ret;
-
-
-}
-
-#if 0
-int base_bcol_basesmuma_exchange_offsets(
-    mca_bcol_basesmuma_module_t *sm_bcol_module,
-    void **result_array, uint64_t mem_offset, int loop_limit,
-    int leading_dim)
-{
-    int ret=OMPI_SUCCESS,i,dummy;
-    int index_in_group, pcnt;
-    opal_list_t peers;
-    ompi_namelist_t *peer;
-    ompi_proc_t *proc_temp, *my_id;
-    opal_buffer_t *send_buffer = OBJ_NEW(opal_buffer_t);
-    opal_buffer_t *recv_buffer = OBJ_NEW(opal_buffer_t);
-    uint64_t rem_mem_offset;
-
-    /*  exchange the base pointer for the controls structures - gather
-     *  every one else's infromation.
-     */
-    /* get list of procs that will participate in the communication */
-    OBJ_CONSTRUCT(&peers, opal_list_t);
-    for (i = 0; i < sm_bcol_module->super.sbgp_partner_module->group_size; i++) {
-        /* get the proc info */
-        proc_temp = ompi_comm_peer_lookup(
-                sm_bcol_module->super.sbgp_partner_module->group_comm,
-                sm_bcol_module->super.sbgp_partner_module->group_list[i]);
-        peer = OBJ_NEW(ompi_namelist_t);
-        peer->name.jobid = proc_temp->proc_name.jobid;
-        peer->name.vpid = proc_temp->proc_name.vpid;
-        opal_list_append(&peers,&peer->super); /* this is with the new field called "super" in ompi_namelist_t struct */
-    }
-    /* pack up the data into the allgather send buffer */
-        if (NULL == send_buffer || NULL == recv_buffer) {
-            opal_output (ompi_bcol_base_framework.framework_output, "Cannot allocate memory for sbuffer or rbuffer\n");
-            ret = OMPI_ERROR;
-            goto exit_ERROR;
-        }
-
-    /* get my proc information */
-    my_id = ompi_proc_local();
-
-    /* pack my information */
-    ret = opal_dss.pack(send_buffer,
-        &(sm_bcol_module->super.sbgp_partner_module->my_index),1,OPAL_UINT32);
-
-    if (OMPI_SUCCESS != ret) {
-        opal_output (ompi_bcol_base_framework.framework_output, "Error packing my_index!!\n");
-        goto exit_ERROR;
-    }
-
-    /* pack the offset of the allocated region */
-    ret = opal_dss.pack(send_buffer,&(mem_offset),1,OPAL_UINT64);
-    if (OMPI_SUCCESS != ret) {
-        goto exit_ERROR;
-    }
-
-    /* get the offsets from all procs, so can setup the control data
-     * structures.
-     */
-    if (OMPI_SUCCESS != (ret = ompi_rte_allgather_list(&peers, send_buffer, recv_buffer))) {
-        opal_output (ompi_bcol_base_framework.framework_output, "ompi_rte_allgather_list returned error %d\n", ret);
-        goto exit_ERROR;
-    }
-
-        /* unpack the dummy */
-        pcnt=1;
-        ret = opal_dss.unpack(recv_buffer,&dummy, &pcnt, OPAL_INT32);
-        if (OMPI_SUCCESS != ret) {
-                opal_output (ompi_bcol_base_framework.framework_output, "unpack returned error %d for dummy\n",ret);
-                goto exit_ERROR;
-        }
-
-    /* get the control stucture offsets within the shared memory
-     *   region and populate the control structures - we do not assume
-     *   any symmetry in memory layout of each process
-     */
-
-    /* loop over the procs in the group */
-    for(i = 0; i < sm_bcol_module->super.sbgp_partner_module->group_size; i++){
-        int array_id;
-        pcnt=1;
-        ret = opal_dss.unpack(recv_buffer,&index_in_group, &pcnt, OPAL_UINT32);
-        if (OMPI_SUCCESS != ret) {
-            opal_output (ompi_bcol_base_framework.framework_output, "unpack returned error %d for remote index_in_group\n",ret);
-            goto exit_ERROR;
-        }
-
-        /* get the offset */
-        pcnt=1;
-        ret = opal_dss.unpack(recv_buffer,&rem_mem_offset, &pcnt, OPAL_UINT64);
-        if (OMPI_SUCCESS != ret) {
-            opal_output (ompi_bcol_base_framework.framework_output, "unpack returned error %d for remote memory offset\n",ret);
-            goto exit_ERROR;
-        }
-
-        array_id=SM_ARRAY_INDEX(leading_dim,0,index_in_group);
-        result_array[array_id]=(void *)rem_mem_offset;
-
-    }
-
-    /* clean up */
-    peer=(ompi_namelist_t *)opal_list_remove_first(&peers);
-    while( NULL !=peer) {
-        OBJ_RELEASE(peer);
-        peer=(ompi_namelist_t *)opal_list_remove_first(&peers);
-    }
-    OBJ_DESTRUCT(&peers);
-    if( send_buffer ) {
-        OBJ_RELEASE(send_buffer);
-    }
-    if( recv_buffer ) {
-        OBJ_RELEASE(recv_buffer);
-    }
-
-    return ret;
-
-exit_ERROR:
-
-    /* free peer list */
-    peer=(ompi_namelist_t *)opal_list_remove_first(&peers);
-    while( NULL !=peer) {
-        OBJ_RELEASE(peer);
-        peer=(ompi_namelist_t *)opal_list_remove_first(&peers);
-    }
-    OBJ_DESTRUCT(&peers);
-    if( send_buffer ) {
-        OBJ_RELEASE(send_buffer);
-    }
-    if( recv_buffer ) {
-        OBJ_RELEASE(recv_buffer);
-    }
-    return ret;
-}
-#endif
-
-
-static int base_bcol_basesmuma_exchange_ctl_params(
-    mca_bcol_basesmuma_module_t *sm_bcol_module,
-    mca_bcol_basesmuma_component_t *cs,
-    sm_buffer_mgmt *ctl_mgmt, list_data_t *data_blk)
-{
-    int ret=OMPI_SUCCESS,i,loop_limit;
-    int leading_dim, buf_id;
-    void *mem_offset;
-    unsigned char *base_ptr;
-    mca_bcol_basesmuma_ctl_struct_t *ctl_ptr;
-
-    /* data block base offset in the mapped file */
-    mem_offset = (void *)((uintptr_t)data_blk->data -
-                          (uintptr_t)cs->sm_ctl_structs->data_addr);
-
-    /* number of buffers in data block */
-    loop_limit=cs->basesmuma_num_mem_banks+ctl_mgmt->number_of_buffs;
-    leading_dim=ctl_mgmt->size_of_group;
-    ret=comm_allgather_pml(&mem_offset, ctl_mgmt->ctl_buffs, sizeof(void *),
-                           MPI_BYTE, sm_bcol_module->super.sbgp_partner_module->my_index,
-                           sm_bcol_module->super.sbgp_partner_module->group_size,
-                           sm_bcol_module->super.sbgp_partner_module->group_list,
-                           sm_bcol_module->super.sbgp_partner_module->group_comm);
-    if( OMPI_SUCCESS != ret ) {
-        goto exit_ERROR;
-    }
-
-#if 0
-    ret=base_bcol_basesmuma_exchange_offsets( sm_bcol_module,
-            (void **)ctl_mgmt->ctl_buffs, mem_offset, loop_limit, leading_dim);
-    if( OMPI_SUCCESS != ret ) {
-        goto exit_ERROR;
-    }
-#endif
-
-    /* convert memory offset to virtual address in current rank */
-    for (i=0;i< sm_bcol_module->super.sbgp_partner_module->group_size;i++) {
-
-        /* get the base pointer */
-        int array_id=SM_ARRAY_INDEX(leading_dim,0,i);
-        if( i == sm_bcol_module->super.sbgp_partner_module->my_index) {
-            /* me */
-            base_ptr=cs->sm_ctl_structs->map_addr;
-        } else {
-            base_ptr=sm_bcol_module->ctl_backing_files_info[i]->sm_mmap->map_addr;
-        }
-        ctl_mgmt->ctl_buffs[array_id]=(void *)
-            (uintptr_t)(((uint64_t)(uintptr_t)ctl_mgmt->ctl_buffs[array_id])+(uint64_t)(uintptr_t)base_ptr);
-        for( buf_id = 1 ; buf_id < loop_limit ; buf_id++ ) {
-            int array_id_m1=SM_ARRAY_INDEX(leading_dim,(buf_id-1),i);
-            array_id=SM_ARRAY_INDEX(leading_dim,buf_id,i);
-            ctl_mgmt->ctl_buffs[array_id]=(void *) (uintptr_t)((uint64_t)(uintptr_t)(ctl_mgmt->ctl_buffs[array_id_m1])+
-                (uint64_t)(uintptr_t)sizeof(mca_bcol_basesmuma_ctl_struct_t));
-        }
-    }
-    /* initialize my control structues */
-    for( buf_id = 0 ; buf_id < loop_limit ; buf_id++ ) {
-
-        int my_idx=sm_bcol_module->super.sbgp_partner_module->my_index;
-        int array_id=SM_ARRAY_INDEX(leading_dim,buf_id,my_idx);
-        ctl_ptr = (mca_bcol_basesmuma_ctl_struct_t *)
-                ctl_mgmt->ctl_buffs[array_id];
-
-        /* initialize the data structures - RLG, this is only one data
-         * structure that needs to be initialized, more are missing */
-        ctl_ptr->sequence_number=-1;
-        ctl_ptr->flag=-1;
-        ctl_ptr->index=0;
-        ctl_ptr->src_ptr = NULL;
-    }
-
-    return ret;
-
-exit_ERROR:
-
-    return ret;
-}
-
-static int base_bcol_basesmuma_setup_ctl (mca_bcol_basesmuma_module_t *sm_bcol_module,
-                                          mca_bcol_basesmuma_component_t *cs)
-{
-    const int my_index = sm_bcol_module->super.sbgp_partner_module->my_index;;
-    bcol_basesmuma_smcm_file_t input_file;
-    int ret;
-
-    /* exchange remote addressing information if it has not already been done  */
-    if (NULL == sm_bcol_module->ctl_backing_files_info) {
-        input_file.file_name=cs->sm_ctl_structs->map_path;
-        input_file.size=cs->sm_ctl_structs->map_size;
-        input_file.size_ctl_structure=0;
-        input_file.data_seg_alignment=BASESMUMA_CACHE_LINE_SIZE;
-        input_file.mpool_size=cs->sm_ctl_structs->map_size;
-        ret = bcol_basesmuma_smcm_allgather_connection(sm_bcol_module,
-                                                       sm_bcol_module->super.sbgp_partner_module,
-                                                       &(cs->sm_connections_list),
-                                                       &(sm_bcol_module->ctl_backing_files_info),
-                                                       sm_bcol_module->super.sbgp_partner_module->group_comm,
-                                                       input_file, cs->clt_base_fname,
-                                                       false);
-        if (OMPI_SUCCESS != ret) {
-            return ret;
-        }
-    }
-
-    /* fill in the pointer to other ranks scartch shared memory */
-    if (NULL == sm_bcol_module->shared_memory_scratch_space) {
-        sm_bcol_module->shared_memory_scratch_space =
-            calloc (sm_bcol_module->super.sbgp_partner_module->group_size, sizeof (void *));
-        if (!sm_bcol_module->shared_memory_scratch_space) {
-            opal_output (ompi_bcol_base_framework.framework_output, "Cannot allocate memory for shared_memory_scratch_space.");
-            return OMPI_ERR_OUT_OF_RESOURCE;
-        }
-
-        for (int i = 0 ; i < sm_bcol_module->super.sbgp_partner_module->group_size ; ++i) {
-            if (i == my_index) {
-                /* local file data is not cached in this list */
-                continue;
-            }
-
-            sm_bcol_module->shared_memory_scratch_space[i] =
-                (void *)((intptr_t) sm_bcol_module->ctl_backing_files_info[i]->sm_mmap +
-                         cs->scratch_offset_from_base_ctl_file);
-        }
-
-        sm_bcol_module->shared_memory_scratch_space[my_index] =
-            (void *)((intptr_t) cs->sm_ctl_structs->map_addr + cs->scratch_offset_from_base_ctl_file);
-    }
-
-    return OMPI_SUCCESS;
-}
-
-int base_bcol_basesmuma_setup_ctl_struct(
-    mca_bcol_basesmuma_module_t *sm_bcol_module,
-    mca_bcol_basesmuma_component_t *cs,
-    sm_buffer_mgmt *ctl_mgmt)
-{
-    int n_ctl, n_levels;
-    int n_ctl_structs;
-    size_t malloc_size;
-
-    /*
-     * set my no user-data conrol structures
-     */
-    /* number of banks and regions per bank are already a power of 2 */
-    n_ctl_structs=cs->basesmuma_num_mem_banks*
-        cs->basesmuma_num_regions_per_bank;
-
-    /* initialize the control structure management struct -
-     * for collectives without user data
-     *---------------------------------------------------------------
-     */
-
-    ctl_mgmt->number_of_buffs=n_ctl_structs;
-    ctl_mgmt->num_mem_banks=
-        cs->basesmuma_num_mem_banks;
-
-    ctl_mgmt->num_buffs_per_mem_bank=
-        cs->basesmuma_num_regions_per_bank;
-    ctl_mgmt->size_of_group=
-        sm_bcol_module->super.sbgp_partner_module->group_size;
-    ompi_roundup_to_power_radix(2,cs->basesmuma_num_regions_per_bank,&n_levels);
-    ctl_mgmt->log2_num_buffs_per_mem_bank=n_levels;
-
-    ompi_roundup_to_power_radix(2,n_ctl_structs,&n_levels);
-    ctl_mgmt->log2_number_of_buffs=n_levels;
-    ctl_mgmt->mask=n_ctl_structs-1;
-    sm_bcol_module->super.n_poll_loops=cs->n_poll_loops;
-
-    malloc_size=
-        (ctl_mgmt->number_of_buffs +
-         ctl_mgmt->num_mem_banks ) *
-         ctl_mgmt->size_of_group *
-         sizeof(void *);
-    ctl_mgmt->ctl_buffs = malloc(malloc_size);
-    if (!ctl_mgmt->ctl_buffs) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    /*
-     * setup the no-data buffer managment data
-     */
-    n_ctl = ctl_mgmt->num_mem_banks;
-    ctl_mgmt->ctl_buffs_mgmt = (mem_bank_management_t *) calloc (n_ctl, sizeof (mem_bank_management_t));
-    if (!ctl_mgmt->ctl_buffs_mgmt) {
-        opal_output (ompi_bcol_base_framework.framework_output, "Cannot allocate memory for ctl_buffs_mgmt");
-        free (ctl_mgmt->ctl_buffs);
-        ctl_mgmt->ctl_buffs = NULL;
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    /* initialize each individual element */
-    for (int i = 0 ; i < n_ctl ; ++i) {
-        opal_list_item_t *item;
-        opal_mutex_t *mutex_ptr;
-
-        ctl_mgmt->ctl_buffs_mgmt[i].available_buffers=
-            ctl_mgmt->num_buffs_per_mem_bank;
-        ctl_mgmt->ctl_buffs_mgmt[i].number_of_buffers=
-            ctl_mgmt->num_buffs_per_mem_bank;
-        mutex_ptr = &(ctl_mgmt->ctl_buffs_mgmt[i].mutex);
-        OBJ_CONSTRUCT(mutex_ptr, opal_mutex_t);
-        ctl_mgmt->ctl_buffs_mgmt[i].index_shared_mem_ctl_structs=i;
-
-        item = (opal_list_item_t *)&(ctl_mgmt->ctl_buffs_mgmt[i].nb_barrier_desc);
-        OBJ_CONSTRUCT(item, opal_list_item_t);
-        ctl_mgmt->ctl_buffs_mgmt[i].nb_barrier_desc.sm_module =
-            sm_bcol_module;
-        ctl_mgmt->ctl_buffs_mgmt[i].nb_barrier_desc.pool_index = i;
-        /* get the sm_buffer_mgmt pointer for the control structures */
-        ctl_mgmt->ctl_buffs_mgmt[i].nb_barrier_desc.coll_buff = ctl_mgmt;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * this function initializes the internal scratch buffers and control
- * structures that will be used by the module. It also intitializes
- * the payload buffer management structures.
- */
-int base_bcol_basesmuma_setup_library_buffers(
-    mca_bcol_basesmuma_module_t *sm_bcol_module,
-    mca_bcol_basesmuma_component_t *cs)
-{
-    int ret=OMPI_SUCCESS,i;
-    int n_ctl_structs;
-    size_t ctl_segement_size,total_memory;
-    int max_elements;
-    unsigned char *data_ptr;
-
-    /* */
-    /* setup the control struct memory */
-    if(!cs->sm_ctl_structs) {
-        ret = mca_bcol_basesmuma_allocate_sm_ctl_memory(cs);
-        if(OMPI_SUCCESS != ret) {
-            opal_output (ompi_bcol_base_framework.framework_output, "In bcol_comm_query mca_bcol_basesmuma_allocate_sm_ctl_memory failed\n");
-            return ret;
-        }
-        /*
-         * put the memory onto the free list - we have worried about
-         * alignment in the mpool allocation, and assume that the
-         * ctl structures have the approriate size to mantain alignment
-         */
-
-        /* figure out segment size */
-        n_ctl_structs=cs->basesmuma_num_mem_banks*
-            cs->basesmuma_num_regions_per_bank;
-
-        /* add memory for the control structure used for recycling the banks */
-        n_ctl_structs+=cs->basesmuma_num_mem_banks;
-
-        ctl_segement_size=n_ctl_structs*
-            sizeof(mca_bcol_basesmuma_ctl_struct_t);
-
-        total_memory=cs->sm_ctl_structs->map_size - (
-            (char *)(cs->sm_ctl_structs->data_addr)-
-            (char *)(cs->sm_ctl_structs->map_addr));
-        total_memory-=cs->my_scratch_shared_memory_size;
-        max_elements=total_memory/ctl_segement_size;
-
-        /* populate the free list */
-        data_ptr=cs->sm_ctl_structs->data_addr;
-
-        for( i=0 ; i < max_elements ; i++ ) {
-            list_data_t *item = OBJ_NEW(list_data_t);
-            if( !item ) {
-                return OMPI_ERR_OUT_OF_RESOURCE;
-            }
-            item->data=(void *)data_ptr;
-            opal_list_append(&(cs->ctl_structures),(opal_list_item_t *)item);
-            data_ptr+=ctl_segement_size;
-        }
-        /* set the scratch memory pointer and offset */
-        cs->my_scratch_shared_memory=(char *)data_ptr;
-        cs->scratch_offset_from_base_ctl_file=(size_t)
-            ((char *)data_ptr-(char *)cs->sm_ctl_structs->map_addr);
-
-
-        /* At this stage the memory is mapped and ready to use by the local rank.
-         * However, the memory of other processes has not yet been mmaped into the
-         * memory of this process.
-         */
-    }
-
-    /* intialize no_userdata_ctl */
-    sm_bcol_module->no_userdata_ctl=(list_data_t *)
-        opal_list_remove_last(&(cs->ctl_structures));
-    if (!sm_bcol_module->no_userdata_ctl) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    /* intialize userdata_ctl */
-    sm_bcol_module->userdata_ctl = (list_data_t *)
-        opal_list_remove_last(&(cs->ctl_structures));
-    if (!sm_bcol_module->userdata_ctl) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    ret = base_bcol_basesmuma_setup_ctl (sm_bcol_module, cs);
-    if (OMPI_SUCCESS != ret) {
-        return ret;
-    }
-
-    ret = base_bcol_basesmuma_setup_ctl_struct (sm_bcol_module, cs, &(sm_bcol_module->colls_no_user_data));
-    if( OMPI_SUCCESS != ret ) {
-        return ret;
-    }
-
-    ret = base_bcol_basesmuma_setup_ctl_struct (sm_bcol_module, cs, &(sm_bcol_module->colls_with_user_data));
-    if( OMPI_SUCCESS != ret ) {
-        return ret;
-    }
-
-    /* used for blocking recursive doubling barrier */
-    sm_bcol_module->index_blocking_barrier_memory_bank=0;
-
-    /* gather the offsets of the control structs relative to the base
-     *   of the shared memory file, and fill in the table with the
-     *   address of all the control structues.
-     */
-    ret = base_bcol_basesmuma_exchange_ctl_params(sm_bcol_module, cs,
-        &(sm_bcol_module->colls_no_user_data),sm_bcol_module->no_userdata_ctl);
-    if( OMPI_SUCCESS != ret ) {
-        return ret;
-    }
-
-    ret = base_bcol_basesmuma_exchange_ctl_params(sm_bcol_module, cs,
-        &(sm_bcol_module->colls_with_user_data),sm_bcol_module->userdata_ctl);
-    if( OMPI_SUCCESS != ret ) {
-        return ret;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-OBJ_CLASS_INSTANCE(list_data_t,
-        opal_list_item_t, NULL, NULL);
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.c
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.c
@ -1,460 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- *
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2014-2016 Intel, Inc.  All rights reserved.
- * Copyright (c) 2014-2015 Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <fcntl.h>
-#include <errno.h>
-#ifdef HAVE_STRINGS_H
-#include <strings.h>
-#endif
-
-#include "ompi/proc/proc.h"
-#include "ompi/patterns/comm/coll_ops.h"
-#include "opal/align.h"
-
-#include "opal/dss/dss.h"
-#include "opal/util/error.h"
-#include "opal/util/output.h"
-#include "opal/class/opal_list.h"
-#include "opal/class/opal_hash_table.h"
-
-#include "bcol_basesmuma.h"
-
-
-
-#define SM_BACKING_FILE_NAME_MAX_LEN 256
-
-static bcol_basesmuma_smcm_mmap_t * bcol_basesmuma_smcm_reg_mmap(void *in_ptr, int fd, size_t length,
-                                                                 size_t addr_offset, size_t alignment,
-                                                                 char *file_name);
-
-struct file_info_t {
-    uint32_t vpid;
-    uint32_t jobid;
-    uint64_t file_size;
-    uint64_t size_ctl_structure;
-    uint64_t data_seg_alignment;
-    char file_name[SM_BACKING_FILE_NAME_MAX_LEN];
-};
-
-/* need to allocate space for the peer */
-static void bcol_basesmuma_smcm_proc_item_t_construct (bcol_basesmuma_smcm_proc_item_t * item)
-{
-    memset ((char *) item + sizeof (item->item), 0, sizeof (*item) - sizeof (item->item));
-}
-
-/* need to free the space for the peer */
-static void bcol_basesmuma_smcm_proc_item_t_destruct (bcol_basesmuma_smcm_proc_item_t * item)
-{
-    if (item->sm_mmap) {
-        OBJ_RELEASE(item->sm_mmap);
-    }
-
-    if (item->sm_file.file_name) {
-        free (item->sm_file.file_name);
-        item->sm_file.file_name = NULL;
-    }
-}
-
-OBJ_CLASS_INSTANCE(bcol_basesmuma_smcm_proc_item_t,
-                   opal_list_item_t,
-                   bcol_basesmuma_smcm_proc_item_t_construct,
-                   bcol_basesmuma_smcm_proc_item_t_destruct);
-
-static void bcol_basesmuma_smcm_mmap_construct (bcol_basesmuma_smcm_mmap_t *smcm_mmap)
-{
-    memset ((char *) smcm_mmap + sizeof (smcm_mmap->super), 0, sizeof (*smcm_mmap) - sizeof (smcm_mmap->super));
-}
-
-static void bcol_basesmuma_smcm_mmap_destruct (bcol_basesmuma_smcm_mmap_t *smcm_mmap)
-{
-    if (smcm_mmap->map_seg) {
-        munmap ((void *)smcm_mmap->map_seg, smcm_mmap->map_size);
-        smcm_mmap->map_seg = NULL;
-    }
-
-    if (smcm_mmap->map_path) {
-        free (smcm_mmap->map_path);
-        smcm_mmap->map_path = NULL;
-    }
-}
-
-OBJ_CLASS_INSTANCE(bcol_basesmuma_smcm_mmap_t, opal_list_item_t,
-                   bcol_basesmuma_smcm_mmap_construct,
-                   bcol_basesmuma_smcm_mmap_destruct);
-
-
-/* smcm_allgather_connection:
-   This function is called when a shared memory subgroup wants to establish shared memory "connections" among
-   a group of processes.
-
-   This function DOES NOT create any shared memory backing files, it only mmaps already existing files. Shared
-   memory files are created by the shared memory registration function
-   -----------------------------------------------------------------------------------------------------------
-   Input params:
-
-   - sbgp module   The subgrouping module contains the list of ranks to wire up.
-
-   - peer_list      An opal list containing a list of bcol_basesmuma_smcm_proc_item_t types. This
-   contains a list of peers whose shared memory files I have already mapped.
-   Upon completion of the allgather exchange with all members of the group and depending on the
-   value of "map_all", my peers' shared memory files are mapped into my local virtual memory
-   space, with all pertinent information being stored in an bcol_basesmuma_smcm_proc_item_t which is
-   subsequently appended onto the "peer_list".
-
-   - comm           The ompi_communicator_t communicator.
-
-   - input          A data struct that caches the information about my shared memory file.
-
-   - map_all        Bool that determines whether or not to go ahead and map the files from all of the peers
-   defined in the sbgp-ing module. If map_all == true, then go ahead and mmap all of the files
-   obtained in the exchange and append the information to the "peer_list". If map_all == false
-   then make a check and only mmap those peers' files whose vpid/jobid/filename combination do
-   not already exist in the "peer_list". Once mapping is completed, append this peer's information
-   to the "peer_list".
-   -----------------------------------------------------------------------------------------------------------
-   *
-   */
-
-
-int bcol_basesmuma_smcm_allgather_connection(
-                                             mca_bcol_basesmuma_module_t *sm_bcol_module,
-                                             mca_sbgp_base_module_t *module,
-                                             opal_list_t *peer_list,
-                                             bcol_basesmuma_smcm_proc_item_t ***back_files,
-                                             ompi_communicator_t *comm,
-                                             bcol_basesmuma_smcm_file_t input,
-                                             char *base_fname,
-                                             bool map_all)
-{
-
-    /* define local variables */
-
-    int rc, i, fd;
-    ptrdiff_t mem_offset;
-    ompi_proc_t *proc_temp, *my_id;
-    bcol_basesmuma_smcm_proc_item_t *temp;
-    bcol_basesmuma_smcm_proc_item_t *item_ptr;
-    bcol_basesmuma_smcm_proc_item_t **backing_files;
-    struct file_info_t local_file;
-    struct file_info_t *all_files=NULL;
-
-    /* sanity check */
-    if (strlen(input.file_name) > SM_BACKING_FILE_NAME_MAX_LEN-1) {
-        opal_output (ompi_bcol_base_framework.framework_output, "backing file name too long:  %s len :: %d",
-                     input.file_name, (int) strlen(input.file_name));
-        return OMPI_ERR_BAD_PARAM;
-    }
-
-    backing_files = (bcol_basesmuma_smcm_proc_item_t **)
-        calloc(module->group_size, sizeof(bcol_basesmuma_smcm_proc_item_t *));
-    if (!backing_files) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    /* FIXME *back_files might have been already allocated
-     * so free it in order to avoid a memory leak */
-    if (NULL != *back_files) {
-        free (*back_files);
-    }
-    *back_files = backing_files;
-
-    my_id = ompi_proc_local();
-
-    /* Phase One:
-       gather a list of processes that will participate in the allgather - I'm
-       preparing this list from the sbgp-ing module that was passed into the function */
-
-    /* fill in local file information */
-    local_file.vpid  = ((orte_process_name_t*)&my_id->super.proc_name)->vpid;
-    local_file.jobid = ((orte_process_name_t*)&my_id->super.proc_name)->jobid;
-    local_file.file_size=input.size;
-    local_file.size_ctl_structure=input.size_ctl_structure;
-    local_file.data_seg_alignment=input.data_seg_alignment;
-
-    strcpy (local_file.file_name, input.file_name);
-
-    /* will exchange this data type as a string of characters -
-     * this routine is first called before MPI_init() completes
-     * and before error handling is setup, so can't use the
-     * MPI data types to send this data */
-    all_files = (struct file_info_t *) calloc(module->group_size,
-                                              sizeof (struct file_info_t));
-    if (!all_files) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    /* exchange data */
-    rc = comm_allgather_pml(&local_file,all_files,sizeof(struct file_info_t), MPI_CHAR,
-                            sm_bcol_module->super.sbgp_partner_module->my_index,
-                            sm_bcol_module->super.sbgp_partner_module->group_size,
-                            sm_bcol_module->super.sbgp_partner_module->group_list,
-                            sm_bcol_module->super.sbgp_partner_module->group_comm);
-    if( OMPI_SUCCESS != rc ) {
-        opal_output (ompi_bcol_base_framework.framework_output, "failed in comm_allgather_pml.  Error code: %d", rc);
-        goto Error;
-    }
-
-    /* Phase four:
-       loop through the receive buffer, unpack the data recieved from remote peers */
-
-    for (i = 0; i < module->group_size; i++) {
-        struct file_info_t *rem_file = all_files + i;
-
-        /* check if this is my index or if the file is already mapped (set above). ther
-         * is no reason to look through the peer list again because no two members of
-         * the group will have the same vpid/jobid pair. ignore this previously found
-         * mapping if map_all was requested (NTH: not sure why exactly since we re-map
-         * and already mapped file) */
-        if (sm_bcol_module->super.sbgp_partner_module->my_index == i) {
-            continue;
-        }
-
-        proc_temp = ompi_comm_peer_lookup(comm,module->group_list[i]);
-
-        OPAL_LIST_FOREACH(item_ptr, peer_list, bcol_basesmuma_smcm_proc_item_t) {
-            /* if the vpid/jobid/filename combination already exists in the list,
-               then do not map this peer's file --- because you already have */
-            if (0 == ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL,
-                                                  OMPI_CAST_RTE_NAME(&proc_temp->super.proc_name),
-                                                  &item_ptr->peer) &&
-                0 == strcmp (item_ptr->sm_file.file_name, rem_file->file_name)) {
-                ++item_ptr->refcnt;
-                /* record file data */
-                backing_files[i] = item_ptr;
-                break;
-            }
-        }
-
-        if (!map_all && backing_files[i]) {
-            continue;
-        }
-
-        temp = OBJ_NEW(bcol_basesmuma_smcm_proc_item_t);
-        if (!temp) {
-            rc = OMPI_ERR_OUT_OF_RESOURCE;
-            goto Error;
-        }
-
-        temp->peer.vpid = rem_file->vpid;
-        temp->peer.jobid = rem_file->jobid;
-
-        temp->sm_file.file_name = strdup (rem_file->file_name);
-        if (!temp->sm_file.file_name) {
-            rc = OMPI_ERR_OUT_OF_RESOURCE;
-            OBJ_RELEASE(temp);
-            goto Error;
-        }
-
-        temp->sm_file.size = (size_t) rem_file->file_size;
-        temp->sm_file.mpool_size = (size_t) rem_file->file_size;
-        temp->sm_file.size_ctl_structure = (size_t) rem_file->size_ctl_structure;
-        temp->sm_file.data_seg_alignment = (size_t) rem_file->data_seg_alignment;
-        temp->refcnt = 1;
-
-        /* Phase Five:
-           If map_all == true, then  we map every peer's file
-           else we check to see if I have already mapped this
-           vpid/jobid/filename combination and if I have, then
-           I do not mmap this peer's file.
-           *
-           */
-        fd = open(temp->sm_file.file_name, O_RDWR, 0600);
-        if (0 > fd) {
-            opal_output (ompi_bcol_base_framework.framework_output, "SMCM Allgather failed to open sm backing file %s. errno = %d",
-                         temp->sm_file.file_name, errno);
-            rc = OMPI_ERROR;
-            goto Error;
-        }
-
-        /* map the file */
-        temp->sm_mmap = bcol_basesmuma_smcm_reg_mmap (NULL, fd, temp->sm_file.size,
-                                                      temp->sm_file.size_ctl_structure,
-                                                      temp->sm_file.data_seg_alignment,
-                                                      temp->sm_file.file_name);
-        close (fd);
-        if (NULL == temp->sm_mmap) {
-            opal_output (ompi_bcol_base_framework.framework_output, "mmapping failed to map remote peer's file");
-            OBJ_RELEASE(temp);
-            rc = OMPI_ERROR;
-            goto Error;
-        }
-
-        /* compute memory offset */
-        mem_offset = (ptrdiff_t) temp->sm_mmap->data_addr -
-            (ptrdiff_t) temp->sm_mmap->map_seg;
-        temp->sm_mmap->map_seg->seg_offset = mem_offset;
-        temp->sm_mmap->map_seg->seg_size = temp->sm_file.size - mem_offset;
-        /* more stuff to follow */
-
-        /* append this peer's info, including shared memory map addr, onto the
-           peer_list */
-
-        /* record file data */
-        backing_files[i] = (bcol_basesmuma_smcm_proc_item_t *) temp;
-
-        opal_list_append(peer_list, (opal_list_item_t*) temp);
-    }
-
-    rc = OMPI_SUCCESS;
-
- Error:
-
-    /* error clean-up and return */
-    if (NULL != all_files) {
-        free(all_files);
-    }
-
-    return rc;
-}
-
-int bcol_basesmuma_smcm_release_connections (mca_bcol_basesmuma_module_t *sm_bcol_module,
-                                             mca_sbgp_base_module_t *sbgp_module, opal_list_t *peer_list,
-                                             bcol_basesmuma_smcm_proc_item_t ***back_files)
-{
-    bcol_basesmuma_smcm_proc_item_t **smcm_procs = *back_files;
-
-    for (int i = 0 ; i < sbgp_module->group_size ; ++i) {
-        if (smcm_procs[i] && 0 == --smcm_procs[i]->refcnt) {
-            opal_list_remove_item (peer_list, (opal_list_item_t *) smcm_procs[i]);
-            OBJ_RELEASE(smcm_procs[i]);
-        }
-    }
-
-    free (smcm_procs);
-    *back_files = NULL;
-
-    return OMPI_SUCCESS;
- }
-
-
-/*
- * mmap the specified file as a shared file.  No information exchange with other
- * processes takes place within this routine.
- * This function assumes that the memory has already been allocated, and only the
- * mmap needs to be done.
- */
-bcol_basesmuma_smcm_mmap_t *bcol_basesmuma_smcm_mem_reg(void *in_ptr,
-                                                        size_t length,
-                                                        size_t alignment,
-                                                        char* file_name)
-{
-    /* local variables */
-    int fd = -1;
-    bcol_basesmuma_smcm_mmap_t *map = NULL;
-    int rc;
-
-    /* if pointer is not allocated - return error.  We have no clue how the user will allocate or
-     *   free this memory.
-     */
-
-    /* open the shared memory backing file */
-
-    fd = open(file_name, O_CREAT|O_RDWR,0600);
-    if (fd < 0) {
-        opal_output (ompi_bcol_base_framework.framework_output, "basesmuma shared memory allocation open failed with errno: %d",
-                    errno);
-        return NULL;
-    }
-
-    if (0 != ftruncate(fd,length)) {
-        opal_output (ompi_bcol_base_framework.framework_output, "basesmuma shared memory allocation ftruncate failed with errno: %d",
-                    errno);
-    } else {
-        /* ensure there is enough space for the backing store */
-        rc = ftruncate (fd, length);
-        if (0 > rc) {
-            opal_output (ompi_bcol_base_framework.framework_output, "failed to truncate the file to be mapped. errno: %d", errno);
-            close(fd);
-            return NULL;
-        }
-
-        map = bcol_basesmuma_smcm_reg_mmap(in_ptr, fd, length, 0, alignment, file_name);
-        if (NULL == map) {
-            close(fd);
-            return NULL;
-        }
-    }
-    /* no longer need this file descriptor. close it */
-    close (fd);
-
-    /* takes us to the top of the control structure */
-
-    return map;
-
-}
-
-static bcol_basesmuma_smcm_mmap_t * bcol_basesmuma_smcm_reg_mmap(void *in_ptr, int fd, size_t length,
-                                                                 size_t addr_offset, size_t alignment,
-                                                                 char *file_name)
-{
-
-    /* local variables */
-    bcol_basesmuma_smcm_mmap_t *map;
-    bcol_basesmuma_smcm_file_header_t *seg;
-    unsigned char* myaddr = NULL;
-    int flags = MAP_SHARED;
-
-    /* set up the map object */
-    map = OBJ_NEW(bcol_basesmuma_smcm_mmap_t);
-    if (OPAL_UNLIKELY(NULL == map)) {
-        return NULL;
-    }
-
-    /* map the file and initialize the segment state */
-    if (NULL != in_ptr) {
-        flags |= MAP_FIXED;
-    }
-    seg = (bcol_basesmuma_smcm_file_header_t *)
-        mmap(in_ptr, length, PROT_READ|PROT_WRITE, flags, fd, 0);
-    if((void*)-1 == seg) {
-        OBJ_RELEASE(map);
-        return NULL;
-    }
-
-    map->map_path = strdup (file_name);
-
-    /* the first entry in the file is the control structure. the first entry
-       in the control structure is an mca_common_sm_file_header_t element */
-    map->map_seg = seg;
-
-    myaddr = (unsigned char *) seg + addr_offset;
-    /* if we have a data segment (i.e. if 0 != data_seg_alignement) */
-
-    if (alignment) {
-        myaddr = OPAL_ALIGN_PTR(myaddr, alignment, unsigned char*);
-
-        /* is addr past the end of the file? */
-        if ((unsigned char *) seg+length < myaddr) {
-            opal_output (ompi_bcol_base_framework.framework_output, "mca_bcol_basesmuma_sm_alloc_mmap: memory region too small len %lu add %p",
-                        (unsigned long) length, (void*)myaddr);
-            OBJ_RELEASE(map);
-            munmap ((void *)seg, length);
-            return NULL;
-        }
-
-    }
-
-    map->data_addr = (unsigned char*) myaddr;
-    map->map_addr = (unsigned char*) seg;
-    map->map_size = length;
-
-    return map;
-}
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.h
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_smcm.h
@ -1,105 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- *
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef BCOL_BASESMUMA_SMCM_H
-#define BCOL_BASESMUMA_SMCM_H
-
-#include <sys/mman.h>
-#include <stdio.h>
-
-#include "ompi_config.h"
-#include "ompi/proc/proc.h"
-
-#include "opal/class/opal_object.h"
-#include "opal/class/opal_list.h"
-#include "opal/sys/atomic.h"
-
-
-
-typedef struct bcol_basesmuma_smcm_file_header_t {
-    /* lock to control atomic access */
-    opal_atomic_lock_t seg_lock;
-
-    /* is the segment ready for use */
-    volatile int32_t seg_inited;
-
-    /* Offset to next available memory location available for allocation */
-    size_t seg_offset;
-
-    /* total size of the segment */
-    size_t seg_size;
-} bcol_basesmuma_smcm_file_header_t;
-
-
-typedef struct bcol_basesmuma_smcm_mmap_t {
-    /* double link list element */
-    opal_list_item_t super;
-    /* pointer to header imbeded in the shared memory file */
-    bcol_basesmuma_smcm_file_header_t *map_seg;
-    /* base address of the mmap'ed file */
-    unsigned char *map_addr;
-    /* base address of data segment */
-    unsigned char *data_addr;
-    /* How big it is (in bytes) */
-    size_t map_size;
-    /* Filename */
-    char *map_path;
-} bcol_basesmuma_smcm_mmap_t;
-
-OBJ_CLASS_DECLARATION(bcol_basesmuma_smcm_mmap_t);
-
-
-/* Struct that characterizes a shared memory file */
-struct bcol_basesmuma_smcm_file_t {
-
-    char *file_name;
-    size_t size;
-    size_t size_ctl_structure;
-    size_t data_seg_alignment;
-    size_t mpool_size;
-
-};
-typedef struct bcol_basesmuma_smcm_file_t bcol_basesmuma_smcm_file_t;
-
-
-struct bcol_basesmuma_smcm_proc_item_t {
-    opal_list_item_t item;          /* can put me on a free list */
-    int refcnt;
-    ompi_process_name_t peer;
-    bcol_basesmuma_smcm_file_t sm_file;
-    bcol_basesmuma_smcm_mmap_t *sm_mmap;   /* Pointer to peer's sm file */
-
-};
-typedef struct bcol_basesmuma_smcm_proc_item_t bcol_basesmuma_smcm_proc_item_t;
-
-OBJ_CLASS_DECLARATION(bcol_basesmuma_smcm_proc_item_t);
-
-
-/* allocate shared memory file
- *   in_ptr - pointer to preallocated memory (if NULL, this will be mmaped)
- *   alignment - region memory alignment
- *   file name - fully qualified backing file name
-*/
-
-OMPI_DECLSPEC extern bcol_basesmuma_smcm_mmap_t *bcol_basesmuma_smcm_mem_reg(void *in_ptr,
-                size_t length,
-                size_t alignment,
-                char* file_name);
-
-OMPI_DECLSPEC extern bcol_basesmuma_smcm_mmap_t* bcol_basesmuma_smcm_create_mmap(int fd,
-        size_t size, char *file_name,
-        size_t size_ctl_structure,
-        size_t data_seg_alignment);
-
-#endif
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_utils.c
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_utils.c
@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-#include "bcol_basesmuma_utils.h"
-
-/*
- *  Return closet power of K that is either greater than
- *  or equal to the group size.
- */
-int pow_sm_k(int k, int number, int *pow_k)
-{
-    int power = 0;
-    int n = 1;
-
-    if( 2 == k){
-        while(n <= number){
-            power++;
-            n <<= 1;
-        }
-        *pow_k = n >> 1;
-
-    } else {
-        while (n <= number) {
-            n *= k;
-            power++;
-        }
-        *pow_k = n/k;
-    }
-
-
-    return (power-1);
-}
-
-
-
-int get_k_nomial_src_list(int group_size,
-                          int radix, int my_index,
-                          int *src_list) {
-
-    /* local variables */
-    int radix_power;
-    int offset;
-    int kount = 0;
-    int src_temp;
-
-    radix_power = 1;
-    offset = 1;
-    while(offset < group_size) {
-        if( offset % (radix * radix_power) ) {
-            src_temp = my_index - offset;
-            /* wrap around */
-            if ( src_temp < 0 ) {
-                src_temp += group_size;
-            }
-            /* don't probe ghost nodes */
-            if( src_temp < group_size ) {
-                src_list[kount] = src_temp;
-                kount++;
-            }
-            offset+=radix_power;
-        } else {
-
-            radix_power *= radix;
-        }
-
-    }
-    /* return the actual number of nodes to poll on */
-    return kount;
-}
-
-int get_k_nomial_dst_size(int group_size, int radix, int my_index)
-{
-	int dst_count = 0;
-	int radix_mask;
-	int k;
-    radix_mask = 1;
-    while (radix_mask < group_size) {
-        if (0 != my_index % (radix * radix_mask)) {
-            /* I found my level in tree */
-            break;
-        }
-        radix_mask *= radix;
-    }
-	radix_mask /= radix;
-
-	while(radix_mask > 0) {
-        /* For each level of tree, do sends */
-        for (k = 1;
-                k < radix && my_index + radix_mask * k < group_size;
-                ++k) {
-            dst_count +=  1 ;
-        }
-        radix_mask /= radix;
-    }
-
-	return dst_count;
-}
--- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_utils.h
+++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_utils.h
@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2012      Los Alamos National Security, LLC.
- *                         All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_BASESMUMA_UTILS_H
-#define MCA_BCOL_BASESMUMA_UTILS_H
-
-#include "ompi_config.h"
-
-BEGIN_C_DECLS
-
-#define BASESMUMA_K_NOMIAL_SEND_CHILDREN(radix_mask,radix,relative_index, \
-        my_group_index, group_size, ready_flag) \
-do {  \
-    int k, child; \
-    while(radix_mask > 0){ \
-        for(k = 1; k < radix && relative_index+radix_mask*k<group_size; \
-                k++) {\
-            child = my_group_index+radix_mask*k;  \
-            if(child >= group_size) {   \
-                child -= group_size; \
-            } \
-            /*fprintf(stderr,"I am %d sending to child %d\n",my_group_index,child);*/ \
-            child_ctl_pointer = data_buffs[child].ctl_struct; \
-            child_ctl_pointer->src = my_group_index;  \
-            /* this can be improved to make better asynchronous progress, but it's
-             * fine for now.
-             */                                                                 \
-            while(child_ctl_pointer->sequence_number != sequence_number );       \
-            child_ctl_pointer->flags[BCAST_FLAG][bcol_id] = ready_flag;  \
-        } \
-        radix_mask = radix_mask/radix; \
-    } \
-} while( 0 )
-
-
-
-
-/*
- *  Return closet power of K that is greater than or equal to "number".
- */
-int pow_sm_k(int radix_k, int group_size, int *pow_k_group_size);
-
-/*
- * Get list of possible sources from which data may arrive based on a K-nomial tree fan-out.
- */
-
-int get_k_nomial_src_list(int group_size, int radix,
-                          int my_index, int *src_list);
-
-
-int get_k_nomial_dst_size(int group_size, int radix, int my_index);
-
-END_C_DECLS
-
-#endif
--- a/ompi/mca/bcol/basesmuma/owner.txt
+++ b/ompi/mca/bcol/basesmuma/owner.txt
@ -1,7 +0,0 @@
-#
-# owner/status file
-# owner: institution that is responsible for this package
-# status: e.g. active, maintenance, unmaintained
-#
-owner: ORNL
-status: unmaintained
--- a/ompi/mca/bcol/bcol.h
+++ b/ompi/mca/bcol/bcol.h
@ -1,805 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2015      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_H
-#define MCA_BCOL_H
-
-#include "ompi_config.h"
-#include "opal/class/opal_list.h"
-#include "ompi/mca/mca.h"
-#include "ompi/mca/coll/coll.h"
-#include "opal/mca/mpool/mpool.h"
-#include "ompi/mca/sbgp/sbgp.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/op/op.h"
-#include "ompi/include/ompi/constants.h"
-#include "ompi/patterns/net/netpatterns_knomial_tree.h"
-
-#include "opal/util/show_help.h"
-
-#include <limits.h>
-
-#if defined(c_plusplus) || defined(__cplusplus)
-extern "C" {
-#endif
-
-/* Forward declaration - please do not remove it */
-struct ml_buffers_t;
-
-struct mca_bcol_base_coll_fn_comm_attributes_t;
-struct mca_bcol_base_coll_fn_invoke_attributes_t;
-struct mca_bcol_base_coll_fn_desc_t;
-
-#define NUM_MSG_RANGES      5
-#define MSG_RANGE_INITIAL (1024)*12
-#define MSG_RANGE_INC      10
-#define BCOL_THRESHOLD_UNLIMITED (INT_MAX)
-/* Maximum size of a bcol's header. This allows us to correctly calculate the message
- * thresholds. If the header of any bcol exceeds this value then increase this one
- * to match. */
-#define BCOL_HEADER_MAX 96
-
-#define BCOL_HEAD_ALIGN 32   /* will turn into an MCA parameter after debug */
-
-/*
- * Functions supported
- */
-enum bcol_coll {
-    /* blocking functions */
-    BCOL_ALLGATHER,
-    BCOL_ALLGATHERV,
-    BCOL_ALLREDUCE,
-    BCOL_ALLTOALL,
-    BCOL_ALLTOALLV,
-    BCOL_ALLTOALLW,
-    BCOL_BARRIER,
-    BCOL_BCAST,
-    BCOL_EXSCAN,
-    BCOL_GATHER,
-    BCOL_GATHERV,
-    BCOL_REDUCE,
-    BCOL_REDUCE_SCATTER,
-    BCOL_SCAN,
-    BCOL_SCATTER,
-    BCOL_SCATTERV,
-    BCOL_FANIN,
-    BCOL_FANOUT,
-
-    /* nonblocking functions */
-    BCOL_IALLGATHER,
-    BCOL_IALLGATHERV,
-    BCOL_IALLREDUCE,
-    BCOL_IALLTOALL,
-    BCOL_IALLTOALLV,
-    BCOL_IALLTOALLW,
-    BCOL_IBARRIER,
-    BCOL_IBCAST,
-    BCOL_IEXSCAN,
-    BCOL_IGATHER,
-    BCOL_IGATHERV,
-    BCOL_IREDUCE,
-    BCOL_IREDUCE_SCATTER,
-    BCOL_ISCAN,
-    BCOL_ISCATTER,
-    BCOL_ISCATTERV,
-    BCOL_IFANIN,
-    BCOL_IFANOUT,
-
-    BCOL_SYNC,
-    /* New function - needed for intermediate steps */
-    BCOL_REDUCE_TO_LEADER,
-    BCOL_NUM_OF_FUNCTIONS
-};
-typedef enum bcol_coll bcol_coll;
-
-typedef enum bcol_elem_type {
-    BCOL_SINGLE_ELEM_TYPE,
-    BCOL_MULTI_ELEM_TYPE,
-    BCOL_NUM_OF_ELEM_TYPES
-} bcol_elem_type;
-
-typedef int (*mca_bcol_base_module_coll_support_all_types_fn_t)(bcol_coll coll_name);
-typedef int (*mca_bcol_base_module_coll_support_fn_t)(int op, int dtype, bcol_elem_type elem_num);
-
-/*
- * Collective function status
- */
-enum {
-    BCOL_FN_NOT_STARTED = (OMPI_ERR_MAX - 1),
-    BCOL_FN_STARTED     = (OMPI_ERR_MAX - 2),
-    BCOL_FN_COMPLETE    = (OMPI_ERR_MAX - 3)
-};
-
-
-
-/**
- * Collective component initialization
- *
- * Initialize the given collective component.  This function should
- * initialize any component-level. data.  It will be called exactly
- * once during MPI_INIT.
- *
- * @note The component framework is not lazily opened, so attempts
- * should be made to minimze the amount of memory allocated during
- * this function.
- *
- * @param[in] enable_progress_threads True if the component needs to
- *                                support progress threads
- * @param[in] enable_mpi_threads  True if the component needs to
- *                                support MPI_THREAD_MULTIPLE
- *
- * @retval OMPI_SUCCESS Component successfully initialized
- * @retval ORTE_ERROR   An unspecified error occurred
- */
-typedef int (*mca_bcol_base_component_init_query_fn_t)
-    (bool enable_progress_threads, bool enable_mpi_threads);
-
-/**
- * Query whether a component is available for the given sub-group
- *
- * Query whether the component is available for the given
- * sub-group.  If the component is available, an array of pointers should be
- * allocated and returned (with refcount at 1).  The module will not
- * be used for collective operations until module_enable() is called
- * on the module, but may be destroyed (via OBJ_RELEASE) either before
- * or after module_enable() is called.  If the module needs to release
- * resources obtained during query(), it should do so in the module
- * destructor.
- *
- * A component may provide NULL to this function to indicate it does
- * not wish to run or return an error during module_enable().
- *
- * @note The communicator is available for point-to-point
- * communication, but other functionality is not available during this
- * phase of initialization.
- *
- * @param[in] sbgp         Pointer to sub-group module.
- * @param[out] priority    Priority setting for component on
- *                         this communicator
- * @param[out] num_modules Number of modules that where generated
- *                         for the sub-group module.
- *
- * @returns An array of pointer to an initialized modules structures if the component can
- * provide a modules with the requested functionality or NULL if the
- * component should not be used on the given communicator.
- */
-typedef struct mca_bcol_base_module_t **(*mca_bcol_base_component_comm_query_fn_t)
-    (mca_sbgp_base_module_t *sbgp, int *num_modules);
-
-
-typedef int (*mca_bcol_barrier_init_fn_t)(struct mca_bcol_base_module_t *bcol_module,
-        mca_sbgp_base_module_t *sbgp_module);
-
-
-
-/*
- * Macro for use in modules that are of type btl v2.0.0
- */
-#define MCA_BCOL_BASE_VERSION_2_0_0 \
-    OMPI_MCA_BASE_VERSION_2_1_0("bcol", 2, 0, 0)
-
-
-/* This is really an abstarction violation, but is the easiest way to get
- * started.  For memory management we need to know what bcol components
- * have compatible memory management schemes.  Such compatibility can
- * be used to eliminate memory copies between levels in the collective
- * operation hierarchy, by having the output buffer of one level be the
- * input buffer to the next level
- */
-
-enum {
-    BCOL_SHARED_MEMORY_UMA=0,
-    BCOL_SHARED_MEMORY_SOCKET,
-    BCOL_POINT_TO_POINT,
-    BCOL_IB_OFFLOAD,
-    BCOL_SIZE
-};
-
-OMPI_DECLSPEC extern int bcol_mpool_compatibility[BCOL_SIZE][BCOL_SIZE];
-OMPI_DECLSPEC extern int bcol_mpool_index[BCOL_SIZE][BCOL_SIZE];
-
-/* what are the input parameters ? too many void * pointers here */
-typedef int (*bcol_register_mem_fn_t)(void *context_data, void *base,
-        size_t size, void **reg_desc);
-/* deregistration function */
-typedef int (*bcol_deregister_mem_fn_t)(void *context_data, void *reg_desc);
-
-/* Bcol network context definition */
-struct bcol_base_network_context_t {
-    opal_object_t super;
-    /* Context id - defined by upper layer, ML */
-    int context_id;
-    /* Any context information that bcol what to use */
-    void *context_data;
-
-    /* registration function */
-    bcol_register_mem_fn_t register_memory_fn;
-    /* deregistration function */
-    bcol_deregister_mem_fn_t deregister_memory_fn;
-};
-typedef struct bcol_base_network_context_t bcol_base_network_context_t;
-OMPI_DECLSPEC OBJ_CLASS_DECLARATION(bcol_base_network_context_t);
-
-/*
- *primitive function types
- */
-
-/* bcast */
-enum {
-    /* small data function */
-    BCOL_BCAST_SMALL_DATA,
-
-    /* small data - dynamic decision making supported */
-    BCOL_BCAST_SMALL_DATA_DYNAMIC,
-
-    /* number of functions */
-    BCOL_NUM_BCAST_FUNCTIONS
-};
-
-
-/**
- *  BCOL instance.
- */
-
-/* no limit on fragment size - this supports using user buffers rather
- * than library buffers
- */
-#define FRAG_SIZE_NO_LIMIT -1
-
-/* forward declaration */
-struct coll_bcol_collective_description_t;
-
-struct mca_bcol_base_component_2_0_0_t {
-
-    /** Base component description */
-    mca_base_component_t bcol_version;
-
-    /** Component initialization function */
-    mca_bcol_base_component_init_query_fn_t collm_init_query;
-
-    /** Query whether component is useable for given communicator */
-    mca_bcol_base_component_comm_query_fn_t collm_comm_query;
-
-    /** If bcol supports all possible data types */
-    mca_bcol_base_module_coll_support_fn_t coll_support;
-
-    /** If bcol supports all possible data types for given collective operation */
-    mca_bcol_base_module_coll_support_all_types_fn_t coll_support_all_types;
-
-    /** Use this flag to prevent init_query multiple calls
-        in case we have the same bcol more than on a single level */
-    bool init_done;
-
-    /** If collective calls with bcols of this type need to be ordered */
-    bool need_ordering;
-
-    /** MCA parameter: Priority of this component */
-    int priority;
-
-    /** Bcast function pointers */
-    struct coll_bcol_collective_description_t *
-        bcast_functions[BCOL_NUM_BCAST_FUNCTIONS];
-
-    /** Number of network contexts - need this for resource management */
-    int n_net_contexts;
-
-    /** List of network contexts */
-    bcol_base_network_context_t **network_contexts;
-
-    /*
-     * Fragmentation support
-     */
-
-    /** Minimum fragement size */
-    int min_frag_size;
-
-    /** Maximum fragment size */
-    int max_frag_size;
-
-    /** Supports direct use of user-buffers */
-    bool can_use_user_buffers;
-};
-typedef struct mca_bcol_base_component_2_0_0_t mca_bcol_base_component_2_0_0_t;
-typedef struct mca_bcol_base_component_2_0_0_t mca_bcol_base_component_t;
-OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_bcol_base_component_t);
-
-/* forward declaration */
-struct mca_coll_ml_descriptor_t;
-struct mca_bcol_base_payload_buffer_desc_t;
-struct mca_bcol_base_route_info_t;
-
-typedef struct {
-    int order_num;           /* Seq num of collective fragment */
-    int bcols_started;       /* How many bcols need ordering have been started */
-    int n_fns_need_ordering; /* The number of functions are called for bcols need ordering */
-} mca_bcol_base_order_info_t;
-
-/* structure that encapsultes information propagated amongst multiple
- * fragments whereby completing the entire ensemble of fragments is
- * necessary in order to complete the entire collective
- */
-struct bcol_fragment_descriptor_t {
-    /* start iterator */
-    int head;
-    /* end iterator */
-    int tail;
-    /* current iteration */
-    int start_iter;
-    /* number of full iterations this frag */
-    int num_iter;
-    /* end iter */
-    int end_iter;
-};
-typedef struct bcol_fragment_descriptor_t bcol_fragment_descriptor_t;
-
-struct bcol_function_args_t {
-    /* full message sequence number */
-    int64_t sequence_num;
-    /* full message descriptor - single copy of fragment invariant
-     * parameters */
-    /* Pasha: We don need this one for new flow - remove it */
-    struct mca_coll_ml_descriptor_t *full_message_descriptor;
-    struct mca_bcol_base_route_info_t *root_route;
-    /* function status */
-    int function_status;
-    /* root, for rooted operations */
-    int root;
-    /* input buffer */
-    const void *sbuf;
-    void *rbuf;
-    const void *userbuf;
-    struct mca_bcol_base_payload_buffer_desc_t *src_desc;
-    struct mca_bcol_base_payload_buffer_desc_t *dst_desc;
-   /* ml buffer size */
-    uint32_t buffer_size;
-    /* index of buffer in ml payload cache */
-    int buffer_index;
-    int count;
-    struct ompi_datatype_t *dtype;
-    struct ompi_op_t *op;
-    int sbuf_offset;
-    int rbuf_offset;
-    /* for bcol opaque data */
-    void *bcol_opaque_data;
-    /* An output argument that will be used by BCOL function to tell ML that the result of the BCOL is in rbuf */
-    bool result_in_rbuf;
-    bool root_flag;      /* True if the rank is root of operation */
-    bool need_dt_support; /* will trigger alternate code path for some colls */
-    int status;          /* Used for non-blocking collective completion */
-    uint32_t frag_size;  /* fragment size for large messages */
-    int hier_factor;     /* factor used when bcast is invoked as a service function back down
-                          * the tree in allgather for example, the pacl_len is not the actual
-                          * len of the data needing bcasting
-                          */
-    mca_bcol_base_order_info_t order_info;
-    bcol_fragment_descriptor_t frag_info;
-
-};
-
-struct mca_bcol_base_route_info_t {
-    int level;
-    int rank;
-};
-typedef struct mca_bcol_base_route_info_t mca_bcol_base_route_info_t;
-
-struct mca_bcol_base_lmngr_block_t {
-    opal_list_item_t super;
-    struct mca_coll_ml_lmngr_t *lmngr;
-    void* base_addr;
-};
-typedef struct mca_bcol_base_lmngr_block_t mca_bcol_base_lmngr_block_t;
-OBJ_CLASS_DECLARATION(mca_bcol_base_lmngr_block_t);
-
-struct mca_bcol_base_memory_block_desc_t {
-
-    /* memory block for payload buffers */
-    struct mca_bcol_base_lmngr_block_t *block;
-
-    /* Address offset in bytes -- Indicates free memory in the block */
-    uint64_t   block_addr_offset;
-
-    /* size of the memory block */
-    size_t     size_block;
-
-    /* number of memory banks */
-    uint32_t     num_banks;
-
-    /* number of buffers per bank */
-    uint32_t    num_buffers_per_bank;
-
-    /* size of a payload buffer */
-    uint32_t     size_buffer;
-
-    /* pointer to buffer descriptors initialized */
-    struct mca_bcol_base_payload_buffer_desc_t *buffer_descs;
-
-    /* index of the next free buffer in the block */
-    uint64_t next_free_buffer;
-
-    uint32_t *bank_release_counters;
-
-    /* Counter that defines what bank should be synchronized next
-     * since collectives could be completed out of order, we have to make
-     * sure that memory synchronization collectives started in order ! */
-    int memsync_counter;
-
-    /* This arrays of flags used to signal that the bank is ready for recycling */
-    bool *ready_for_memsync;
-
-    /* This flags monitors if bank is open for usage. Usually we expect that user
-     * will do the check only on buffer-zero allocation */
-    bool *bank_is_busy;
-
-};
-
-/* convenience typedef */
-typedef struct mca_bcol_base_memory_block_desc_t mca_bcol_base_memory_block_desc_t;
-
-typedef void (*mca_bcol_base_release_buff_fn_t)(struct mca_bcol_base_memory_block_desc_t *ml_memblock, uint32_t buff_id);
-
-struct mca_bcol_base_payload_buffer_desc_t {
-    void         *base_data_addr;   /* buffer address */
-    void         *data_addr;         /* buffer address  + header offset */
-    uint64_t     generation_number;  /* my generation */
-    uint64_t     bank_index;         /* my bank */
-    uint64_t     buffer_index;       /* my buff index */
-};
-/* convenience typedef */
-typedef struct mca_bcol_base_payload_buffer_desc_t mca_bcol_base_payload_buffer_desc_t;
-
-
-
-
-
-
-typedef struct bcol_function_args_t bcol_function_args_t;
-
-
-/* The collective operation is defined by a series of collective operations
- * invoked through a function pointer.  Each function may be different,
- * so will store the arguments in a struct and pass a pointer to the struct,
- * and use this as a way to hide the different function signatures.
- *
- * @param[in] input_args  Structure with function arguments
- * @param[in] bcol_desc   Component specific paremeters
- * @param[out] status  return status of the function
- *                     MCA_BCOL_COMPLETE    - function completed
- *                     MCA_BCOL_IN_PROGRESS - function incomplete
- *
- * @retval OMPI_SUCCESS successful completion
- * @retval OMPI_ERROR function returned error
- */
-/* forward declaration */
-struct mca_bcol_base_module_t;
-
-/* collective function prototype - all functions have the same interface
- * so that we can call them via a function pointer */
-struct mca_bcol_base_function_t;
-typedef int (*mca_bcol_base_module_collective_fn_primitives_t)
-    (bcol_function_args_t *input_args, struct mca_bcol_base_function_t *const_args);
-
-typedef int (*mca_bcol_base_module_collective_init_fn_primitives_t)
-    (struct mca_bcol_base_module_t *bcol_module);
-
-    /**
-     *  function to query for collctive function attributes
-     *
-     *  @param attribute (IN) the attribute of interest
-     *  @param algorithm_parameters (OUT) the value of attribute for this
-     *         function.  If this attribute is not supported,
-     *         OMPI_ERR_NOT_FOUND is returned.
-     */
-    typedef int (*mca_bcol_get_collective_attributes)(int attribute,
-            void *algorithm_parameters);
-
-/* data structure for tracking the relevant data needed for ml level
- * algorithm construction (e.g., function selection), initialization, and
- * usage.
- */
-struct coll_bcol_collective_description_t {
-    /* collective initiation function - first functin called */
-    mca_bcol_base_module_collective_fn_primitives_t coll_fn;
-
-    /* collective progress function - first functin called */
-    mca_bcol_base_module_collective_fn_primitives_t progress_fn;
-
-    /* collective progress function - first functin called */
-    mca_bcol_get_collective_attributes get_attributes;
-
-    /* attributes supported - bit map */
-    uint64_t attribute;
-
-};
-typedef struct coll_bcol_collective_description_t
-coll_bcol_collective_description_t;
-
-/* collective operation attributes */
-enum {
-    /* supports dynamic decisions - e.g., do not need to have the collective
-     * operation fully defined before it can be started
-     */
-    BCOL_ATTRIBUTE_DYNAMIC,
-
-    /* number of attributes */
-    BCOL_NUM_ATTRIBUTES
-};
-
-/* For rooted collectives,
- * does the algorithm knows its data source ?
- */
-enum {
-    DATA_SRC_KNOWN=0,
-    DATA_SRC_UNKNOWN,
-    DATA_SRC_TYPES
-};
-
-enum {
-    BLOCKING,
-    NON_BLOCKING
-};
-/* gvm For selection logic */
-struct mca_bcol_base_coll_fn_comm_attributes_t {
-    int bcoll_type;
-    int comm_size_min;
-    int comm_size_max;
-    int data_src;
-    int waiting_semantics;
-};
-
-typedef struct mca_bcol_base_coll_fn_comm_attributes_t
-                        mca_bcol_base_coll_fn_comm_attributes_t;
-
-struct mca_bcol_base_coll_fn_invoke_attributes_t {
-    int bcol_msg_min;
-    int bcol_msg_max;
-    uint64_t datatype_bitmap; /* Max is OMPI_DATATYPE_MAX_PREDEFINED defined to be 45 */
-    uint32_t op_types_bitmap; /* bit map of optypes supported */
-};
-
-typedef struct mca_bcol_base_coll_fn_invoke_attributes_t
-                        mca_bcol_base_coll_fn_invoke_attributes_t;
-
-struct mca_bcol_base_coll_fn_desc_t {
-    opal_list_item_t super;
-    struct mca_bcol_base_coll_fn_comm_attributes_t *comm_attr;
-    struct mca_bcol_base_coll_fn_invoke_attributes_t *inv_attr;
-    mca_bcol_base_module_collective_fn_primitives_t coll_fn;
-    mca_bcol_base_module_collective_fn_primitives_t progress_fn;
-};
-
-typedef struct mca_bcol_base_coll_fn_desc_t mca_bcol_base_coll_fn_desc_t;
-OBJ_CLASS_DECLARATION(mca_bcol_base_coll_fn_desc_t);
-
-/* end selection logic */
-
-typedef int (*mca_bcol_base_module_collective_init_fn_t)
-    (struct mca_bcol_base_module_t *bcol_module,
-     mca_sbgp_base_module_t *sbgp_module);
-
-    /* per communicator memory initialization function */
-typedef  int (*mca_bcol_module_mem_init)(struct ml_buffers_t *registered_buffers,
- mca_bcol_base_component_t *module);
-
-/* Initialize memory block - ml_memory_block initialization interface function
- *
- * Invoked at the ml level, used to pass bcol specific registration information
- * for the "ml_memory_block"
- *
- * @param[in] ml_memory_block   Pointer to the ml_memory_block. This struct
- *  contains bcol specific registration information and a call back function
- *  used for resource recycling.
- *
- * @param[in] reg_data         bcol specific registration data.
- *
- * @returns   On Success: OMPI_SUCCESS
- *            On Failure: OMPI_ERROR
- *
- */
-/*typedef int (*mca_bcol_base_init_memory_fn_t)
-    (struct mca_bcol_base_memory_block_desc_t *ml_block, void *reg_data);*/
-
-typedef int (*mca_bcol_base_init_memory_fn_t)
-     (struct mca_bcol_base_memory_block_desc_t *payload_block,
-     uint32_t data_offset,
-     struct mca_bcol_base_module_t *bcol,
-     void *reg_data);
-
-typedef int (*mca_common_allgather_init_fn_t)
-    (struct mca_bcol_base_module_t *bcol_module);
-
-typedef void (*mca_bcol_base_set_thresholds_fn_t)
-    (struct mca_bcol_base_module_t *bcol_module);
-
-enum {
-    MCA_BCOL_BASE_ZERO_COPY                   = 1,
-    MCA_BCOL_BASE_NO_ML_BUFFER_FOR_LARGE_MSG  = 1 << 1,
-    MCA_BCOL_BASE_NO_ML_BUFFER_FOR_BARRIER    = 1 << 2
-};
-
-/* base  module */
-struct mca_bcol_base_module_t {
-    /* base coll component */
-    opal_object_t super;
-
-    /* bcol component (Pasha: Do we really need cache the component?)*/
-    mca_bcol_base_component_t *bcol_component;
-
-    /* network context that is used by this bcol
-    only one context per bcol is allowed */
-    bcol_base_network_context_t *network_context;
-
-    /* We are going to use the context index a lot,
-    int order to decrease number of dereferences
-    bcol->network_context->index
-    we are caching the value on bcol */
-    int context_index;
-
-    /* Set of flags that describe features supported by bcol */
-    uint64_t supported_mode;
-
-    /* per communicator memory initialization function */
-    mca_bcol_module_mem_init init_module;
-
-    /* sub-grouping module partner */
-    mca_sbgp_base_module_t *sbgp_partner_module;
-
-    /* size of subgroup - cache this, so can have access when
-     * sbgp_partner_module no longer existes */
-    int size_of_subgroup;
-
-    /* sequence number offset - want to make sure that we start
-     * id'ing collectives with id 0, so we can have simple
-     * resource management.
-     */
-    int64_t squence_number_offset;
-
-
-    /* number of times to poll for operation completion before
-     * breaking out of a non-blocking collective operation
-     */
-    int n_poll_loops;
-
-    /* size of header that will go in data buff, should not include
-     * any info regarding alignment, let the ml level handle this
-     */
-    uint32_t header_size;
-
-
-   /* Each bcol is assigned a unique value
-    * see if we can get away with 16-bit id
-    */
-    int16_t bcol_id;
-
-    /*FIXME:
-     * Since mca_bcol_base_module_t is the only parameter which will be passed
-     * into the bcol_basesmuma_bcast_init(), add the flag to indicate whether
-     * the hdl-based algorithms will get enabled.
-     */
-    bool use_hdl;
-        /*
-     * Collective function pointers
-     */
-    /* changing function signature - will replace bcol_functions */
-    mca_bcol_base_module_collective_fn_primitives_t bcol_function_table[BCOL_NUM_OF_FUNCTIONS];
-
-    /* Tables hold pointers to functions */
-    mca_bcol_base_module_collective_init_fn_primitives_t bcol_function_init_table[BCOL_NUM_OF_FUNCTIONS];
-    opal_list_t bcol_fns_table[BCOL_NUM_OF_FUNCTIONS];
-    struct mca_bcol_base_coll_fn_desc_t*
-    filtered_fns_table[DATA_SRC_TYPES][2][BCOL_NUM_OF_FUNCTIONS][NUM_MSG_RANGES+1][OMPI_OP_NUM_OF_TYPES][OMPI_DATATYPE_MAX_PREDEFINED];
-
-    /*
-     * Bcol interface function to pass bcol specific
-     * info and memory recycling call back
-     */
-    mca_bcol_base_init_memory_fn_t bcol_memory_init;
-
-    /*
-     * netpatterns interface function, would like to invoke this on
-     * on the ml level
-     */
-    mca_common_allgather_init_fn_t k_nomial_tree;
-     /* Each bcol caches a list which describes how many ranks
-     * are "below" each rank in this bcol
-     */
-    int *list_n_connected;
-
-    /* offsets for scatter/gather */
-    int hier_scather_offset;
-
-    /* Small message threshold for each collective */
-    int small_message_thresholds[BCOL_NUM_OF_FUNCTIONS];
-
-    /* Set small_message_thresholds array */
-    mca_bcol_base_set_thresholds_fn_t set_small_msg_thresholds;
-
-    /* Pointer to the order counter on the upper layer,
-       used if the bcol needs to be ordered */
-    int *next_inorder;
-};
-typedef struct mca_bcol_base_module_t mca_bcol_base_module_t;
-OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_bcol_base_module_t);
-
-/* function description */
-struct mca_bcol_base_function_t {
-    int fn_idx;
-    /* module */
-    struct mca_bcol_base_module_t *bcol_module;
-
-    /*
-     *  The following two parameters are used for bcol modules
-     *  that want to do some optimizations based on the fact that
-     *  n functions from the same bcol module are called in a row.
-     *  For example, in the iboffload case, on the first call one
-     *  will want to initialize the MWR, and start to instantiate
-     *  it, but only post it at the end of the last call.
-     *  The index of this function in a sequence of consecutive
-     *  functions from the same bcol
-     */
-    int index_in_consecutive_same_bcol_calls;
-
-    /* number of times functions from this bcol are
-     * called in order
-     */
-    int n_of_this_type_in_a_row;
-
-    /*
-     * number of times functions from this module are called in the
-     * collective operation.
-     */
-    int n_of_this_type_in_collective;
-    int index_of_this_type_in_collective;
-};
-typedef struct mca_bcol_base_function_t mca_bcol_base_function_t;
-
-
-
-
-struct mca_bcol_base_descriptor_t {
-    opal_free_list_item_t super;
-/* Vasily: will be described in the future */
-};
-typedef struct mca_bcol_base_descriptor_t mca_bcol_base_descriptor_t;
-
-static inline __opal_attribute_always_inline__ size_t
-             mca_bcol_base_get_buff_length(ompi_datatype_t *dtype, int count)
-{
-    ptrdiff_t lb, extent;
-    ompi_datatype_get_extent(dtype, &lb, &extent);
-
-    return (size_t) (extent * count);
-}
-
-#define MCA_BCOL_CHECK_ORDER(module, bcol_function_args)                     \
-    do {                                                                     \
-        if (*((module)->next_inorder) !=                                     \
-                               (bcol_function_args)->order_info.order_num) { \
-            return BCOL_FN_NOT_STARTED;                                      \
-        }                                                                    \
-    } while (0);
-
-#define MCA_BCOL_UPDATE_ORDER_COUNTER(module, order_info) \
-    do {                                                  \
-       (order_info)->bcols_started++;                     \
-        if ((order_info)->n_fns_need_ordering ==          \
-                        (order_info)->bcols_started) {    \
-            ++(*((module)->next_inorder));                \
-        }                                                 \
-    } while (0);
-
-#if defined(c_plusplus) || defined(__cplusplus)
-}
-#endif
-#endif /* MCA_BCOL_H */
--- a/ompi/mca/bcol/iboffload/.opal_ignore
+++ b/ompi/mca/bcol/iboffload/.opal_ignore
--- a/ompi/mca/bcol/iboffload/Makefile.am
+++ b/ompi/mca/bcol/iboffload/Makefile.am
@ -1,66 +0,0 @@
-#
-# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
-# Copyright (c) 2012-2015 Cisco Systems, Inc.  All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-AM_CPPFLAGS = $(bcol_iboffload_CPPFLAGS) $(btl_openib_CPPFLAGS)
-
-sources = \
-        bcol_iboffload.h \
-        bcol_iboffload_device.h \
-        bcol_iboffload_module.c \
-        bcol_iboffload_mca.h \
-        bcol_iboffload_mca.c \
-        bcol_iboffload_endpoint.h \
-        bcol_iboffload_endpoint.c \
-        bcol_iboffload_frag.h \
-        bcol_iboffload_frag.c \
-        bcol_iboffload_collfrag.h \
-        bcol_iboffload_collfrag.c \
-        bcol_iboffload_task.h \
-        bcol_iboffload_task.c \
-        bcol_iboffload_component.c \
-        bcol_iboffload_barrier.c \
-        bcol_iboffload_bcast.h \
-        bcol_iboffload_bcast.c \
-        bcol_iboffload_allgather.c \
-        bcol_iboffload_collreq.h \
-        bcol_iboffload_collreq.c \
-        bcol_iboffload_qp_info.c \
-        bcol_iboffload_qp_info.h \
-        bcol_iboffload_fanin.c \
-        bcol_iboffload_fanout.c \
-        bcol_iboffload_allreduce.c
-
-# Make the output library in this directory, and name it either
-# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
-# (for static builds).
-
-component_noinst =
-component_install =
-if MCA_BUILD_ompi_bcol_iboffload_DSO
-component_install += mca_bcol_iboffload.la
-else
-component_noinst += libmca_bcol_iboffload.la
-endif
-
-# See ompi/mca/btl/sm/Makefile.am for an explanation of
-# libmca_common_sm.la.
-
-mcacomponentdir = $(ompilibdir)
-mcacomponent_LTLIBRARIES = $(component_install)
-mca_bcol_iboffload_la_SOURCES = $(sources)
-mca_bcol_iboffload_la_LDFLAGS = -module -avoid-version $(btl_openib_LDFLAGS) $(bcol_iboffload_LDFLAGS)
-mca_bcol_iboffload_la_LIBADD = $(btl_openib_LIBS) $(bcol_iboffload_LIBS) \
-        $(OMPI_TOP_BUILDDIR)/ompi/mca/common/ofacm/libmca_common_ofacm.la \
-        $(OMPI_TOP_BUILDDIR)/ompi/mca/common/verbs/libmca_common_verbs.la
-
-noinst_LTLIBRARIES = $(component_noinst)
-libmca_bcol_iboffload_la_SOURCES =$(sources)
-libmca_bcol_iboffload_la_LDFLAGS = -module -avoid-version  $(btl_openib_LDFLAGS) $(bcol_iboffload_LDFLAGS)
--- a/ompi/mca/bcol/iboffload/bcol_iboffload.h
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload.h
@ -1,765 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2012      Los Alamos National Security, LLC.
- *                         All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_IBOFFLOAD_H
-#define MCA_BCOL_IBOFFLOAD_H
-
-#include "ompi_config.h"
-
-#include <stdio.h>
-#include <assert.h>
-
-#include <infiniband/mqe.h>
-#include <infiniband/verbs.h>
-#include <infiniband/mverbs.h>
-
-#include "ompi/mca/mca.h"
-
-#include "ompi/op/op.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/datatype/ompi_datatype_internal.h"
-
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-
-#include "ompi/mca/sbgp/ibnet/sbgp_ibnet.h"
-
-#include "opal/mca/mpool/mpool.h"
-#include "ompi/request/request.h"
-
-#include "ompi/mca/common/ofacm/connect.h"
-
-#include "bcol_iboffload_qp_info.h"
-
-BEGIN_C_DECLS
-
-#define IMM_RDMA 1
-#define INLINE 1
-#define NO_INLINE 0
-
-#define MCA_IBOFFLOAD_CALC_SIZE_EXT 8
-#define MCA_IBOFFLOAD_IB_DRIVER_OPERAND_SIZE 8
-#define MCA_IBOFFLOAD_CACHE_LINE_SIZE 128
-
-#if OPAL_HAVE_IBOFFLOAD_CALC_RDMA
-#define MCA_BCOL_IBOFFLOAD_SEND_CALC IBV_M_WR_CALC_SEND
-#else
-#define MCA_BCOL_IBOFFLOAD_SEND_CALC IBV_M_WR_CALC
-#endif
-
-
-/* 0 - barrier rdma info
-   1 - ML rdma info */
-#define MAX_REMOTE_RDMA_INFO 2
-
-/* forward declarations */
-struct mca_bcol_iboffload_module_t;
-struct mca_bcol_iboffload_collreq_t;
-struct mca_bcol_iboffload_endpoint_t;
-struct mca_bcol_iboffload_frag_t;
-struct mca_bcol_iboffload_task_t;
-struct mca_bcol_iboffload_qp_info_t;
-struct mca_bcol_iboffload_collfrag_t;
-struct mca_bcol_iboffload_algth_lst_t;
-struct mca_bcol_iboffload_device_t;
-
-typedef int (*mca_bcol_iboffload_coll_algth_fn_t) (
-        struct mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *coll_request);
-
-struct mca_bcol_iboffload_rdma_info_t {
-    uint64_t    addr;
-    uint32_t    rkey;
-    uint32_t    lkey;
-};
-typedef struct mca_bcol_iboffload_rdma_info_t mca_bcol_iboffload_rdma_info_t;
-
-struct mca_bcol_iboffload_rdma_buffer_desc_t {
-    void     *data_addr;             /* buffer address */
-    uint64_t     generation_number;  /* my generation */
-    uint64_t     bank_index;         /* my bank */
-    uint64_t     buffer_index;       /* my buff index */
-};
-typedef struct mca_bcol_iboffload_rdma_buffer_desc_t mca_bcol_iboffload_rdma_buffer_desc_t;
-
-struct mca_bcol_iboffload_rdma_block_desc_t {
-    /* number of memory banks */
-    uint32_t     num_banks;
-    /* number of buffers per bank */
-    uint32_t     num_buffers_per_bank;
-    /* size of a payload buffer */
-    uint32_t     size_buffer;
-    /* data offset from ML */
-    uint32_t     data_offset;
-    /* pointer to buffer descriptors initialized */
-    mca_bcol_iboffload_rdma_buffer_desc_t *rdma_desc;
-};
-typedef struct mca_bcol_iboffload_rdma_block_desc_t mca_bcol_iboffload_rdma_block_desc_t;
-
-/* Information that we need to keep in order to access remote
-   memory. For each remote peer (endpoint) we will keep this
-   structure */
-struct mca_bcol_iboffload_rem_rdma_block_t {
-    /* IB related information first */
-    mca_bcol_iboffload_rdma_info_t ib_info;
-
-    mca_bcol_iboffload_rdma_buffer_desc_t *rdma_desc;
-};
-typedef struct mca_bcol_iboffload_rem_rdma_block_t mca_bcol_iboffload_rem_rdma_block_t;
-
-enum {
-    MCA_BCOL_IBOFFLOAD_BK_COUNTER_INDEX = 0,
-    MCA_BCOL_IBOFFLOAD_BK_SYNC_INDEX,
-    MCA_BCOL_IBOFFLOAD_BK_LAST
-};
-
-/* Information that we need to keep in order to access and
-   track local memory that is used as source and destinatination
-   for RDMA operations */
-struct mca_bcol_iboffload_local_rdma_block_t {
-    /* sync counter keeps next to start bank id */
-    int sync_counter;
-    /* Counter for released ml buffers */
-    int *bank_buffer_counter[MCA_BCOL_IBOFFLOAD_BK_LAST];
-    /* IB related information first */
-    struct mca_bcol_iboffload_rdma_info_t ib_info;
-    /* back pointer to original ML memory descriptor */
-    struct mca_bcol_base_memory_block_desc_t *ml_mem_desc;
-    /* Pasha: do we really need this one ?*/
-    /* caching ml memory descriptor configurations localy */
-    mca_bcol_iboffload_rdma_block_desc_t bdesc;
-};
-typedef struct mca_bcol_iboffload_local_rdma_block_t mca_bcol_iboffload_local_rdma_block_t;
-
-struct mca_bcol_iboffload_recv_wr_manager {
-    opal_mutex_t lock;
-    /** Array of ready to use receive work requests.
-     * it is 2 dimensional array since for each
-     * qp size we want to keep separate recv wr  */
-    struct ibv_recv_wr **recv_work_requests;
-};
-typedef struct mca_bcol_iboffload_recv_wr_manager mca_bcol_iboffload_recv_wr_manager;
-
-/**
- * Structure to hold the basic shared memory coll component.  First it holds the
- * base coll component, and then holds a bunch of
- * sm-coll-component-specific stuff (e.g., current MCA param
- * values).
- */
-struct mca_bcol_iboffload_component_t {
-    /** Base coll component */
-    mca_bcol_base_component_2_0_0_t super;
-    /** Enable disable verbose mode */
-    int verbose;
-    int num_qps;
-    /** Whether we want a warning if non default GID prefix is not configured
-      on multiport setup */
-    bool warn_default_gid_prefix;
-    /** Whether we want a warning if the user specifies a non-existent
-      device and/or port via bcol_ibofflad_if_[in|ex]clude MCA params */
-    bool warn_nonexistent_if;
-    /** initial size of free lists */
-    int free_list_num;
-    /** maximum size of free lists */
-    int free_list_max;
-    /** number of elements to alloc when growing free lists */
-    int free_list_inc;
-    /** name of ib memory pool */
-    char* mpool_name;
-    /** max outstanding CQE on the CQ */
-    int cq_size;
-    /** Max size of inline data */
-    unsigned int max_inline_data;
-    /** IB partition definition */
-    uint32_t pkey_val;
-    /** Outstanding atomic reads */
-    unsigned int qp_ous_rd_atom;
-    /** IB MTU */
-    int mtu;
-    /** Recv not ready timer */
-    int min_rnr_timer;
-    /** IB timeout */
-    int timeout;
-    /** IB retry count */
-    int retry_count;
-    /** Recv not ready retry count */
-    int rnr_retry;
-    /** IB maximum pending RDMA */
-    int max_rdma_dst_ops;
-    /** IB Service level (QOS) */
-    int service_level;
-    /** Preferred communication buffer alignment in Bytes (must be power of two) */
-    int buffer_alignment;
-    /** Max tasks number for MQ */
-    int max_mqe_tasks;
-    /** Max MQ size */
-    int max_mq_size;
-    /** HCA/Port include exclude list */
-    char *if_include;
-    char **if_include_list;
-    char *if_exclude;
-    char **if_exclude_list;
-    /** Dummy argv-style list; a copy of names from the
-        if_[in|ex]clude list that we use for error checking (to ensure
-        that they all exist) */
-    char **if_list;
-    /** Array of ibv devices */
-    struct ibv_device **ib_devs;
-    /** devices count */
-    int num_devs;
-    /** MCA param bcol_iboffload_receive_queues */
-    char *receive_queues;
-    /** Common info about all kinds of QPs on each iboffload module */
-    struct mca_bcol_iboffload_qp_info_t qp_infos[MCA_BCOL_IBOFFLOAD_QP_LAST];
-    /** Array of iboffload devices */
-    opal_pointer_array_t devices;
-    /** Free lists of collfrag descriptors */
-    ompi_free_list_t collfrags_free;
-    /** Free lists of outstanding collective operations */
-    ompi_free_list_t collreqs_free;
-    /** Free lists for free task operations */
-    ompi_free_list_t tasks_free;
-    /** Free lists for free calc task operations */
-    ompi_free_list_t calc_tasks_free;
-    /** Free list of empty frags, that do not keep any
-      registration information */
-    ompi_free_list_t ml_frags_free;
-    /** Recv work request mananger */
-    mca_bcol_iboffload_recv_wr_manager recv_wrs;
-    /** We allocate some resources on the component
-      * with creating of the first iboffload module
-      * and set this flag to true */
-    bool init_done;
-    /** Maximal number of fragments of the same colective request that can be sent in parallel */
-    unsigned int max_pipeline_depth;
-    /** array mapping Open MPI reduction operators to MVerbs reduction operators */
-    enum ibv_m_wr_calc_op map_ompi_to_ib_calcs[OMPI_OP_NUM_OF_TYPES];
-    /** array mapping Open MPI data types to MVerbs data types */
-    enum ibv_m_wr_data_type map_ompi_to_ib_dt[OMPI_DATATYPE_MPI_MAX_PREDEFINED];
-    /** The order of the exchange tree */
-    int exchange_tree_order;
-    /** Knomial tree order */
-    int knomial_tree_order;
-    /** K-nomial radix */
-    int k_nomial_radix;
-    /** Maximum number of pulls for completion check */
-    int max_progress_pull;
-    /** Barrier function selector */
-    int barrier_mode;
-    /** MCA for selecting Bruck's alltoall algorithms */
-    int use_brucks_smsg_alltoall_rdma;
-    int use_brucks_smsg_alltoall_sr;
-    /** radix of small-data alltoall Bruck-like algorithm */
-    int k_alltoall_bruck_radix;
-    /** alltoall small data buffer alignment */
-    int tmp_buf_alignment;
-};
-
-/**
- * Convenience typedef
- */
-typedef struct mca_bcol_iboffload_component_t mca_bcol_iboffload_component_t;
-
-/* List of all algorithms that we use */
-enum {
-    FANIN_ALG,
-    FANOUT_ALG,
-    RECURSIVE_DOUBLING_BARRIER_ALG,
-    RECURSIVE_KNOMIAL_BARRIER_ALG,
-    RECURSIVE_DOUBLING_ALLREDUCE_ALG,
-    RECURSIVE_DOUBLING_REDUCE_ALG,
-    RECURSIVE_DOUBLING_TREE_BCAST,
-    ALL_ENDPOINTS, /* connected to all peers */
-    ALLGATHER_KNOMIAL_ALG,
-    ALLGATHER_NEIGHBOR_ALG,
-    REMOTE_EXCHANGE_ALG,
-    LAST_ALG
-};
-
-struct mca_bcol_iboffload_port_t {
-    int             id;         /** Port number on device: 1 or 2 */
-    int             stat;       /** Port status - Active,Init,etc.. */
-    enum ibv_mtu    mtu;        /** MTU on this port */
-    uint64_t        subnet_id;  /** Sunnet id for the port */
-    uint16_t        lid;
-    uint16_t        lmc;
-};
-typedef struct mca_bcol_iboffload_port_t mca_bcol_iboffload_port_t;
-
-enum {
-    COLL_MQ  = 0,
-    SERVICE_MQ,
-    BCOL_IBOFFLOAD_MQ_NUM
-};
-
-struct mca_bcol_iboffload_module_t {
-    /* base structure */
-    mca_bcol_base_module_t super;
-
-    /* size */
-    int group_size;
-    int log_group_size;
-
-    /* size of each memory segment */
-    size_t segment_size;
-
-    /* collective tag */
-    long long collective_tag;
-
-    /* pointer to device */
-    struct mca_bcol_iboffload_device_t *device;
-
-    /* caching port number */
-    uint32_t port;
-
-    /* Connecting iboffload with ibnet module information */
-    /* pointer to sbgp ibnet */
-    mca_sbgp_ibnet_module_t *ibnet;
-
-    /* connection group inder for the ibnet */
-    int cgroup_index;
-
-    /* array of endpoints */
-    struct mca_bcol_iboffload_endpoint_t **endpoints;
-
-    /* Size of the endpoints array */
-    int num_endpoints;
-
-    /* caching port subnet id and lid
-     * the same information we have on device */
-    uint64_t        subnet_id;
-    uint16_t        lid;
-
-    /* Pointer to management queue */
-    struct mqe_context *mq[BCOL_IBOFFLOAD_MQ_NUM];
-    int mq_credit[BCOL_IBOFFLOAD_MQ_NUM];
-
-    /* pending list of collfrags */
-    opal_list_t collfrag_pending;
-
-    /* recursive-doubling tree node */
-    netpatterns_pair_exchange_node_t recursive_doubling_tree;
-
-    /* N exchange tree */
-    netpatterns_pair_exchange_node_t n_exchange_tree;
-
-    /* Knomial exchange tree */
-    netpatterns_k_exchange_node_t knomial_exchange_tree;
-
-    /* Knomial exchange tree */
-    netpatterns_k_exchange_node_t knomial_allgather_tree;
-
-    /* The array will keep pre-calculated task consumption per
-     * algorithm
-     */
-    uint32_t alg_task_consump[LAST_ALG];
-
-    /* Pointer to a func that's implementation of a barrier algorithm */
-    mca_bcol_iboffload_coll_algth_fn_t barrier_algth;
-
-    /* Pointer to a func that's implementation of a fanin algorithm */
-    mca_bcol_iboffload_coll_algth_fn_t fanin_algth;
-
-    /* Pointer to a func that's implementation of a fanin algorithm */
-    mca_bcol_iboffload_coll_algth_fn_t fanout_algth;
-
-    /* Pointer to a func that's implementation of a allreduce algorithm */
-    mca_bcol_iboffload_coll_algth_fn_t allreduce_algth;
-
-    /* Pointer to a func that's implementation of a non blocking memory syncronization algorithm */
-    mca_bcol_iboffload_coll_algth_fn_t memsync_algth;
-
-    /* rdma block memory information */
-    mca_bcol_iboffload_local_rdma_block_t rdma_block;
-
-    /* The largest power of two which 1 << power_of_2
-       is not larger than the group size */
-    int power_of_2;
-
-    /* The largest power of two number which is not larger than the group size */
-    int power_of_2_ranks;
-
-    /* Connection status array */
-    bool connection_status[LAST_ALG];
-
-    /* map from communicator ranks to ibsubnet */
-    int *comm_to_ibnet_map;
-
-    /* order preserving value */
-    int64_t prev_sequence_num;
-
-    /* Temp iovec to send the data fragments -- alltoall Brucks */
-    struct iovec *alltoall_iovec;
-    struct iovec *alltoall_recv_iovec;
-
-    /* tree radix for the knomial bruck small data alltoall */
-    int k_alltoall_bruck_radix;
-
-    /* Temp buffer alignment for knomial bruck small data alltoall */
-    int tmp_buf_alignment;
-
-    /* Free task list with sge's array */
-    ompi_free_list_t iovec_tasks_free;
-};
-
-typedef struct mca_bcol_iboffload_module_t mca_bcol_iboffload_module_t;
-OBJ_CLASS_DECLARATION(mca_bcol_iboffload_module_t);
-
-/**
- * Global component instance
- */
-OMPI_MODULE_DECLSPEC
-    extern mca_bcol_iboffload_component_t mca_bcol_iboffload_component;
-
-static inline int mca_bcol_iboffload_err(const char* fmt, ...)
-{
-    va_list list;
-    int ret;
-
-    va_start(list, fmt);
-    ret = vfprintf(stderr, fmt, list);
-    va_end(list);
-    return ret;
-}
-
-#define MCA_BCOL_IBOFFLOAD_ALLREDUCE_DO_CALC(ompi_op, c_type, l_operand, r_operand, result) \
-do {                                                                                        \
-    switch (ompi_op) {                                                                      \
-        case OMPI_OP_MAX:                                                                   \
-            *((c_type *)&result) = ((*(c_type *)&(l_operand) > *(c_type *)&(r_operand)) ?   \
-                                     *(c_type *)&(l_operand) : *(c_type *)&(r_operand));    \
-            break;                                                                          \
-        case OMPI_OP_MIN:                                                                   \
-            *((c_type *)&result) = ((*(c_type *)&(l_operand) < *(c_type *)&(r_operand)) ?   \
-                                     *(c_type *)&(l_operand) : *(c_type *)&(r_operand));    \
-            break;                                                                          \
-        case OMPI_OP_SUM:                                                                   \
-            *((c_type *)&result) = (*((c_type *)&(l_operand)) + *((c_type *)&(r_operand))); \
-            break;                                                                          \
-        default:                                                                            \
-            break;                                                                          \
-    }                                                                                       \
-} while (0);
-
-#define MCA_BCOL_IBOFFLOAD_PKEY_MASK 0x7fff
-#define MCA_BCOL_IBOFFLOAD_DEFAULT_GID_PREFIX 0xfe80000000000000ll
-
-#define IBOFFLOAD_ERROR(args)                                       \
-    do {                                                            \
-        mca_bcol_iboffload_err("[%s]%s[%s:%d:%s] IBOFFLOAD ",       \
-            ompi_process_info.nodename,                             \
-            OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),                     \
-            __FILE__, __LINE__, __func__);                          \
-        mca_bcol_iboffload_err args;                                \
-        mca_bcol_iboffload_err("\n");                               \
-    } while(0)
-
-#if OPAL_ENABLE_DEBUG
-#define IBOFFLOAD_VERBOSE(level, args)                              \
-    do {                                                            \
-        if (mca_bcol_iboffload_component.verbose >= level) {        \
-            mca_bcol_iboffload_err("[%s]%s[%s:%d:%s] IBOFFLOAD ",   \
-                    ompi_process_info.nodename,                     \
-                    OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),             \
-                    __FILE__, __LINE__, __func__);                  \
-            mca_bcol_iboffload_err args;                            \
-            mca_bcol_iboffload_err("\n");                           \
-        }                                                           \
-    } while(0)
-#else
-#define IBOFFLOAD_VERBOSE(level, args)
-#endif
-
-#define MCA_BCOL_IBOFFLOAD_SET_COLL_REQ_LINKS(coll_req, coll_work_req) \
-    do {                                                               \
-        opal_list_append(&(coll_req)->work_requests,                   \
-                        (opal_list_item_t*) (coll_work_req));          \
-        (coll_work_req)->coll_full_req = (coll_req);                   \
-    } while(0)
-/* Vasily: will be removed soon */
-#define APPEND_TO_TASKLIST(task_ptr_to_set, event, last_event_type)  \
-    do {                                                             \
-        *task_ptr_to_set = &(event)->element;                        \
-        last_event_type = &(event)->element;                         \
-        task_ptr_to_set = &((event)->element.next);                  \
-    } while(0)
-
-#define MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(task_ptr_to_set, task) \
-    do {                                                              \
-        *task_ptr_to_set = (task);                                    \
-        task_ptr_to_set = &((task)->next_task);                       \
-    } while(0)
-
-#define MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(task_ptr_to_set, event) \
-    do {                                                                  \
-        *task_ptr_to_set = &(event)->element;                             \
-        task_ptr_to_set = &((event)->element.next);                       \
-    } while(0)
-
-#define BCOL_IS_COMPLETED(req) (((req)->n_frag_mpi_complete == (req)->n_fragments) && \
-                                ((req)->n_fragments > 0))
-
-#define BCOL_AND_NET_ARE_COMPLETED(req) (BCOL_IS_COMPLETED(req) && \
-                                        ((req)->n_frag_net_complete == (req)->n_fragments))
-
-/* Pasha: Need to add locks here */
-#define BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(module, mq_index, num_of_credits) \
-                (((module)->mq_credit[mq_index] -= (num_of_credits)) < 0 ? false : true)
-/* Pasha: Need to add locks here */
-#define BCOL_IBOFFLOAD_MQ_RETURN_CREDITS(module, mq_index, num_of_credits) \
-                ((module)->mq_credit[mq_index] += (num_of_credits))
-
-#define BCOL_IBOFFLOAD_IS_FIRST_CALL(args) (0 == (args)->index_in_consecutive_same_bcol_calls)
-
-#define BCOL_IBOFFLOAD_IS_LAST_CALL(args) (((args)->n_of_this_type_in_collective - 1) == \
-                                            (args)->index_of_this_type_in_collective)
-
-#define BCOL_IBOFFLOAD_READY_TO_POST(args) (((args)->n_of_this_type_in_a_row - 1) == \
-                                             (args)->index_in_consecutive_same_bcol_calls)
-/*
- * bcol module functions
- */
-
-int mca_bcol_iboffload_rec_doubling_start_connections(struct mca_bcol_iboffload_module_t *iboffload);
-
-/* RDMA addr exchange with rem proc */
-int mca_bcol_iboffload_exchange_rem_addr(struct mca_bcol_iboffload_endpoint_t *ep);
-
-/* Progress function */
-int mca_bcol_iboffload_component_progress(void);
-
-/* Register memory */
-int mca_bcol_iboffload_register_mr(void *reg_data, void * base, size_t size,
-        mca_mpool_base_registration_t *reg);
-
-/* Deregister memory */
-int mca_bcol_iboffload_deregister_mr(void *reg_data, mca_mpool_base_registration_t *reg);
-
-/*
- * The function is used for create CQ in this module.
- */
-int mca_bcol_iboffload_adjust_cq(struct mca_bcol_iboffload_device_t *device,
-                                 struct ibv_cq **ib_cq);
-/*
- * Query to see if the component is available for use,
- * and can satisfy the thread and progress requirements
- */
-int mca_bcol_iboffload_init_query(bool enable_progress_threads,
-        bool enable_mpi_threads);
-
-
-/* Interface to setup the allgather tree */
-int mca_bcol_iboffload_setup_knomial_tree(mca_bcol_base_module_t *super);
-
-/*
- * Query to see if the module is available for use on
- * the given communicator, and if so, what it's priority is.
- */
-mca_bcol_base_module_t **
-mca_bcol_iboffload_comm_query(mca_sbgp_base_module_t *sbgp, int *num_modules);
-
-int
-mca_bcol_iboffload_free_tasks_frags_resources(
-        struct mca_bcol_iboffload_collfrag_t *collfrag,
-        ompi_free_list_t *frags_free);
-
-/**
- * Shared memory blocking barrier
- */
-
-int mca_bcol_iboffload_small_msg_bcast_intra(bcol_function_args_t *fn_arguments,
-                                                   struct mca_bcol_base_function_t
-                                                   *const_args);
-
-int mca_bcol_iboffload_barrier_intra_recursive_doubling_start(
-        struct mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *coll_request);
-
-int mca_bcol_iboffload_barrier_intra_recursive_knomial_start(
-        struct mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *coll_request);
-
-int mca_bcol_iboffload_barrier_intra_recursive_doubling(
-        mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *coll_request);
-
-int mca_bcol_iboffload_nb_memory_service_barrier_start(
-        struct mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *coll_request);
-
-int mca_bcol_iboffload_fanin_register(mca_bcol_base_module_t *super);
-int mca_bcol_iboffload_fanout_register(mca_bcol_base_module_t *super);
-int mca_bcol_iboffload_barrier_register(mca_bcol_base_module_t *super);
-int mca_bcol_iboffload_memsync_register(mca_bcol_base_module_t *super);
-int mca_bcol_iboffload_allreduce_register(mca_bcol_base_module_t *super);
-
-int mca_bcol_iboffload_new_style_fanin_first_call(
-                mca_bcol_iboffload_module_t *iboffload,
-                struct mca_bcol_iboffload_collreq_t *coll_request);
-
-int mca_bcol_iboffload_new_style_fanout_first_call(
-                mca_bcol_iboffload_module_t *iboffload,
-                struct mca_bcol_iboffload_collreq_t *coll_request);
-
-int mca_bcol_iboffload_nb_memory_service_barrier_intra(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-int mca_bcol_iboffload_coll_support_all_types(bcol_coll coll_name);
-int mca_bcol_iboffload_coll_supported(int op, int dtype, bcol_elem_type elem_type);
-
-static inline __opal_attribute_always_inline__ int
-                        mca_bcol_iboffload_fls(int num)
-{
-    int i = 1;
-    int j = 0;
-
-    if (0 == num) {
-        return 0;
-    }
-
-    while (i < num) {
-        i <<= 1;
-        j++;
-    }
-
-    if (i > num) {
-        j--;
-    }
-
-   return j;
-}
-
-#define BCOL_IBOFFLOAD_IS_EVEN(num) (!((num) & 1))
-static inline __opal_attribute_always_inline__ int
-                        mca_bcol_iboffload_ffs(int num)
-{
-    int j = 0;
-
-    if (0 == num) {
-        return 0;
-    }
-
-    while (BCOL_IBOFFLOAD_IS_EVEN(num)) {
-        num >>= 1;
-        j++;
-    }
-
-   return j;
-}
-
-#if OPAL_ENABLE_DEBUG
-
-/* Post task list MQ */
-#define IS_IMM(a) (a & MQE_WR_FLAG_IMM_EXE)
-#define IS_SIG(a) (a & MQE_WR_FLAG_SIGNAL)
-#define IS_BLK(a) (a & MQE_WR_FLAG_BLOCK)
-
-int task_to_rank(mca_bcol_iboffload_module_t *iboffload, struct mqe_task *task);
-int wait_to_rank(mca_bcol_iboffload_module_t *iboffload, struct mqe_task *task);
-
-#endif
-
-/* MQ posting function */
-static inline __opal_attribute_always_inline__ int
-                 mca_bcol_iboffload_post_mqe_tasks(
-                            mca_bcol_iboffload_module_t *iboffload,
-                            struct mqe_task *head_mqe)
-{
-    int rc;
-    struct mqe_task *bad_mqe = NULL;
-
-#if OPAL_ENABLE_DEBUG /* debug code */
-
-    struct mqe_task *curr_mqe_task = NULL;
-    int send_count = 0, recv_count = 0, wait_count = 0;
-
-    curr_mqe_task = head_mqe;
-    IBOFFLOAD_VERBOSE(10, ("Processing MQE Head with addr %p <START>\n",
-                          (uintptr_t) (void*) curr_mqe_task));
-
-    while (NULL != curr_mqe_task) {
-        switch(curr_mqe_task->opcode) {
-            case MQE_WR_SEND:
-                IBOFFLOAD_VERBOSE(10, ("Posting task %p id 0x%x: send on QP 0x%x\n"
-                                   "rank %d, sg_entry: addr %p LEN %d lkey %u, flag[%d-%d-%d]\n",
-                            (void*) curr_mqe_task, (uintptr_t) curr_mqe_task->wr_id,
-                            curr_mqe_task->post.qp->qp_num,
-                            task_to_rank(iboffload, curr_mqe_task),
-                            curr_mqe_task->post.send_wr->sg_list->addr,
-                            curr_mqe_task->post.send_wr->sg_list->length,
-                            curr_mqe_task->post.send_wr->sg_list->lkey,
-                            IS_IMM(curr_mqe_task->flags), IS_SIG(curr_mqe_task->flags), IS_BLK(curr_mqe_task->flags)));
-
-                ++send_count;
-                break;
-            case MQE_WR_RECV:
-                IBOFFLOAD_VERBOSE(10, ("Posting task %p id 0x%x: recv on QP 0x%x rank %d flag[%d-%d-%d]\n",
-                        (void*) curr_mqe_task, (uintptr_t) curr_mqe_task->wr_id,
-                        curr_mqe_task->post.qp->qp_num, task_to_rank(iboffload, curr_mqe_task),
-                        IS_IMM(curr_mqe_task->flags), IS_SIG(curr_mqe_task->flags), IS_BLK(curr_mqe_task->flags)));
-
-                ++recv_count;
-                break;
-            case MQE_WR_CQE_WAIT:
-
-                IBOFFLOAD_VERBOSE(10, ("Posting task %p id %x: wait on CQ %p for rank %d num of waits %d flag[%d-%d-%d]\n",
-                            (void*) curr_mqe_task, (uintptr_t) curr_mqe_task->wr_id,
-                            (void*) curr_mqe_task->wait.cq, wait_to_rank(iboffload, curr_mqe_task),
-                            curr_mqe_task->wait.count,
-                            IS_IMM(curr_mqe_task->flags), IS_SIG(curr_mqe_task->flags), IS_BLK(curr_mqe_task->flags)));
-
-                wait_count += curr_mqe_task->wait.count;
-                break;
-            default:
-                IBOFFLOAD_ERROR(("Fatal error, unknow packet type %d\n",
-                                                   curr_mqe_task->opcode));
-                return OMPI_ERROR;
-        }
-
-        /* pointer to next task */
-        curr_mqe_task = curr_mqe_task->next;
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("wait[%d] send[%d] recv[%d]\n",
-                            wait_count, send_count, recv_count));
-#endif
-
-    IBOFFLOAD_VERBOSE(10, ("Posting MQ %p <DONE>\n", (uintptr_t) head_mqe->wr_id));
-
-    rc = mqe_post_task(iboffload->mq[0], head_mqe, &bad_mqe);
-    if (OPAL_UNLIKELY(0 != rc)) {
-        IBOFFLOAD_ERROR(("ibv_post_mqe failed, errno says: %s,"
-                         " the return code is [%d]\n",
-                         strerror(errno), rc));
-
-        return OMPI_ERROR;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__
-                                int lognum(int n) {
-    int count = 1, lognum = 0;
-
-    while (count < n) {
-        count = count << 1;
-        lognum++;
-    }
-
-    return lognum;
-}
-
-END_C_DECLS
-
-#endif /* MCA_BCOL_IBOFFLOAD_H */
-
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_allgather.c
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_allgather.c
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_allreduce.c
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_allreduce.c
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_barrier.c
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_barrier.c
@ -1,934 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <errno.h>
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_task.h"
-#include "bcol_iboffload_collreq.h"
-#include "bcol_iboffload_collfrag.h"
-#include "bcol_iboffload_endpoint.h"
-#include "ompi/mca/coll/ml/coll_ml_allocation.h"
-
-static int mca_bcol_iboffload_barrier_init(
-        bcol_function_args_t *input_args,
-        mca_bcol_iboffload_module_t *iboffload,
-        collective_message_completion_callback_function cb_fn,
-        struct mca_bcol_iboffload_collreq_t **coll_request);
-
-/**
- * Start barrier
- */
-
-int mca_bcol_iboffload_barrier_intra_recursive_doubling(
-        mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    /* local variables */
-    mca_bcol_iboffload_task_t *send_task = NULL,
-                              *wait_task = NULL;
-
-    struct mqe_task **mqe_ptr_to_set = NULL;
-    mca_bcol_iboffload_collfrag_t *coll_fragment = NULL;
-
-    struct mqe_task *last_wait = NULL, /* we need ask from completion on last wait */
-                    *last_send = NULL; /* If it no wait, we need ask for completion on last send */
-
-    int rc, exchange, extra_rank, pair_rank;
-
-
-    mca_bcol_iboffload_frag_t *send_fragment = NULL,
-                              *preposted_recv_frag = NULL;
-
-    netpatterns_pair_exchange_node_t *my_exchange_node =
-                                          &iboffload->recursive_doubling_tree;
-
-    IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_barrier_intra_recursive_doubling.\n"));
-
-    coll_fragment = (mca_bcol_iboffload_collfrag_t *)
-                         opal_list_get_last(&coll_request->work_requests);
-    /* Set mq credits */
-    coll_fragment->mq_credits = iboffload->alg_task_consump[RECURSIVE_DOUBLING_BARRIER_ALG];
-
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-                 iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-
-        goto out_of_resources;
-    }
-
-    coll_fragment->alg = RECURSIVE_DOUBLING_BARRIER_ALG;
-
-    /*
-     * NOTE: need to generate template, if this will be a multiple fragment
-     * message.  This way we can progress the collective w/o knowing it's
-     * type - actually, this is not the case for barrier, but just a note
-     * to remind us that we need to generalize this.
-     */
-
-    mqe_ptr_to_set = &coll_fragment->to_post;
-
-    /*
-     * Fill in the communication pattern
-     */
-
-    /*
-     * If non power of 2, may need to wait for message from "extra" proc.
-     */
-
-    if (0 < my_exchange_node->n_extra_sources) {
-        if (EXCHANGE_NODE == my_exchange_node->node_type) {
-            /* I will participate in the exchange (of the algorithm) -
-             * wait for signal from extra process */
-            extra_rank = my_exchange_node->rank_extra_source;
-            preposted_recv_frag = mca_bcol_iboffload_get_preposted_recv_frag(
-                    iboffload, extra_rank, coll_request->qp_index);
-
-            if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-                IBOFFLOAD_VERBOSE(10, ("Non power of 2 case: "
-                                   "Failing for getting prepost recv frag.\n"));
-                goto out_of_resources;
-            }
-
-            wait_task = mca_bcol_iboffload_get_wait_task(iboffload,
-                    extra_rank, 1, preposted_recv_frag, coll_request->qp_index, NULL);
-            if (OPAL_UNLIKELY(NULL == wait_task)) {
-                IBOFFLOAD_VERBOSE(10, ("Non power of 2 case: "
-                                   "Failing for getting wait task.\n"));
-                goto out_of_resources;
-            }
-
-            APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-            MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-        }  else {
-            /* I will not participate in the exchange - so just "register" as here */
-            extra_rank = my_exchange_node->rank_extra_source;
-            /* send - no need to send any data, in-order delivery */
-            send_fragment = mca_bcol_iboffload_get_send_frag(coll_request,
-                                    extra_rank, coll_request->qp_index, 0,
-                                    0, SBUF,MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY);
-
-            send_task = mca_bcol_iboffload_get_send_task(iboffload, extra_rank,
-                    coll_request->qp_index, send_fragment, coll_fragment, INLINE);
-            if (OPAL_UNLIKELY(NULL == send_task)) {
-                IBOFFLOAD_VERBOSE(10, ("Non power of 2 case: "
-                                   "Failing for getting send task.\n"));
-                goto out_of_resources;
-            }
-
-            APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send);
-            MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task);
-        }
-    }
-
-    /* loop over exchange send/recv pairs */
-    for (exchange = 0; exchange < my_exchange_node->n_exchanges; ++exchange) {
-        /* rank of exchange partner */
-        pair_rank = my_exchange_node->rank_exchanges[exchange];
-        /* post send */
-        send_fragment = mca_bcol_iboffload_get_send_frag(coll_request,
-                                 pair_rank, coll_request->qp_index, 0,
-                                 0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY);
-
-        assert(NULL != send_fragment);
-
-        send_task = mca_bcol_iboffload_get_send_task(iboffload, pair_rank,
-                                                     coll_request->qp_index,
-                                                     send_fragment, coll_fragment, INLINE);
-        if (OPAL_UNLIKELY(NULL == send_task)) {
-            IBOFFLOAD_VERBOSE(10, ("Exchaging: "
-                               "Failing for getting send task.\n"));
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send);
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task);
-
-        /* post wait */
-        preposted_recv_frag = mca_bcol_iboffload_get_preposted_recv_frag(
-                iboffload, pair_rank, coll_request->qp_index);
-        if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-            IBOFFLOAD_VERBOSE(10, ("Exchaging: "
-                               "Failing for getting prepost recv frag.\n"));
-            goto out_of_resources;
-        }
-
-        wait_task = mca_bcol_iboffload_get_wait_task(iboffload, pair_rank, 1,
-                                                     preposted_recv_frag,
-                                                     coll_request->qp_index, NULL);
-        if (OPAL_UNLIKELY(NULL == wait_task)) {
-            IBOFFLOAD_VERBOSE(10, ("Exchaging: "
-                               "Failing for getting wait task.\n"));
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-    }
-
-    /* if non power of 2, may need to send message to "extra" proc */
-    if (0 < my_exchange_node->n_extra_sources)  {
-        if (EXTRA_NODE == my_exchange_node->node_type) {
-            /* I will not participate in the exchange -
-             * wait for signal from exchange process */
-            extra_rank = my_exchange_node->rank_extra_source;
-            /* post wait */
-            preposted_recv_frag =
-                mca_bcol_iboffload_get_preposted_recv_frag(iboffload, extra_rank,
-                                                           coll_request->qp_index);
-            if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-                IBOFFLOAD_VERBOSE(10, ("Sending to 'extra' node: "
-                                   "Failing for getting prepost recv frag.\n"));
-                goto out_of_resources;
-            }
-
-            wait_task = mca_bcol_iboffload_get_wait_task(iboffload, extra_rank, 1,
-                                                         preposted_recv_frag,
-                                                         coll_request->qp_index, NULL);
-            if (OPAL_UNLIKELY(NULL == wait_task)) {
-                IBOFFLOAD_VERBOSE(10, ("Sending to 'extra' node: "
-                                   "Failing for getting wait task.\n"));
-                goto out_of_resources;
-            }
-
-            APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-            MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-
-        }  else {
-            /* I will participate in the exchange -
-             * send signal to extra process */
-            extra_rank = my_exchange_node->rank_extra_source;
-            send_fragment = mca_bcol_iboffload_get_send_frag(coll_request,
-                                    extra_rank, coll_request->qp_index, 0,
-                                    0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY);
-
-            send_task = mca_bcol_iboffload_get_send_task(
-                                                iboffload, extra_rank,
-                                                coll_request->qp_index,
-                                                send_fragment, coll_fragment, INLINE);
-            if (OPAL_UNLIKELY(NULL == send_task)) {
-                IBOFFLOAD_VERBOSE(10, ("Sending to 'extra' node: "
-                                   "Failing for getting send task.\n"));
-                goto out_of_resources;
-            }
-
-            APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send);
-            MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task);
-        }
-    }
-
-    /* Fill in the the rest of the coll_fragment */
-    IBOFFLOAD_VERBOSE(10, ("Fill in the the rest of the coll_fragment.\n"));
-    /* end of list */
-    *mqe_ptr_to_set = NULL;
-
-    /* finish initializing full message descriptor */
-    coll_request->n_fragments  = 1;
-    coll_request->n_frags_sent = 1;
-
-    coll_request->n_frag_mpi_complete = 0;
-    coll_request->n_frag_net_complete = 0;
-
-    coll_request->user_handle_freed = false;
-
-    last_wait->flags |= MQE_WR_FLAG_SIGNAL;
-    coll_fragment->signal_task_wr_id = last_wait->wr_id;
-    last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-
-    /* post the mwr */
-    if (MCA_BCOL_IBOFFLOAD_QP_SYNC != coll_request->qp_index) {
-        rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
-            /* Note: need to clean up */
-            return rc;
-        }
-
-        MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info);
-    } else {
-        /* Special flow for ML service barrier , only this function supposed to
-         post service requests */
-        struct mqe_task *bad_mqe = NULL;
-        assert (MCA_BCOL_IBOFFLOAD_QP_SYNC == coll_request->qp_index );
-        /* Post to special service MQ - 1 */
-        rc = mqe_post_task(iboffload->mq[1], coll_fragment->to_post, &bad_mqe);
-        if (OPAL_UNLIKELY(0 != rc)) {
-            IBOFFLOAD_ERROR(("ibv_post_mqe failed on device (%s), errno says: %s,"
-                        " the return code is [%d]\n",
-                        ibv_get_device_name(iboffload->device->dev.ib_dev),
-                        strerror(errno), rc));
-            return OMPI_ERROR;
-        }
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("Return success.\n"));
-    return OMPI_SUCCESS;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Barrier, adding collfrag to collfrag_pending.\n"));
-    return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload);
-}
-
-int mca_bcol_iboffload_barrier_intra_recursive_doubling_start(
-        struct mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int rc;
-
-    rc = mca_bcol_iboffload_rec_doubling_start_connections(iboffload);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        return rc;
-    }
-
-    iboffload->barrier_algth =
-                       mca_bcol_iboffload_barrier_intra_recursive_doubling;
-    return
-       mca_bcol_iboffload_barrier_intra_recursive_doubling(iboffload, coll_request);
-}
-
-int mca_bcol_iboffload_nb_memory_service_barrier_start(
-        struct mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int rc;
-
-    rc = mca_bcol_iboffload_rec_doubling_start_connections(iboffload);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        return rc;
-    }
-
-    iboffload->memsync_algth =
-        mca_bcol_iboffload_barrier_intra_recursive_doubling;
-
-    return
-        mca_bcol_iboffload_barrier_intra_recursive_doubling
-        (iboffload, coll_request);
-}
-
-int mca_bcol_iboffload_nb_memory_service_barrier_intra(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-
-    /* local variables */
-    int rc;
-    mca_bcol_iboffload_collreq_t *coll_request;
-    mca_bcol_iboffload_module_t *iboffload =
-                    (mca_bcol_iboffload_module_t *) const_args->bcol_module;
-    /*
-     * recursive doubling
-     */
-
-
-    IBOFFLOAD_VERBOSE(10, ("Memory syncranization barrier was started\n"));
-
-    /* init barrier collective request */
-    rc = mca_bcol_iboffload_barrier_init(input_args, iboffload, NULL, &coll_request);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_ERROR(("Get error from mca_bcol_iboffload_barrier_init"));
-        return rc;
-    }
-
-    /* set the qp index to special qp that is used only for synchronization */
-    coll_request->qp_index = MCA_BCOL_IBOFFLOAD_QP_SYNC;
-    /* overwrite mq index to run over service setup */
-    coll_request->first_collfrag.mq_index = SERVICE_MQ;
-
-    /* start the barrier */
-    rc = iboffload->memsync_algth(iboffload, coll_request);
-    if (OPAL_UNLIKELY(OMPI_ERROR == rc)) {
-      return rc;
-    }
-
-    /* complete the barrier - progress releases full request descriptors */
-    IBOFFLOAD_VERBOSE(10, ("Memory syncranization barrier was started\n"));
-
-    /* done */
-    return BCOL_FN_STARTED;
-}
-
-/* Recursive K - ing*/
-static int recursive_knomial_start_connections(struct mca_bcol_iboffload_module_t *iboffload)
-{
-    netpatterns_k_exchange_node_t *my_exchange_node =
-        &iboffload->knomial_exchange_tree;
-    int k, i, n_exchanges = my_exchange_node->n_exchanges,
-        **exchanges = my_exchange_node->rank_exchanges,
-        n_extra_src = my_exchange_node->n_extra_sources,
-        tree_order = my_exchange_node->tree_order - 1,
-        rank_extra_src;
-
-    mca_bcol_iboffload_endpoint_t *ep;
-
-    iboffload->alg_task_consump[RECURSIVE_KNOMIAL_BARRIER_ALG] += 0;
-
-    IBOFFLOAD_VERBOSE(10, ("\nMy sbgp rank (index) - %d, "
-                "num of endpoints = %d, iboffload module - %p"
-                " extra n %d, n_exchanges %d",
-                iboffload->ibnet->super.my_index, iboffload->num_endpoints, iboffload,
-                n_extra_src, n_exchanges));
-    if (0 < n_extra_src) {
-        for (k = 0; k < n_extra_src; k++) {
-            iboffload->alg_task_consump[RECURSIVE_KNOMIAL_BARRIER_ALG] += 2; /* One send task one wait */
-            rank_extra_src = my_exchange_node->rank_extra_sources_array[k];
-            ep = iboffload->endpoints[rank_extra_src];
-            if (iboffload->ibnet->super.my_index < ep->index) {
-                while(0 == (ep)->remote_zero_rdma_addr.addr) {
-                    opal_progress();
-                }
-            } else {
-                IBOFFLOAD_VERBOSE(10, ("Trying to connect - %d", ep->index));
-                while (OMPI_SUCCESS !=
-                        check_endpoint_state(ep, NULL, NULL)) {
-                    opal_progress();
-                }
-            }
-        }
-    }
-
-    for (i = 0; i < n_exchanges; ++i) {
-        for (k = 0; k < tree_order; k++) {
-            iboffload->alg_task_consump[RECURSIVE_KNOMIAL_BARRIER_ALG] += 2; /* One send task one wait */
-            ep = iboffload->endpoints[exchanges[i][k]];
-
-            IBOFFLOAD_VERBOSE(10, ("Trying to connect - %d", ep->index));
-            if (iboffload->ibnet->super.my_index < ep->index) {
-                while(0 == (ep)->remote_zero_rdma_addr.addr) {
-                    opal_progress();
-                }
-            } else {
-                while (OMPI_SUCCESS !=
-                        check_endpoint_state(ep, NULL, NULL)) {
-                    opal_progress();
-                }
-            }
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_bcol_iboffload_barrier_intra_recursive_knomial(
-        mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    /* local variables */
-    mca_bcol_iboffload_task_t *send_task = NULL,
-                              *wait_task = NULL;
-
-    struct mqe_task **mqe_ptr_to_set = NULL;
-    mca_bcol_iboffload_collfrag_t *coll_fragment = NULL;
-
-    struct mqe_task *last_wait = NULL, /* we need ask from completion on last wait */
-                    *last_send = NULL; /* If it no wait, we need ask for completion on last send */
-
-    int rc, exchange, extra_rank, pair_rank, k;
-
-
-    mca_bcol_iboffload_frag_t *send_fragment = NULL,
-                              *preposted_recv_frag = NULL;
-
-    netpatterns_k_exchange_node_t *my_exchange_node =
-        &iboffload->knomial_exchange_tree;
-    IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_barrier_intra_recursive_knomial. Node type %d\n", my_exchange_node->node_type));
-
-    coll_fragment = (mca_bcol_iboffload_collfrag_t *)
-                         opal_list_get_last(&coll_request->work_requests);
-
-    /* Set mq credits */
-    coll_fragment->mq_credits = iboffload->alg_task_consump[RECURSIVE_KNOMIAL_BARRIER_ALG];
-
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-                 iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-
-        goto out_of_resources;
-    }
-
-    coll_fragment->alg = RECURSIVE_KNOMIAL_BARRIER_ALG;
-
-    /*
-     * NOTE: need to generate template, if this will be a multiple fragment
-     * message.  This way we can progress the collective w/o knowing it's
-     * type - actually, this is not the case for barrier, but just a note
-     * to remind us that we need to generalize this.
-     */
-
-    mqe_ptr_to_set = &coll_fragment->to_post;
-
-    /*
-     * Fill in the communication pattern
-     */
-
-    /*
-     * If non power of 2, may need to wait for message from "extra" proc.
-     */
-
-    if (0 < my_exchange_node->n_extra_sources) {
-        if (EXCHANGE_NODE == my_exchange_node->node_type) {
-            /* I will participate in the exchange (of the algorithm) -
-             * wait for signal from extra process */
-            for (k = 0; k < my_exchange_node->n_extra_sources; k++) {
-                extra_rank = my_exchange_node->rank_extra_sources_array[k];
-                IBOFFLOAD_VERBOSE(10,("Exchange [ %d ] extra get %d", k, extra_rank));
-
-                preposted_recv_frag = mca_bcol_iboffload_get_preposted_recv_frag(
-                        iboffload, extra_rank, coll_request->qp_index);
-
-                if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-                    IBOFFLOAD_VERBOSE(10, ("Non power of 2 case: "
-                                "Failing for getting prepost recv frag.\n"));
-                    goto out_of_resources;
-                }
-
-                wait_task = mca_bcol_iboffload_get_wait_task(iboffload,
-                        extra_rank, 1, preposted_recv_frag, coll_request->qp_index, NULL);
-                if (OPAL_UNLIKELY(NULL == wait_task)) {
-                    IBOFFLOAD_VERBOSE(10, ("Non power of 2 case: "
-                                "Failing for getting wait task.\n"));
-                    goto out_of_resources;
-                }
-
-                APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-                MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-            }
-        }  else {
-            /* I will not participate in the exchange - so just "register" as here */
-            extra_rank = my_exchange_node->rank_extra_sources_array[0];
-            IBOFFLOAD_VERBOSE(10,("Send to proxy %d", extra_rank));
-            /* send - no need to send any data, in-order delivery */
-            send_fragment = mca_bcol_iboffload_get_send_frag(coll_request,
-                                    extra_rank, coll_request->qp_index, 0,
-                                    0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY);
-
-            send_task = mca_bcol_iboffload_get_send_task(iboffload, extra_rank,
-                    coll_request->qp_index, send_fragment, coll_fragment, INLINE);
-            if (OPAL_UNLIKELY(NULL == send_task)) {
-                IBOFFLOAD_VERBOSE(10, ("Non power of 2 case: "
-                                   "Failing for getting send task.\n"));
-                goto out_of_resources;
-            }
-
-            APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send);
-            MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task);
-        }
-    }
-
-    /* loop over exchange send/recv pairs */
-    for (exchange = 0; exchange < my_exchange_node->n_exchanges; ++exchange) {
-        for (k = 0; k < my_exchange_node->tree_order - 1; k++) {
-            /* rank of exchange partner */
-            pair_rank = my_exchange_node->rank_exchanges[exchange][k];
-            IBOFFLOAD_VERBOSE(10,("Exchange [ %d ,%d ] send to %d", exchange, k, pair_rank));
-            /* post send */
-            send_fragment = mca_bcol_iboffload_get_send_frag(coll_request,
-                                        pair_rank, coll_request->qp_index, 0,
-                                        0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY);
-
-            send_task = mca_bcol_iboffload_get_send_task(iboffload, pair_rank,
-                    coll_request->qp_index,
-                    send_fragment, coll_fragment, INLINE);
-            if (OPAL_UNLIKELY(NULL == send_task)) {
-                IBOFFLOAD_VERBOSE(10, ("Exchaging: "
-                            "Failing for getting send task.\n"));
-                goto out_of_resources;
-            }
-
-            APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send);
-            MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task);
-        }
-
-        for (k = 0; k < my_exchange_node->tree_order - 1; k++) {
-
-            pair_rank = my_exchange_node->rank_exchanges[exchange][k];
-            IBOFFLOAD_VERBOSE(10,("Exchange [ %d ,%d ] recv %d", exchange, k, pair_rank));
-            /* post wait */
-            preposted_recv_frag = mca_bcol_iboffload_get_preposted_recv_frag(
-                    iboffload, pair_rank, coll_request->qp_index);
-            if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-                IBOFFLOAD_VERBOSE(10, ("Exchaging: "
-                            "Failing for getting prepost recv frag.\n"));
-                goto out_of_resources;
-            }
-
-            wait_task = mca_bcol_iboffload_get_wait_task(iboffload, pair_rank, 1,
-                    preposted_recv_frag, coll_request->qp_index, NULL);
-            if (OPAL_UNLIKELY(NULL == wait_task)) {
-                IBOFFLOAD_VERBOSE(10, ("Exchaging: "
-                            "Failing for getting wait task.\n"));
-                goto out_of_resources;
-            }
-
-            APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-            MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-        }
-    }
-
-    /* if non power of 2, may need to send message to "extra" proc */
-    if (0 < my_exchange_node->n_extra_sources)  {
-        if (EXTRA_NODE == my_exchange_node->node_type) {
-            /* I will not participate in the exchange -
-             * wait for signal from exchange process */
-            extra_rank = my_exchange_node->rank_extra_sources_array[0];
-            IBOFFLOAD_VERBOSE(10,("Wait from proxy %d", extra_rank));
-            /* post wait */
-            preposted_recv_frag =
-                mca_bcol_iboffload_get_preposted_recv_frag(iboffload, extra_rank,
-                                                           coll_request->qp_index);
-            if (OPAL_UNLIKELY(NULL == preposted_recv_frag)) {
-                IBOFFLOAD_VERBOSE(10, ("Sending to 'extra' node: "
-                                   "Failing for getting prepost recv frag.\n"));
-                goto out_of_resources;
-            }
-
-            wait_task = mca_bcol_iboffload_get_wait_task(iboffload, extra_rank, 1,
-                                                         preposted_recv_frag,
-                                                         coll_request->qp_index, NULL);
-            if (OPAL_UNLIKELY(NULL == wait_task)) {
-                IBOFFLOAD_VERBOSE(10, ("Sending to 'extra' node: "
-                                   "Failing for getting wait task.\n"));
-                goto out_of_resources;
-            }
-
-            APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-            MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-
-        }  else {
-            /* I will participate in the exchange -
-             * send signal to extra process */
-            for (k = 0; k < my_exchange_node->n_extra_sources; k++) {
-                extra_rank = my_exchange_node->rank_extra_sources_array[k];
-                IBOFFLOAD_VERBOSE(10,("Exchange [ %d ] extra release %d", k, extra_rank));
-
-                send_fragment = mca_bcol_iboffload_get_send_frag(coll_request,
-                                        extra_rank, coll_request->qp_index, 0,
-                                        0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY);
-
-                send_task = mca_bcol_iboffload_get_send_task(
-                        iboffload, extra_rank,
-                        coll_request->qp_index,
-                        send_fragment, coll_fragment, INLINE);
-                if (OPAL_UNLIKELY(NULL == send_task)) {
-                    IBOFFLOAD_VERBOSE(10, ("Sending to 'extra' node: "
-                                "Failing for getting send task.\n"));
-                    goto out_of_resources;
-                }
-
-                APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send);
-                MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task);
-            }
-        }
-    }
-
-    /* Fill in the the rest of the coll_fragment */
-    IBOFFLOAD_VERBOSE(10, ("Fill in the the rest of the coll_fragment.\n"));
-    /* end of list */
-    *mqe_ptr_to_set = NULL;
-
-    /* finish initializing full message descriptor */
-    coll_request->n_fragments  = 1;
-    coll_request->n_frags_sent = 1;
-
-    coll_request->n_frag_mpi_complete = 0;
-    coll_request->n_frag_net_complete = 0;
-
-    coll_request->user_handle_freed = false;
-
-    last_wait->flags |= MQE_WR_FLAG_SIGNAL;
-    coll_fragment->signal_task_wr_id = last_wait->wr_id;
-    last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-
-    /* post the mwr */
-    if (MCA_BCOL_IBOFFLOAD_QP_SYNC != coll_request->qp_index) {
-        rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
-            /* Note: need to clean up */
-            return rc;
-        }
-
-        MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info);
-    } else {
-        /* Special flow for ML service barrier , only this function supposed to
-         post service requests */
-        struct mqe_task *bad_mqe = NULL;
-        assert (MCA_BCOL_IBOFFLOAD_QP_SYNC == coll_request->qp_index );
-        /* Post to special service MQ - 1 */
-        rc = mqe_post_task(iboffload->mq[1], coll_fragment->to_post, &bad_mqe);
-        if (OPAL_UNLIKELY(0 != rc)) {
-            IBOFFLOAD_ERROR(("ibv_post_mqe failed on device (%s), errno says: %s,"
-                        " the return code is [%d]\n",
-                        ibv_get_device_name(iboffload->device->dev.ib_dev),
-                        strerror(errno), rc));
-            return OMPI_ERROR;
-        }
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("Return success.\n"));
-    return OMPI_SUCCESS;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Barrier, adding collfrag to collfrag_pending.\n"));
-    return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload);
-}
-
-int mca_bcol_iboffload_barrier_intra_recursive_knomial_start(
-        struct mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int rc;
-
-    rc = recursive_knomial_start_connections(iboffload);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        return rc;
-    }
-
-    iboffload->barrier_algth =
-        mca_bcol_iboffload_barrier_intra_recursive_knomial;
-    return
-       mca_bcol_iboffload_barrier_intra_recursive_knomial(iboffload, coll_request);
-}
-
-int mca_bcol_iboffload_rec_doubling_start_connections(mca_bcol_iboffload_module_t *iboffload)
-{
-    netpatterns_pair_exchange_node_t *my_exchange_node =
-                                          &iboffload->recursive_doubling_tree;
-
-    int i, n_exchanges = my_exchange_node->n_exchanges,
-        *exchanges = my_exchange_node->rank_exchanges,
-        n_extra_src = my_exchange_node->n_extra_sources,
-        rank_extra_src = my_exchange_node->rank_extra_source;
-
-    mca_bcol_iboffload_endpoint_t *ep;
-
-    IBOFFLOAD_VERBOSE(10, ("\nMy sbgp rank (index) - %d, "
-                          "num of endpoints = %d, iboffload module - %p\n",
-                           iboffload->ibnet->super.my_index, iboffload->num_endpoints, iboffload));
-    if (0 < n_extra_src) {
-        iboffload->alg_task_consump[RECURSIVE_DOUBLING_BARRIER_ALG] += 2; /* One send task one wait */
-        ep = iboffload->endpoints[rank_extra_src];
-
-        if (iboffload->ibnet->super.my_index < ep->index) {
-            while(0 == (ep)->remote_zero_rdma_addr.addr) {
-                opal_progress();
-            }
-        } else {
-            IBOFFLOAD_VERBOSE(10, ("Trying to connect - %d", ep->index));
-            while (OMPI_SUCCESS !=
-                    check_endpoint_state(ep, NULL, NULL)) {
-                opal_progress();
-            }
-        }
-    }
-
-    for (i = 0; i < n_exchanges; ++i) {
-        iboffload->alg_task_consump[RECURSIVE_DOUBLING_BARRIER_ALG] += 2; /* One send task one wait */
-        ep = iboffload->endpoints[exchanges[i]];
-
-        if (iboffload->ibnet->super.my_index < ep->index) {
-            while(0 == (ep)->remote_zero_rdma_addr.addr) {
-                opal_progress();
-            }
-        } else {
-            IBOFFLOAD_VERBOSE(10, ("Trying to connect - %d", ep->index));
-            while (OMPI_SUCCESS !=
-                    check_endpoint_state(ep, NULL, NULL)) {
-                opal_progress();
-            }
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_bcol_iboffload_barrier_init(
-        bcol_function_args_t *input_args,
-        mca_bcol_iboffload_module_t *iboffload,
-        collective_message_completion_callback_function cb_fn,
-        struct mca_bcol_iboffload_collreq_t **coll_request)
-{
-    ompi_free_list_item_t *item;
-    mca_bcol_iboffload_collfrag_t *coll_fragment;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_barrier_init"));
-
-    OMPI_FREE_LIST_WAIT_MT(&cm->collreqs_free, item);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for coll request free list waiting.\n"));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    (*coll_request) = (mca_bcol_iboffload_collreq_t *) item;
-    (*coll_request)->progress_fn = iboffload->barrier_algth;
-
-    /*
-     * For usual barrier it is null. For memory
-     * service barrier we need some work to do
-     */
-    (*coll_request)->completion_cb_fn = cb_fn;
-    (*coll_request)->order_info = &input_args->order_info;
-
-    (*coll_request)->module = iboffload;
-    (*coll_request)->ml_buffer_index = input_args->buffer_index;
-    (*coll_request)->buffer_info[SBUF].offset = 0;
-    (*coll_request)->buffer_info[RBUF].offset = 0;
-    (*coll_request)->qp_index = MCA_BCOL_IBOFFLOAD_QP_BARRIER;
-
-    input_args->bcol_opaque_data = (void *) (*coll_request);
-
-    /*
-     * setup collective work request
-     */
-
-    /* get collective frag */
-    coll_fragment = &(*coll_request)->first_collfrag;
-    mca_bcol_iboffload_collfrag_init(coll_fragment);
-
-    coll_fragment->mq_index = COLL_MQ;
-
-    /* set pointers for (coll frag) <-> (coll full request) */
-    MCA_BCOL_IBOFFLOAD_SET_COLL_REQ_LINKS(*coll_request, coll_fragment);
-
-    return OMPI_SUCCESS;
-}
-
-/************************************************************************
- ************************ New style Barrier *****************************
- ***********************************************************************/
-
-static int mca_bcol_iboffload_new_style_barrier_progress(
-                        bcol_function_args_t *input_args,
-                        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_iboffload_collreq_t *coll_request =
-                 (mca_bcol_iboffload_collreq_t *)
-                                   input_args->bcol_opaque_data;
-
-    if (BCOL_IS_COMPLETED(coll_request)) {
-        coll_request->user_handle_freed = true;
-        if (COLLREQ_IS_DONE(coll_request)) {
-            IBOFFLOAD_VERBOSE(10, ("Coll request already done.\n"));
-            RELEASE_COLLREQ(coll_request);
-        }
-
-        IBOFFLOAD_VERBOSE(10, ("Barrier already done.\n"));
-        return BCOL_FN_COMPLETE;
-    }
-
-    return BCOL_FN_STARTED;
-}
-
-static int mca_bcol_iboffload_new_style_barrier_intra(
-                                bcol_function_args_t *input_args,
-                                struct mca_bcol_base_function_t *const_args)
-{
-    /* local variables */
-    int rc;
-    mca_bcol_iboffload_collreq_t *coll_request;
-    mca_bcol_iboffload_module_t *iboffload =
-                    (mca_bcol_iboffload_module_t *) const_args->bcol_module;
-
-    /* check for ordering */
-    MCA_BCOL_CHECK_ORDER(const_args->bcol_module, input_args);
-
-    /*
-     * recursive doubling
-     */
-
-
-    IBOFFLOAD_VERBOSE(10, ("Barrier starts.\n"));
-
-    /* init barrier collective reqeust */
-    rc = mca_bcol_iboffload_barrier_init(input_args, iboffload, NULL, &coll_request);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_ERROR(("Get error from mca_bcol_iboffload_barrier_init"));
-        return rc;
-    }
-
-    /* start the barrier */
-    rc = iboffload->barrier_algth(iboffload, coll_request);
-    if (OPAL_UNLIKELY(OMPI_ERROR == rc)) {
-        return BCOL_FN_NOT_STARTED;
-    }
-
-    /* done */
-    return BCOL_FN_STARTED;
-}
-
-int mca_bcol_iboffload_barrier_register(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    IBOFFLOAD_VERBOSE(10, ("Register iboffload Barrier.\n"));
-
-    comm_attribs.bcoll_type = BCOL_BARRIER;
-
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super,
-        &comm_attribs, &inv_attribs,
-        mca_bcol_iboffload_new_style_barrier_intra,
-        mca_bcol_iboffload_new_style_barrier_progress);
-
-    return OMPI_SUCCESS;
-}
-
-int mca_bcol_iboffload_memsync_register(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    IBOFFLOAD_VERBOSE(10, ("Register sync function\n"));
-
-    comm_attribs.bcoll_type = BCOL_SYNC;
-
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super,
-        &comm_attribs, &inv_attribs,
-        mca_bcol_iboffload_nb_memory_service_barrier_intra,
-        mca_bcol_iboffload_new_style_barrier_progress);
-
-    return OMPI_SUCCESS;
-}
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.c
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.c
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.h
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_bcast.h
@ -1,606 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_IBOFFLOAD_BCAST_H
-#define MCA_BCOL_IBOFFLOAD_BCAST_H
-
-#include "ompi_config.h"
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_task.h"
-#include "bcol_iboffload_collreq.h"
-#include "bcol_iboffload_collfrag.h"
-#include "bcol_iboffload_endpoint.h"
-
-#include "opal/include/opal/types.h"
-
-BEGIN_C_DECLS
-
-int mca_bcol_iboffload_small_msg_bcast_progress(
-                        bcol_function_args_t *input_args,
-                        struct mca_bcol_base_function_t *const_args);
-int mca_bcol_iboffload_small_msg_bcast_extra_intra(bcol_function_args_t *fn_arguments,
-        struct mca_bcol_base_function_t *const_args);
-int mca_bcol_iboffload_small_msg_bcast_intra(bcol_function_args_t *fn_arguments,
-        struct mca_bcol_base_function_t *const_args);
-int mca_bcol_iboffload_bcast_scatter_allgather_intra(bcol_function_args_t *fn_arguments,
-        struct mca_bcol_base_function_t *const_args);
-int mca_bcol_iboffload_zero_copy_progress(bcol_function_args_t *fn_arguments,
-        struct mca_bcol_base_function_t *const_args);
-int mca_bcol_iboffload_bcast_scatter_allgather_extra_intra(bcol_function_args_t *fn_arguments,
-        struct mca_bcol_base_function_t *const_args);
-int mca_bcol_iboffload_bcast_register(mca_bcol_base_module_t *super);
-
-static inline __opal_attribute_always_inline__ int
-mca_bcol_iboffload_recv_rtr_setup(
-        struct mqe_task **last_wait,
-        uint32_t dest_rank,
-        mca_bcol_iboffload_module_t *iboffload,
-        mca_bcol_iboffload_collfrag_t *coll_fragment)
-{
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_frag_t *fragment;
-
-    /* Wait for RTR message over credit QP */
-    fragment = mca_bcol_iboffload_get_preposted_recv_frag(
-            iboffload, dest_rank,
-            MCA_BCOL_IBOFFLOAD_QP_CREDIT);
-    if (OPAL_UNLIKELY(NULL == fragment)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get recv frag.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    task = mca_bcol_iboffload_get_wait_task(
-            iboffload, dest_rank, 1, fragment, MCA_BCOL_IBOFFLOAD_QP_CREDIT,
-            iboffload->endpoints[dest_rank]->qps[MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF].qp->lcl_qp);
-    if (OPAL_UNLIKELY(NULL == task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get wait task.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, task);
-    MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, task);
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__ int
-mca_bcol_iboffload_send_small_buff_setup(
-        struct mqe_task **last_send,
-        size_t len, uint32_t dest_rank,
-        mca_bcol_iboffload_module_t *iboffload,
-        mca_bcol_iboffload_collfrag_t *coll_fragment)
-{
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_frag_t *fragment;
-
-    mca_bcol_iboffload_collreq_t *coll_request =
-        coll_fragment->coll_full_req;
-
-    IBOFFLOAD_VERBOSE(10,("Get ml frag that I will send dest rank %d, len %d, lkey %d",
-                            dest_rank, len, iboffload->rdma_block.ib_info.lkey));
-
-    fragment = mca_bcol_iboffload_get_send_frag(coll_request, dest_rank,
-                                 coll_request->qp_index, len, 0,
-                                 SBUF, /* this could be problematic */
-                                 MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML);
-    if (OPAL_UNLIKELY(NULL == fragment)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get frag.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    IBOFFLOAD_VERBOSE(10,("Get an rdma task for dest %d for packet size %d",
-                            dest_rank,len));
-    task = mca_bcol_iboffload_get_rdma_task(
-                            dest_rank, 0,
-                            fragment, iboffload, coll_fragment);
-
-    if (OPAL_UNLIKELY(NULL == task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get send task.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    *last_send = &task->element;
-
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, task);
-    MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, task);
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__ int
-mca_bcol_iboffload_send_large_buff_setup(
-        struct mqe_task **last_send,
-        int buf_index, int offset,
-        size_t len, uint32_t dest_rank,
-        mca_bcol_iboffload_module_t *iboffload,
-        mca_bcol_iboffload_collfrag_t *coll_fragment)
-{
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_frag_t *fragment;
-
-    mca_bcol_iboffload_collreq_t *coll_request =
-        coll_fragment->coll_full_req;
-
-    fragment = mca_bcol_iboffload_get_send_frag(coll_request, dest_rank,
-                                 MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF,
-                                 len,
-                                 offset, buf_index, MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML);
-    if (OPAL_UNLIKELY(NULL == fragment)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get frag.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    task = mca_bcol_iboffload_get_send_task(
-                            iboffload, dest_rank,
-                            MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF,
-                            fragment, coll_fragment, NO_INLINE);
-
-    if (OPAL_UNLIKELY(NULL == task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get send task.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    *last_send = &task->element;
-
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, task);
-    MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, task);
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__ int
-mca_bcol_iboffload_send_rtr_setup(
-                            struct mqe_task **last_send,
-                            uint32_t dest_rank,
-                            mca_bcol_iboffload_module_t *iboffload,
-                            mca_bcol_iboffload_collfrag_t *coll_fragment)
-{
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_frag_t *fragment;
-
-    /* Recv is ready , Send RTR message */
-    fragment = mca_bcol_iboffload_get_send_frag(coll_fragment->coll_full_req,
-                                 dest_rank, MCA_BCOL_IBOFFLOAD_QP_CREDIT, 0,
-                                 0, RBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY);
-    if (OPAL_UNLIKELY(NULL == fragment)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get frag.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    task = mca_bcol_iboffload_get_send_task(iboffload, dest_rank,
-            MCA_BCOL_IBOFFLOAD_QP_CREDIT,
-            fragment, coll_fragment, INLINE);
-    if (OPAL_UNLIKELY(NULL == task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get send task.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("dest_rank - %d. qp index - %d.\n",
-                dest_rank, MCA_BCOL_IBOFFLOAD_QP_CREDIT));
-
-    *last_send = &task->element;
-
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, task);
-    MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, task);
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__ int
-mca_bcol_iboffload_recv_small_preposted_buff_setup(
-                            struct mqe_task **last_wait,
-                            size_t len, uint32_t dest_rank,
-                            int qp_index,
-                            int nwaits,
-                            mca_bcol_iboffload_module_t *iboffload,
-                            mca_bcol_iboffload_collfrag_t *coll_fragment)
-{
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_frag_t *fragment;
-
-    IBOFFLOAD_VERBOSE(10,("Get preposted recv from rank %d", dest_rank));
-
-    fragment = mca_bcol_iboffload_get_preposted_recv_frag(
-                               iboffload, dest_rank,
-                               qp_index);
-    if (OPAL_UNLIKELY(NULL == fragment)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get recv frag.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    task = mca_bcol_iboffload_get_wait_task(iboffload, dest_rank, nwaits,
-            fragment, qp_index, NULL);
-    if (OPAL_UNLIKELY(NULL == task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get wait task.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    *last_wait = &task->element;
-
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, task);
-    MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, task);
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__ int
-mca_bcol_iboffload_recv_small_buff_setup(
-                            struct mqe_task **last_wait,
-                            size_t len, uint32_t dest_rank,
-                            mca_bcol_iboffload_module_t *iboffload,
-                            mca_bcol_iboffload_collfrag_t *coll_fragment)
-{
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_frag_t *fragment;
-
-    mca_bcol_iboffload_collreq_t *coll_request =
-        coll_fragment->coll_full_req;
-
-    IBOFFLOAD_VERBOSE(10, ("Get preposted recv from rank %d", dest_rank));
-
-    fragment = mca_bcol_iboffload_get_preposted_recv_frag(
-                               iboffload, dest_rank,
-                               coll_request->qp_index);
-    if (OPAL_UNLIKELY(NULL == fragment)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get recv frag.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    task = mca_bcol_iboffload_get_wait_task(iboffload, dest_rank, 1,
-            fragment, coll_request->qp_index, NULL);
-    if (OPAL_UNLIKELY(NULL == task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get wait task.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    *last_wait = &task->element;
-
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, task);
-    MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, task);
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__ int
-mca_bcol_iboffload_recv_large_buff_setup(
-                            struct mqe_task **last_wait,
-                            int buf_index, int offset,
-                            size_t len, uint32_t dest_rank,
-                            mca_bcol_iboffload_module_t *iboffload,
-                            mca_bcol_iboffload_collfrag_t *coll_fragment)
-{
-    int num_preposted;
-
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_frag_t *fragment;
-
-    mca_bcol_iboffload_collreq_t *coll_request = coll_fragment->coll_full_req;
-
-    /* Post message to recv queue for large messages */
-    fragment = mca_bcol_iboffload_get_ml_frag(
-            iboffload, MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF, len,
-            coll_request->buffer_info[buf_index].iboffload_reg->mr->lkey,
-            (uint64_t)((unsigned char *)coll_request->buffer_info[buf_index].buf + offset));
-    if (OPAL_UNLIKELY(NULL == fragment)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get recv frag.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    num_preposted = mca_bcol_iboffload_prepost_ml_recv_frag(
-            MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF,
-            dest_rank, fragment, iboffload);
-    if (0 >= num_preposted) {
-        IBOFFLOAD_ERROR(("Failed to prepost recv fragments "
-                    "return code - %d; dest_rank - %d",
-                    num_preposted, dest_rank));
-
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    task = mca_bcol_iboffload_get_wait_task(iboffload, dest_rank, 1,
-            fragment, MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF, NULL);
-    if (OPAL_UNLIKELY(NULL == task)) {
-        IBOFFLOAD_VERBOSE(10, ("Failed to get wait task.\n"));
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    }
-
-    *last_wait = &task->element;
-
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, task);
-    MCA_BCOL_IBOFFLOAD_APPEND_MQ_TASK_TO_LIST(coll_fragment->tail_next, task);
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__
-int bcol_iboffload_binomial_root_to_src(int group_root, int my_rank,
-        int pow2_size, int group_size, int *distance)
-{
-    int root, relative_rank, src,
-        pow2_distance = 0, i;
-
-    if (group_root < pow2_size) {
-        root = group_root;
-    } else {
-        /* the source of the data is extra node,
-           the real root it represented by some rank from
-           pow2 group */
-        root = group_root - pow2_size;
-        /* shortcut for the case when my rank is root for the group */
-        if (my_rank == root) {
-            *distance = -1;
-            return group_root;
-        }
-    }
-
-    relative_rank = (my_rank - root) < 0 ? my_rank - root + pow2_size :
-        my_rank - root;
-
-    for (i = 1; i < pow2_size; i<<=1, pow2_distance++) {
-        if (relative_rank & i) {
-            src = my_rank ^ i;
-            if (src >= pow2_size)
-                src -= pow2_size;
-
-            *distance = pow2_distance;
-            IBOFFLOAD_VERBOSE(10, ("AAAAA d %d rel %d it %d root %d my %d", *distance, relative_rank, i, root, my_rank));
-            return src;
-        }
-    }
-
-    /* error case */
-    *distance = -1;
-    return -1;
-}
-
-static inline void bcol_iboffload_setup_binomial_connection(mca_bcol_iboffload_module_t *iboffload)
-{
-    netpatterns_pair_exchange_node_t *my_exchange_node =
-                                          &iboffload->recursive_doubling_tree;
-
-    int i, n_exchanges = my_exchange_node->n_exchanges,
-        *exchanges = my_exchange_node->rank_exchanges,
-        n_extra_src = my_exchange_node->n_extra_sources,
-        my_rank = iboffload->ibnet->super.my_index,
-        rank_extra_src = my_exchange_node->rank_extra_source;
-
-    mca_bcol_iboffload_endpoint_t *ep;
-
-    IBOFFLOAD_VERBOSE(10, ("Open connections.\n"));
-
-    if (0 < n_extra_src) {
-        ep = iboffload->endpoints[rank_extra_src];
-        while (OMPI_SUCCESS !=
-                check_endpoint_state(ep, NULL, NULL)) {
-            opal_progress();
-        }
-
-#if OPAL_ENABLE_DEBUG
-        {
-            int qp_index, num_qps = mca_bcol_iboffload_component.num_qps;
-            for (qp_index = 0; qp_index < num_qps; ++qp_index) {
-                assert(NULL != ep->qps[qp_index].qp->lcl_qp);
-                IBOFFLOAD_VERBOSE(10, ("Endpoint - %p, QP index - %d: qp num - %x.",
-                                       ep, qp_index, ep->qps[qp_index].qp->lcl_qp->qp_num));
-            }
-        }
-#endif
-
-        /* Connect to all extra nodes */
-        if (EXTRA_NODE == my_exchange_node->node_type) {
-            for (i = iboffload->power_of_2_ranks;
-                    i < iboffload->num_endpoints; ++i) {
-                if (i != my_rank) {
-                    ep = iboffload->endpoints[i];
-
-                    IBOFFLOAD_VERBOSE(10, ("subgroup rank %d: Connect to rank %d.\n", my_rank, i));
-
-                    while (OMPI_SUCCESS !=
-                            check_endpoint_state(ep, NULL, NULL)) {
-                        opal_progress();
-                    }
-
-#if OPAL_ENABLE_DEBUG
-        {
-            int qp_index, num_qps = mca_bcol_iboffload_component.num_qps;
-            for (qp_index = 0; qp_index < num_qps; ++qp_index) {
-                assert(NULL != ep->qps[qp_index].qp->lcl_qp);
-                IBOFFLOAD_VERBOSE(10, ("Endpoint - %p, QP index - %d: qp num - %x.",
-                                       ep, qp_index, ep->qps[qp_index].qp->lcl_qp->qp_num));
-            }
-        }
-#endif
-                }
-            }
-        }
-    }
-
-    for (i = 0; i < n_exchanges; ++i) {
-        ep = iboffload->endpoints[exchanges[i]];
-
-        while (OMPI_SUCCESS !=
-                check_endpoint_state(ep, NULL, NULL)) {
-            opal_progress();
-        }
-
-#if OPAL_ENABLE_DEBUG
-        {
-            int qp_index, num_qps = mca_bcol_iboffload_component.num_qps;
-            for (qp_index = 0; qp_index < num_qps; ++qp_index) {
-                assert(NULL != ep->qps[qp_index].qp->lcl_qp);
-                IBOFFLOAD_VERBOSE(10, ("Endpoint - %p, QP index - %d: qp num - %x.",
-                                       ep, qp_index, ep->qps[qp_index].qp->lcl_qp->qp_num));
-            }
-        }
-#endif
-    }
-    /* set the connection status to connected */
-    iboffload->connection_status[RECURSIVE_DOUBLING_TREE_BCAST] = true;
-}
-
-static inline __opal_attribute_always_inline__
-int bcol_iboffload_bcast_binomial_gather(mca_bcol_iboffload_module_t *iboffload_module,
-        struct mqe_task **last_send, struct mqe_task **last_wait,
-        mca_bcol_iboffload_collfrag_t *coll_fragment,
-        int count, int base_block_size, int radix_mask_pow)
-{
-    int rc;
-    int i;
-    int my_group_index = iboffload_module->ibnet->super.my_index;
-    int delta, rdelta;
-
-    IBOFFLOAD_VERBOSE(10, ("bcol_iboffload_bcast_binomial_gather %d %d",
-                radix_mask_pow, my_group_index));
-
-    /* we assume the iteration #iteration already was completed with probe */
-    for (i = 0; i < iboffload_module->power_of_2; i++) {
-        int pow2 = 1 << i;
-        int peer_index = my_group_index ^ pow2;
-        int slen, rlen,
-            send_offset,
-            recv_offset;
-
-        if (i > radix_mask_pow) {
-            slen = rlen = pow2 * base_block_size;
-            send_offset = base_block_size * ((my_group_index) & ((~(int)0) << i));
-            recv_offset = base_block_size * ((peer_index)     & ((~(int)0) << i));
-
-            rdelta = count - recv_offset;
-            if (rdelta > 0) {
-                IBOFFLOAD_VERBOSE(10, ("Recv1 [ pow2 %d, radix %d ] offset %d , len %d , dest %d",
-                            pow2, 1 << iboffload_module->power_of_2,
-                            recv_offset, rlen, peer_index));
-
-                rc = mca_bcol_iboffload_send_rtr_setup(last_send,
-                        peer_index, iboffload_module,
-                        coll_fragment);
-                if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                    IBOFFLOAD_VERBOSE(10, ("Failed to setup send rtr"));
-                    return OMPI_ERROR;
-                }
-            }
-
-            delta = count - send_offset;
-            if (delta > 0) {
-                if (delta < slen) {
-                    /* recv the tail */
-                    slen = delta;
-                }
-
-                IBOFFLOAD_VERBOSE(10, ("Send1 [ pow2 %d, radix %d ] offset %d , len %d , dest %d",
-                            pow2, 1 << iboffload_module->power_of_2,
-                            send_offset, slen, peer_index));
-                rc = mca_bcol_iboffload_recv_rtr_setup(last_wait, peer_index, iboffload_module, coll_fragment);
-                if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                    IBOFFLOAD_VERBOSE(10, ("Failed to isend data"));
-                    return OMPI_ERROR;
-                }
-
-                rc = mca_bcol_iboffload_send_large_buff_setup(last_send, SBUF, send_offset, slen, peer_index,
-                        iboffload_module, coll_fragment);
-                if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                    IBOFFLOAD_VERBOSE(10, ("Failed to isend data"));
-                    return OMPI_ERROR;
-                }
-            }
-
-            if (rdelta > 0) {
-                if (rdelta < rlen) {
-                    /* recv the tail */
-                    rlen = rdelta;
-                }
-
-                rc = mca_bcol_iboffload_recv_large_buff_setup(last_wait,
-                        SBUF, recv_offset, rlen, peer_index,
-                        iboffload_module, coll_fragment);
-                if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                    IBOFFLOAD_VERBOSE(10, ("Failed to setup data receive"));
-                    return OMPI_ERROR;
-                }
-            }
-
-        } else if (i == radix_mask_pow) {
-            /* only receive data */
-            rlen = pow2 * base_block_size;
-            recv_offset = base_block_size * ((peer_index) & ((~(int)0) << i));
-            delta = count - recv_offset;
-            if (0 >= delta) {
-                /* we have nothing to send, skip the iteration */
-                continue;
-            }
-            if (delta < rlen) {
-                /* recv the tail */
-                rlen = delta;
-            }
-            /* receive data from the peer */
-            IBOFFLOAD_VERBOSE(10, ("Recv2 [ pow2 %d, radix %d ] offset %d , len %d , dest %d",
-                        pow2,
-                        1 << iboffload_module->power_of_2,
-                        recv_offset,
-                        rlen, peer_index));
-            rc = mca_bcol_iboffload_send_rtr_setup(last_send,
-                    peer_index, iboffload_module,
-                    coll_fragment);
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                IBOFFLOAD_VERBOSE(10, ("Failed to setup send rtr"));
-                return OMPI_ERROR;
-            }
-
-            rc = mca_bcol_iboffload_recv_large_buff_setup(last_wait,
-                    SBUF, recv_offset, rlen, peer_index,
-                    iboffload_module, coll_fragment);
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                IBOFFLOAD_VERBOSE(10, ("Failed to setup data receive"));
-                return OMPI_ERROR;
-            }
-        } else if (i < radix_mask_pow) {
-            /* Only send data */
-            slen = pow2 * base_block_size;
-            send_offset = base_block_size * ((my_group_index) & ((~(int)0) << i));
-            delta = count - send_offset;
-            if (0 >= delta) {
-                /* we have nothing to send, skip the iteration */
-                continue;
-            }
-
-            if (delta < slen) {
-                slen = delta;
-            }
-
-            IBOFFLOAD_VERBOSE(10, ("Send2 [ pow2 %d, radix %d ] offset %d , len %d , dest %d",
-                        pow2,
-                        1 << iboffload_module->power_of_2,
-                        send_offset,
-                        slen,
-                        peer_index));
-
-            rc = mca_bcol_iboffload_recv_rtr_setup(last_wait, peer_index, iboffload_module, coll_fragment);
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                IBOFFLOAD_VERBOSE(10, ("Failed to isend data"));
-                return OMPI_ERROR;
-            }
-
-            rc = mca_bcol_iboffload_send_large_buff_setup(last_send, SBUF, send_offset, slen, peer_index,
-                    iboffload_module, coll_fragment);
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                IBOFFLOAD_VERBOSE(10, ("Failed to isend data"));
-                return OMPI_ERROR;
-            }
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-END_C_DECLS
-
-#endif
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_collfrag.c
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_collfrag.c
@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <string.h>
-
-#include "bcol_iboffload_collreq.h"
-#include "bcol_iboffload_collfrag.h"
-
-static void
-collfrag_constructor(struct mca_bcol_iboffload_collfrag_t *collfrag)
-{
-    collfrag->n_sends = 0;
-    collfrag->n_sends_completed = 0;
-
-    memset(collfrag->pre_posted_recvs, 0,
-           sizeof(struct mca_bcol_iboffload_task_t *) * MAX_MQE_TASKS);
-
-    collfrag->signal_task_wr_id = (uint64_t) 0;
-    collfrag->complete = false;
-
-    collfrag->seq_n = -1;
-    collfrag->coll_full_req = NULL;
-
-    collfrag->unpack_size = 0;
-
-    collfrag->tasks_posted = 0;
-    collfrag->to_post = NULL;
-    collfrag->task_next = NULL;
-    collfrag->tasks_to_release = NULL;
-
-    collfrag->in_pending_list = false;
-}
-
-static void
-collfrag_destruct(struct mca_bcol_iboffload_collfrag_t *collfrag)
-{
-}
-
-OBJ_CLASS_INSTANCE(mca_bcol_iboffload_collfrag_t,
-                   ompi_free_list_item_t,
-                   collfrag_constructor,
-                   collfrag_destruct);
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_collfrag.h
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_collfrag.h
@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_IBOFFLOAD_COLLFRAG_H
-#define MCA_BCOL_IBOFFLOAD_COLLFRAG_H
-
-#include "ompi_config.h"
-
-#include <infiniband/mqe.h>
-#include <infiniband/verbs.h>
-#include <infiniband/mverbs.h>
-
-#include "bcol_iboffload.h"
-
-#include "opal/class/ompi_free_list.h"
-
-BEGIN_C_DECLS
-
-#define MAX_MQE_TASKS 128 /* Pasha - do we want to make it dynamic ?*/
-
-struct mca_bcol_iboffload_task_t;
-struct mca_bcol_iboffload_collreq_t;
-
-/* collective fragment descriptor */
-struct mca_bcol_iboffload_collfrag_t {
-    ompi_free_list_item_t super;
-
-    /* number of asynchronous sends scheduled */
-    uint32_t n_sends;
-
-    /* number of sends completed */
-    uint32_t n_sends_completed;
-
-    /* Algorithm ID that was user for this fragment*/
-    int32_t alg;
-
-    /* pre-posted receive sources */
-    struct mca_bcol_iboffload_task_t *pre_posted_recvs[MAX_MQE_TASKS];
-
-    /* cache here pointer to signaled task */
-    uint64_t signal_task_wr_id;
-
-    /* mwr completion from the mcq */
-    volatile bool complete;
-
-    /* sequence number - we use it for
-       correct ordering of resources release */
-    uint32_t seq_n;
-
-    /* pointer to the full collective request descriptor */
-    struct mca_bcol_iboffload_collreq_t *coll_full_req;
-
-    size_t unpack_size;
-
-    bool in_pending_list;
-
-    /* Num of posted tasks */
-    int tasks_posted;
-
-    /* Pointer to head of not posted elements list */
-    struct mqe_task *to_post;
-
-    /* Pointer to tail next */
-    struct mqe_task **tail_next;
-
-    /* List of the all tasks of this coll frag */
-    struct mca_bcol_iboffload_task_t *tasks_to_release;
-
-    /* Pointer to the next elem in All tasks list */
-    struct mca_bcol_iboffload_task_t **task_next;
-
-    /* Num of needed mq credits */
-    int mq_credits;
-
-    /* MQ index, that used for this frag */
-    int mq_index;
-
-    /*
-     * Last wait sequence number; zero i.e.
-     * there isn't any wait in the coll request
-     */
-    int32_t last_wait_num;
-    /* fragment descriptor for non contiguous data */
-    bcol_fragment_descriptor_t *bcol_frag_info;
-    /* frag-len of ml buffer */
-    int frag_len;
-};
-typedef struct mca_bcol_iboffload_collfrag_t mca_bcol_iboffload_collfrag_t;
-OBJ_CLASS_DECLARATION(mca_bcol_iboffload_collfrag_t);
-
-static inline __opal_attribute_always_inline__
-            void mca_bcol_iboffload_collfrag_init(
-                          mca_bcol_iboffload_collfrag_t *cf)
-{
-    /* init the request */
-    cf->n_sends = 0;
-    cf->complete = false;
-    cf->n_sends_completed = 0;
-    cf->alg = -1;
-    cf->in_pending_list = false;
-    cf->tail_next = NULL;
-    cf->tasks_posted = 0;
-    cf->to_post = NULL;
-    cf->mq_credits = 0;
-    cf->mq_index = 0;
-    cf->tasks_to_release = NULL;
-    cf->task_next = &cf->tasks_to_release;
-    cf->last_wait_num = 0;
-}
-
-static inline __opal_attribute_always_inline__
-                struct mca_bcol_iboffload_collfrag_t *
-                       mca_bcol_iboffload_get_collfrag(void)
-{
-    ompi_free_list_item_t *item;
-    mca_bcol_iboffload_collfrag_t *cf;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    /* blocking allocation for collectives fragment */
-    OMPI_FREE_LIST_GET_MT(&cm->collfrags_free, item);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        IBOFFLOAD_ERROR(("Failed to allocated collfrag.\n"));
-        return NULL;
-    }
-
-    cf = (mca_bcol_iboffload_collfrag_t*) item;
-    mca_bcol_iboffload_collfrag_init(cf);
-
-    return cf;
-}
-
-END_C_DECLS
-
-#endif
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_collreq.c
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_collreq.c
@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "bcol_iboffload_collreq.h"
-
-static void
-collreq_construct(struct mca_bcol_iboffload_collreq_t *collreq)
-{
-    int i;
-    collreq->n_fragments = 0;
-    collreq->n_frag_mpi_complete = 0;
-    collreq->n_frag_net_complete = 0;
-    collreq->user_handle_freed = false;
-
-    for (i = 0; i < BCOL_IBOFFLOAD_BUFFERS; i++) {
-        collreq->buffer_info[i].buf = NULL;
-        collreq->buffer_info[i].offset = 0;
-        collreq->buffer_info[i].iboffload_reg = NULL;
-    }
-
-    OBJ_CONSTRUCT(&collreq->work_requests, opal_list_t);
-    OBJ_CONSTRUCT(&collreq->first_collfrag, mca_bcol_iboffload_collfrag_t);
-
-    OBJ_CONSTRUCT(&collreq->send_convertor, opal_convertor_t);
-    OBJ_CONSTRUCT(&collreq->recv_convertor, opal_convertor_t);
-}
-
-static void
-collreq_destruct(struct mca_bcol_iboffload_collreq_t *collreq)
-{
-    OBJ_DESTRUCT(&collreq->work_requests);
-    OBJ_DESTRUCT(&collreq->first_collfrag);
-
-    OBJ_DESTRUCT(&collreq->send_convertor);
-    OBJ_DESTRUCT(&collreq->recv_convertor);
-}
-
-OBJ_CLASS_INSTANCE(mca_bcol_iboffload_collreq_t,
-        ompi_request_t,
-        collreq_construct,
-        collreq_destruct);
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_collreq.h
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_collreq.h
@ -1,273 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_IBOFFLOAD_COLLREQ_H
-#define MCA_BCOL_IBOFFLOAD_COLLREQ_H
-
-#include "ompi_config.h"
-
-#include <infiniband/mqe.h>
-#include <infiniband/verbs.h>
-#include <infiniband/mverbs.h>
-
-#include "opal/class/ompi_free_list.h"
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_device.h"
-#include "bcol_iboffload_collfrag.h"
-
-#define SBUF 0
-#define RBUF 1
-
-#define BCOL_IBOFFLOAD_BUFFERS 2
-
-BEGIN_C_DECLS
-
-struct mca_bcol_iboffload_reg_t;
-
-/*
- * collective progress function
- */
-typedef int (*collective_message_progress_function)(
-        struct mca_bcol_iboffload_module_t *iboffload,
-        struct mca_bcol_iboffload_collreq_t *full_message_descriptor);
-/*
- * callback function to be called after the collective work request
- * completes.  This is invoked in user-space, and is typically where
- * data may be copied out of library buffers, or when any other user-
- * level protocol may be completed
- *
- * input:
- * callback data: typically, this may be the work request just finished
- */
-typedef int (*collective_message_completion_callback_function)(
-        void *callback_data);
-
-struct mca_bcol_iboffload_buff_info {
-    void *buf;
-    size_t offset;
-    uint32_t lkey;
-    struct mca_bcol_iboffload_reg_t *iboffload_reg;
-};
-typedef struct mca_bcol_iboffload_buff_info mca_bcol_iboffload_buff_info;
-
-/*
- * Collective message descriptor
- * the mca_bcol_iboffload_message_desc_t was replaced with mca_bcol_iboffload_collreq_t
- * *************************************************************************************************
- *
- * Brief  description of iboffload collective request dependencies:
- *
- * mca_bcol_iboffload_collreq_t                      <----<< Full coll request
- *          |
- *          --(0)-- mca_bcol_iboffload_collfrag_t    <----<< Fragment of coll request ( for example
- *          |                   |                            10MB Bcast maybe split to 2MB fragments )
- *          |                   |
- *          |                   --(0)-- mca_bcol_iboffload_task_t---mqe_task
- *          |                   |                    |
- *          |                   |                     ---mca_bcol_iboffload_frag_t---ibv_sge
- *          |                   --(1)-- mca_bcol_iboffload_task_t---mqe_task
- *          |                   |                    |
- *          |                   |                     ---mca_bcol_iboffload_frag_t---ibv_sge
- *          |                   ..(M)..
- *          |
- *          --(1)-- mca_bcol_iboffload_collfrag_t
- *          |
- *          ..(N)..
- *
- * *************************************************************************************************
- */
-
-struct mca_bcol_iboffload_collreq_t {
-    ompi_request_t super;
-
-    /* op type */
-    struct ompi_op_t *op;
-
-    /* Sometimes the operation that should be performed
-       by the IB is different than the mpi_op and is then set
-       by the pack_data_for_calc function */
-    enum ibv_m_wr_calc_op actual_ib_op;
-
-    /* Sometimes the data type that should be used by the IB
-       to peroform the calc s different than the mpi dtype,
-       and is then set by the pack_data_for_calc function */
-    enum ibv_m_wr_data_type actual_ib_dtype;
-
-    /* data type */
-    struct ompi_datatype_t *dtype;
-
-    /* convertor for send operation */
-    opal_convertor_t send_conv;
-
-    /* convertor for recv operation */
-    opal_convertor_t recv_conv;
-
-    /*
-     * count (in data type units)
-     */
-    uint64_t count;
-
-    /*
-     * root of collective operation
-     */
-    int root;
-
-    /* number of message fragments */
-    int n_fragments;
-
-    /* number of fragments sent - all resrouces for a fragment are allocated
-     * or none at all are
-     */
-    int n_frags_sent;
-
-    /* number of fragments completed from the MPI perspective */
-    int n_frag_mpi_complete;
-
-    /* number of fragments completed from a network perspective */
-    int n_frag_net_complete;
-
-    /* collective free and may be released  - message complete from the
-     ** MPI perspective, the network prespective, and the user is done
-     ** with the message handle */
-    volatile bool user_handle_freed;
-
-    /* list of collective fragements - only 1 for now */
-    opal_list_t work_requests;
-
-    /* message progress function */
-    collective_message_progress_function progress_fn;
-
-    /* work request completion callback function */
-    collective_message_completion_callback_function completion_cb_fn;
-
-    /* index of qp with enough length of buffs for this collective */
-    int qp_index;
-
-    bool if_bcol_last;
-
-    /* The flag is used for the last bcol to indicate if the calculation should be done by the cpu */
-    bool do_calc_in_cpu;
-
-    /* in Allreduce case, if (true == do_calc_in_cpu) =>
-       the final result will be calc on local CPU */
-    uint64_t l_operand;
-    uint64_t r_operand;
-
-    /* caching ML-rdma buffer descriptor */
-    mca_bcol_iboffload_rdma_buffer_desc_t *ml_rdma_desc;
-
-    /* ML buffer index code */
-    int ml_buffer_index;
-
-    /* In the current implementation the collrequest connected to 1 single
-       iboffload module */
-    struct mca_bcol_iboffload_module_t *module;
-
-    mca_bcol_iboffload_collfrag_t first_collfrag;
-
-    /* Send/recv buffs info - user buffers registration if needed etc. */
-    mca_bcol_iboffload_buff_info buffer_info[BCOL_IBOFFLOAD_BUFFERS];
-
-    /* My bi nominal tree children in this collective */
-    int *bi_nominal_tree_children;
-
-    /* Convertors for send/recv if needed */
-    opal_convertor_t send_convertor;
-    opal_convertor_t recv_convertor;
-
-    /* Order info from upper layer */
-    mca_bcol_base_order_info_t *order_info;
-};
-typedef struct mca_bcol_iboffload_collreq_t mca_bcol_iboffload_collreq_t;
-OBJ_CLASS_DECLARATION(mca_bcol_iboffload_collreq_t);
-
-#define COLLREQ_IS_DONE(cr) (cr->user_handle_freed &&   \
-        (cr->n_frag_mpi_complete == cr->n_fragments) && \
-        (cr->n_frag_net_complete == cr->n_fragments))
-
-#define RELEASE_COLLREQ(cr)                                            \
-do {                                                                   \
-    (cr)->user_handle_freed = false;                                   \
-    OMPI_FREE_LIST_RETURN_MT(&mca_bcol_iboffload_component.collreqs_free, \
-        (ompi_free_list_item_t *) (cr));                               \
-} while (0)
-
-static inline __opal_attribute_always_inline__
-            int mca_bcol_iboffload_free_resources_and_move_to_pending(
-                     mca_bcol_iboffload_collfrag_t *coll_fragment,
-                     mca_bcol_iboffload_module_t *iboffload)
-{
-    int rc = mca_bcol_iboffload_free_tasks_frags_resources(coll_fragment,
-                iboffload->device->frags_free);
-
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        return rc;
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("iboffload - %p, coll_fragment - %p, "
-                          "coll frag in_pending_list ? - %d, pending_list size - %d.\n",
-                           iboffload, coll_fragment, coll_fragment->in_pending_list,
-                           opal_list_get_size(&iboffload->collfrag_pending)));
-
-    BCOL_IBOFFLOAD_MQ_RETURN_CREDITS(iboffload, coll_fragment->mq_index, coll_fragment->mq_credits);
-
-    /* Remove coll frag from coll request opal_list */
-    opal_list_remove_item(&coll_fragment->coll_full_req->work_requests,
-                          (opal_list_item_t *) coll_fragment);
-
-    if (false == coll_fragment->in_pending_list) {
-        /* Put the collfrag on pending list */
-        coll_fragment->in_pending_list = true;
-        opal_list_append(&iboffload->collfrag_pending,
-                            (opal_list_item_t *) coll_fragment);
-    } else {
-        /* The item is already on pending list =>
-           insert it first that not break order
-           between frags on the list */
-        opal_list_prepend(&iboffload->collfrag_pending,
-                         (opal_list_item_t *) coll_fragment);
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/* Forward declaration */
-struct mca_bcol_iboffload_reg_t;
-static inline __opal_attribute_always_inline__
-      int mca_bcol_iboffload_prepare_buffer(
-            void *buffer,
-            size_t size,
-            struct mca_bcol_iboffload_reg_t **registration_handler,
-            mca_bcol_iboffload_module_t *iboffload)
-{
-    int rc;
-    mca_mpool_base_registration_t *reg = NULL;
-
-    assert(size > 0);
-    rc = iboffload->device->mpool->mpool_register(
-                            iboffload->device->mpool,
-                            buffer, size,
-                            (uint32_t) 0 /* flags */,
-                            &reg);
-
-    *registration_handler =
-        (struct mca_bcol_iboffload_reg_t *) reg;
-
-    return rc;
-}
-
-int mca_bcol_iboffload_coll_req_implement(
-                            mca_bcol_iboffload_module_t *iboffload,
-                            mca_bcol_iboffload_collreq_t *coll_request);
-
-END_C_DECLS
-
-#endif
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_component.c
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_component.c
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_device.h
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_device.h
@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_IBOFFLOAD_DEVICE_H
-#define MCA_BCOL_IBOFFLOAD_DEVICE_H
-
-#include "ompi_config.h"
-
-#include <infiniband/mqe.h>
-#include <infiniband/mverbs.h>
-
-#include <infiniband/verbs.h>
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-
-#define BCOL_IBOFFLOAD_DUMMY_MEM_SIZE 1
-
-BEGIN_C_DECLS
-
-/* Device OBJ */
-struct mca_bcol_iboffload_device_t {
-    opal_list_item_t super;
-
-    bool activated;
-
-    struct ompi_common_ofacm_base_dev_desc_t dev;
-    struct ibv_pd *ib_pd;
-    struct ibv_device_attr ib_dev_attr;
-
-    int num_act_ports;
-
-    struct mca_bcol_iboffload_port_t *ports;
-    struct ibv_cq *ib_cq;
-
-    /* CQ for MQs of all iboffload modules on this device */
-    struct ibv_cq *ib_mq_cq;
-
-    /* The free list of registered buffers
-     * since the registration depends on PD, it is
-     * most resonable place to keep the frags */
-    ompi_free_list_t *frags_free;
-    mca_mpool_base_module_t *mpool;
-
-    /* netowrk context */
-    bcol_base_network_context_t *net_context;
-
-    /* We keep dummy frags for all QPs on each device,
-       possibly some of QPs don't need it but anyway we distribute dummy
-       for them. All dummies point to a same byte of memory. */
-    mca_bcol_iboffload_frag_t dummy_frags[MCA_BCOL_IBOFFLOAD_QP_LAST];
-
-    /* Registred memory for the dummy frags */
-    char dummy_mem[BCOL_IBOFFLOAD_DUMMY_MEM_SIZE];
-
-    /* Registration info of the dummy memory */
-    mca_bcol_iboffload_reg_t dummy_reg;
-};
-
-typedef struct mca_bcol_iboffload_device_t mca_bcol_iboffload_device_t;
-OBJ_CLASS_DECLARATION(mca_bcol_iboffload_device_t);
-
-END_C_DECLS
-
-#endif /* MCA_BCOL_IBOFFLOAD_DEVICE_H */
-
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_endpoint.c
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_endpoint.c
@ -1,373 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <infiniband/mverbs.h>
-
-#include "ompi/constants.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/mca/common/ofacm/connect.h"
-
-#include "opal/threads/mutex.h"
-#include "opal/class/opal_object.h"
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_device.h"
-#include "bcol_iboffload_endpoint.h"
-
-static void mca_bcol_iboffload_endpoint_construct(mca_bcol_iboffload_endpoint_t *ep)
-{
-    ep->iboffload_module = NULL;
-    ep->ibnet_proc = NULL;
-
-    ep->qps = (mca_bcol_iboffload_endpoint_qp_t *)
-              calloc(mca_bcol_iboffload_component.num_qps,
-                     sizeof(mca_bcol_iboffload_endpoint_qp_t));
-
-    ep->index = 0;
-    OBJ_CONSTRUCT(&ep->endpoint_lock, opal_mutex_t);
-    OBJ_CONSTRUCT(&ep->pending_frags, opal_list_t);
-
-    memset(ep->recv_cq, 0, IBOFFLOAD_CQ_LAST * sizeof(ep->recv_cq[0]));
-    memset(&ep->qp_config, 0, sizeof(ompi_common_ofacm_base_qp_config_t));
-
-    ep->cpc_context = NULL;
-
-    memset(&ep->remote_zero_rdma_addr, 0, sizeof(mca_bcol_iboffload_rdma_info_t));
-    memset(&ep->remote_rdma_block, 0, sizeof(mca_bcol_iboffload_rem_rdma_block_t));
-
-    ep->need_toset_remote_rdma_info = false;
-}
-
-static void mca_bcol_iboffload_endpoint_destruct(mca_bcol_iboffload_endpoint_t *ep)
-{
-    int qp_index, num_qps, i;
-    ompi_free_list_item_t *item;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    num_qps = cm->num_qps;
-
-    IBOFFLOAD_VERBOSE(10, ("Destruct: ep - %p, ep->index - %d", ep, ep->index));
-
-    if (NULL != ep->qps) {
-        for (qp_index = 0; qp_index < num_qps; ++qp_index) {
-            do {
-                item = (ompi_free_list_item_t *)
-                    opal_list_remove_first(&ep->qps[qp_index].preposted_frags);
-                if(OPAL_LIKELY(NULL != item)) {
-                    OMPI_FREE_LIST_RETURN_MT(&ep->device->frags_free[qp_index], item);
-                }
-            } while (NULL != item);
-
-            OBJ_DESTRUCT(&ep->qps[qp_index].preposted_frags);
-        }
-
-        free(ep->qps);
-    }
-
-    OBJ_DESTRUCT(&ep->endpoint_lock);
-    OBJ_DESTRUCT(&ep->pending_frags);
-
-    /* If the CPC has an endpoint_finalize function, call it */
-    if (NULL != ep->endpoint_cpc->cbm_endpoint_finalize) {
-        ep->endpoint_cpc->cbm_endpoint_finalize(ep->cpc_context);
-    }
-
-    for (i = 0; i < IBOFFLOAD_CQ_LAST; i++) {
-        if (NULL != ep->recv_cq[i]) {
-            if (ibv_destroy_cq(ep->recv_cq[i])) {
-                IBOFFLOAD_ERROR(("Endpoint %x "
-                            ", failed to destroy CQ, errno says %s",
-                            ep, strerror(errno)));
-            }
-        }
-    }
-}
-
-OBJ_CLASS_INSTANCE(mca_bcol_iboffload_endpoint_t,
-        opal_list_item_t,
-        mca_bcol_iboffload_endpoint_construct,
-        mca_bcol_iboffload_endpoint_destruct);
-
-/* Pasha: Add some error message here */
-
-/*
- * Called when the CPC has established a connection on an endpoint
- */
-static void mca_bcol_iboffload_endpoint_invoke_error(void *context)
-{
-    mca_bcol_iboffload_endpoint_t *endpoint = (mca_bcol_iboffload_endpoint_t *) context;
-    IBOFFLOAD_ERROR(("Getting error on endpoint - %p!", endpoint));
-}
-
-
-/* Pasha: Need to add more logic here */
-static void mca_bcol_iboffload_endpoint_cpc_complete(void *context)
-{
-    mca_bcol_iboffload_endpoint_t *endpoint = (mca_bcol_iboffload_endpoint_t *) context;
-
-    IBOFFLOAD_VERBOSE(10, ("Endpoint - %p for comm rank %d: CPC complete.\n",
-                           endpoint, endpoint->iboffload_module->ibnet->super.group_list[endpoint->index]));
-
-    if (OMPI_SUCCESS !=
-            mca_bcol_iboffload_exchange_rem_addr(endpoint)) {
-        IBOFFLOAD_ERROR(("endpoint - %p, "
-                    "remote addr exchange error.\n", endpoint));
-    }
-    /* The connection is correctly setup. Now we can decrease the
-       event trigger. */
-    opal_progress_event_users_decrement();
-}
-
-/* Vasily: Need to add more logic here */
-int mca_bcol_iboffload_endpoint_post_recvs(void *context)
-{
-    int qp_index, rc, num_qps;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    mca_bcol_iboffload_endpoint_t *endpoint =
-                             (mca_bcol_iboffload_endpoint_t *) context;
-
-    IBOFFLOAD_VERBOSE(10, ("endpoint - %p, post of %d recvs !",
-                            endpoint, cm->qp_infos[0].rd_num));
-    /* TODO Pasha - fix later */
-    num_qps = cm->num_qps;
-    for (qp_index = 0; qp_index < num_qps; ++qp_index) {
-        rc = mca_bcol_iboffload_prepost_recv(endpoint, qp_index,
-                                             cm->qp_infos[qp_index].rd_num);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            /* Pasha: Need to add more failure logic */
-            IBOFFLOAD_ERROR(("Failed to prepost recv fragments "
-                             "on qp index %d, return code - %d",
-                              qp_index, rc));
-
-            return OMPI_ERROR;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/* The function go over each ibnet proc and creates endpoint for each one */
-int mca_bcol_iboffloads_create_endpoints(mca_sbgp_ibnet_connection_group_info_t *cgroup,
-        mca_bcol_iboffload_module_t *module) {
-    uint32_t i;
-    mca_bcol_iboffload_endpoint_t *ep;
-
-    if (NULL == cgroup || NULL == module) {
-        IBOFFLOAD_ERROR(("Bad parameters for create endpoints function."));
-        return OMPI_ERROR;
-    }
-
-    module->num_endpoints = cgroup->num_procs;
-    module->endpoints = (mca_bcol_iboffload_endpoint_t **)
-                            calloc(module->num_endpoints,
-                                   sizeof(mca_bcol_iboffload_endpoint_t *));
-    if (NULL == module->endpoints) {
-        IBOFFLOAD_ERROR(("Error memory allocation for endpoints array"
-                         ", errno says %s", strerror(errno)));
-        return OMPI_ERROR;
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("iboffload - %p, num of endpoints - %d.\n",
-                            module, module->num_endpoints));
-/* Ishai: No need to open so many endpoints. We are not talking with all procs */
-    for (i = 0; i < cgroup->num_procs; i++) {
-        ep = OBJ_NEW(mca_bcol_iboffload_endpoint_t);
-        /* check qp memory allocation */
-        if (NULL == ep->qps) {
-            IBOFFLOAD_ERROR(("Failed to allocate memory for qps"));
-            return OMPI_ERROR;
-        }
-        /* init new endpoint */
-        ep->index = i;
-        ep->iboffload_module = module;
-        /* saving the device for the destruction - iboffload module amy not exist than */
-        ep->device = ep->iboffload_module->device;
-        ep->ibnet_proc = (mca_sbgp_ibnet_proc_t *)
-            opal_pointer_array_get_item(cgroup->ibnet_procs, i);
-        if (NULL == ep->ibnet_proc) {
-            IBOFFLOAD_ERROR(("Failed to get proc pointer, for index %d", i));
-            return OMPI_ERROR;
-        }
-
-        if (OMPI_SUCCESS !=
-            mca_bcol_iboffload_endpoint_init(ep)) {
-            IBOFFLOAD_ERROR(("Failed to init endpoint - %p", ep));
-            return OMPI_ERROR;
-        }
-
-        IBOFFLOAD_VERBOSE(10, ("Endpoint - %p, ep index - %d, iboffload - %p, "
-                               "cpc contex - %p.\n", ep, ep->index,
-                                ep->iboffload_module, ep->cpc_context));
-
-        /* Add the new endpoint to array of endpoints */
-        module->endpoints[i] = ep;
-    }
-
-    /* Pasha: Need to add better clean-up here */
-    return OMPI_SUCCESS;
-}
-
-static int config_qps(mca_bcol_iboffload_endpoint_t *ep)
-{
-    int qp_index;
-    int ret = OMPI_SUCCESS;
-
-    ompi_common_ofacm_base_qp_config_t *qp_config = &ep->qp_config;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    qp_config->num_srqs = 0;
-    qp_config->srq_num = NULL;
-
-    qp_config->num_qps = cm->num_qps;
-
-    qp_config->init_attr = (struct ibv_qp_init_attr *)
-            calloc(qp_config->num_qps, sizeof(struct ibv_qp_init_attr));
-
-    if (NULL == qp_config->init_attr) {
-        IBOFFLOAD_ERROR(("Failed allocate memory for qp init attributes"));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-
-        goto config_qps_exit;
-    }
-
-    qp_config->attr = (struct ibv_qp_attr *)
-        calloc(qp_config->num_qps, sizeof(struct ibv_qp_attr));
-
-    if (OPAL_UNLIKELY(NULL == qp_config->attr)) {
-        IBOFFLOAD_ERROR(("Failed allocate memory for qp attributes"));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-
-        goto config_qps_exit;
-    }
-
-    /* we must to specify that the qps are special */
-    qp_config->init_attr_mask = (uint32_t *)
-        calloc(qp_config->num_qps, sizeof(uint32_t));
-
-    if (OPAL_UNLIKELY(NULL == qp_config->init_attr_mask)) {
-        IBOFFLOAD_ERROR(("Failed allocate memory for qp mask."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-
-        goto config_qps_exit;
-    }
-
-    /* qp_config->rtr_attr_mask = qp_config->rts_attr_mask = NULL; */
-
-    qp_config->rtr_attr_mask = (uint32_t *)
-        calloc(qp_config->num_qps, sizeof(uint32_t));
-
-    if (OPAL_UNLIKELY(NULL == qp_config->rtr_attr_mask)) {
-        IBOFFLOAD_ERROR(("Failled allocate memory for qp rtr attributes mask."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-
-        goto config_qps_exit;
-    }
-
-    qp_config->rts_attr_mask = (uint32_t *)
-        calloc(qp_config->num_qps, sizeof(uint32_t));
-
-    if (OPAL_UNLIKELY(NULL == qp_config->rts_attr_mask)) {
-        IBOFFLOAD_ERROR(("Failled allocate memory for qp rts attributes mask."));
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-
-        goto config_qps_exit;
-    }
-
-    for (qp_index = 0; qp_index < qp_config->num_qps; ++qp_index) {
-        mca_bcol_iboffload_config_qps_fn_t config_qp =
-                                 cm->qp_infos[qp_index].config_qp;
-
-        if (NULL != config_qp) {
-            config_qp(qp_index, ep, qp_config);
-        }
-    }
-
-config_qps_exit:
-    return ret;
-}
-
-/* The fucntion is called for endpoints
- * with MCA_COMMON_OFACM_USER_CUSTOM state only,
- * we need a OPAL_THREAD_LOCK before call to this function */
-int mca_bcol_iboffload_endpoint_init(mca_bcol_iboffload_endpoint_t *ep)
-{
-    int qp_index, cq_index, num_qps;
-    ompi_common_ofacm_base_module_t *cpc;
-
-    mca_bcol_iboffload_device_t *device = ep->iboffload_module->device;
-
-    mca_sbgp_ibnet_connection_group_info_t *cgroup =
-                &ep->iboffload_module->ibnet->cgroups[ep->iboffload_module->cgroup_index];
-
-    for (cq_index = 0; cq_index < IBOFFLOAD_CQ_LAST; cq_index++) {
-        if (OMPI_SUCCESS !=
-                mca_bcol_iboffload_adjust_cq(device, &ep->recv_cq[cq_index])) {
-            IBOFFLOAD_ERROR(("Error creating CQ for %s errno says %s",
-                        ibv_get_device_name(device->dev.ib_dev), strerror(errno)));
-            /* OBJ_RELEASE(ep); */ /* Vasily: What must we do in this case ??? */
-            return OMPI_ERROR;
-        }
-    }
-
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != config_qps(ep))) {
-        IBOFFLOAD_ERROR(("Error configure QPs for endpoint %x errno says %s",
-                                                           ep, strerror(errno)));
-        return OMPI_ERROR;
-    }
-
-    /* Adding here one more redirection in critical path. Need to think
-     * what is the best way to prevent it */
-
-    IBOFFLOAD_VERBOSE(10, ("Endpoint - %p, rem port - %d", ep,
-            ep->ibnet_proc->remote_ports_info[BCOL_IBOFFLOAD_ENDPOINT_PORT_IDX(cgroup, ep)].id));
-
-    cpc = ep->ibnet_proc->remote_ports_info[BCOL_IBOFFLOAD_ENDPOINT_PORT_IDX(cgroup, ep)].local_cpc;
-    ep->endpoint_cpc = cpc; /* caching pointer to cpc */
-
-    if (NULL != cpc->cbm_endpoint_init) {
-        ep->cpc_context = cpc->cbm_endpoint_init(
-                ep->ibnet_proc->ompi_proc,
-                &ep->qp_config,
-                device->ib_pd,
-                ep->iboffload_module->subnet_id,
-                ep->iboffload_module->ibnet->group_id,
-                ep->iboffload_module->lid,
-                /* Remote lid of target module */
-                ep->ibnet_proc->remote_ports_info[BCOL_IBOFFLOAD_ENDPOINT_PORT_IDX(cgroup, ep)].lid,
-                ep->index,   /* user context index */
-                (void *) ep,  /* user context */
-                cpc,
-                mca_bcol_iboffload_endpoint_cpc_complete,
-                mca_bcol_iboffload_endpoint_invoke_error,
-                mca_bcol_iboffload_endpoint_post_recvs);
-
-        if (OPAL_UNLIKELY(NULL == ep->cpc_context)) {
-            IBOFFLOAD_ERROR(("Endpoint - %p, failed to init context", ep));
-            /* OBJ_RELEASE(ep); */ /* Vasily: What must we do in this case ??? */
-            return OMPI_ERROR;
-        }
-
-        /* Updating remote port info */
-        num_qps = mca_bcol_iboffload_component.num_qps;
-
-        ep->remote_info = &ep->cpc_context->remote_info;
-        for (qp_index = 0; qp_index < num_qps; ++qp_index) {
-            ep->qps[qp_index].qp = &ep->cpc_context->qps[qp_index];
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_endpoint.h
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_endpoint.h
@ -1,328 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_IBOFFLOAD_ENDPOINT_H
-#define MCA_BCOL_IBOFFLOAD_ENDPOINT_H
-
-#include "ompi_config.h"
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-
-#include "ompi/mca/sbgp/ibnet/sbgp_ibnet.h"
-
-#define BCOL_IBOFFLOAD_ENDPOINT_PORT(cgroup, ep) (ep)->ibnet_proc->use_port[(cgroup)->index]
-#define BCOL_IBOFFLOAD_ENDPOINT_PORT_IDX(cgroup, ep) (BCOL_IBOFFLOAD_ENDPOINT_PORT(cgroup, ep) - 1)
-
-BEGIN_C_DECLS
-
-struct mca_bcol_iboffload_endpoint_qp_t {
-    struct ompi_common_ofacm_base_qp_t *qp;
-    size_t ib_inline_max;
-    int32_t  sd_wqe;             /* Number of available send wqe entries */
-    int32_t  rd_wqe;             /* Number of available recv wqe entries */
-    opal_list_t preposted_frags; /* List of preposted frags */
-    /* opal_mutex_t lock; */     /* Do I need lock here ? */
-};
-
-typedef struct mca_bcol_iboffload_endpoint_qp_t mca_bcol_iboffload_endpoint_qp_t;
-
-enum {
-    IBOFFLOAD_CQ_SMALL_MESSAGES = 0,
-    IBOFFLOAD_CQ_SYNC,
-    IBOFFLOAD_CQ_LARGE_MESSAGES,
-    IBOFFLOAD_CQ_LAST
-};
-
-/* Endpoint object */
-struct mca_bcol_iboffload_endpoint_t {
-    opal_list_item_t super;
-
-    /** BTL module that created this connection */
-    mca_bcol_iboffload_module_t *iboffload_module;
-
-    /** proc structure corresponding to endpoint */
-    mca_sbgp_ibnet_proc_t *ibnet_proc;
-
-    /** lock for concurrent access to endpoint state */
-    opal_mutex_t                endpoint_lock;
-
-    /** Penging frag list */
-    opal_list_t                 pending_frags;
-
-    /** QPs information */
-    mca_bcol_iboffload_endpoint_qp_t *qps;
-
-    /** endpoint index on array */
-    int32_t index;
-
-    /** CQ for receive queues on this endpoint */
-    struct ibv_cq *recv_cq[IBOFFLOAD_CQ_LAST];
-
-    /** QP configuration information */
-    ompi_common_ofacm_base_qp_config_t qp_config;
-
-    /** cpc context */
-    ompi_common_ofacm_base_local_connection_context_t *cpc_context;
-
-    /** caching pointer to remote info */
-    ompi_common_ofacm_base_remote_connection_context_t *remote_info;
-
-    /** caching pointer to cpc */
-    ompi_common_ofacm_base_module_t *endpoint_cpc;
-
-    /** The struct is used for zero RDMA with immediate
-        in some collectives, in barrier for example. */
-    mca_bcol_iboffload_rdma_info_t remote_zero_rdma_addr;
-    mca_bcol_iboffload_rem_rdma_block_t remote_rdma_block;
-
-    /** The pointer to device - In the destruction function
-        the iboffload module may not exist any more - caching the device */
-    struct mca_bcol_iboffload_device_t *device;
-
-    bool need_toset_remote_rdma_info;
-
-    mca_bcol_iboffload_rdma_info_t remote_rdma_info[MAX_REMOTE_RDMA_INFO];
-};
-typedef struct mca_bcol_iboffload_endpoint_t mca_bcol_iboffload_endpoint_t;
-OBJ_CLASS_DECLARATION(mca_bcol_iboffload_endpoint_t);
-
-/* Function declaration */
-int mca_bcol_iboffload_endpoint_init(mca_bcol_iboffload_endpoint_t *ep);
-
-static inline __opal_attribute_always_inline__
-    int check_endpoint_state(mca_bcol_iboffload_endpoint_t *ep,
-                             mca_bcol_base_descriptor_t *des,
-                             opal_list_t *pending_list)
-{
-    int rc = OMPI_ERR_RESOURCE_BUSY;
-
-    OPAL_THREAD_LOCK(&ep->cpc_context->context_lock);
-    /* Adding here one more redirection in critical path. Need to think
-     * what is the best way to prevent it */
-    switch(ep->cpc_context->state) {
-        case MCA_COMMON_OFACM_CLOSED:
-            rc = ep->endpoint_cpc->cbm_start_connect(ep->cpc_context);
-            if (OMPI_SUCCESS == rc) {
-                rc = OMPI_ERR_RESOURCE_BUSY;
-            }
-            /*
-             * As long as we expect a message from the peer (in order
-             * to setup the connection) let the event engine pool the
-             * OOB events. Note: we increment it once peer active
-             * connection.
-             */
-            opal_progress_event_users_increment();
-            /* fall through */
-        default:
-            /* opal_list_append(pending_list, (opal_list_item_t *)des); */ /* Vasily: will be uncomment later */
-            break;
-        case MCA_COMMON_OFACM_FAILED:
-            rc = OMPI_ERR_UNREACH;
-            break;
-        case MCA_COMMON_OFACM_CONNECTED:
-            rc = OMPI_SUCCESS;
-            break;
-    }
-
-    OPAL_THREAD_UNLOCK(&ep->cpc_context->context_lock);
-    return rc;
-}
-
-int mca_bcol_iboffloads_create_endpoints(mca_sbgp_ibnet_connection_group_info_t *cgroup,
-        mca_bcol_iboffload_module_t *module);
-
-int mca_bcol_iboffload_endpoint_post_recvs(void *context);
-
-static inline __opal_attribute_always_inline__ int
-                            mca_bcol_iboffload_prepost_recv(
-                        mca_bcol_iboffload_endpoint_t *endpoint,
-                        int qp_index, int num_to_prepost)
-{
-    mca_bcol_iboffload_prepost_qps_fn_t prepost_recv =
-                       mca_bcol_iboffload_component.qp_infos[qp_index].prepost_recv;
-    if (NULL != prepost_recv) {
-        return prepost_recv(endpoint, qp_index, num_to_prepost);
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__ int
-                    mca_bcol_iboffload_post_ml_scatter_recv_frag(
-                        int qp_index, uint32_t dest_rank,
-                        int nitems, struct iovec *buff_iovec,
-                        uint32_t lkey,
-                        struct ibv_sge *sg_entries,
-                        mca_bcol_iboffload_frag_t *frag,
-                        mca_bcol_iboffload_module_t *iboffload)
-{
-    int ret, start_wr_index;
-    struct ibv_recv_wr *recv_wr, *recv_bad;
-    int i;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    mca_bcol_iboffload_endpoint_t *endpoint = iboffload->endpoints[dest_rank];
-
-    mca_bcol_iboffload_recv_wr_manager *recv_wrs = &cm->recv_wrs;
-    mca_bcol_iboffload_device_t *device = endpoint->iboffload_module->device;
-
-    IBOFFLOAD_VERBOSE(10, ("Recv prepost call: endpoint %p, qp_index %d",
-                          (void *) endpoint, qp_index));
-
-    /* make sure that we do not overrun number of rd_wqe */
-    if (0 >= endpoint->qps[qp_index].rd_wqe) {
-        IBOFFLOAD_VERBOSE(10, ("There are no rd_wqe - %d",
-                                endpoint->qps[qp_index].rd_wqe));
-
-        return 0;
-    }
-
-    OPAL_THREAD_LOCK(&recv_wrs->lock);
-
-    /* Calculate start index in array
-     * of pre-allocated work requests */
-    start_wr_index = cm->qp_infos[qp_index].rd_num - 1;
-    recv_wr = &recv_wrs->recv_work_requests[qp_index][start_wr_index];
-
-    IBOFFLOAD_VERBOSE(10, ("Endpoint %p, qp_index - %d, "
-                           "start index of WRs - %d", (void *) endpoint,
-                            qp_index, start_wr_index));
-
-    for (i = 0; i < nitems; i++) {
-        sg_entries[i].length = buff_iovec[i].iov_len;
-        sg_entries[i].addr = (uint64_t)buff_iovec[i].iov_base;
-        sg_entries[i].lkey = lkey;
-
-       IBOFFLOAD_VERBOSE(10, ("Recv SGE List item %d , length %d , address %p",
-                               i, sg_entries[i].length, sg_entries[i].addr));
-
-       IBOFFLOAD_VERBOSE(10, ("Recv SGE List item %d , iovec length %d",
-                               i, buff_iovec[i].iov_len));
-    }
-
-    recv_wr->num_sge = nitems;
-    recv_wr->sg_list = sg_entries;
-
-    /* Set the tail */
-    recv_wr->next = NULL;
-
-    /* post the list of recvs */
-    ret = ibv_post_recv(endpoint->qps[qp_index].qp->lcl_qp, recv_wr, &recv_bad);
-    if (OPAL_UNLIKELY(0 != ret)) {
-        IBOFFLOAD_ERROR(("ibv_post_recv failed (%s), error: %s [%d], "
-                         "qp_index - %d.\n",
-                          ibv_get_device_name(device->dev.ib_dev),
-                          strerror(errno), ret, qp_index));
-
-        return -1;
-    }
-
-    /* decresing numbers of free recv wqe */
-    --endpoint->qps[qp_index].rd_wqe;
-
-    OPAL_THREAD_UNLOCK(&recv_wrs->lock);
-
-    IBOFFLOAD_VERBOSE(10, ("Return success: "
-                          "endpoint %p, qp_index %d, dest_rank %d",
-                           endpoint, qp_index, dest_rank));
-
-    return 1;
-}
-
-static inline __opal_attribute_always_inline__ int
-                    mca_bcol_iboffload_prepost_ml_recv_frag(
-                        int qp_index, uint32_t dest_rank,
-                        mca_bcol_iboffload_frag_t *frag,
-                        mca_bcol_iboffload_module_t *iboffload)
-{
-    int ret, start_wr_index;
-    struct ibv_recv_wr *recv_wr, *recv_bad;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    mca_bcol_iboffload_endpoint_t *endpoint = iboffload->endpoints[dest_rank];
-
-    mca_bcol_iboffload_recv_wr_manager *recv_wrs = &cm->recv_wrs;
-    mca_bcol_iboffload_device_t *device = endpoint->iboffload_module->device;
-
-    IBOFFLOAD_VERBOSE(10, ("Recv prepost call: endpoint %p, qp_index %d",
-                          (void *) endpoint, qp_index));
-
-    /* make sure that we do not overrun number of rd_wqe */
-    if (0 >= endpoint->qps[qp_index].rd_wqe) {
-        IBOFFLOAD_VERBOSE(10, ("There are no rd_wqe - %d",
-                                endpoint->qps[qp_index].rd_wqe));
-
-        return 0;
-    }
-
-    OPAL_THREAD_LOCK(&recv_wrs->lock);
-
-    /* Calculate start index in array
-     * of pre-allocated work requests */
-    start_wr_index = cm->qp_infos[qp_index].rd_num - 1;
-    recv_wr = &recv_wrs->recv_work_requests[qp_index][start_wr_index];
-
-    IBOFFLOAD_VERBOSE(10, ("Endpoint %p, qp_index - %d, "
-                           "start index of WRs - %d", (void *) endpoint,
-                            qp_index, start_wr_index));
-
-    recv_wr->sg_list = &frag->sg_entry;
-
-    /* Set the tail */
-    recv_wr->next = NULL;
-
-    /* post the list of recvs */
-    ret = ibv_post_recv(endpoint->qps[qp_index].qp->lcl_qp, recv_wr, &recv_bad);
-    if (OPAL_UNLIKELY(0 != ret)) {
-        IBOFFLOAD_ERROR(("ibv_post_recv failed (%s), error: %s [%d], "
-                         "qp_index - %d.\n",
-                          ibv_get_device_name(device->dev.ib_dev),
-                          strerror(errno), ret, qp_index));
-
-        return -1;
-    }
-
-    /* decresing numbers of free recv wqe */
-    --endpoint->qps[qp_index].rd_wqe;
-
-    OPAL_THREAD_UNLOCK(&recv_wrs->lock);
-
-    IBOFFLOAD_VERBOSE(10, ("Return success: "
-                          "endpoint %p, qp_index %d, dest_rank %d",
-                           endpoint, qp_index, dest_rank));
-
-    return 1;
-}
-
-static inline __opal_attribute_always_inline__
-   mca_bcol_iboffload_frag_t* mca_bcol_iboffload_get_preposted_recv_frag(
-                            mca_bcol_iboffload_module_t *iboffload,
-                            int source, int qp_index)
-{
-    mca_bcol_iboffload_frag_t *frag;
-    mca_bcol_iboffload_endpoint_t *endpoint = iboffload->endpoints[source];
-
-    frag = mca_bcol_iboffload_component.qp_infos[qp_index].get_preposted_recv(endpoint, qp_index);
-
-    /* do we want to run prepost */
-    IBOFFLOAD_VERBOSE(10, ("source - %d, qp_index - %d; "
-                          "allocating preposted addr %p.\n",
-                           source, qp_index,  (void *) frag->sg_entry.addr));
-
-    if (OPAL_LIKELY(NULL != frag)) {
-        frag->next = NULL;
-    }
-
-    return frag;
-}
-
-END_C_DECLS
-
-#endif /* MCA_BCOL_IBOFFLOAD_ENDPOINT_H */
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_fanin.c
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_fanin.c
@ -1,350 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <errno.h>
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_task.h"
-#include "bcol_iboffload_collfrag.h"
-#include "bcol_iboffload_endpoint.h"
-
-static int mca_bcol_iboffload_fanin_leader_progress(
-                mca_bcol_iboffload_module_t *iboffload,
-                struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int rc = OMPI_SUCCESS, leader_rank = 0, rank,
-        sbgp_size = iboffload->ibnet->super.group_size;
-
-    struct mqe_task *last_wait = NULL;
-
-    mca_bcol_iboffload_task_t *wait_task = NULL;
-    mca_bcol_iboffload_frag_t *preposted_recv_frag = NULL;
-
-    struct mqe_task **mqe_ptr_to_set;
-    mca_bcol_iboffload_collfrag_t *coll_fragment;
-
-    coll_fragment = (mca_bcol_iboffload_collfrag_t *)
-                         opal_list_get_last(&coll_request->work_requests);
-
-    mqe_ptr_to_set = &coll_fragment->to_post;
-
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-               iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-        goto out_of_resources;
-    }
-
-    for (rank = leader_rank + 1; rank < sbgp_size; ++rank) {
-       /* post wait */
-        preposted_recv_frag = mca_bcol_iboffload_get_preposted_recv_frag(
-                                        iboffload, rank, coll_request->qp_index);
-        if(NULL == preposted_recv_frag) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting prepost recv frag.\n"));
-            goto out_of_resources;
-        }
-
-        wait_task = mca_bcol_iboffload_get_wait_task(iboffload, rank, 1,
-                             preposted_recv_frag, coll_request->qp_index, NULL);
-        if(NULL == wait_task) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n"));
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-    }
-
-   /* end of list */
-    *mqe_ptr_to_set = NULL;
-
-    last_wait->flags |= MQE_WR_FLAG_SIGNAL;
-
-    coll_fragment->signal_task_wr_id = last_wait->wr_id;
-    last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-
-    /* post the mwr */
-    rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post);
-    if(OMPI_SUCCESS != rc) {
-        IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
-        /* Note: need to clean up */
-        return rc;
-    }
-
-    MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info);
-
-    return OMPI_SUCCESS;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Fan-in, adding collfrag to collfrag_pending"));
-    return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload);
-}
-
-static int mca_bcol_iboffload_fanin_proxy_progress(
-                mca_bcol_iboffload_module_t *iboffload,
-                struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int rc = OMPI_SUCCESS, leader_rank = 0;
-
-    struct mqe_task *last_send = NULL;
-    mca_bcol_iboffload_task_t *send_task = NULL;
-    mca_bcol_iboffload_frag_t *send_fragment = NULL;
-
-    struct mqe_task **mqe_ptr_to_set;
-    mca_bcol_iboffload_collfrag_t *coll_fragment;
-
-    coll_fragment = (mca_bcol_iboffload_collfrag_t *)
-                         opal_list_get_last(&coll_request->work_requests);
-
-    mqe_ptr_to_set = &coll_fragment->to_post;
-
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-               iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-        goto out_of_resources;
-    }
-
-    /* post send */
-    send_fragment = mca_bcol_iboffload_get_send_frag(coll_request,
-                                    leader_rank, coll_request->qp_index, 0,
-                                    0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY);
-    if(NULL == send_fragment) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting and packing send frag.\n"));
-        goto out_of_resources;
-    }
-
-    send_task = mca_bcol_iboffload_get_send_task(iboffload, leader_rank, MCA_BCOL_IBOFFLOAD_QP_BARRIER,
-                                                 send_fragment, coll_fragment, INLINE);
-    if(NULL == send_task) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting send task.\n"));
-        goto out_of_resources;
-    }
-
-    APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send);
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task);
-
-   /* end of list */
-    *mqe_ptr_to_set = NULL;
-    assert(NULL != last_send);
-
-    last_send->flags |= MQE_WR_FLAG_SIGNAL;
-
-    coll_fragment->signal_task_wr_id = last_send->wr_id;
-    last_send->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-
-    /* post the mwr */
-    rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post);
-    if(OMPI_SUCCESS != rc) {
-        IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
-        /* Note: need to clean up */
-        return rc;
-    }
-
-    MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info);
-
-    return OMPI_SUCCESS;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Fan-in, adding collfrag to collfrag_pending"));
-    return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload);
-}
-
-static int mca_bcol_iboffload_fanin_init(
-                bcol_function_args_t *input_args,
-                mca_bcol_iboffload_module_t *iboffload,
-                struct mca_bcol_iboffload_collreq_t **coll_request)
-{
-    ompi_free_list_item_t *item = NULL;
-    mca_bcol_iboffload_collfrag_t *coll_fragment = NULL;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_barrier_init"));
-
-    OMPI_FREE_LIST_WAIT_MT(&cm->collreqs_free, item);
-    if(OPAL_UNLIKELY(NULL == item)) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for coll request free list waiting.\n"));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    (*coll_request) = (mca_bcol_iboffload_collreq_t *) item;
-    (*coll_request)->progress_fn = iboffload->fanin_algth;
-
-    (*coll_request)->completion_cb_fn = NULL;
-    (*coll_request)->order_info = &input_args->order_info;
-
-    (*coll_request)->module = iboffload;
-    (*coll_request)->ml_buffer_index = input_args->buffer_index;
-    (*coll_request)->buffer_info[SBUF].offset = 0;
-    (*coll_request)->buffer_info[RBUF].offset = 0;
-    (*coll_request)->qp_index = MCA_BCOL_IBOFFLOAD_QP_BARRIER;
-
-    input_args->bcol_opaque_data = (void *) (*coll_request);
-
-    /* finish initializing full message descriptor */
-    (*coll_request)->n_fragments  = 1;
-    (*coll_request)->n_frags_sent = 1;
-
-    (*coll_request)->n_frag_mpi_complete = 0;
-    (*coll_request)->n_frag_net_complete = 0;
-
-    (*coll_request)->user_handle_freed = false;
-
-    /*
-     * setup collective work request
-     */
-
-    /* get collective frag */
-    coll_fragment = &(*coll_request)->first_collfrag;
-    mca_bcol_iboffload_collfrag_init(coll_fragment);
-
-    coll_fragment->alg = FANIN_ALG;
-    coll_fragment->mq_index = COLL_MQ;
-
-    /* Set mq credits */
-    coll_fragment->mq_credits = iboffload->alg_task_consump[FANIN_ALG];
-
-    /* set pointers for (coll frag) <-> (coll full request) */
-    MCA_BCOL_IBOFFLOAD_SET_COLL_REQ_LINKS(*coll_request, coll_fragment);
-
-    return OMPI_SUCCESS;
-}
-
-/************************************************************************
- ************************ New style Fan-In ******************************
- ***********************************************************************/
-static int mca_bcol_iboffload_new_style_fanin_progress(
-                        bcol_function_args_t *input_args,
-                        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_iboffload_collreq_t *coll_request =
-                 (mca_bcol_iboffload_collreq_t *)
-                                   input_args->bcol_opaque_data;
-
-    if (BCOL_IS_COMPLETED(coll_request)) {
-        coll_request->user_handle_freed = true;
-        if (COLLREQ_IS_DONE(coll_request)) {
-            IBOFFLOAD_VERBOSE(10, ("Coll request already done.\n"));
-            RELEASE_COLLREQ(coll_request);
-        }
-
-        IBOFFLOAD_VERBOSE(10, ("Fan-In already done.\n"));
-        return BCOL_FN_COMPLETE;
-    }
-
-    return BCOL_FN_STARTED;
-}
-
-int mca_bcol_iboffload_new_style_fanin_first_call(
-                mca_bcol_iboffload_module_t *iboffload,
-                struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int i = 0, leader_rank = 0, /* We always suppose - the lowest index is a leader */
-        my_rank = iboffload->ibnet->super.my_index,
-        sbgp_size = iboffload->ibnet->super.group_size;
-
-    mca_bcol_iboffload_endpoint_t *ep = NULL;
-    mca_sbgp_ibnet_proc_t *my_ibnet_proc = iboffload->endpoints[my_rank]->ibnet_proc;
-
-    assert(NULL != my_ibnet_proc);
-
-    if (MCA_SBGP_IBNET_NODE_LEADER == my_ibnet_proc->duty) {
-        iboffload->fanin_algth = mca_bcol_iboffload_fanin_leader_progress;
-        iboffload->alg_task_consump[FANIN_ALG] += sbgp_size;
-
-        for (i = leader_rank + 1; i < sbgp_size; ++i) {
-            ep = iboffload->endpoints[i];
-            while (OMPI_SUCCESS !=
-                    check_endpoint_state(ep, NULL, NULL)) {
-                opal_progress();
-            }
-        }
-    } else {
-        iboffload->fanin_algth = mca_bcol_iboffload_fanin_proxy_progress;
-        iboffload->alg_task_consump[FANIN_ALG] += 1;
-
-        ep = iboffload->endpoints[leader_rank];
-        while(OMPI_SUCCESS !=
-                check_endpoint_state(ep, NULL, NULL)) {
-            opal_progress();
-        }
-    }
-
-    return iboffload->fanin_algth(iboffload, coll_request);
-}
-
-static int mca_bcol_iboffload_new_style_fanin_intra(
-                                bcol_function_args_t *input_args,
-                                struct mca_bcol_base_function_t *const_args)
-{
-    int rc = OMPI_SUCCESS;
-
-    struct mca_bcol_iboffload_collreq_t *coll_request = NULL;
-    mca_bcol_iboffload_module_t *iboffload =
-                    (mca_bcol_iboffload_module_t *) const_args->bcol_module;
-
-    assert(NULL != iboffload);
-
-    MCA_BCOL_CHECK_ORDER(const_args->bcol_module, input_args);
-
-    /* Init Fan-In collective reqeust */
-    rc = mca_bcol_iboffload_fanin_init(input_args, iboffload, &coll_request);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("Error from mca_bcol_iboffload_fanin_init.\n"));
-        return BCOL_FN_NOT_STARTED;
-    }
-
-    rc = iboffload->fanin_algth(iboffload, coll_request);
-    if (OPAL_UNLIKELY(OMPI_ERROR == rc)) {
-        return BCOL_FN_NOT_STARTED;
-    }
-
-    return BCOL_FN_STARTED;
-}
-
-int mca_bcol_iboffload_fanin_register(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    IBOFFLOAD_VERBOSE(10, ("Register iboffload Fan-In.\n"));
-
-    comm_attribs.bcoll_type = BCOL_FANIN;
-
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super,
-        &comm_attribs, &inv_attribs,
-        mca_bcol_iboffload_new_style_fanin_intra,
-        mca_bcol_iboffload_new_style_fanin_progress);
-
-    return OMPI_SUCCESS;
-}
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_fanout.c
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_fanout.c
@ -1,349 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <errno.h>
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_task.h"
-#include "bcol_iboffload_collfrag.h"
-#include "bcol_iboffload_endpoint.h"
-
-static int mca_bcol_iboffload_fanout_leader_progress(
-                mca_bcol_iboffload_module_t *iboffload,
-                struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int rc = OMPI_SUCCESS, leader_rank = 0, rank,
-        sbgp_size = iboffload->ibnet->super.group_size;
-
-    struct mqe_task *last_send = NULL;
-    mca_bcol_iboffload_task_t *send_task = NULL;
-    mca_bcol_iboffload_frag_t *send_fragment = NULL;
-
-    struct mqe_task **mqe_ptr_to_set;
-    mca_bcol_iboffload_collfrag_t *coll_fragment;
-
-    coll_fragment = (mca_bcol_iboffload_collfrag_t *)
-                         opal_list_get_last(&coll_request->work_requests);
-
-    mqe_ptr_to_set = &coll_fragment->to_post;
-
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-               iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-        goto out_of_resources;
-    }
-
-    for (rank = leader_rank + 1; rank < sbgp_size; ++rank) {
-        /* post send */
-        send_fragment = mca_bcol_iboffload_get_send_frag(coll_request,
-                                      rank, coll_request->qp_index, 0,
-                                      0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY);
-        if(NULL == send_fragment) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting and packing send frag.\n"));
-            goto out_of_resources;
-        }
-
-        send_task = mca_bcol_iboffload_get_send_task(iboffload, rank, MCA_BCOL_IBOFFLOAD_QP_BARRIER,
-                                                     send_fragment, coll_fragment, INLINE);
-        if(NULL == send_task) {
-            IBOFFLOAD_VERBOSE(10, ("Failing for getting send task.\n"));
-            goto out_of_resources;
-        }
-
-        APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send);
-        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task);
-    }
-
-   /* end of list */
-    *mqe_ptr_to_set = NULL;
-    assert(NULL != last_send);
-
-    last_send->flags |= MQE_WR_FLAG_SIGNAL;
-
-    coll_fragment->signal_task_wr_id = last_send->wr_id;
-    last_send->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-
-    /* post the mwr */
-    rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post);
-    if(OMPI_SUCCESS != rc) {
-        IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
-        /* Note: need to clean up */
-        return rc;
-    }
-
-    MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info);
-
-    return OMPI_SUCCESS;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Fan-in, adding collfrag to collfrag_pending"));
-    return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload);
-}
-
-static int mca_bcol_iboffload_fanout_proxy_progress(
-                mca_bcol_iboffload_module_t *iboffload,
-                struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int rc = OMPI_SUCCESS, leader_rank = 0;
-
-    struct mqe_task *last_wait = NULL;
-    mca_bcol_iboffload_task_t *wait_task = NULL;
-    mca_bcol_iboffload_frag_t *preposted_recv_frag = NULL;
-
-    struct mqe_task **mqe_ptr_to_set;
-    mca_bcol_iboffload_collfrag_t *coll_fragment;
-
-    coll_fragment = (mca_bcol_iboffload_collfrag_t *)
-                         opal_list_get_last(&coll_request->work_requests);
-
-    mqe_ptr_to_set = &coll_fragment->to_post;
-
-    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
-               iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) {
-        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
-        goto out_of_resources;
-    }
-
-     /* post wait */
-    preposted_recv_frag = mca_bcol_iboffload_get_preposted_recv_frag(
-                                        iboffload, leader_rank, coll_request->qp_index);
-    if(NULL == preposted_recv_frag) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting prepost recv frag.\n"));
-        goto out_of_resources;
-    }
-
-    wait_task = mca_bcol_iboffload_get_wait_task(iboffload, leader_rank, 1,
-                             preposted_recv_frag, coll_request->qp_index, NULL);
-    if(NULL == wait_task) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n"));
-        goto out_of_resources;
-    }
-
-    APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
-    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
-
-   /* end of list */
-    *mqe_ptr_to_set = NULL;
-
-    last_wait->flags |= MQE_WR_FLAG_SIGNAL;
-
-    coll_fragment->signal_task_wr_id = last_wait->wr_id;
-    last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment;
-
-    /* post the mwr */
-    rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post);
-    if(OMPI_SUCCESS != rc) {
-        IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
-        /* Note: need to clean up */
-        return rc;
-    }
-
-    MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info);
-
-    return OMPI_SUCCESS;
-
-out_of_resources:
-    /* Release all resources */
-    IBOFFLOAD_VERBOSE(10, ("Fan-in, adding collfrag to collfrag_pending"));
-    return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload);
-}
-
-static int mca_bcol_iboffload_fanout_init(
-                bcol_function_args_t *input_args,
-                mca_bcol_iboffload_module_t *iboffload,
-                struct mca_bcol_iboffload_collreq_t **coll_request)
-{
-    ompi_free_list_item_t *item = NULL;
-    mca_bcol_iboffload_collfrag_t *coll_fragment = NULL;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    IBOFFLOAD_VERBOSE(10, ("Calling for mca_bcol_iboffload_barrier_init"));
-
-    OMPI_FREE_LIST_WAIT_MT(&cm->collreqs_free, item);
-    if(NULL == item) {
-        IBOFFLOAD_VERBOSE(10, ("Failing for coll request free list waiting.\n"));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    (*coll_request) = (mca_bcol_iboffload_collreq_t *) item;
-    (*coll_request)->progress_fn = iboffload->fanout_algth;
-
-    (*coll_request)->completion_cb_fn = NULL;
-    (*coll_request)->order_info = &input_args->order_info;
-
-    (*coll_request)->module = iboffload;
-    (*coll_request)->ml_buffer_index = input_args->buffer_index;
-    (*coll_request)->buffer_info[SBUF].offset = 0;
-    (*coll_request)->buffer_info[RBUF].offset = 0;
-    (*coll_request)->qp_index = MCA_BCOL_IBOFFLOAD_QP_BARRIER;
-
-    /* finish initializing full message descriptor */
-    (*coll_request)->n_fragments  = 1;
-    (*coll_request)->n_frags_sent = 1;
-
-    (*coll_request)->n_frag_mpi_complete = 0;
-    (*coll_request)->n_frag_net_complete = 0;
-
-    (*coll_request)->user_handle_freed = false;
-
-    input_args->bcol_opaque_data = (void *) (*coll_request);
-
-    /*
-     * setup collective work request
-     */
-
-    /* get collective frag */
-    coll_fragment = &(*coll_request)->first_collfrag;
-    mca_bcol_iboffload_collfrag_init(coll_fragment);
-
-    coll_fragment->alg = FANOUT_ALG;
-    coll_fragment->mq_index = COLL_MQ;
-
-    /* Set mq credits */
-    coll_fragment->mq_credits = iboffload->alg_task_consump[FANOUT_ALG];
-
-    /* set pointers for (coll frag) <-> (coll full request) */
-    MCA_BCOL_IBOFFLOAD_SET_COLL_REQ_LINKS(*coll_request, coll_fragment);
-
-    return OMPI_SUCCESS;
-}
-
-/************************************************************************
- ************************ New style Fan-In ******************************
- ***********************************************************************/
-static int mca_bcol_iboffload_new_style_fanout_progress(
-                        bcol_function_args_t *input_args,
-                        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_iboffload_collreq_t *coll_request =
-                 (mca_bcol_iboffload_collreq_t *)
-                                   input_args->bcol_opaque_data;
-
-    if (BCOL_IS_COMPLETED(coll_request)) {
-        coll_request->user_handle_freed = true;
-        if (COLLREQ_IS_DONE(coll_request)) {
-            IBOFFLOAD_VERBOSE(10, ("Coll request already done.\n"));
-            RELEASE_COLLREQ(coll_request);
-        }
-
-        IBOFFLOAD_VERBOSE(10, ("Fan-Out already done.\n"));
-        return BCOL_FN_COMPLETE;
-    }
-
-    return BCOL_FN_STARTED;
-}
-
-int mca_bcol_iboffload_new_style_fanout_first_call(
-                mca_bcol_iboffload_module_t *iboffload,
-                struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int i = 0, leader_rank = 0, /* We always suppose - the lowest index is a leader */
-        my_rank = iboffload->ibnet->super.my_index,
-        sbgp_size = iboffload->ibnet->super.group_size;
-
-    mca_bcol_iboffload_endpoint_t *ep = NULL;
-    mca_sbgp_ibnet_proc_t *my_ibnet_proc = iboffload->endpoints[my_rank]->ibnet_proc;
-
-    assert(NULL != my_ibnet_proc);
-
-    if (MCA_SBGP_IBNET_NODE_LEADER == my_ibnet_proc->duty) {
-        iboffload->fanout_algth = mca_bcol_iboffload_fanout_leader_progress;
-        iboffload->alg_task_consump[FANOUT_ALG] += sbgp_size;
-
-        for (i = leader_rank + 1; i < sbgp_size; ++i) {
-            ep = iboffload->endpoints[i];
-            while (OMPI_SUCCESS !=
-                    check_endpoint_state(ep, NULL, NULL)) {
-                opal_progress();
-            }
-        }
-    } else {
-        iboffload->fanout_algth = mca_bcol_iboffload_fanout_proxy_progress;
-        iboffload->alg_task_consump[FANOUT_ALG] += 1;
-
-        ep = iboffload->endpoints[leader_rank];
-        while(OMPI_SUCCESS !=
-                check_endpoint_state(ep, NULL, NULL)) {
-            opal_progress();
-        }
-    }
-
-    return iboffload->fanout_algth(iboffload, coll_request);
-}
-
-static int mca_bcol_iboffload_new_style_fanout_intra(
-                                bcol_function_args_t *input_args,
-                                struct mca_bcol_base_function_t *const_args)
-{
-    int rc = OMPI_SUCCESS;
-
-    struct mca_bcol_iboffload_collreq_t *coll_request = NULL;
-    mca_bcol_iboffload_module_t *iboffload =
-                    (mca_bcol_iboffload_module_t *) const_args->bcol_module;
-
-    assert(NULL != iboffload);
-
-    MCA_BCOL_CHECK_ORDER(const_args->bcol_module, input_args);
-
-    /* Init Fan-In collective reqeust */
-    rc = mca_bcol_iboffload_fanout_init(input_args, iboffload, &coll_request);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("Error from mca_bcol_iboffload_fanin_init.\n"));
-        return BCOL_FN_NOT_STARTED;
-    }
-
-    rc = iboffload->fanout_algth(iboffload, coll_request);
-    if (OPAL_UNLIKELY(OMPI_ERROR == rc)) {
-        return BCOL_FN_NOT_STARTED;
-    }
-
-    return BCOL_FN_STARTED;
-}
-
-int mca_bcol_iboffload_fanout_register(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    IBOFFLOAD_VERBOSE(10, ("Register iboffload Fan-In.\n"));
-
-    comm_attribs.bcoll_type = BCOL_FANOUT;
-
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super,
-        &comm_attribs, &inv_attribs,
-        mca_bcol_iboffload_new_style_fanout_intra,
-        mca_bcol_iboffload_new_style_fanout_progress);
-
-    return OMPI_SUCCESS;
-}
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_frag.c
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_frag.c
@ -1,272 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "opal/include/opal/types.h"
-#include "opal/datatype/opal_convertor.h"
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_device.h"
-#include "bcol_iboffload_collreq.h"
-#include "bcol_iboffload_endpoint.h"
-
-static void frag_constructor(mca_bcol_iboffload_frag_t *frag)
-{
-    mca_bcol_iboffload_reg_t* reg =
-        (mca_bcol_iboffload_reg_t*) frag->super.registration;
-
-    memset(&frag->sg_entry, 0, sizeof(struct ibv_sge));
-    frag->sg_entry.addr = (uint64_t) (uintptr_t) frag->super.ptr;
-
-    frag->registration = reg;
-
-    if (NULL != reg) {
-        frag->sg_entry.lkey = reg->mr->lkey;
-    }
-
-    frag->next = NULL;
-    frag->type = MCA_BCOL_IBOFFLOAD_NONE_OWNER;
-    frag->ref_counter = 0;
-    frag->qp_index = -1;
-}
-
-OBJ_CLASS_INSTANCE(
-        mca_bcol_iboffload_frag_t,
-        ompi_free_list_item_t,
-        frag_constructor,
-        NULL);
-
-
-static mca_bcol_iboffload_frag_t*
-    mca_bcol_iboffload_get_ml_frag_calc(mca_bcol_iboffload_module_t *iboffload,
-                                    mca_bcol_iboffload_collreq_t *coll_request,
-                                    size_t len, size_t src_offset)
-{
-    int rc;
-
-    mca_bcol_iboffload_frag_t *fragment;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    uint64_t sbuff = (uint64_t) (uintptr_t) coll_request->buffer_info[SBUF].buf +
-                                           src_offset;
-
-    /* The buffer was allocated on ML level,
-       no need to allocate local buffer */
-    rc = pack_data_for_calc(iboffload->device->dev.ib_dev_context,
-                            cm->map_ompi_to_ib_calcs[coll_request->op->op_type],
-                            cm->map_ompi_to_ib_dt[coll_request->dtype->id],
-                            false /* host order */,
-                            (void *) sbuff, 0,
-                            &coll_request->actual_ib_op,
-                            &coll_request->actual_ib_dtype,
-                            (void *) sbuff);
-    if (OPAL_UNLIKELY(0 != rc)) {
-        IBOFFLOAD_VERBOSE(10, ("pack_data_for_calc failed, op: %s, type: %s\n",
-                                coll_request->op->o_name, coll_request->dtype->name));
-        return NULL;
-    }
-
-    fragment = mca_bcol_iboffload_get_ml_frag(
-            iboffload, coll_request->qp_index, len,
-            coll_request->buffer_info[SBUF].lkey,
-            sbuff);
-
-    return fragment;
-}
-
-static mca_bcol_iboffload_frag_t *
-mca_bcol_iboffload_get_packed_frag(mca_bcol_iboffload_module_t *iboffload,
-                                   uint32_t destination, int qp_index, size_t len,
-                                   struct opal_convertor_t *convertor)
-{
-    /* local variables */
-    int rc;
-    uint32_t out_size;
-    size_t max_size = 0;
-
-    struct iovec payload_iovec;
-
-    ompi_free_list_item_t *item;
-    mca_bcol_iboffload_frag_t *frag;
-
-    mca_bcol_iboffload_device_t *device = iboffload->device;
-
-    /* Get frag from free list */
-    OMPI_FREE_LIST_GET_MT(&device->frags_free[qp_index], item);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        return NULL;
-    }
-
-    frag = (mca_bcol_iboffload_frag_t *) item;
-
-    /* Pack data into the buffer */
-    out_size = 1;
-    payload_iovec.iov_len = len;
-
-    payload_iovec.iov_base = (void *) (uintptr_t) frag->sg_entry.addr;
-
-    rc = opal_convertor_pack(convertor, &(payload_iovec),
-            &out_size, &max_size);
-    if (OPAL_UNLIKELY(rc < 0)) {
-        /* Error: put the fragment back */
-        OMPI_FREE_LIST_RETURN_MT(&device->frags_free[qp_index], item);
-        return NULL;
-    }
-
-    return frag;
-}
-
-static mca_bcol_iboffload_frag_t *
-mca_bcol_iboffload_get_calc_frag(mca_bcol_iboffload_module_t *iboffload, int qp_index,
-                                 struct mca_bcol_iboffload_collreq_t *coll_request)
-{
-    int rc;
-
-    ompi_free_list_item_t *item;
-    mca_bcol_iboffload_frag_t *frag;
-
-    mca_bcol_iboffload_device_t *device = iboffload->device;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    IBOFFLOAD_VERBOSE(10, ("Start to pack frag.\n"));
-
-    /* Get frag from free list */
-    OMPI_FREE_LIST_GET_MT(&device->frags_free[qp_index], item);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        return NULL;
-    }
-
-    frag = (mca_bcol_iboffload_frag_t *) item;
-
-    /* Pack data into the buffer */
-    rc = pack_data_for_calc(device->dev.ib_dev_context,
-                            cm->map_ompi_to_ib_calcs[coll_request->op->op_type],
-                            cm->map_ompi_to_ib_dt[coll_request->dtype->id], false,
-                            coll_request->buffer_info[SBUF].buf, 0,
-                            &coll_request->actual_ib_op,
-                            &coll_request->actual_ib_dtype,
-                            (void *) (uintptr_t) frag->sg_entry.addr);
-    if (OPAL_UNLIKELY(0 != rc)) {
-        IBOFFLOAD_ERROR(("pack_data_for_calc failed, op: %s, type: %s\n",
-                                coll_request->op->o_name, coll_request->dtype->name));
-        return NULL;
-    }
-
-    return frag;
-}
-
-mca_bcol_iboffload_frag_t*
-mca_bcol_iboffload_get_send_frag(mca_bcol_iboffload_collreq_t *coll_request,
-                                 uint32_t destination, int qp_index, size_t len,
-                                 size_t src_offset, int buf_index, int send_frag_type)
-{
-    /* local variables */
-    mca_bcol_iboffload_frag_t *frag;
-    mca_bcol_iboffload_module_t *iboffload = coll_request->module;
-
-    mca_bcol_iboffload_endpoint_t *endpoint =
-                iboffload->endpoints[destination];
-
-    IBOFFLOAD_VERBOSE(10, ("Calling mca_bcol_iboffload_get_send_frag qp_index %d",
-                            qp_index));
-
-    if ((endpoint->qps[qp_index].sd_wqe) <= 0) {
-        IBOFFLOAD_VERBOSE(10, ("No send wqe %d",
-                    endpoint->qps[qp_index].sd_wqe));
-        return NULL;
-    }
-
-    --endpoint->qps[qp_index].sd_wqe;
-
-    IBOFFLOAD_VERBOSE(10, ("Endpoint %p: qp_index %d, destination %d, sd_wqe %d",
-                            endpoint, qp_index, destination, endpoint->qps[qp_index].sd_wqe));
-
-    switch (send_frag_type) {
-        case MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY:
-            IBOFFLOAD_VERBOSE(10, ("Getting MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY"));
-            assert(NULL != &iboffload->device->dummy_frags[qp_index]);
-            return &iboffload->device->dummy_frags[qp_index];
-
-        case MCA_BCOL_IBOFFLOAD_SEND_FRAG:
-        {
-            ompi_free_list_item_t *item;
-            IBOFFLOAD_VERBOSE(10, ("Getting MCA_BCOL_IBOFFLOAD_SEND_FRAG"));
-
-            /* Get frag from free list */
-            OMPI_FREE_LIST_GET_MT(&iboffload->device->frags_free[qp_index], item);
-
-            frag = (mca_bcol_iboffload_frag_t *) item;
-        }
-
-        break;
-        case MCA_BCOL_IBOFFLOAD_SEND_FRAG_CONVERT:
-            IBOFFLOAD_VERBOSE(10, ("Getting MCA_BCOL_IBOFFLOAD_SEND_FRAG_CONVERT"));
-            frag = mca_bcol_iboffload_get_packed_frag(iboffload, destination,
-                         qp_index, len, &coll_request->send_convertor);
-
-        break;
-        case MCA_BCOL_IBOFFLOAD_SEND_FRAG_CALC:
-            IBOFFLOAD_VERBOSE(10, ("Getting MCA_BCOL_IBOFFLOAD_SEND_FRAG_CALC"));
-            frag = mca_bcol_iboffload_get_calc_frag(iboffload, qp_index, coll_request);
-
-        break;
-        case MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML:
-            IBOFFLOAD_VERBOSE(10, ("Getting MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML"));
-            frag = mca_bcol_iboffload_get_ml_frag(
-                  iboffload, qp_index, len, coll_request->buffer_info[buf_index].lkey,
-                  (uint64_t)(uintptr_t) coll_request->buffer_info[buf_index].buf + src_offset);
-
-        break;
-        case MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML_CALC:
-            frag = mca_bcol_iboffload_get_ml_frag_calc(iboffload, coll_request, len, src_offset);
-            IBOFFLOAD_VERBOSE(10, ("Getting MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML_CALC"));
-
-        break;
-        default:
-            IBOFFLOAD_VERBOSE(10, ("Getting default"));
-            frag = NULL;
-            IBOFFLOAD_ERROR(("Unknown send frag type %d for QP index %d",
-                              send_frag_type, qp_index));
-    }
-
-    if (OPAL_UNLIKELY(NULL == frag)) {
-        IBOFFLOAD_VERBOSE(10, ("Getting NULL"));
-        return NULL;
-    }
-
-    frag->sg_entry.length = len;
-    frag->next = NULL;
-
-    return frag;
-}
-
-void
-mca_bcol_iboffload_frag_init(ompi_free_list_item_t* item, void* ctx)
-{
-    int qp_index = *(int *) ctx;
-    mca_bcol_iboffload_frag_t *frag = (mca_bcol_iboffload_frag_t *) item;
-
-    frag->qp_index = qp_index;
-    frag->type = MCA_BCOL_IBOFFLOAD_BCOL_OWNER;
-}
-
-void
-mca_bcol_iboffload_ml_frag_init(ompi_free_list_item_t* item, void* ctx)
-{
-    mca_bcol_iboffload_frag_t *frag = (mca_bcol_iboffload_frag_t *) item;
-
-    frag->qp_index = -1;
-    frag->type = MCA_BCOL_IBOFFLOAD_ML_OWNER;
-}
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_frag.h
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_frag.h
@ -1,154 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_IBOFFLOAD_FRAG_H
-#define MCA_BCOL_IBOFFLOAD_FRAG_H
-
-#include "ompi_config.h"
-
-#include <infiniband/verbs.h>
-
-#include "opal/datatype/opal_convertor.h"
-
-#include "opal/mca/mpool/mpool.h"
-#include "opal/class/ompi_free_list.h"
-
-#include "bcol_iboffload.h"
-
-BEGIN_C_DECLS
-
-/* forward declarations */
-struct mca_bcol_iboffload_collreq_t;
-
-struct mca_bcol_iboffload_reg_t {
-    mca_mpool_base_registration_t base;
-    struct ibv_mr *mr;
-};
-typedef struct mca_bcol_iboffload_reg_t mca_bcol_iboffload_reg_t;
-
-typedef enum {
-    MCA_BCOL_IBOFFLOAD_NONE_OWNER = -1,
-    MCA_BCOL_IBOFFLOAD_DUMMY_OWNER,
-    MCA_BCOL_IBOFFLOAD_BCOL_OWNER,
-    MCA_BCOL_IBOFFLOAD_ML_OWNER
-} frag_type;
-
-typedef enum {
-    MCA_BCOL_IBOFFLOAD_SEND_FRAG,
-    MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML,
-    MCA_BCOL_IBOFFLOAD_SEND_FRAG_ML_CALC,
-    MCA_BCOL_IBOFFLOAD_SEND_FRAG_CONVERT,
-    MCA_BCOL_IBOFFLOAD_SEND_FRAG_CALC,
-    MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY
-} send_frag_type;
-
-struct mca_bcol_iboffload_frag_t {
-    ompi_free_list_item_t super;
-
-    struct mca_bcol_iboffload_frag_t *next;
-    struct mca_bcol_iboffload_reg_t *registration;
-
-    struct ibv_sge sg_entry;
-
-    frag_type type;
-
-    int ref_counter;
-    int qp_index;
-};
-typedef struct mca_bcol_iboffload_frag_t mca_bcol_iboffload_frag_t;
-OBJ_CLASS_DECLARATION(mca_bcol_iboffload_frag_t);
-
-/* The same fragment maybe shared by multiple task.
- * In order to manage right release and allocation flow
- * we use reference counter on each fragment and the follow
- * wrapper allocation and release function that hides
- * the counter */
-
-#define IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(fragment, task)           \
-    do {                                                            \
-        ++((fragment)->ref_counter);                                \
-        (task)->frag = (fragment);                                  \
-    } while(0)
-
-#define IBOFFLOAD_SET_FRAGS_ON_TASK(fragment, task)                 \
-    do {                                                            \
-        struct mca_bcol_iboffload_frag_t *temp_frag = fragment;     \
-        while (NULL != temp_frag) {                                 \
-            ++(temp_frag->ref_counter);                             \
-            temp_frag = temp_frag->next;                            \
-        }                                                           \
-        (task)->frag = fragment;                                    \
-    } while(0)
-
-/* function declarations */
-mca_bcol_iboffload_frag_t *
-mca_bcol_iboffload_get_send_frag(struct mca_bcol_iboffload_collreq_t *coll_request,
-                                 uint32_t destination, int qp_index, size_t len,
-                                 size_t src_offset, int buff_index, int send_frag_type);
-
-void
-mca_bcol_iboffload_frag_init(ompi_free_list_item_t* item, void* ctx);
-void
-mca_bcol_iboffload_ml_frag_init(ompi_free_list_item_t* item, void* ctx);
-
-static inline __opal_attribute_always_inline__
-mca_bcol_iboffload_frag_t* mca_bcol_iboffload_get_ml_empty_frag(
-                    mca_bcol_iboffload_module_t *iboffload,
-                    int qp_index)
-{
-    ompi_free_list_item_t *item;
-    mca_bcol_iboffload_frag_t *frag;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    /* Get frag from free list */
-    OMPI_FREE_LIST_GET_MT(&cm->ml_frags_free, item);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        return NULL;
-    }
-
-    frag = (mca_bcol_iboffload_frag_t *) item;
-
-    frag->qp_index = qp_index;
-    frag->next = NULL;
-
-    return frag;
-}
-
-static inline __opal_attribute_always_inline__
-mca_bcol_iboffload_frag_t* mca_bcol_iboffload_get_ml_frag(
-                    mca_bcol_iboffload_module_t *iboffload,
-                    int qp_index, size_t len, uint32_t lkey, uint64_t addr)
-{
-    /* local variables */
-    mca_bcol_iboffload_frag_t *frag;
-
-    IBOFFLOAD_VERBOSE(10, ("Call for get ML frag - addr 0x%x", addr));
-
-    frag = mca_bcol_iboffload_get_ml_empty_frag(iboffload, qp_index);
-
-    frag->sg_entry.addr = addr;
-    frag->sg_entry.lkey = lkey;
-    frag->sg_entry.length = len;
-
-    IBOFFLOAD_VERBOSE(10, ("Setting ml frag lkey %u, "
-                           "addr %p, qp_index %d, send value - %lf",
-                            frag->sg_entry.lkey, frag->sg_entry.addr,
-                            qp_index, *(double *) frag->sg_entry.addr));
-
-    return frag;
-}
-
-END_C_DECLS
-
-#endif
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_mca.c
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_mca.c
@ -1,451 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_mca.h"
-
-#include "ompi/constants.h"
-#include "ompi/mca/common/ofacm/base.h"
-#include "ompi/communicator/communicator.h"
-
-#include "opal/util/show_help.h"
-
-/*
- * Local flags
- */
-enum {
-    REGINT_NEG_ONE_OK = 0x01,
-    REGINT_GE_ZERO = 0x02,
-    REGINT_GE_ONE = 0x04,
-    REGINT_NONZERO = 0x08,
-    REGINT_MAX = 0x88
-};
-
-enum {
-    REGSTR_EMPTY_OK = 0x01,
-    REGSTR_MAX = 0x88
-};
-
-mca_base_var_enum_value_t mtu_values[] = {
-    {IBV_MTU_256, "256B"},
-    {IBV_MTU_512, "512B"},
-    {IBV_MTU_1024, "1k"},
-    {IBV_MTU_4096, "4k"},
-    {0, NULL}
-};
-
-/*
- * utility routine for string parameter registration
- */
-static int reg_string(const char* param_name,
-                      const char* deprecated_param_name,
-                      const char* param_desc,
-                      const char* default_value, char **storage,
-                      int flags)
-{
-    int index;
-
-    /* the MCA variable system will not attempt to modify this value */
-    *storage = (char *) default_value;
-    index = mca_base_component_var_register(&mca_bcol_iboffload_component.super.bcol_version,
-                                            param_name, param_desc, MCA_BASE_VAR_TYPE_STRING,
-                                            NULL, 0, 0, OPAL_INFO_LVL_9,
-                                            MCA_BASE_VAR_SCOPE_READONLY, storage);
-    if (NULL != deprecated_param_name) {
-        (void) mca_base_var_register_synonym(index, "ompi", "bcol", "iboffload", deprecated_param_name,
-                                             MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-    }
-
-    if (0 != (flags & REGSTR_EMPTY_OK) && 0 == strlen(*storage)) {
-        opal_output(0, "Bad parameter value for parameter \"%s\"",
-                param_name);
-        return OMPI_ERR_BAD_PARAM;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * utility routine for integer parameter registration
- */
-static int reg_int(const char* param_name,
-                   const char* deprecated_param_name,
-                   const char* param_desc,
-                   int default_value, int *storage, int flags)
-{
-    int index;
-
-    *storage = default_value;
-    index = mca_base_component_var_register(&mca_bcol_iboffload_component.super.bcol_version,
-                                            param_name, param_desc, MCA_BASE_VAR_TYPE_INT,
-                                            NULL, 0, 0, OPAL_INFO_LVL_9,
-                                            MCA_BASE_VAR_SCOPE_READONLY, storage);
-    if (NULL != deprecated_param_name) {
-        (void) mca_base_var_register_synonym(index, "ompi", "bcol", "iboffload", deprecated_param_name,
-                                             MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-    }
-
-    if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == *storage) {
-        return OMPI_SUCCESS;
-    }
-
-    if ((0 != (flags & REGINT_GE_ZERO) && *storage < 0) ||
-        (0 != (flags & REGINT_GE_ONE) && *storage < 1) ||
-        (0 != (flags & REGINT_NONZERO) && 0 == *storage)) {
-        opal_output(0, "Bad parameter value for parameter \"%s\"",
-                param_name);
-        return OMPI_ERR_BAD_PARAM;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * utility routine for integer parameter registration
- */
-static int reg_bool(const char* param_name,
-                    const char* deprecated_param_name,
-                    const char* param_desc,
-                    bool default_value, bool *storage)
-{
-    int index;
-
-    *storage = default_value;
-    index = mca_base_component_var_register(&mca_bcol_iboffload_component.super.bcol_version,
-                                            param_name, param_desc, MCA_BASE_VAR_TYPE_BOOL,
-                                            NULL, 0, 0, OPAL_INFO_LVL_9,
-                                            MCA_BASE_VAR_SCOPE_READONLY, storage);
-    if (NULL != deprecated_param_name) {
-        (void) mca_base_var_register_synonym(index, "ompi", "bcol", "iboffload", deprecated_param_name,
-                                             MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-    }
-
-    return OMPI_SUCCESS;
-}
-
-int mca_bcol_iboffload_verify_params(void)
-{
-    if (mca_bcol_iboffload_component.min_rnr_timer > 31) {
-        opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
-                       true, "bcol_iboffload_ib_min_rnr_timer > 31",
-                       "bcol_iboffload_ib_min_rnr_timer reset to 31");
-        mca_bcol_iboffload_component.min_rnr_timer = 31;
-    } else if (mca_bcol_iboffload_component.min_rnr_timer < 0){
-        opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
-                   true, "bcol_iboffload_ib_min_rnr_timer < 0",
-                   "bcol_iboffload_ib_min_rnr_timer reset to 0");
-        mca_bcol_iboffload_component.min_rnr_timer = 0;
-    }
-
-    if (mca_bcol_iboffload_component.timeout > 31) {
-        opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
-                       true, "bcol_iboffload_ib_timeout > 31",
-                       "bcol_iboffload_ib_timeout reset to 31");
-        mca_bcol_iboffload_component.timeout = 31;
-    } else if (mca_bcol_iboffload_component.timeout < 0) {
-        opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
-                   true, "bcol_iboffload_ib_timeout < 0",
-                   "bcol_iboffload_ib_timeout reset to 0");
-        mca_bcol_iboffload_component.timeout = 0;
-    }
-
-    if (mca_bcol_iboffload_component.retry_count > 7) {
-        opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
-                       true, "bcol_iboffload_ib_retry_count > 7",
-                       "bcol_iboffload_ib_retry_count reset to 7");
-        mca_bcol_iboffload_component.retry_count = 7;
-    } else if (mca_bcol_iboffload_component.retry_count < 0) {
-        opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
-                   true, "bcol_iboffload_ib_retry_count < 0",
-                   "bcol_iboffload_ib_retry_count reset to 0");
-        mca_bcol_iboffload_component.retry_count = 0;
-    }
-
-    if (mca_bcol_iboffload_component.max_rdma_dst_ops > 7) {
-        opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
-                       true, "bcol_iboffload_ib_rnr_retry > 7",
-                       "bcol_iboffload_ib_rnr_retry reset to 7");
-        mca_bcol_iboffload_component.max_rdma_dst_ops = 7;
-    } else if (mca_bcol_iboffload_component.max_rdma_dst_ops < 0) {
-        opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
-                   true, "bcol_iboffload_ib_rnr_retry < 0",
-                   "bcol_iboffload_ib_rnr_retry reset to 0");
-        mca_bcol_iboffload_component.max_rdma_dst_ops = 0;
-    }
-
-    if (mca_bcol_iboffload_component.service_level > 15) {
-        opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
-                       true, "bcol_iboffload_ib_service_level > 15",
-                       "bcol_iboffload_ib_service_level reset to 15");
-        mca_bcol_iboffload_component.service_level = 15;
-    } else if (mca_bcol_iboffload_component.service_level < 0) {
-        opal_show_help("help-mpi-btl-openib.txt", "invalid mca param value",
-                   true, "bcol_iboffload_ib_service_level < 0",
-                   "bcol_iboffload_ib_service_level reset to 0");
-        mca_bcol_iboffload_component.service_level = 0;
-    }
-
-    if(mca_bcol_iboffload_component.buffer_alignment <= 1 ||
-       (mca_bcol_iboffload_component.buffer_alignment & (mca_bcol_iboffload_component.buffer_alignment - 1))) {
-        opal_show_help("help-mpi-btl-openib.txt", "wrong buffer alignment",
-                true, mca_bcol_iboffload_component.buffer_alignment, ompi_process_info.nodename, 64);
-        mca_bcol_iboffload_component.buffer_alignment = 64;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-int mca_bcol_iboffload_register_params(void)
-{
-    mca_base_var_enum_t *new_enum;
-    char *msg;
-    int ret = OMPI_SUCCESS, tmp;
-
-#define CHECK(expr) do {                    \
-        tmp = (expr);                       \
-        if (OMPI_SUCCESS != tmp) ret = tmp; \
-     } while (0)
-
-    /* register openib component parameters */
-    CHECK(reg_int("k_nomial_radix", NULL,
-                  "The radix of the K-nomial tree for scatther-gather type algorithms"
-                  "(starts from 2)", 2, &mca_bcol_iboffload_component.k_nomial_radix,
-                  REGINT_GE_ONE));
-
-    CHECK(reg_int("priority", NULL,
-                  "IB offload component priority"
-                  "(from 0(low) to 90 (high))", 90,
-                  &mca_bcol_iboffload_component.super.priority, 0));
-
-    CHECK(reg_int("verbose", NULL,
-                  "Output some verbose IB offload BTL information "
-                  "(0 = no output, nonzero = output)", 0,
-                  &mca_bcol_iboffload_component.verbose, 0));
-
-    CHECK(reg_bool("warn_default_gid_prefix", NULL,
-                   "Warn when there is more than one active ports and at least one of them connected to the network with only default GID prefix configured (0 = do not warn; any other value = warn)",
-                   true, &mca_bcol_iboffload_component.warn_default_gid_prefix));
-
-    CHECK(reg_bool("warn_nonexistent_if", NULL,
-                   "Warn if non-existent devices and/or ports are specified in the bcol_iboffla_if_[in|ex]clude MCA parameters (0 = do not warn; any other value = warn)",
-                   true, &mca_bcol_iboffload_component.warn_nonexistent_if));
-
-    CHECK(reg_int("max_pipeline_depth", NULL,
-                  "The maximal number of fragments of the same collective request that can be transferred in parallel", 3,
-                  (int *) &mca_bcol_iboffload_component.max_pipeline_depth, 0));
-
-    CHECK(reg_int("max_mqe_tasks", NULL,
-                  "Maximum number of MQEs for each iboffload module",
-                  1024, &mca_bcol_iboffload_component.max_mqe_tasks, 0));
-    CHECK(reg_int("max_mq_size", NULL,
-                  "Maximum size of each MQ for each iboffload module",
-                  1024, &mca_bcol_iboffload_component.max_mq_size, 0));
-    CHECK(reg_int("free_list_num", NULL,
-                  "Intial size of free lists (must be >= 1)",
-                  256, &mca_bcol_iboffload_component.free_list_num,
-                  REGINT_GE_ONE));
-    CHECK(reg_int("free_list_max", NULL,
-                  "Maximum size of free lists "
-                  "(-1 = infinite, otherwise must be >= 0)",
-                  -1, &mca_bcol_iboffload_component.free_list_max,
-                  REGINT_NEG_ONE_OK | REGINT_GE_ONE));
-    CHECK(reg_int("free_list_inc", NULL,
-                  "Increment size of free lists (must be >= 1)",
-                  32, &mca_bcol_iboffload_component.free_list_inc,
-                  REGINT_GE_ONE));
-    /* rdma mpool no longer exists - must use the grdma mpool component, should resolve errors in
-     * mtt testing
-     */
-    /*
-    CHECK(reg_string("mpool", NULL,
-                     "Name of the memory pool to be used (it is unlikely that you will ever want to change this",
-                     "rdma", &mca_bcol_iboffload_component.mpool_name,
-                     0));
-    */
-    CHECK(reg_string("mpool", NULL,
-                     "Name of the memory pool to be used (it is unlikely that you will ever want to change this",
-                     "grdma", &mca_bcol_iboffload_component.mpool_name,
-                     0));
-    CHECK(reg_int("cq_size", "cq_size",
-                  "Size of the OpenFabrics completion "
-                  "queue (will automatically be set to a minimum of "
-                  "(2 * number_of_peers * bcol_iboffload_rd_num))",
-                  1024, &mca_bcol_iboffload_component.cq_size, REGINT_GE_ONE));
-
-    CHECK(reg_int("exchange_tree_order", NULL,
-                  "The order of the exchange tree. "
-                  "Must be power of two.",
-                   2, &mca_bcol_iboffload_component.exchange_tree_order, REGINT_GE_ONE));
-
-    CHECK(reg_int("knomial_tree_order", NULL,
-                  "The order of the knomial exchange tree. ",
-                   3, &mca_bcol_iboffload_component.knomial_tree_order, REGINT_GE_ONE));
-
-
-    CHECK(reg_int("max_inline_data", "max_inline_data",
-                  "Maximum size of inline data segment "
-                  "(-1 = run-time probe to discover max value, "
-                  "otherwise must be >= 0). "
-                  "If not explicitly set, use max_inline_data from "
-                  "the INI file containing device-specific parameters",
-                  128, (int *) &mca_bcol_iboffload_component.max_inline_data,
-                  REGINT_NEG_ONE_OK | REGINT_GE_ZERO));
-
-#if 0
-    CHECK(reg_string("pkey", "ib_pkey_val",
-                     "OpenFabrics partition key (pkey) value. "
-                     "Unsigned integer decimal or hex values are allowed (e.g., \"3\" or \"0x3f\") and will be masked against the maximum allowable IB paritition key value (0x7fff)",
-                     "0", &pkey, 0));
-    /* Pasha
-    mca_bcol_iboffload_component.pkey_val =
-        ompi_btl_openib_ini_intify(pkey) & MCA_BTL_IB_PKEY_MASK;
-    free(pkey);
-    */
-#endif
-
-    CHECK(reg_string("receive_queues", NULL,
-                     "Colon-delimited, comma delimited list of receive queues: P,4096,8,6,4:P,32768,8,6,4",
-                     "P,512,256,192,128", &mca_bcol_iboffload_component.receive_queues,
-                     0));
-
-    CHECK(reg_int("qp_ous_rd_atom", NULL,
-                  "InfiniBand outstanding atomic reads (must be >= 0)", 4,
-                  (int *) &mca_bcol_iboffload_component.qp_ous_rd_atom, REGINT_GE_ZERO));
-
-    asprintf(&msg, "OpenFabrics MTU, in bytes (if not specified in INI files).  Valid values are: %d=256 bytes, %d=512 bytes, %d=1024 bytes, %d=2048 bytes, %d=4096 bytes",
-             IBV_MTU_256,
-             IBV_MTU_512,
-             IBV_MTU_1024,
-             IBV_MTU_2048,
-             IBV_MTU_4096);
-    if (NULL == msg) {
-        /* Don't try to recover from this */
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-    CHECK(mca_base_var_enum_create("infiniband mtu", mtu_values, &new_enum));
-    mca_bcol_iboffload_component.mtu = IBV_MTU_1024;
-    tmp = mca_base_component_var_register(&mca_bcol_iboffload_component.super.bcol_version,
-                                          "mtu", msg, MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                          OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
-                                          &mca_bcol_iboffload_component.mtu);
-    OBJ_RELEASE(new_enum);
-    free(msg);
-
-    if (0 > tmp) ret = tmp;
-
-    tmp = mca_base_var_register_synonym(tmp, "ompi", "bcol", "iboffload", "ib_mtu",
-                                        MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-    if (0 > tmp) ret = tmp;
-
-    CHECK(reg_int("ib_min_rnr_timer", NULL, "InfiniBand minimum "
-                  "\"receiver not ready\" timer, in seconds "
-                  "(must be >= 0 and <= 31)",
-                  1 , &mca_bcol_iboffload_component.min_rnr_timer, 0));
-
-    CHECK(reg_int("ib_timeout", NULL, "InfiniBand transmit timeout, plugged into formula: 4.096 microseconds * "
-                  "(2^bcol_iboffload_ib_timeout) (must be >= 0 and <= 31)",
-                  20, &mca_bcol_iboffload_component.timeout, 0));
-
-    CHECK(reg_int("ib_retry_count", NULL, "InfiniBand transmit retry count "
-                  "(must be >= 0 and <= 7)",
-                  7, &mca_bcol_iboffload_component.retry_count, 0));
-
-    CHECK(reg_int("ib_rnr_retry", NULL, "InfiniBand \"receiver not ready\" "
-                  "retry count; applies *only* to SRQ/XRC queues.  PP queues "
-                  "use RNR retry values of 0 because Open MPI performs "
-                  "software flow control to guarantee that RNRs never occur "
-                  "(must be >= 0 and <= 7; 7 = \"infinite\")",
-                  7, &mca_bcol_iboffload_component.rnr_retry, 0));
-
-    CHECK(reg_int("ib_max_rdma_dst_ops", NULL, "InfiniBand maximum pending RDMA "
-                  "destination operations "
-                  "(must be >= 0)",
-                  4, &mca_bcol_iboffload_component.max_rdma_dst_ops, REGINT_GE_ZERO));
-
-    CHECK(reg_int("ib_service_level", NULL, "InfiniBand service level "
-                  "(must be >= 0 and <= 15)",
-                  0, &mca_bcol_iboffload_component.service_level, 0));
-
-    CHECK(reg_int("buffer_alignment", NULL,
-                  "Prefered communication buffer alignment, in bytes "
-                  "(must be > 0 and power of two)",
-                  64, &mca_bcol_iboffload_component.buffer_alignment, REGINT_GE_ZERO));
-
-    /* register parmeters controlling message fragementation */
-    CHECK(reg_int("min_frag_size", NULL,
-                  "Minimum fragment size",
-                  getpagesize(), &mca_bcol_iboffload_component.super.min_frag_size,
-                  REGINT_GE_ONE));
-
-    CHECK(reg_int("max_frag_size", NULL,
-                  "Maximum fragment size",
-                  FRAG_SIZE_NO_LIMIT, &mca_bcol_iboffload_component.super.max_frag_size,
-                  REGINT_NONZERO));
-
-    CHECK(reg_bool("can_use_user_buffers", NULL,
-                   "User memory can be used by the collective algorithms",
-                   true, &mca_bcol_iboffload_component.super.can_use_user_buffers));
-
-    CHECK(reg_int("barrier_mode", NULL,
-                "Barrier mode: 0 - Recursive doubling; 1 - Recursive K-ing",
-                0, &mca_bcol_iboffload_component.barrier_mode, REGINT_GE_ZERO));
-
-    CHECK(reg_int("max_progress_pull", NULL,
-                "Max number of progress pull checks",
-                8, &mca_bcol_iboffload_component.max_progress_pull, REGINT_GE_ZERO));
-
-    CHECK(reg_int("use_brucks_smsg_alltoall_rdma", NULL,
-                "Use brucks algorithm for smsg alltoall and RDMA semantics 1 = No Temp buffer recycling"
-                "1 = Alg with no Temp Buffer Recycling (faster), 2 = Alg with temp Buffer Recycling (slower)",
-                0, &mca_bcol_iboffload_component.use_brucks_smsg_alltoall_rdma, 0));
-
-    CHECK(reg_int("use_brucks_smsg_alltoall_sr", NULL,
-                "Use brucks algorithm for smsg alltoall and Send/Recv semantics "
-                "1 = Alg with RTR (faster), 2 = Alg with RNR (slower)",
-                0, &mca_bcol_iboffload_component.use_brucks_smsg_alltoall_sr, 0));
-
-    CHECK(reg_int("alltoall_bruck_radix", NULL,
-                "Radix for Bruck algorithm for smsg alltoall",
-                3, &mca_bcol_iboffload_component.k_alltoall_bruck_radix, 0));
-
-    CHECK(reg_int("k_alltoall_bruck_radix", NULL,
-                "Temp Buffer alignment for Bruck algorithm for smsg alltoall",
-                64, &mca_bcol_iboffload_component.tmp_buf_alignment, 0));
-
-    /*
-    CHECK(reg_string("if_include", NULL,
-                     "Comma-delimited list of devices/ports to be used (e.g. \"mthca0,mthca1:2\"; empty value means to use all ports found).  Mutually exclusive with bcol_iboffload_if_exclude.",
-                     NULL, &mca_bcol_iboffload_component.if_include,
-                     0));
-
-    CHECK(reg_string("if_exclude", NULL,
-                     "Comma-delimited list of device/ports to be excluded (empty value means to not exclude any ports).  Mutually exclusive with bcol_iboffload_if_include.",
-                     NULL, &mca_bcol_iboffload_component.if_exclude,
-                     0));
-    */
-
-    CHECK(mca_bcol_iboffload_verify_params());
-
-    /* Register any MCA params for the connect pseudo-components */
-    if (OMPI_SUCCESS == ret) {
-        ret = ompi_common_ofacm_base_register(&mca_bcol_iboffload_component.super.bcol_version);
-    }
-
-    return ret;
-}
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_mca.h
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_mca.h
@ -1,20 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
- /** @file */
-
-#ifndef MCA_BCOL_IBOFFLOAD_MCA_H
-#define MCA_BCOL_IBOFFLOAD_MCA_H
-
-#include "ompi_config.h"
-
-int mca_bcol_iboffload_register_params(void);
-int mca_bcol_iboffload_verify_params(void);
-
-#endif
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_module.c
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_module.c
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_qp_info.c
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_qp_info.c
@ -1,452 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <infiniband/mqe.h>
-#include <infiniband/verbs.h>
-#include <infiniband/mverbs.h>
-
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_device.h"
-#include "bcol_iboffload_qp_info.h"
-#include "bcol_iboffload_collreq.h"
-#include "bcol_iboffload_endpoint.h"
-
-static int mca_bcol_iboffload_dummy_frag_qp_prepost(
-                mca_bcol_iboffload_endpoint_t *endpoint,
-                int qp_index, int num_to_prepost)
-{
-    struct ibv_recv_wr *recv_wr, *recv_bad;
-    int ret, num_preposted = 0, start_wr_index;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    mca_bcol_iboffload_recv_wr_manager *recv_wrs = &cm->recv_wrs;
-
-    IBOFFLOAD_VERBOSE(10, ("Recv prepost call: endpoint %p, to prepost %d",
-                          (void *) endpoint, num_to_prepost));
-
-    if (OPAL_UNLIKELY(0 == num_to_prepost)) {
-        IBOFFLOAD_VERBOSE(10, ("num_to_prepost = 0, return immediate"));
-        return OMPI_SUCCESS;
-    }
-
-    /* make sure that we do not overrun number of rd_wqe */
-    if (num_to_prepost > endpoint->qps[qp_index].rd_wqe) {
-        IBOFFLOAD_VERBOSE(10, ("Reset num_to_prepost = %d, to rd_wqe = %d",
-                                num_to_prepost, endpoint->qps[qp_index].rd_wqe));
-
-        num_to_prepost = endpoint->qps[qp_index].rd_wqe;
-    }
-
-    OPAL_THREAD_LOCK(&recv_wrs->lock);
-
-    /* calculate start index in array
-     * of pre-allocated work requests */
-    start_wr_index = cm->qp_infos[qp_index].rd_num - num_to_prepost;
-    recv_wr = &recv_wrs->recv_work_requests[qp_index][start_wr_index];
-
-    IBOFFLOAD_VERBOSE(10, ("Endpoint %p, qp_index - %d, to_porepost %d, "
-                           "start index of WRs - %d, rd_wqe - %d",
-                           (void *) endpoint, qp_index, num_to_prepost,
-                            start_wr_index, endpoint->qps[qp_index].rd_wqe));
-
-    while (num_preposted < num_to_prepost) {
-        /* prepost the special barrier frag to recv queue */
-        struct ibv_sge *dummy_sg_entry =
-                    &endpoint->iboffload_module->device->dummy_frags[qp_index].sg_entry;
-
-        recv_wr[num_preposted].sg_list = dummy_sg_entry;
-        ++num_preposted;
-    }
-
-    if (OPAL_LIKELY(num_preposted > 0)) {
-        /* Set the tail */
-        recv_wr[num_preposted - 1].next = NULL;
-
-        /* post the list of recvs */
-        ret = ibv_post_recv(endpoint->qps[qp_index].qp->lcl_qp, recv_wr, &recv_bad);
-        if (OPAL_UNLIKELY(0 != ret)) {
-            IBOFFLOAD_ERROR(("ibv_post_recv failed, error: %s [%d], "
-                             "qp_index - %d.\n", strerror(errno), ret, qp_index));
-
-            return OMPI_ERROR;
-        }
-
-        /* recover last recv_wr if needed */
-        if (OPAL_UNLIKELY(num_to_prepost != num_preposted)) {
-            recv_wr[num_preposted - 1].next = &recv_wr[num_preposted];
-        }
-
-        /* decresing numbers of free recv wqe */
-        endpoint->qps[qp_index].rd_wqe -= num_preposted;
-    }
-
-    OPAL_THREAD_UNLOCK(&recv_wrs->lock);
-
-    IBOFFLOAD_VERBOSE(10, ("Endpoint %p, to_porepost %d, num preposted - %d, qp_index - %d",
-                          (void *) endpoint, num_to_prepost, num_preposted, qp_index));
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * Receive prepost:
- * return values:
- * 0 - no prepost was done
- * -1 - fatal error during prepost
- * other value - number preposted elements
- */
-static int mca_bcol_iboffload_frag_reg_qp_prepost(
-                mca_bcol_iboffload_endpoint_t *endpoint,
-                int qp_index, int num_to_prepost)
-{
-    ompi_free_list_item_t *item;
-    mca_bcol_iboffload_frag_t *frag;
-
-    struct ibv_recv_wr *recv_wr, *recv_bad;
-    int i, ret, num_preposted = 0, start_wr_index;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    mca_bcol_iboffload_device_t *device = endpoint->iboffload_module->device;
-
-    opal_list_t *preposted = &(endpoint->qps[qp_index].preposted_frags);
-    mca_bcol_iboffload_recv_wr_manager *recv_wrs = &cm->recv_wrs;
-
-    IBOFFLOAD_VERBOSE(10, ("Recv prepost call: endpoint %p, to prepost %d",
-                          (void *) endpoint, num_to_prepost));
-
-    if (OPAL_UNLIKELY(0 == num_to_prepost)) {
-        IBOFFLOAD_VERBOSE(10, ("num_to_prepost = 0, return immediate"));
-        return OMPI_SUCCESS;
-    }
-
-    /* make sure that we do not overrun number of rd_wqe */
-    if (num_to_prepost > endpoint->qps[qp_index].rd_wqe) {
-        IBOFFLOAD_VERBOSE(10, ("Reset num_to_prepost = %d, to rd_wqe = %d",
-                                num_to_prepost, endpoint->qps[qp_index].rd_wqe));
-
-        num_to_prepost = endpoint->qps[qp_index].rd_wqe;
-    }
-
-    OPAL_THREAD_LOCK(&recv_wrs->lock);
-
-    /* calculate start index in array
-     * of pre-allocated work requests */
-    start_wr_index = cm->qp_infos[qp_index].rd_num - num_to_prepost;
-    recv_wr = &recv_wrs->recv_work_requests[qp_index][start_wr_index];
-
-    IBOFFLOAD_VERBOSE(10, ("Endpoint %p, qp_index - %d, to_porepost %d, "
-                           "start index of WRs - %d, rd_wqe - %d",
-                           (void *) endpoint, qp_index, num_to_prepost,
-                            start_wr_index, endpoint->qps[qp_index].rd_wqe));
-
-    while (num_preposted < num_to_prepost) {
-        /* put the item on list of preposted */
-        OMPI_FREE_LIST_GET_MT(&device->frags_free[qp_index], item);
-        if (OPAL_UNLIKELY(NULL == item)) {
-            break;
-        }
-
-        frag = (mca_bcol_iboffload_frag_t *) item;
-        opal_list_append(preposted, (opal_list_item_t *) item);
-
-        recv_wr[num_preposted].sg_list = &frag->sg_entry;
-        /* TODO Pasha - fix it later */ /* Vasily: Is it right place to take a size value ???? */
-        frag->sg_entry.length = cm->qp_infos[qp_index].size;
-        ++num_preposted;
-    }
-
-    if (OPAL_LIKELY(num_preposted > 0)) {
-        /* Set the tail */
-        recv_wr[num_preposted - 1].next = NULL;
-
-        /* post the list of recvs */
-        ret = ibv_post_recv(endpoint->qps[qp_index].qp->lcl_qp, recv_wr, &recv_bad);
-        if (OPAL_UNLIKELY(0 != ret)) {
-            IBOFFLOAD_ERROR(("ibv_post_recv failed (%s), error: %s [%d], "
-                             "qp_index - %d.\n",
-                              ibv_get_device_name(device->dev.ib_dev),
-                              strerror(errno), ret, qp_index));
-
-            /* Return allocated frags */
-            for (i = 0; i < num_preposted; i++) {
-                OMPI_FREE_LIST_RETURN_MT(&device->frags_free[qp_index],
-                        (ompi_free_list_item_t *)
-                            opal_list_remove_last(preposted));
-            }
-
-            return OMPI_ERROR;
-        }
-
-        /* recover last recv_wr if needed */
-        if (OPAL_UNLIKELY(num_to_prepost != num_preposted)) {
-            recv_wr[num_preposted - 1].next = &recv_wr[num_preposted];
-        }
-
-        /* decresing numbers of free recv wqe */
-        endpoint->qps[qp_index].rd_wqe -= num_preposted;
-    }
-
-    OPAL_THREAD_UNLOCK(&recv_wrs->lock);
-
-    IBOFFLOAD_VERBOSE(10, ("Endpoint %p, to_porepost %d, num preposted - %d",
-                          (void *) endpoint, num_to_prepost, num_preposted));
-
-    return OMPI_SUCCESS;
-}
-
-
-static void mca_bcol_iboffload_fillin_qp_attr(int qp_index,
-                                   mca_bcol_iboffload_endpoint_t *ep,
-                                   ompi_common_ofacm_base_qp_config_t *qp_config)
-{
-        uint32_t max_sge, *init_attr_mask =
-                                  &qp_config->init_attr_mask[qp_index];
-
-        struct ibv_qp_attr *attr = &qp_config->attr[qp_index];
-        struct ibv_qp_init_attr *init_attr = &qp_config->init_attr[qp_index];
-
-        mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-        /* Set special init attributes mask */
-        *init_attr_mask = IBV_M_QP_EXT_CLASS_1 |
-                          IBV_M_QP_EXT_CLASS_2 |
-                          IBV_M_QP_EXT_IGNORE_RQ_OVERFLOW;
-
-        /* Set init attributes */
-        init_attr->qp_type = IBV_QPT_RC;
-
-/* Vasily: ??????
-        init_attr->cap.max_inline_data =
-            max_inline_size(qp, iboffload_module->device);
-*/
-        /* Pasha: we can not leave max_inline empty !
-           Todo: copy max_inline_size() from ofacm to
-           common area.
-         */
-        init_attr->cap.max_inline_data = (int32_t) cm->max_inline_data;
-
-        /* We allocate SG list for some algorithms (Bruck's alltoall) */
-        max_sge = ep->iboffload_module->group_size / 2 +
-                       ep->iboffload_module->group_size % 2;
-
-        /* max send sge should be less than device maximums */
-        if (max_sge > (uint32_t)
-                             ep->iboffload_module->device->ib_dev_attr.max_sge) {
-            max_sge = (uint32_t) ep->iboffload_module->device->ib_dev_attr.max_sge;
-        }
-
-        init_attr->cap.max_send_sge = max_sge;
-        init_attr->cap.max_recv_sge = max_sge;
-/* Vasily: the value will be changed later */
-/* TODO Pasha: this is real crap */
-        init_attr->cap.max_recv_wr  = (uint32_t) cm->cq_size;
-        init_attr->cap.max_send_wr  = (uint32_t) cm->cq_size;
-
-        /* Set attributes */
-
-        /* attr->pkey_index = 0; */ /* Vasily: ????? */
-
-        attr->port_num = ep->iboffload_module->port;
-/* Vasily: the value will be changed later */
-        attr->path_mtu = (uint32_t)cm->mtu;
-
-        attr->max_dest_rd_atomic = cm->max_rdma_dst_ops;
-        attr->min_rnr_timer = (uint32_t)cm->min_rnr_timer;
-
-        attr->ah_attr.is_global = 0;
-        attr->ah_attr.sl = (uint32_t)cm->service_level;
-/* Vasily: from struct mca_bcol_iboffload_port_t ????? */
-/*
-        attr->ah_attr.src_path_bits = iboffload_module->src_path_bits;
-*/
-        attr->ah_attr.port_num = ep->iboffload_module->port;
-        /* JMS to be filled in later dynamically */
-        attr->ah_attr.static_rate = 0;
-        /* RTS params */
-        attr->timeout        = (uint32_t)cm->timeout;
-        attr->retry_cnt      = (uint32_t)cm->retry_count;
-        attr->rnr_retry      = (uint32_t)cm->rnr_retry;
-        attr->max_rd_atomic  = (uint32_t)cm->max_rdma_dst_ops;
-
-        /* Init for local mca_bcol_iboffload_endpoint_qp_t qps structure
-         * that caches the qp information on endpoint */
-        OBJ_CONSTRUCT(&ep->qps[qp_index].preposted_frags, opal_list_t);
-
-        /* Pasha: Need to add function that will */
-        ep->qps[qp_index].ib_inline_max = cm->max_inline_data;
-        /* TODO Pasha - this is crap too... we do not have info for sevice qps. Fix it later */
-
-        ep->qps[qp_index].sd_wqe = cm->qp_infos[qp_index].rd_num;
-        ep->qps[qp_index].rd_wqe = cm->qp_infos[qp_index].rd_num;
-
-        IBOFFLOAD_VERBOSE(10, ("ep - %p, qp index - %d, num of rd_wqe - %d.",
-                               ep, qp_index, ep->qps[qp_index].rd_wqe));
-}
-
-static int mca_bcol_iboffload_alloc_reg_qp_resource(int qp_index, mca_bcol_iboffload_device_t *device)
-{
-    int length;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    ompi_free_list_t *frags_free = &device->frags_free[qp_index];
-
-    OBJ_CONSTRUCT(frags_free, ompi_free_list_t);
-    length = cm->qp_infos[qp_index].size;
-
-    IBOFFLOAD_VERBOSE(10, ("free list len %d\n", length));
-    if (OMPI_SUCCESS != ompi_free_list_init_ex_new(frags_free,
-                sizeof(mca_bcol_iboffload_frag_t), MCA_IBOFFLOAD_CACHE_LINE_SIZE,
-                OBJ_CLASS(mca_bcol_iboffload_frag_t),
-                length, cm->buffer_alignment,
-                cm->free_list_num,
-                cm->free_list_max,
-                cm->free_list_inc,
-                device->mpool,
-                mca_bcol_iboffload_frag_init,
-                (void *) &cm->qp_infos[qp_index].qp_index)) {
-        IBOFFLOAD_ERROR(("Failed to allocate frags_free"));
-        return OMPI_ERROR;
-    }
-
-    return OMPI_SUCCESS;
-}
-static int mca_bcol_iboffload_dealloc_reg_qp_resource(int qp_index, mca_bcol_iboffload_device_t *device)
-{
-    OBJ_DESTRUCT(&device->frags_free[qp_index]);
-
-    return OMPI_SUCCESS;
-}
-
-static mca_bcol_iboffload_frag_t *mca_bcol_iboffload_get_dummy_frag(
-                         mca_bcol_iboffload_endpoint_t *ep, int qp_index)
-{
-    return &ep->iboffload_module->device->dummy_frags[qp_index];
-}
-
-static mca_bcol_iboffload_frag_t *mca_bcol_iboffload_endpoint_get_preposted_frag(
-                                   mca_bcol_iboffload_endpoint_t *ep, int qp_index)
-{
-    return (mca_bcol_iboffload_frag_t *)
-              opal_list_remove_first(&ep->qps[qp_index].preposted_frags);
-}
-
-static void mca_bcol_iboffload_regular_qp_attr(int qp_index,
-                                    mca_bcol_iboffload_endpoint_t *ep,
-                                    ompi_common_ofacm_base_qp_config_t *qp_config)
-{
-    struct ibv_qp_init_attr *init_attr = &qp_config->init_attr[qp_index];
-
-    mca_bcol_iboffload_fillin_qp_attr(qp_index, ep, qp_config);
-
-    init_attr->send_cq = ep->iboffload_module->device->ib_cq;
-    init_attr->recv_cq = ep->recv_cq[IBOFFLOAD_CQ_SMALL_MESSAGES];
-}
-
-static void mca_bcol_iboffload_large_buff_qp_attr(int qp_index,
-                                    mca_bcol_iboffload_endpoint_t *ep,
-                                    ompi_common_ofacm_base_qp_config_t *qp_config)
-{
-    struct ibv_qp_init_attr *init_attr = &qp_config->init_attr[qp_index];
-
-    mca_bcol_iboffload_fillin_qp_attr(qp_index, ep, qp_config);
-
-    init_attr->send_cq = ep->iboffload_module->device->ib_cq;
-    init_attr->recv_cq = ep->recv_cq[IBOFFLOAD_CQ_LARGE_MESSAGES];
-}
-
-static void mca_bcol_iboffload_sync_qp_attr(int qp_index,
-                                    mca_bcol_iboffload_endpoint_t *ep,
-                                    ompi_common_ofacm_base_qp_config_t *qp_config)
-{
-    struct ibv_qp_init_attr *init_attr = &qp_config->init_attr[qp_index];
-
-    mca_bcol_iboffload_fillin_qp_attr(qp_index, ep, qp_config);
-
-    init_attr->send_cq = ep->iboffload_module->device->ib_cq;
-    init_attr->recv_cq = ep->recv_cq[IBOFFLOAD_CQ_SYNC];
-}
-
-static int mca_bcol_iboffload_setup_barrier_qp(mca_bcol_iboffload_qp_info_t* qp_info)
-{
-    qp_info->config_qp = mca_bcol_iboffload_regular_qp_attr;
-    qp_info->prepost_recv = mca_bcol_iboffload_dummy_frag_qp_prepost;
-
-    qp_info->alloc_resource = NULL;
-    qp_info->dealloc_resource = NULL;
-
-    qp_info->get_preposted_recv = mca_bcol_iboffload_get_dummy_frag;
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_bcol_iboffload_setup_regular_qp(mca_bcol_iboffload_qp_info_t* qp_info)
-{
-    qp_info->config_qp = mca_bcol_iboffload_regular_qp_attr;
-    qp_info->prepost_recv = mca_bcol_iboffload_frag_reg_qp_prepost;
-
-    qp_info->alloc_resource = mca_bcol_iboffload_alloc_reg_qp_resource;
-    qp_info->dealloc_resource = mca_bcol_iboffload_dealloc_reg_qp_resource;
-
-    qp_info->get_preposted_recv = mca_bcol_iboffload_endpoint_get_preposted_frag;
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_bcol_iboffload_setup_large_buff_qp(mca_bcol_iboffload_qp_info_t* qp_info)
-{
-    qp_info->config_qp = mca_bcol_iboffload_large_buff_qp_attr;
-
-    qp_info->prepost_recv = NULL; /* We use "manual" ML frag preposting for this QP */
-    qp_info->alloc_resource = NULL;
-    qp_info->dealloc_resource = NULL;
-    qp_info->get_preposted_recv = NULL;
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_bcol_iboffload_setup_credit_qp(mca_bcol_iboffload_qp_info_t* qp_info)
-{
-    qp_info->config_qp = mca_bcol_iboffload_large_buff_qp_attr;
-    qp_info->prepost_recv = mca_bcol_iboffload_dummy_frag_qp_prepost;
-
-    qp_info->alloc_resource = NULL;
-    qp_info->dealloc_resource = NULL;
-
-    qp_info->get_preposted_recv = mca_bcol_iboffload_get_dummy_frag;
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_bcol_iboffload_setup_sync_qp(mca_bcol_iboffload_qp_info_t* qp_info)
-{
-    qp_info->config_qp = mca_bcol_iboffload_sync_qp_attr;
-    qp_info->prepost_recv = mca_bcol_iboffload_dummy_frag_qp_prepost;
-
-    qp_info->alloc_resource = NULL;
-    qp_info->dealloc_resource = NULL;
-
-    qp_info->get_preposted_recv = mca_bcol_iboffload_get_dummy_frag;
-
-    return OMPI_SUCCESS;
-}
-
-mca_bcol_iboffload_setup_qps_fn_t setup_qps_fn[MCA_BCOL_IBOFFLOAD_QP_LAST] = {
-    mca_bcol_iboffload_setup_barrier_qp,    /* MCA_BCOL_IBOFFLOAD_QP_BARRIER */
-    mca_bcol_iboffload_setup_regular_qp,    /* MCA_BCOL_IBOFFLOAD_QP_REGULAR */
-    mca_bcol_iboffload_setup_sync_qp,       /* MCA_BCOL_IBOFFLOAD_QP_SYNC */
-    mca_bcol_iboffload_setup_credit_qp,     /* MCA_BCOL_IBOFFLOAD_QP_CREDIT */
-    mca_bcol_iboffload_setup_large_buff_qp, /* MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF */
-    /* MCA_BCOL_IBOFFLOAD_QP_LAST */
-};
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_qp_info.h
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_qp_info.h
@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/*
- * In order to add a new QP you need to do next steps:
- *
- *    1) Add new index to enum with list of the all QPs,
- *           MCA_BCOL_IBOFFLOAD_QP_NEW_QP e.g.
- *
- *    2) In the setup_qps_fn array init MCA_BCOL_IBOFFLOAD_QP_NEW_QP
- *        index with your init func for this QP.
- *
- *    3) In the init func you added init the next func pointers:
- *       a) config_qp - in this func you need to fill in ibv_qp_init_attr
- *                      structure will be used for this QP creation.
- *
- *       b) prepost_recv - you have to specify this poiner if you want
- *                         automatically executed preposting to your new QP.
- *
- *       c) alloc_resource - will be called during device activation,
- *                           if you need any device resource (list of frags for example)
- *                           for your new QP here the right place to allocate it.
- *
- *       d) dealloc_resource - if any resource was allocated dynamically
- *                             by alloc_resource func destruct it in this func.
- *
- *       e) get_preposted_recv - the function returns preposted recieve for 'wait task'.
- *
- *       d) If you don't need any of these funcs you have to init appropriate pointer with NULL.
- */
-
-#ifndef MCA_BCOL_IBOFFLOAD_QP_INFO_H
-#define MCA_BCOL_IBOFFLOAD_QP_INFO_H
-
-#include "ompi_config.h"
-
-BEGIN_C_DECLS
-
-/* forward declarations */
-struct mca_bcol_iboffload_device_t;
-struct mca_bcol_iboffload_collreq_t;
-struct mca_bcol_iboffload_qp_info_t;
-struct mca_bcol_iboffload_endpoint_t;
-
-/* The list of the all required QPs */
-enum {
-    MCA_BCOL_IBOFFLOAD_QP_BARRIER,
-    MCA_BCOL_IBOFFLOAD_QP_REGULAR,
-    MCA_BCOL_IBOFFLOAD_QP_SYNC,
-    MCA_BCOL_IBOFFLOAD_QP_CREDIT,
-    MCA_BCOL_IBOFFLOAD_QP_LARGE_BUFF,
-    MCA_BCOL_IBOFFLOAD_QP_LAST
-};
-
-typedef enum {
-    MCA_BCOL_IBOFFLOAD_PP_QP,
-    MCA_BCOL_IBOFFLOAD_SRQ_QP,
-    MCA_BCOL_IBOFFLOAD_XRC_QP
-} mca_bcol_iboffload_qp_type_t;
-
-struct mca_bcol_iboffload_pp_qp_info_t {
-    int32_t rd_win;
-    int32_t rd_rsv;
-}; typedef struct mca_bcol_iboffload_pp_qp_info_t mca_bcol_iboffload_pp_qp_info_t;
-
-struct mca_bcol_iboffload_srq_qp_info_t {
-    int32_t sd_max;
-}; typedef struct mca_bcol_iboffload_srq_qp_info_t mca_bcol_iboffload_srq_qp_info_t;
-
-typedef int (*mca_bcol_iboffload_setup_qps_fn_t) (struct mca_bcol_iboffload_qp_info_t*);
-typedef int (*mca_bcol_iboffload_prepost_qps_fn_t)
-                        (struct mca_bcol_iboffload_endpoint_t *endpoint,
-                         int qp_index, int num_to_prepost);
-
-typedef void (*mca_bcol_iboffload_config_qps_fn_t)
-                        (int qp_index,
-                         struct mca_bcol_iboffload_endpoint_t *ep,
-                         ompi_common_ofacm_base_qp_config_t *qp_config);
-
-typedef int (*mca_bcol_iboffload_alloc_qps_resource_fn_t)
-                               (int qp_index,
-                                struct mca_bcol_iboffload_device_t *device);
-
-typedef int (*mca_bcol_iboffload_dealloc_qps_resource_fn_t)
-                               (int qp_index,
-                                struct mca_bcol_iboffload_device_t *device);
-
-typedef struct mca_bcol_iboffload_frag_t* (*mca_bcol_iboffload_get_preposted_recv_fn_t)
-                         (struct mca_bcol_iboffload_endpoint_t *ep, int qp_index);
-
-struct mca_bcol_iboffload_qp_info_t {
-    size_t size;
-
-    int32_t rd_num;
-    int32_t rd_low;
-    int32_t rd_pp_win; /* prepost window = rd_num - rd_low */
-    int qp_index;
-
-    mca_bcol_iboffload_qp_type_t type;
-
-    mca_bcol_iboffload_config_qps_fn_t config_qp;
-    mca_bcol_iboffload_prepost_qps_fn_t prepost_recv;
-
-    mca_bcol_iboffload_alloc_qps_resource_fn_t alloc_resource;
-    mca_bcol_iboffload_dealloc_qps_resource_fn_t dealloc_resource;
-
-    mca_bcol_iboffload_get_preposted_recv_fn_t get_preposted_recv;
-
-    union {
-        mca_bcol_iboffload_pp_qp_info_t pp_qp;
-        mca_bcol_iboffload_srq_qp_info_t srq_qp;
-    } u;
-}; typedef struct mca_bcol_iboffload_qp_info_t mca_bcol_iboffload_qp_info_t;
-
-extern mca_bcol_iboffload_setup_qps_fn_t setup_qps_fn[MCA_BCOL_IBOFFLOAD_QP_LAST];
-
-END_C_DECLS
-
-#endif /* MCA_BCOL_IBOFFLOAD_QP_INFO_H */
-
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_task.c
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_task.c
@ -1,81 +0,0 @@
- /*
-  * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-  * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
-  * $COPYRIGHT$
-  *
-  * Additional copyrights may follow
-  *
-  * $HEADER$
-  */
-
-#include "ompi_config.h"
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_task.h"
-
-static void task_constructor(mca_bcol_iboffload_task_t *task)
-{
-    task->frag = NULL;
-    task->collfrag = NULL;
-    task->endpoint = NULL;
-    task->next_task = NULL;
-
-    task->sg_entries = NULL;
-    task->sg_entries_num = 0;
-
-    task->task_list = NULL;
-
-    memset(&task->wr, 0, sizeof(task->wr));
-
-    memset(&task->element, 0, sizeof(struct mqe_task));
-    memset(&task->task_mqe_qp_entry, 0, sizeof(struct mqe_qp_entry));
-}
-
-static void task_destructor(mca_bcol_iboffload_task_t *task)
-{
-    if (NULL != task->sg_entries) {
-        free(task->sg_entries);
-    }
-}
-
-OBJ_CLASS_INSTANCE(
-        mca_bcol_iboffload_task_t,
-        ompi_free_list_item_t,
-        task_constructor,
-        task_destructor);
-
-void
-mca_bcol_iboffload_calc_task_init(ompi_free_list_item_t* item, void* ctx)
-{
-    mca_bcol_iboffload_task_t *calc_task =
-                    (mca_bcol_iboffload_task_t *) item;
-
-    calc_task->task_list = (ompi_free_list_t *) ctx;
-
-    calc_task->sg_entries_num = 2;
-    calc_task->sg_entries = (struct ibv_sge *) malloc (2 * sizeof(struct ibv_sge));
-}
-
-void
-mca_bcol_iboffload_iovec_task_init(ompi_free_list_item_t* item, void* ctx)
-{
-    mca_bcol_iboffload_task_t *iovec_task =
-                    (mca_bcol_iboffload_task_t *) item;
-
-    mca_bcol_iboffload_module_t *iboffload_module =
-                   (mca_bcol_iboffload_module_t *) ctx;
-
-    int nitems, group_size = iboffload_module->group_size;
-
-    nitems = group_size / 2 + group_size % 2;
-    if (nitems > iboffload_module->device->ib_dev_attr.max_sge) {
-        nitems = iboffload_module->device->ib_dev_attr.max_sge;
-    }
-
-    iovec_task->sg_entries_num = nitems;
-    iovec_task->task_list = &iboffload_module->iovec_tasks_free;
-
-    iovec_task->sg_entries = (struct ibv_sge *)
-                       malloc(nitems * sizeof(struct ibv_sge));
-}
--- a/ompi/mca/bcol/iboffload/bcol_iboffload_task.h
+++ b/ompi/mca/bcol/iboffload/bcol_iboffload_task.h
@ -1,613 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_IBOFFLOAD_TASK_H
-#define MCA_BCOL_IBOFFLOAD_TASK_H
-
-#include "ompi_config.h"
-
-#include <infiniband/verbs.h>
-#include <infiniband/mverbs.h>
-#include <infiniband/mqe.h>
-
-#include "bcol_iboffload.h"
-#include "bcol_iboffload_frag.h"
-#include "bcol_iboffload_collreq.h"
-#include "bcol_iboffload_endpoint.h"
-#include "bcol_iboffload_collfrag.h"
-
-#define SENDWR(task)  ((task)->element.post.send_wr)
-
-BEGIN_C_DECLS
-
-/* the mca_bcol_ibv_mwr_task_t name was replaced with mca_bcol_iboffload_task_t */
-struct mca_bcol_iboffload_task_t {
-    ompi_free_list_item_t super;
-
-    /* pointer to the memory descriptor associated with the task */
-    mca_bcol_iboffload_frag_t *frag;
-
-    /* pointer to the bcol descriptor,
-     * we need it for send task only becasue we complete them in async maner
-     */
-    mca_bcol_iboffload_collfrag_t *collfrag;
-
-    /* task to be posted */
-    struct mqe_task element;
-
-    /* allocate ibv_sge structs array - in a CALC case
-     * for example it will have two entries.
-     */
-    struct ibv_sge *sg_entries;
-
-    /* sg_entries array length */
-    int sg_entries_num;
-
-    /* Each task is a member of some free list,
-       if the pointer is NULL => we assume the task
-       is a member of the common task list (tasks_free) */
-    ompi_free_list_t *task_list;
-
-    /* Pointer to the next task */
-    struct mca_bcol_iboffload_task_t *next_task;
-
-    /* pasha - it is crappy work around for driver interface
-     * the send_wr and recv_wr should be part of mqe_task and not pointers !
-     */
-    union {
-        struct ibv_m_send_wr  send_wr;
-        struct ibv_recv_wr    recv_wr;
-    } wr;
-
-    /* If we'll decide to post a task to a different qp */
-    struct mqe_qp_entry task_mqe_qp_entry;
-
-    /* Pointer to endpoint for this task */
-    mca_bcol_iboffload_endpoint_t *endpoint;
-};
-typedef struct mca_bcol_iboffload_task_t mca_bcol_iboffload_task_t;
-OBJ_CLASS_DECLARATION(mca_bcol_iboffload_task_t);
-
-
-/* calc_tasks_free free list init function */
-void
-mca_bcol_iboffload_calc_task_init(ompi_free_list_item_t* item, void* ctx);
-
-/* iovec_tasks_free free list init function */
-void
-mca_bcol_iboffload_iovec_task_init(ompi_free_list_item_t* item, void* ctx);
-
-static inline __opal_attribute_always_inline__ void
-        mca_bcol_iboffload_return_frag_tolist(
-                        mca_bcol_iboffload_frag_t *frag,
-                        ompi_free_list_t *list)
-{
-    if (NULL != frag) {
-        mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-        assert(MCA_BCOL_IBOFFLOAD_NONE_OWNER != frag->type);
-
-        if (MCA_BCOL_IBOFFLOAD_DUMMY_OWNER != frag->type &&
-                                      0 == frag->ref_counter) {
-            if (MCA_BCOL_IBOFFLOAD_BCOL_OWNER == frag->type) {
-                OMPI_FREE_LIST_RETURN_MT((&(list[frag->qp_index])),
-                        (ompi_free_list_item_t*) frag);
-            } else if (MCA_BCOL_IBOFFLOAD_ML_OWNER == frag->type) {
-                OMPI_FREE_LIST_RETURN_MT((&(cm->ml_frags_free)),
-                        (ompi_free_list_item_t*) frag);
-            }
-        }
-    }
-}
-
-static inline __opal_attribute_always_inline__ void
-        mca_bcol_iboffload_return_recv_frags_toendpoint(
-                        mca_bcol_iboffload_frag_t *frags,
-                        mca_bcol_iboffload_endpoint_t *ep,
-                        int qp_index)
-{
-    mca_bcol_iboffload_frag_t *recv_frag = frags;
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-
-    while (NULL != recv_frag) {
-        assert(MCA_BCOL_IBOFFLOAD_NONE_OWNER != recv_frag->type);
-        if (MCA_BCOL_IBOFFLOAD_ML_OWNER != recv_frag->type) {
-            opal_list_prepend(&ep->qps[qp_index].preposted_frags,
-                            (opal_list_item_t *) recv_frag);
-        } else {
-            OMPI_FREE_LIST_RETURN_MT((&(cm->ml_frags_free)),
-                (ompi_free_list_item_t*) recv_frag);
-        }
-
-        recv_frag = recv_frag->next;
-    }
-}
-
-/* Wait task allocation and initialization */
-static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
-        mca_bcol_iboffload_get_wait_task(mca_bcol_iboffload_module_t *iboffload,
-                                         uint32_t source, int num_waits,
-                                         mca_bcol_iboffload_frag_t *frags,
-                                         int qp_index, struct ibv_qp *qp)
-{
-    ompi_free_list_item_t *item;
-    mca_bcol_iboffload_task_t *task;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    mca_bcol_iboffload_endpoint_t *endpoint = iboffload->endpoints[source];
-
-    /* blocking allocation for send fragment */
-    OMPI_FREE_LIST_GET_MT(&cm->tasks_free, item);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        mca_bcol_iboffload_return_recv_frags_toendpoint(frags, endpoint, qp_index);
-        return NULL;
-    }
-
-    task = (mca_bcol_iboffload_task_t *) item;
-    /* set pointer to corresponding recv fragment */
-    IBOFFLOAD_SET_FRAGS_ON_TASK(frags, task);
-
-    task->next_task = NULL;
-    task->endpoint = endpoint;
-
-    /* set opcode */
-    task->element.opcode = MQE_WR_CQE_WAIT;
-    task->element.flags = 0; /* Here maybe ANY flag, anyway driver ignore it */
-    /* set task id */
-    task->element.wr_id = (uint64_t) (uintptr_t) task;
-    /* set CQ */
-    task->element.wait.cq = endpoint->qp_config.init_attr[qp_index].recv_cq;
-
-    /* set number of tasks to task */
-    task->element.wait.count = num_waits;
-    /* set pointer to QP */
-
-    if (NULL == qp) { /* NULL means use MQ's QP */
-        task->element.wait.mqe_qp = NULL;
-    } else { /* Post wait to the SQ of this QP */
-        task->task_mqe_qp_entry.next = NULL;
-        task->task_mqe_qp_entry.qp = qp;
-
-        task->element.wait.mqe_qp = &task->task_mqe_qp_entry;
-    }
-
-    IBOFFLOAD_VERBOSE(10, ("Allocating task %p, cq: %p, num waits: %d, qp_index - %d, "
-                           "destination %d for comm rank: %d.\n",
-                           (void *) task, (void *) task->element.wait.cq,
-                            task->element.wait.count, qp_index, source,
-                            endpoint->iboffload_module->ibnet->super.group_list[endpoint->index]));
-    return task;
-}
-
-static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
-mca_bcol_iboffload_prepare_send_task(
-        mca_bcol_iboffload_module_t *iboffload,
-        mca_bcol_iboffload_endpoint_t *endpoint,
-        int qp_index, ompi_free_list_t *task_list,
-        mca_bcol_iboffload_collfrag_t *collfrag)
-{
-    ompi_free_list_item_t *item;
-    mca_bcol_iboffload_task_t *task;
-
-    IBOFFLOAD_VERBOSE(10, ("Destination rank - %d, QP index - %d, "
-                           "for comm rank - %d\n", endpoint->index, qp_index,
-                            endpoint->iboffload_module->ibnet->super.group_list[endpoint->index]));
-
-    /* get item from free list */
-    OMPI_FREE_LIST_GET_MT(task_list, item);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        return NULL;
-    }
-
-    task = (mca_bcol_iboffload_task_t*) item;
-    task->endpoint = endpoint;
-
-    ++(collfrag->n_sends);
-    task->collfrag = collfrag;
-
-    task->next_task = NULL;
-    task->element.wr_id = (uint64_t) (uintptr_t) task;
-
-    task->element.post.qp = endpoint->qps[qp_index].qp->lcl_qp;
-
-    task->element.opcode = MQE_WR_SEND;
-
-    /* define send work request */
-    SENDWR(task) = &(task->wr.send_wr);
-
-    SENDWR(task)->next = NULL;
-
-    SENDWR(task)->wr_id = (uint64_t) (uintptr_t) collfrag;
-    IBOFFLOAD_VERBOSE(10, ("coll_frag - %p.\n", collfrag));
-
-    /* Allways send IMM on sends ! */
-    task->element.flags  = MQE_WR_FLAG_IMM_EXE;
-
-    /* Always signal completion */
-    SENDWR(task)->send_flags = IBV_SEND_SIGNALED;
-
-    return task;
-}
-
-static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
-mca_bcol_iboffload_get_send_task(
-        mca_bcol_iboffload_module_t *iboffload,
-        uint32_t destination, int qp_index,
-        mca_bcol_iboffload_frag_t *frag,
-        mca_bcol_iboffload_collfrag_t *collfrag,
-        bool enable_inline)
-{
-    mca_bcol_iboffload_task_t *task;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    mca_bcol_iboffload_endpoint_t *endpoint = iboffload->endpoints[destination];
-
-    IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_get_send_task qp_index %d\n",
-                qp_index));
-
-    task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint, qp_index,
-                                                &cm->tasks_free,
-                                                collfrag);
-
-    if (OPAL_UNLIKELY(NULL == task)) {
-        mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free);
-        return NULL;
-    }
-
-    /* no support for multiple frags */
-    IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task);
-
-    /* We can not do send with 0 byte but we can do zero byte RDMA with immidiate */
-    if (0 == frag->sg_entry.length) {
-        SENDWR(task)->imm_data = 0;
-        SENDWR(task)->opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
-
-        SENDWR(task)->wr.rdma.rkey = endpoint->remote_zero_rdma_addr.rkey;
-        SENDWR(task)->wr.rdma.remote_addr = endpoint->remote_zero_rdma_addr.addr;
-    } else {
-        SENDWR(task)->opcode = IBV_WR_SEND;
-    }
-
-    /* single sge */
-    SENDWR(task)->num_sge = 1;
-    SENDWR(task)->sg_list = &(frag->sg_entry);
-
-    /* Use inline send when it is possible */
-    if (enable_inline &&
-            frag->sg_entry.length < cm->max_inline_data) {
-        IBOFFLOAD_VERBOSE(10, ("Setting inline for len %d\n", frag->sg_entry.length));
-        SENDWR(task)->send_flags |= IBV_SEND_INLINE;
-    }
-
-    return task;
-}
-
-static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
-mca_bcol_iboffload_get_send_vec_task(
-        mca_bcol_iboffload_module_t *iboffload,
-        uint32_t destination, int qp_index,
-        size_t nitems,
-        struct iovec *buff_iovec,
-        uint32_t lkey,
-        mca_bcol_iboffload_frag_t *frag,
-        mca_bcol_iboffload_collfrag_t *collfrag,
-        bool enable_inline)
-{
-    mca_bcol_iboffload_task_t *task;
-    int i;
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    mca_bcol_iboffload_endpoint_t *endpoint = iboffload->endpoints[destination];
-
-    IBOFFLOAD_VERBOSE(10, ("mca_bcol_iboffload_get_send_task qp_index %d\n",
-                qp_index));
-
-    task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint, qp_index,
-                                                &iboffload->iovec_tasks_free,
-                                                collfrag);
-
-    if (OPAL_UNLIKELY(NULL == task)) {
-        mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free);
-        return NULL;
-    }
-
-    /* no support for multiple frags */
-    IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task);
-
-    /* We can not do send with 0 byte but we can do zero byte RDMA with immidiate */
-    SENDWR(task)->opcode = IBV_WR_SEND;
-
-    assert (task->sg_entries != NULL);
-
-    for (i = 0; (size_t) i < nitems; ++i){
-        task->sg_entries[i].length = buff_iovec[i].iov_len;
-        task->sg_entries[i].addr = (uint64_t) buff_iovec[i].iov_base;
-        task->sg_entries[i].lkey = lkey;
-    }
-
-    /* multiple sge */
-    SENDWR(task)->num_sge = nitems;
-    SENDWR(task)->sg_list = (task->sg_entries);
-
-   /* Use inline send when it is possible */
-    if (enable_inline &&
-            frag->sg_entry.length < cm->max_inline_data) {
-        IBOFFLOAD_VERBOSE(10, ("Setting inline for len %d\n", frag->sg_entry.length));
-        SENDWR(task)->send_flags |= IBV_SEND_INLINE;
-    }
-
-    return task;
-}
-static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
-    mca_bcol_iboffload_get_rdma_vec_task(
-        uint32_t destination, size_t offset, size_t nitems,
-        mca_bcol_iboffload_frag_t *frag,
-        mca_bcol_iboffload_module_t *iboffload,
-        struct iovec *buff_iovec, uint32_t lkey,
-        mca_bcol_iboffload_collfrag_t *collfrag)
-{
-    int i;
-    mca_bcol_iboffload_collreq_t *coll_request = collfrag->coll_full_req;
-
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_endpoint_t *endpoint =
-                            iboffload->endpoints[destination];
-
-    task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint,
-                                                coll_request->qp_index,
-                                                &iboffload->iovec_tasks_free,
-                                                collfrag);
-    if (OPAL_UNLIKELY(NULL == task)) {
-        mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free);
-        return NULL;
-    }
-
-    /* no support for multiple frags */
-    IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task);
-
-    SENDWR(task)->imm_data = 0;
-    SENDWR(task)->opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
-    SENDWR(task)->wr.rdma.rkey = endpoint->remote_rdma_block.ib_info.rkey;
-
-    SENDWR(task)->wr.rdma.remote_addr = (uint64_t) (uintptr_t)
-       ((unsigned char *) endpoint->remote_rdma_block.rdma_desc[coll_request->ml_buffer_index].data_addr + offset);
-
-    for (i = 0; (size_t) i < nitems; ++i){
-        task->sg_entries[i].length = buff_iovec[i].iov_len;
-        task->sg_entries[i].addr = (uint64_t) buff_iovec[i].iov_base;
-        task->sg_entries[i].lkey = lkey;
-    }
-
-    /* single sge */
-    SENDWR(task)->num_sge = nitems;
-    SENDWR(task)->sg_list = (task->sg_entries);
-
-    IBOFFLOAD_VERBOSE(10, ("The remote offset %ld \n", offset));
-    return task;
-}
-
-static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
-    mca_bcol_iboffload_get_rdma_task(
-        uint32_t destination, size_t offset,
-        mca_bcol_iboffload_frag_t *frag,
-        mca_bcol_iboffload_module_t *iboffload,
-        mca_bcol_iboffload_collfrag_t *collfrag)
-{
-    mca_bcol_iboffload_collreq_t *coll_request = collfrag->coll_full_req;
-
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_endpoint_t *endpoint =
-                            iboffload->endpoints[destination];
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint,
-                                                coll_request->qp_index,
-                                                &cm->tasks_free, collfrag);
-    if (OPAL_UNLIKELY(NULL == task)) {
-        mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free);
-        return NULL;
-    }
-
-    /* no support for multiple frags */
-    IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task);
-
-    SENDWR(task)->imm_data = 0;
-    SENDWR(task)->opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
-    SENDWR(task)->wr.rdma.rkey = endpoint->remote_rdma_block.ib_info.rkey;
-    /* Pasha: I really not happy with the way we calculate remote addresses.
-       why we don't use rbuf + offset ?*/
-    SENDWR(task)->wr.rdma.remote_addr = (uint64_t) (uintptr_t)
-       ((unsigned char *) endpoint->remote_rdma_block.rdma_desc[coll_request->ml_buffer_index].data_addr + offset);
-    /* single sge */
-    SENDWR(task)->num_sge = 1;
-    SENDWR(task)->sg_list = &(frag->sg_entry);
-
-    IBOFFLOAD_VERBOSE(10, ("The remote offset %ld \n", offset));
-    return task;
-}
-
-/* Pasha: hacking version of calc operation */
-    static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
-mca_bcol_iboffload_get_calc_task(mca_bcol_iboffload_module_t *iboffload,
-        uint32_t destination, int qp_index, mca_bcol_iboffload_frag_t *frag,
-        struct ibv_sge *l_operand, struct ibv_sge *r_operand,
-        mca_bcol_iboffload_collreq_t *coll_request,
-        bool enable_inline)
-/* Some specifications for this function:
- *  1) We assume that the len of two operands (ibv_sge structs) is a same.
- *  2) Possibly we use the results (ibv_sge structs) from previous
- *     calc operations => maybe the frag pointer is NULL.
- */
-{
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_endpoint_t *endpoint =
-                                     iboffload->endpoints[destination];
-
-    mca_bcol_iboffload_collfrag_t *collfrag =
-                                    (mca_bcol_iboffload_collfrag_t *)
-                                     opal_list_get_last(&coll_request->work_requests);
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint, qp_index,
-                                                &cm->calc_tasks_free, collfrag);
-    if (OPAL_UNLIKELY(NULL == task)) {
-        mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free);
-        return NULL;
-    }
-
-    if (NULL != frag) {
-        IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task);
-    } else {
-        task->frag = NULL;
-    }
-
-    task->sg_entries[0] = *l_operand;
-    task->sg_entries[1] = *r_operand;
-
-    SENDWR(task)->num_sge = 2;
-    SENDWR(task)->sg_list = task->sg_entries;
-
-    SENDWR(task)->opcode = MCA_BCOL_IBOFFLOAD_SEND_CALC;
-#if OPAL_HAVE_IBOFFLOAD_CALC_RDMA
-    SENDWR(task)->wr.calc_send.data_type = coll_request->actual_ib_dtype;
-    SENDWR(task)->wr.calc_send.calc_op = coll_request->actual_ib_op;
-#else
-    SENDWR(task)->wr.calc.data_type = coll_request->actual_ib_dtype;
-    SENDWR(task)->wr.calc.calc_op = coll_request->actual_ib_op;
-#endif
-
-    return task;
-}
-
-static inline __opal_attribute_always_inline__ mca_bcol_iboffload_task_t*
-    mca_bcol_iboffload_get_rdma_calc_task(mca_bcol_iboffload_module_t *iboffload,
-        uint32_t destination, int qp_index, mca_bcol_iboffload_frag_t *frag,
-        struct ibv_sge *l_operand, struct ibv_sge *r_operand,
-        mca_bcol_iboffload_collreq_t *coll_request,
-        size_t offset)
-/* Some specifications for this function:
- *  1) We assume that the len of two operands (ibv_sge structs) is a same.
- *  2) Possibly we use the results (ibv_sge structs) from previous
- *     calc operations => maybe the frag pointer is NULL.
- */
-{
-    mca_bcol_iboffload_task_t *task;
-    mca_bcol_iboffload_endpoint_t *endpoint =
-                                     iboffload->endpoints[destination];
-
-    mca_bcol_iboffload_collfrag_t *collfrag =
-                                    (mca_bcol_iboffload_collfrag_t *)
-                                     opal_list_get_last(&coll_request->work_requests);
-
-    mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component;
-    task = mca_bcol_iboffload_prepare_send_task(iboffload, endpoint, qp_index,
-                                                &cm->calc_tasks_free, collfrag);
-    if (OPAL_UNLIKELY(NULL == task)) {
-        mca_bcol_iboffload_return_frag_tolist(frag, iboffload->device->frags_free);
-        return NULL;
-    }
-
-    if (NULL != frag) {
-        IBOFFLOAD_SET_SINGLE_FRAG_ON_TASK(frag, task);
-    } else {
-        task->frag = NULL;
-    }
-
-    task->sg_entries[0] = *l_operand;
-
-    /* Hack - we don't really use it.
-    task->sg_entries[1] = *r_operand;
-    */
-    /* We use only single entry
-    SENDWR(task)->num_sge = 2;
-    */
-    SENDWR(task)->num_sge = 1;
-    SENDWR(task)->sg_list = task->sg_entries;
-
-#if OPAL_HAVE_IBOFFLOAD_CALC_RDMA
-    SENDWR(task)->opcode = IBV_M_WR_CALC_RDMA_WRITE_WITH_IMM;
-    SENDWR(task)->wr.calc_rdma.data_type = coll_request->actual_ib_dtype;
-    SENDWR(task)->wr.calc_rdma.calc_op = coll_request->actual_ib_op;
-    SENDWR(task)->wr.calc_rdma.rkey = endpoint->remote_rdma_block.ib_info.rkey;
-    SENDWR(task)->wr.calc_rdma.remote_addr = (uint64_t) (uintptr_t)
-        ((unsigned char *) endpoint->remote_rdma_block.rdma_desc[coll_request->ml_buffer_index].data_addr + offset);
-#else
-    IBOFFLOAD_ERROR(("Fatal error: RDMA CALC was called, but the driver does not support this operation"));
-    return NULL;
-#endif
-
-    return task;
-}
-
-static inline __opal_attribute_always_inline__
-              int release_frags_on_task(mca_bcol_iboffload_task_t *task,
-                                        ompi_free_list_t *list)
-{
-    int rc, qp_index;
-
-    mca_bcol_iboffload_frag_t *temp_frag = task->frag;
-    mca_bcol_iboffload_endpoint_t *endpoint = task->endpoint;
-
-    mca_bcol_iboffload_component_t *cm =
-                       &mca_bcol_iboffload_component;
-
-    IBOFFLOAD_VERBOSE(10, ("\nCalling release_frags_on_task"));
-
-    while (NULL != temp_frag) {
-        qp_index = temp_frag->qp_index;
-
-        --(temp_frag->ref_counter);
-
-        /* Return credits */
-        if (MQE_WR_CQE_WAIT == task->element.opcode) {
-            ++(endpoint->qps[qp_index].rd_wqe);
-
-            IBOFFLOAD_VERBOSE(10, ("Return rd_wqe %d pp_win %d",
-                        endpoint->qps[qp_index].rd_wqe,
-                        cm->qp_infos[qp_index].rd_pp_win));
-
-            /* Call for recv prepost */
-            if (endpoint->qps[qp_index].rd_wqe >=
-                        cm->qp_infos[qp_index].rd_pp_win) {
-                IBOFFLOAD_VERBOSE(10, ("Prepost to endpoint->index - %d, qp_index - %d", endpoint->index, qp_index));
-                rc = mca_bcol_iboffload_prepost_recv(endpoint, qp_index,
-                        endpoint->qps[qp_index].rd_wqe);
-                if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                    IBOFFLOAD_ERROR(("QP %d: failed to prepost.\n", qp_index));
-                    return OMPI_ERROR;
-                }
-                /* What happens if we can not prepost ?*/
-            }
-        } else if (MQE_WR_SEND == task->element.opcode) {
-            ++(endpoint->qps[qp_index].sd_wqe);
-
-            assert(endpoint->qps[qp_index].sd_wqe <= cm->qp_infos[qp_index].rd_num);
-
-            IBOFFLOAD_VERBOSE(10, ("Return sd_wqe %d, qp_index - %d, endpoint - %p",
-                                    endpoint->qps[qp_index].sd_wqe, qp_index, endpoint));
-        } else {
-            /* We should not arrive to this case */
-            IBOFFLOAD_ERROR(("Unsupporeted operation"));
-
-            return OMPI_ERROR;
-        }
-
-        mca_bcol_iboffload_return_frag_tolist(temp_frag, list);
-        temp_frag = temp_frag->next;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-END_C_DECLS
-
-#endif
--- a/ompi/mca/bcol/iboffload/configure.m4
+++ b/ompi/mca/bcol/iboffload/configure.m4
@ -1,40 +0,0 @@
-# -*- shell-script -*-
-#
-# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
-# Copyright (c) 2015      Research Organization for Information Science
-#                         and Technology (RIST). All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-# MCA_ompi_bcol_iboffload_CONFIG([should_build])
-# ------------------------------------------
-# AC_DEFUN([MCA_ompi_bcol_iboffload_POST_CONFIG], [
-# ])
-
-
-# MCA_ompi_bcol_iboffload_CONFIG([action-if-can-compile],
-#                      [action-if-cant-compile])
-# ------------------------------------------------
-AC_DEFUN([MCA_ompi_bcol_iboffload_CONFIG],[
-    AC_CONFIG_FILES([ompi/mca/bcol/iboffload/Makefile])
-    bcol_ofa_happy="no"
-    bcol_mlnx_ofed_happy="no"
-
-    OPAL_CHECK_OPENFABRICS([bcol_iboffload], [bcol_ofa_happy="yes"])
-    OPAL_CHECK_MLNX_OPENFABRICS([bcol_iboffload], [bcol_mlnx_ofed_happy="yes"])
-
-    AS_IF([test "$bcol_ofa_happy" = "yes" && test "$bcol_mlnx_ofed_happy" = "yes"],
-          [$1],
-          [$2])
-
-    # substitute in the things needed to build iboffload
-    AC_SUBST([bcol_iboffload_CFLAGS])
-    AC_SUBST([bcol_iboffload_CPPFLAGS])
-    AC_SUBST([bcol_iboffload_LDFLAGS])
-    AC_SUBST([bcol_iboffload_LIBS])
-])dnl
--- a/ompi/mca/bcol/iboffload/owner.txt
+++ b/ompi/mca/bcol/iboffload/owner.txt
@ -1,7 +0,0 @@
-#
-# owner/status file
-# owner: institution that is responsible for this package
-# status: e.g. active, maintenance, unmaintained
-#
-owner: ORNL
-status: unmaintained
--- a/ompi/mca/bcol/ptpcoll/Makefile.am
+++ b/ompi/mca/bcol/ptpcoll/Makefile.am
@ -1,57 +0,0 @@
-#
-# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2009-2013 Mellanox Technologies.  All rights reserved.
-# Copyright (c) 2013      Los Alamos National Security, LLC. All rights
-#                         reserved.
-# Copyright (c) 2015 Cisco Systems, Inc.  All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-sources = \
-        bcol_ptpcoll.h \
-        bcol_ptpcoll_utils.h \
-        bcol_ptpcoll_utils.c \
-        bcol_ptpcoll_mca.h \
-        bcol_ptpcoll_mca.c \
-        bcol_ptpcoll_barrier.c \
-        bcol_ptpcoll_bcast.c \
-        bcol_ptpcoll_bcast.h \
-        bcol_ptpcoll_component.c  \
-        bcol_ptpcoll_fanin.c \
-        bcol_ptpcoll_fanout.c \
-        bcol_ptpcoll_module.c \
-        bcol_ptpcoll_allreduce.h \
-        bcol_ptpcoll_allreduce.c \
-        bcol_ptpcoll_reduce.h \
-        bcol_ptpcoll_reduce.c \
-        bcol_ptpcoll_allgather.c
-
-
-# Make the output library in this directory, and name it either
-# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
-# (for static builds).
-
-component_noinst =
-component_install =
-if MCA_BUILD_ompi_bcol_ptpcoll_DSO
-component_install += mca_bcol_ptpcoll.la
-else
-component_noinst += libmca_bcol_ptpcoll.la
-endif
-
-# See ompi/mca/btl/sm/Makefile.am for an explanation of
-# libmca_common_sm.la.
-
-mcacomponentdir = $(ompilibdir)
-mcacomponent_LTLIBRARIES = $(component_install)
-mca_bcol_ptpcoll_la_SOURCES = $(sources)
-mca_bcol_ptpcoll_la_LDFLAGS = -module -avoid-version
-mca_bcol_ptpcoll_la_LIBADD =
-
-noinst_LTLIBRARIES = $(component_noinst)
-libmca_bcol_ptpcoll_la_SOURCES =$(sources)
-libmca_bcol_ptpcoll_la_LDFLAGS = -module -avoid-version
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h
+++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h
@ -1,474 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2015      Los Alamos National Security, LLC.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_PTPCOLL_EXPORT_H
-#define MCA_BCOL_PTPCOLL_EXPORT_H
-
-#include "ompi_config.h"
-
-#include "mpi.h"
-#include "ompi/mca/mca.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "opal/mca/mpool/mpool.h"
-#include "ompi/request/request.h"
-#include "ompi/mca/pml/pml.h"
-#include "ompi/patterns/net/netpatterns.h"
-
-BEGIN_C_DECLS
-
-#ifdef HAVE_SCHED_YIELD
-#  include <sched.h>
-#  define SPIN sched_yield()
-#else  /* no switch available */
-#  define SPIN
-#endif
-
-/**
- * Structure to hold the basic shared memory coll component.  First it holds the
- * base coll component, and then holds a bunch of
- * sm-coll-component-specific stuff (e.g., current MCA param
- * values).
- */
-struct mca_bcol_ptpcoll_component_t {
-    /** Base coll component */
-    mca_bcol_base_component_2_0_0_t super;
-    /** Verbosity level, used only in debug enabled builds */
-    int verbose;
-    /** The radix of K-nomial tree, initilized by mca parameter */
-    int k_nomial_radix;
-    /** The radix of narray tree, initilized by mca parameter */
-    int narray_radix;
-    /** The radix is used for narray scatther and knomail gather for
-      large message bcast **/
-    int narray_knomial_radix;
-    /** Number of times to poll for specific tag/src */
-    int num_to_probe;
-    /*
-     * bcast small messages algorithm
-     * 1 - Knomial bcast
-     * 2 - Narray bcast
-     */
-    int bcast_small_messages_known_root_alg;
-    /*
-     * bcast large messages algorithm
-     * 1 - binomial scatter-gather
-     * 2 - Narray scatther, knomial gather
-     */
-    int bcast_large_messages_known_root_alg;
-    /*
-     * barrier algorithm
-     * 1 - recursive doubling
-     * 2 - recursive K-ing
-     */
-    int barrier_alg;
-
-    int use_brucks_smsg_alltoall_rdma;
-};
-
-struct mca_bcol_ptpcoll_collreq_t {
-    opal_free_list_item_t super;
-
-    int tag;
-    int num_reqs;
-    int exchange;
-
-    int need_toserv_extra;
-    int extra_partner_rank;
-
-    ompi_request_t **requests;
-};
-typedef struct mca_bcol_ptpcoll_collreq_t mca_bcol_ptpcoll_collreq_t;
-OBJ_CLASS_DECLARATION(mca_bcol_ptpcoll_collreq_t);
-
-/**
- * Convenience typedef
- */
-typedef struct mca_bcol_ptpcoll_component_t mca_bcol_ptpcoll_component_t;
-
-/* Bcast small messages,
-   known root algorithm */
-enum {
-    PTPCOLL_KNOMIAL = 1,
-    PTPCOLL_NARRAY
-};
-
-/* Bcast large messages,
-   known root algorithm */
-enum {
-    PTPCOLL_BINOMIAL_SG = 1,  /* Binomila scatter-gather */
-    PTPCOLL_NARRAY_KNOMIAL_SG /* Narray-Knomial scatter-gather */
-};
-
-/*
- * Implemented function index list
- */
-
-/* barrier */
-enum{
-    FANIN_FAN_OUT_BARRIER_FN,
-    RECURSIVE_DOUBLING_BARRIER_FN,
-    N_BARRIER_FNS
-};
-
-/* reduce */
-enum{
-    FANIN_REDUCE_FN,
-    REDUCE_SCATTER_GATHER_FN,
-    N_REDUCE_FNS
-};
-enum{
-    SHORT_DATA_FN_REDUCE,
-    LONG_DATA_FN_REDUCE,
-    N_REDUCE_FNS_USED
-};
-
-/* all-reduce */
-enum{
-    FANIN_FANOUT_ALLREDUCE_FN,
-    REDUCE_SCATTER_ALLGATHER_FN,
-    N_ALLREDUCE_FNS
-};
-enum{
-    SHORT_DATA_FN_ALLREDUCE,
-    LONG_DATA_FN_ALLREDUCE,
-    N_ALLREDUCE_FNS_USED
-};
-
-
-/*
- * N-order tree node description
- */
-struct tree_node_t {
-    /* my rank within the group */
-    int my_rank;
-    /* my node type - root, leaf, or interior */
-    int my_node_type;
-    /* number of nodes in the tree */
-    int tree_size;
-    /* number of parents (0/1) */
-    int n_parents;
-    /* number of children */
-    int n_children;
-    /* parent rank within the group */
-    int parent_rank;
-    /* chidren ranks within the group */
-    int *children_ranks;
-};
-typedef struct tree_node_t tree_node_t;
-
-struct pair_exchange_node_t {
-
-    /* number of nodes this node will exchange data with */
-    int n_exchanges;
-
-    /* ranks of nodes involved in data exchnge */
-    int *rank_exchanges;
-
-    /* number of extra sources of data - outside largest power of 2 in
-     *  this group */
-    int n_extra_sources;
-
-    /* rank of the extra source */
-    int rank_extra_source;
-
-    /* number of tags needed per stripe */
-    int n_tags;
-
-    /* log 2 of largest full power of 2 for this node set */
-    int log_2;
-
-    /* largest power of 2 that fits in this group */
-    int n_largest_pow_2;
-
-    /* node type */
-    int node_type;
-
-};
-typedef struct pair_exchange_node_t pair_exchange_node_t;
-
-/*
- * Barrier request objects
- */
-
-/* enum for phase at which the nb barrier is in */
-enum{
-    NB_BARRIER_INACTIVE,
-    NB_BARRIER_FAN_IN,
-    NB_BARRIER_FAN_OUT,
-    /* done and not started are the same for all practicle
-     * purposes, as the init funtion always sets this flag
-     */
-    NB_BARRIER_DONE
-};
-
-typedef enum {
-    PTPCOLL_NOT_STARTED         = 1,
-    PTPCOLL_WAITING_FOR_DATA    = 1 << 1,
-    PTPCOLL_SCATTER_STARTED     = 1 << 2,
-    PTPCOLL_GATHER_STARTED      = 1 << 3,
-    PTPCOLL_EXTRA_SEND_STARTED  = 1 << 4,
-    PTPCOLL_ROOT_SEND_STARTED   = 1 << 5
-} ptpcoll_op_status;
-
-struct mca_bcol_ptpcoll_ml_buffer_desc_t {
-    void     *data_addr;            /* buffer address */
-    uint64_t     bank_index;        /* my bank */
-    uint64_t     buffer_index;      /* my buff index */
-    int       active_requests;   /* keep number of active requests */
-    ompi_request_t **requests;      /* caching pointers to requests */
-    int          data_src;          /* used for bcast to cache internal data */
-    int          radix_mask;        /* used for bcast to cache internal data */
-    int          radix_mask_pow;    /* used for bcast to cache internal data */
-    int          iteration;         /* buffer iteration in knomial, binomail, etc. algorithms */
-    int          tag;               /* tag number that is attached to this operation */
-    int          status;       /* operation status */
-    /* Fixme: Probably we can get rid of these fields by redesigning
-     * the reduce implementation
-     */
-    int          reduction_status; /* used for reduction to cache internal
-                                      reduction status */
-    bool          reduce_init_called;
-};
-typedef struct mca_bcol_ptpcoll_ml_buffer_desc_t mca_bcol_ptpcoll_ml_buffer_desc_t;
-
-/*
- * Information that we need to keep in order to access and
- * track local ML memory that is used as source and destinatination
- * for collectives operations
- */
-struct mca_bcol_ptpcoll_local_mlmem_desc_t {
-    /* Bank index to release */
-    uint32_t bank_index_for_release;
-    /* number of memory banks */
-    uint32_t     num_banks;
-    /* number of buffers per bank */
-    uint32_t     num_buffers_per_bank;
-    /* size of a payload buffer */
-    uint32_t     size_buffer;
-    /* pointer to buffer descriptors initialized */
-    mca_bcol_ptpcoll_ml_buffer_desc_t *ml_buf_desc;
-};
-typedef struct mca_bcol_ptpcoll_local_mlmem_desc_t mca_bcol_ptpcoll_local_mlmem_desc_t;
-
-typedef enum {
-    PTPCOLL_PROXY       = 1,
-    PTPCOLL_IN_GROUP    = 1 << 1,
-    PTPCOLL_EXTRA       = 1 << 2,
-    PTPCOLL_KN_PROXY    = 1 << 3,
-    PTPCOLL_KN_IN_GROUP = 1 << 4,
-    PTPCOLL_KN_EXTRA    = 1 << 5
-} node_type_pow2;
-
-struct mca_bcol_ptpcoll_module_t {
-    /* base structure */
-    mca_bcol_base_module_t super;
-
-    /* size */
-    int group_size;
-
-    /* size of each memory segment */
-    size_t segment_size;
-
-    /* k_nomial radix */
-    int k_nomial_radix;
-    /* caching power of K, for K-nomial operations */
-    int pow_k;
-    /* caching power of K number that is smaller or equal to size of group */
-    int pow_knum;
-    /* caching power of 2, it is special case for some algorithms */
-    int pow_2;
-    /* caching power of 2 number that is closet to size of group */
-    int pow_2num;
-    /* type of this node in group of power 2 */
-    int pow_2type;
-    /* type of this node in group of K-nomaial tree */
-    int pow_ktype;
-    /* type of this node in group of narray tree */
-    int narray_type;
-    /* size of full narray tree */
-    int full_narray_tree_size;
-    /* num leafs on last level */
-    int full_narray_tree_num_leafs;
-
-    /* Nary tree info */
-    netpatterns_tree_node_t *narray_node;
-
-    /* if the rank in group, it keeps the extra peer.
-       if the rank is extra, it keeps the proxy peer.
-     */
-    int proxy_extra_index;    /* pow2 algorithm */
-    int *kn_proxy_extra_index; /* K nomaila algorithm */
-    int kn_proxy_extra_num; /* number of extra peers , maximum k - 1*/
-
-    /* collective tag */
-    long long collective_tag;
-
-    /* tag mask - the pml has a limit on tag size, so need
-     * to wrap around
-     */
-    uint64_t tag_mask;
-
-    /* Caching information about local ml memory.
-     * Since ptpcoll does not support RDMA operations over pml,
-     * we don't need to keep any information about remote buffers
-     */
-    mca_bcol_ptpcoll_local_mlmem_desc_t ml_mem;
-
-
-    /* Narray-Knomial scatther gather */
-
-    /* list of extra indexes */
-    int *narray_knomial_proxy_extra_index;
-    /* number of extra peers , maximum k - 1*/
-    int narray_knomial_proxy_num;
-    /* Narray-Knomial node information array */
-    netpatterns_narray_knomial_tree_node_t *narray_knomial_node;
-    /* Knomial exchange tree */
-    netpatterns_k_exchange_node_t knomial_exchange_tree;
-    /* knomial allgather tree --- Do not disable, we need both
-       different algorithms define recursive k - ing differently
-     */
-    netpatterns_k_exchange_node_t knomial_allgather_tree;
-
-	/* Knomial allgather offsets */
-	int **allgather_offsets;
-
-    /* Free lists of outstanding collective operations */
-    opal_free_list_t collreqs_free;
-
-    int log_group_size;
-    struct iovec *alltoall_iovec;
-};
-
-typedef struct mca_bcol_ptpcoll_module_t mca_bcol_ptpcoll_module_t;
-OBJ_CLASS_DECLARATION(mca_bcol_ptpcoll_module_t);
-
-
-/**
- * Global component instance
- */
-OMPI_MODULE_DECLSPEC extern mca_bcol_ptpcoll_component_t
-mca_bcol_ptpcoll_component;
-
-
-/*
- * coll module functions
- */
-
-/* query to see if the component is available for use, and can
- * satisfy the thread and progress requirements
- */
-int mca_bcol_ptpcoll_init_query(bool enable_progress_threads,
-        bool enable_mpi_threads);
-
-/* query to see if the module is available for use on the given
- * communicator, and if so, what it's priority is.
- */
-mca_bcol_base_module_t **
-mca_bcol_ptpcoll_comm_query(mca_sbgp_base_module_t *sbgp, int *num_modules);
-
-/* interface function to setup recursive k-ing tree */
-int mca_bcol_ptpcoll_setup_knomial_tree(mca_bcol_base_module_t *super);
-
-/* barrier routines */
-int bcol_ptpcoll_barrier_recurs_dbl(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-int bcol_ptpcoll_barrier_recurs_knomial(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-int bcol_ptpcoll_barrier_init(mca_bcol_base_module_t *super);
-int mca_bcol_ptpcoll_memsync_init(mca_bcol_base_module_t *super);
-void * bcol_ptpcoll_allocate_memory(size_t length, size_t alignment,
-        struct mca_bcol_base_module_t *bcol_module);
-int bcol_ptpcoll_register_memory(void * in_ptr, size_t length, size_t alignment,
-        struct mca_bcol_base_module_t *bcol_module);
-int bcol_ptpcoll_deregister_memory( void * in_ptr,
-        struct mca_bcol_base_module_t *bcol_module);
-int bcol_ptpcoll_free_memory(void *ptr,
-        struct mca_bcol_base_module_t *bcol_module);
-int bcol_ptpcoll_fanin( bcol_function_args_t *input_args,
-        struct mca_bcol_base_module_t *module);
-int bcol_ptpcoll_fanout( bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-
-/* allgather routine */
-int bcol_ptpcoll_k_nomial_allgather_init(bcol_function_args_t *input_args,
-                        struct mca_bcol_base_function_t *const_args);
-
-/* allgather progress */
-int bcol_ptpcoll_k_nomial_allgather_progress(bcol_function_args_t *input_args,
-                        struct mca_bcol_base_function_t *const_args);
-/* allgather register */
-int bcol_ptpcoll_allgather_init(mca_bcol_base_module_t *super);
-
-static inline __opal_attribute_always_inline__
-        int mca_bcol_ptpcoll_test_for_match(ompi_request_t **request , int *rc)
-{
-    int matched = 0;
-    int i;
-    mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
-    *rc = OMPI_SUCCESS;
-
-    for (i = 0; i < cm->num_to_probe &&
-             0 == matched && OMPI_SUCCESS == *rc ; i++) {
-        *rc = ompi_request_test(request, &matched, MPI_STATUS_IGNORE);
-    }
-
-    return matched;
-}
-
-static inline __opal_attribute_always_inline__
-        int mca_bcol_ptpcoll_test_all_for_match(int *n_requests, ompi_request_t **requests , int *rc)
-{
-    int matched = 0;
-    int i;
-    mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
-    *rc = OMPI_SUCCESS;
-
-    assert(*n_requests >= 0);
-
-    if (0 == *n_requests) {
-        return 1;
-    }
-
-    for (i = 0; i < cm->num_to_probe &&
-            0 == matched && OMPI_SUCCESS == *rc; i++) {
-        *rc = ompi_request_test_all
-            (*n_requests, requests, &matched, MPI_STATUS_IGNORE);
-    }
-
-    if (matched) {
-        *n_requests = 0;
-    }
-
-    return matched;
-}
-
-/* Some negative tags already used by OMPI, making sure that we take safe offset */
-#define PTPCOLL_TAG_OFFSET 100
-#define PTPCOLL_TAG_FACTOR 2
-
-static inline int lognum(int n){
-	int count = 1, lognum = 0;
-
-	while (count < n) {
-		count = count << 1;
-		lognum++;
-	}
-	return lognum;
-}
-
-END_C_DECLS
-
-#endif /* MCA_BCOL_PTPCOLL_EXPORT_H */
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allgather.c
+++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allgather.c
@ -1,605 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/include/ompi/constants.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "bcol_ptpcoll_allreduce.h"
-/*
- * Recursive K-ing allgather
- */
-
-/*
- *
- * Recurssive k-ing algorithm
- * Example k=3 n=9
- *
- *
- * Number of Exchange steps = log (basek) n
- * Number of steps in exchange step = k (radix)
- *
- */
-
-int bcol_ptpcoll_k_nomial_allgather_init(bcol_function_args_t *input_args,
-                struct mca_bcol_base_function_t *const_args)
-{
-    /* local variables */
-
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    netpatterns_k_exchange_node_t *exchange_node = &ptpcoll_module->knomial_allgather_tree;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    int group_size = ptpcoll_module->group_size;
-    int *list_connected = ptpcoll_module->super.list_n_connected; /* critical for hierarchical colls */
-
-    int tag;
-    int i, j;
-    int knt;
-    int comm_src, comm_dst, src, dst;
-    int recv_offset, recv_len;
-    int send_offset, send_len;
-
-    uint32_t buffer_index = input_args->buffer_index;
-    int pow_k, tree_order;
-    int rc = OMPI_SUCCESS;
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int completed = 0; /* initialized */
-    void *data_buffer = (void*)(
-            (unsigned char *) input_args->sbuf +
-            (size_t) input_args->sbuf_offset);
-    int pack_len = input_args->count * input_args->dtype->super.size;
-
-#if 0
-    fprintf(stderr,"entering p2p allgather pack_len %d. exchange node: %p\n",pack_len, exchange_node);
-#endif
-    /* initialize the iteration counter */
-    int *iteration = &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration;
-    *iteration = 0;
-
-    /* reset active request counter */
-    *active_requests = 0;
-
-    /* keep tag within the limit supported by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    /* mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-
-    /* k-nomial parameters */
-    tree_order = exchange_node->tree_order;
-    pow_k = exchange_node->log_tree_order;
-
-
-    /* let's begin the collective, starting with extra ranks and their
-     * respective proxies
-     */
-    if( EXTRA_NODE == exchange_node->node_type ) {
-
-        /* then I will send to my proxy rank*/
-        dst = exchange_node->rank_extra_sources_array[0];
-        /* find rank in the communicator */
-        comm_dst = group_list[dst];
-        /* now I need to calculate my own offset */
-        knt = 0;
-        for (i = 0 ; i < my_group_index; i++){
-            knt += list_connected[i];
-        }
-
-        /* send the data to my proxy */
-        rc = MCA_PML_CALL(isend((void *) ( (unsigned char *) data_buffer +
-                        knt*pack_len),
-                        pack_len * list_connected[my_group_index],
-                        MPI_BYTE,
-                        comm_dst, tag,
-                        MCA_PML_BASE_SEND_STANDARD, comm,
-                        &(requests[*active_requests])));
-
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10,("Failed to isend data"));
-            return OMPI_ERROR;
-        }
-        ++(*active_requests);
-
-        /* now I go ahead and post the receive from my proxy */
-        comm_src = comm_dst;
-        knt = 0;
-        for( i =0; i < group_size; i++){
-            knt += list_connected[i];
-        }
-        rc = MCA_PML_CALL(irecv(data_buffer,
-                    knt * pack_len,
-                    MPI_BYTE,
-                    comm_src,
-                    tag , comm, &(requests[*active_requests])));
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10, ("Failed to post ireceive "));
-            return OMPI_ERROR;
-        }
-
-        ++(*active_requests);
-        /* poll for completion */
-        /* this polls internally */
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if(completed){
-            /* go to buffer release */
-            goto FINISHED;
-        }else{
-            /* save state and hop out
-             * nothing to save here
-             */
-            return ((OMPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED);
-        }
-    }else if ( 0 < exchange_node->n_extra_sources ) {
-
-        /* I am a proxy for someone */
-        src = exchange_node->rank_extra_sources_array[0];
-        /* find the rank in the communicator */
-        comm_src = group_list[src];
-        knt = 0;
-        for(i = 0; i < src; i++){
-            knt += list_connected[i];
-        }
-        /* post the receive */
-        rc = MCA_PML_CALL(irecv((void *) ( (unsigned char *) data_buffer
-                        + knt*pack_len),
-                        pack_len * list_connected[src],
-                        MPI_BYTE,
-                        comm_src,
-                        tag , comm, &(requests[*active_requests])));
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10, ("Failed to post ireceive "));
-            return OMPI_ERROR;
-        }
-
-        ++(*active_requests);
-        /* poll for completion */
-        /* this routine polls internally */
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if(!completed){
-            /* save state and hop out
-             * We really do need to block here so set
-             * the iteration to -1 indicating we need to
-             *  finish this part first
-             */
-            *iteration = -1;
-            return ((OMPI_SUCCESS != rc )? OMPI_ERROR : BCOL_FN_STARTED);
-        }
-
-    }
-
-    /* we start the recursive k - ing phase */
-    /* fprintf(stderr,"tree order %d pow_k %d \n",tree_order,pow_k);*/
-    for( i = 0; i < pow_k; i++) {
-        for(j = 0; j < (tree_order - 1); j++) {
-
-            /* send phase */
-            dst = exchange_node->rank_exchanges[i][j];
-            if( dst < 0 ){
-                continue;
-            }
-            comm_dst = group_list[dst];
-            send_offset = exchange_node->payload_info[i][j].s_offset * pack_len;
-            send_len = exchange_node->payload_info[i][j].s_len * pack_len;
-            /* debug print */
-            /* fprintf(stderr,"sending %d bytes to rank %d at offset %d\n",send_len, */
-            /*         comm_dst,send_offset); */
-            rc = MCA_PML_CALL(isend((void*)((unsigned char *) data_buffer +
-                            send_offset),
-                            send_len,
-                            MPI_BYTE,
-                            comm_dst, tag,
-                            MCA_PML_BASE_SEND_STANDARD, comm,
-                            &(requests[*active_requests])));
-
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10,("Failed to isend data"));
-                return OMPI_ERROR;
-            }
-            ++(*active_requests);
-
-            /* sends are posted */
-        }
-
-        /* Now post the recv's */
-        for( j = 0; j < (tree_order - 1); j++ ) {
-
-            /* recv phase */
-            src = exchange_node->rank_exchanges[i][j];
-            if( src < 0 ) {
-                continue;
-            }
-            comm_src = group_list[src];
-            recv_offset = exchange_node->payload_info[i][j].r_offset * pack_len;
-            recv_len = exchange_node->payload_info[i][j].r_len * pack_len;
-            /* debug print */
-            /* fprintf(stderr,"recving %d bytes to rank %d at offset %d\n",recv_len, */
-            /*         comm_src,recv_offset); */
-            /* post the receive */
-            rc = MCA_PML_CALL(irecv((void *) ((unsigned char *) data_buffer +
-                            recv_offset),
-                            recv_len,
-                            MPI_BYTE,
-                            comm_src,
-                            tag, comm, &(requests[*active_requests])));
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to post ireceive "));
-                return OMPI_ERROR;
-            }
-
-            ++(*active_requests);
-        }
-        /* finished all send/recv's now poll for completion before
-         * continuing to next iteration
-         */
-        completed = 0;
-        /* polling internally on 2*(k - 1) requests */
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-
-        if(!completed){
-            /* save state and hop out
-             * only the iteration needs to be tracked
-             */
-            *iteration = i; /* need to pick up here */
-
-            return ((OMPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED);
-        }
-    }
-
-    /* finish off the last piece, send the data back to the extra  */
-    if( 0 < exchange_node->n_extra_sources ) {
-        dst = exchange_node->rank_extra_sources_array[0];
-        comm_dst = group_list[dst];
-        knt = 0;
-        for( i = 0; i < group_size; i++){
-            knt += list_connected[i];
-        }
-        /* debug print */
-        /*
-        fprintf(stderr,"sending %d bytes to extra %d \n",pack_len*knt,comm_dst);
-        */
-        rc = MCA_PML_CALL(isend(data_buffer,
-                    pack_len * knt,
-                    MPI_BYTE,
-                    comm_dst, tag,
-                    MCA_PML_BASE_SEND_STANDARD, comm,
-                    &(requests[*active_requests])));
-
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10,("Failed to isend data"));
-            return OMPI_ERROR;
-        }
-        ++(*active_requests);
-
-        /* probe for send completion */
-        completed = 0;
-        /* polling internally */
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if(!completed){
-            /* save state and hop out
-             * We really do need to block here so set
-             * the iteration to pow_k +1 indicating we need to
-             *  finish progressing the last part
-             */
-            *iteration = pow_k + 1;
-
-            return (OMPI_SUCCESS != rc ? OMPI_ERROR : BCOL_FN_STARTED);
-        }
-    }
-
-FINISHED:
-    /* recycle buffer if need be */
-    return BCOL_FN_COMPLETE;
-}
-
-/* allgather progress function */
-
-int bcol_ptpcoll_k_nomial_allgather_progress(bcol_function_args_t *input_args,
-                        struct mca_bcol_base_function_t *const_args)
-{
-
-
-    /* local variables */
-
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    netpatterns_k_exchange_node_t *exchange_node = &ptpcoll_module->knomial_allgather_tree;
-    int group_size = ptpcoll_module->group_size;
-    int *list_connected = ptpcoll_module->super.list_n_connected; /* critical for hierarchical colls */
-
-
-    int tag;
-    int i, j;
-    int knt;
-    int comm_src, comm_dst, src, dst;
-    int recv_offset, recv_len;
-    int send_offset, send_len;
-    uint32_t buffer_index = input_args->buffer_index;
-
-    int pow_k, tree_order;
-    int rc = OMPI_SUCCESS;
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int completed = 0; /* initialized */
-    void *data_buffer = (void*)(
-            (unsigned char *) input_args->sbuf +
-            (size_t) input_args->sbuf_offset);
-    int pack_len = input_args->count * input_args->dtype->super.size;
-    /* initialize the counter */
-    int *iteration = &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration;
-
-
-#if 0
-    fprintf(stderr,"%d: entering p2p allgather progress AR: %d iter: %d\n",my_group_index,*active_requests,
-            *iteration);
-#endif
-    /* keep tag within the limit supported by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    /* mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-
-    /* k-nomial tree parameters */
-    tree_order = exchange_node->tree_order;
-    pow_k = exchange_node->log_tree_order;
-
-    /* let's begin the collective, starting with extra ranks and their
-     * respective proxies
-     */
-    if( EXTRA_NODE == exchange_node->node_type ) {
-
-        /* debug print */
-        /*fprintf(stderr,"666 \n");*/
-        /* simply poll for completion */
-        completed = 0;
-        /* polling internally */
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if(completed){
-            /* go to buffer release */
-            goto FINISHED;
-        }else{
-            /* save state and hop out
-             * nothing to save here
-             */
-            return ((OMPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED);
-        }
-    }else if ( 0 < exchange_node->n_extra_sources && (-1 == *iteration)) {
-
-        /* I am a proxy for someone */
-        /* Simply poll for completion */
-        completed = 0;
-        /* polling internally */
-        assert( 1 == *active_requests);
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if(!completed){
-            /* save state and hop out
-             * We really do need to block here so set
-             * the iteration to -1 indicating we need to
-             *  finish this part first
-             */
-            (*iteration) = -1;
-            return ((OMPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED);
-        }
-        /* I may now proceed to the recursive k - ing phase */
-        *iteration = 0;
-    }
-
-
-    /* the ordering here between the extra rank and progress active requests
-     * is critical
-     */
-    /* extra rank */
-    if( (pow_k + 1) == *iteration ){
-        /* finish off the last one */
-        goto PROGRESS_EXTRA;
-    }
-
-    /* active requests must be completed before continuing on to
-     * recursive k -ing step
-     * CAREFUL HERE, IT THIS REALLY WHAT YOU WANT??
-     */
-    if( 0 < (*active_requests) ) {
-        /* then we have something to progress from last step */
-        /* debug print */
-        /*
-        fprintf(stderr,"%d: entering progress AR: %d iter: %d\n",my_group_index,*active_requests,
-            *iteration);
-        */
-        completed = 0;
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if(!completed){
-            /* save state and hop out
-             * state hasn't changed
-             */
-
-            return ((MPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED);
-        }
-        ++(*iteration);
-    }
-
-
-
-    /* we start the recursive k - ing phase */
-    for( i = *iteration; i < pow_k; i++) {
-        /* nothing changes here */
-        for(j = 0; j < (tree_order - 1); j++) {
-
-            /* send phase */
-            dst = exchange_node->rank_exchanges[i][j];
-            if( dst < 0 ){
-                continue;
-            }
-            comm_dst = group_list[dst];
-            send_offset = exchange_node->payload_info[i][j].s_offset * pack_len;
-            send_len = exchange_node->payload_info[i][j].s_len * pack_len;
-            rc = MCA_PML_CALL(isend((void*)((unsigned char *) data_buffer +
-                            send_offset),
-                            send_len,
-                            MPI_BYTE,
-                            comm_dst, tag,
-                            MCA_PML_BASE_SEND_STANDARD, comm,
-                            &(requests[*active_requests])));
-
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10,("Failed to isend data"));
-                return OMPI_ERROR;
-            }
-            ++(*active_requests);
-
-            /* sends are posted */
-        }
-
-        /* Now post the recv's */
-        for( j = 0; j < (tree_order - 1); j++ ) {
-
-            /* recv phase */
-            src = exchange_node->rank_exchanges[i][j];
-            if( src < 0 ) {
-                continue;
-            }
-            comm_src = group_list[src];
-            recv_offset = exchange_node->payload_info[i][j].r_offset * pack_len;
-            recv_len = exchange_node->payload_info[i][j].r_len * pack_len;
-            /* post the receive */
-            rc = MCA_PML_CALL(irecv((void *) ((unsigned char *) data_buffer +
-                            recv_offset),
-                            recv_len,
-                            MPI_BYTE,
-                            comm_src,
-                            tag, comm, &(requests[*active_requests])));
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to post ireceive "));
-                return OMPI_ERROR;
-            }
-
-            ++(*active_requests);
-        }
-        /* finished all send/recv's now poll for completion before
-         * continuing to next iteration
-         */
-        completed = 0;
-        /* make this non-blocking */
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if(!completed){
-            /* save state and hop out
-             * We really do need to block here so set
-             * the iteration to -1 indicating we need to
-             *  finish this part first
-             */
-            *iteration = i; /* need to pick up here */
-
-            return ((OMPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED);
-        }
-    }
-
-    /* finish off the last piece, send the data back to the extra  */
-    if( 0 < exchange_node->n_extra_sources ) {
-        dst = exchange_node->rank_extra_sources_array[0];
-        comm_dst = group_list[dst];
-        knt = 0;
-        for( i = 0; i < group_size; i++){
-            knt += list_connected[i];
-        }
-        rc = MCA_PML_CALL(isend(data_buffer,
-                    pack_len * knt,
-                    MPI_BYTE,
-                    comm_dst, tag,
-                    MCA_PML_BASE_SEND_STANDARD, comm,
-                    &(requests[*active_requests])));
-
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10,("Failed to isend data"));
-            return OMPI_ERROR;
-        }
-        ++(*active_requests);
-
-        /* probe for send completion */
-        completed = 0;
-        /* make this non-blocking */
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if(!completed){
-            /* save state and hop out
-             * We really do need to block here so set
-             * the iteration to pow_k +1 indicating we need to
-             *  finish progressing the last part
-             */
-            *iteration = pow_k + 1;
-
-            return ((OMPI_SUCCESS != rc) ? OMPI_ERROR :  BCOL_FN_STARTED);
-        }
-    }
-    /* folks need to skip this unless they really are the proxy
-     * reentering with the intent of progressing the final send
-     */
-    goto FINISHED;
-
-PROGRESS_EXTRA:
-
-    /* probe for send completion */
-    completed = 0;
-    /* make this non-blocking */
-    completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-    if(!completed){
-        /* save state and hop out
-         * We really do need to block here so set
-         * the iteration to pow_k +1 indicating we need to
-         *  finish progressing the last part
-         */
-
-        return ((OMPI_SUCCESS != rc) ? OMPI_ERROR : BCOL_FN_STARTED);
-    }
-
-FINISHED:
-    /* recycle buffer if need be */
-    return BCOL_FN_COMPLETE;
-}
-
-/*
- * Register allreduce functions to the BCOL function table,
- * so they can be selected
- */
-int bcol_ptpcoll_allgather_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    comm_attribs.bcoll_type = BCOL_ALLGATHER;
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                bcol_ptpcoll_k_nomial_allgather_init,
-                bcol_ptpcoll_k_nomial_allgather_progress);
-
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-    inv_attribs.bcol_msg_min = 10000000;
-    inv_attribs.bcol_msg_max = 10485760; /* range 4 */
-
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                bcol_ptpcoll_k_nomial_allgather_init,
-                bcol_ptpcoll_k_nomial_allgather_progress);
-
-    return OMPI_SUCCESS;
-}
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.c
+++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.c
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.h
+++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_allreduce.h
@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_PTPCOLL_ALLREDUCE_H
-#define MCA_BCOL_PTPCOLL_ALLREDUCE_H
-
-#include "ompi_config.h"
-#include "ompi/op/op.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "bcol_ptpcoll.h"
-#include "bcol_ptpcoll_utils.h"
-
-enum {
-	BLOCK_OFFSET = 0,
-	LOCAL_REDUCE_SEG_OFFSET,
-	BLOCK_COUNT,
-	SEG_SIZE,
-	NOFFSETS
-};
-
-BEGIN_C_DECLS
-int bcol_ptpcoll_allreduce_narraying(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-        const int buffer_index, void *data_buffer,
-		struct ompi_op_t *op,
-		const int count, struct ompi_datatype_t *dtype, const int
-		buffer_size, const int relative_group_index);
-
-
-int bcol_ptpcoll_allreduce_narraying_init(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-int bcol_ptpcoll_allreduce_recursivek_scatter_reduce(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-						const int buffer_index, void *sbuf,
-					    void *rbuf,
-						struct ompi_op_t *op,
-						const int count, struct ompi_datatype_t *dtype,
-						const int relative_group_index,
-						const int padded_start_byte);
-
-int bcol_ptpcoll_allreduce_knomial_allgather(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-				const int buffer_index,
-				void *sbuf,void *rbuf, int count, struct
-				ompi_datatype_t *dtype,
-				const int relative_group_index,
-				const int padded_start_byte);
-
-int bcol_ptpcoll_allreduce_recursivek_scatter_reduce_allgather_init(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-
-int compute_knomial_allgather_offsets(int group_index, int count, struct
-				ompi_datatype_t *dtype,int k_radix,int n_exchanges,
-				int **offsets);
-
-
-int bcol_ptpcoll_allreduce_recursivek_scatter_reduce_extra(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-						int buffer_index,
-						void *sbuf,
-					    void *rbuf,
-						struct ompi_op_t *op,
-						const int count, struct ompi_datatype_t *dtype);
-
-int bcol_ptpcoll_allreduce_knomial_allgather_extra(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-						int buffer_index,
-						void *sbuf,
-					    void *rbuf,
-						const int count, struct ompi_datatype_t *dtype);
-
-int bcol_ptpcoll_allreduce_recursivek_scatter_reduce_allgather_extra_init(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-int bcol_ptpcoll_allreduce_init(mca_bcol_base_module_t *super);
-
-#if 0
-int knomial_reduce_scatter_offsets(int group_index,int count, struct ompi_datatype_t *dtype, int k_radix,
-				int n_exchanges, int nth_exchange, size_t *recv_offset, size_t
-				*block_offset, size_t *block_count, size_t *block_size, size_t
-				*seg_size);
-
-int allgather_offsets(int group_index,int count, struct ompi_datatype_t *dtype, int k_radix,
-				int n_exchanges, int nth_exchange, size_t *send_offset, size_t
-				*block_offset, size_t *block_count, size_t *block_size, size_t
-				*seg_size);
-#endif
-
-END_C_DECLS
-
-#endif
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_barrier.c
+++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_barrier.c
@ -1,933 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2015      Los Alamos National Security, LLC.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/include/ompi/constants.h"
-#include "bcol_ptpcoll.h"
-#include "bcol_ptpcoll_utils.h"
-
-/*
- * Fanin routines - no user data
- */
-
-/********************************************* New Barrier *********************************************/
-/*******************************************************************************************************/
-/*******************************************************************************************************/
-
-/*************************************** K-nominal ***************************************/
-/*****************************************************************************************/
-static int bcol_ptpcoll_barrier_recurs_knomial_new(
-                bcol_function_args_t *input_args,
-                struct mca_bcol_base_function_t *const_args)
-{
-    /* local variable */
-    uint64_t sequence_number;
-    mca_bcol_ptpcoll_module_t *ptpcoll_module =
-                        (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
-
-    netpatterns_k_exchange_node_t *my_exchange_node =
-                                       &ptpcoll_module->knomial_exchange_tree;
-
-    int rc, k, pair_comm_rank, exchange, completed,
-        tree_order = my_exchange_node->tree_order, tag,
-        n_extra_sources = my_exchange_node->n_extra_sources,
-        n_exchange = my_exchange_node->n_exchanges, num_reqs;
-
-    ompi_communicator_t *comm =
-            ptpcoll_module->super.sbgp_partner_module->group_comm;
-
-    int *extra_sources_array = NULL,
-        **rank_exchanges = my_exchange_node->rank_exchanges;
-
-    ompi_request_t **requests;
-    opal_free_list_item_t *item;
-
-    mca_bcol_ptpcoll_collreq_t *collreq;
-
-    item = opal_free_list_wait (&ptpcoll_module->collreqs_free);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        PTPCOLL_ERROR(("Free list waiting failed."));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    collreq = (mca_bcol_ptpcoll_collreq_t *) item;
-    input_args->bcol_opaque_data = (void *) collreq;
-
-    requests = collreq->requests;
-
-    /* TAG Calculation */
-    sequence_number = input_args->sequence_num;
-
-    /* Keep tag within the limit supportd by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-
-    /* Mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-
-    if (0 < n_extra_sources) { /* EXCHANGE_NODE case */
-        collreq->need_toserv_extra = 1;
-        extra_sources_array = my_exchange_node->rank_extra_sources_array;
-
-        /* I will participate in the exchange (of the algorithm) -
-         * wait for signal from extra process */
-        for (k = 0; k < n_extra_sources; ++k) {
-            pair_comm_rank =
-                    ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[k]];
-
-            rc = MCA_PML_CALL(irecv(
-                        NULL, 0, MPI_INT,
-                        pair_comm_rank, tag,
-                        comm, &(requests[k])));
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                PTPCOLL_ERROR(("IRecv failed."));
-                return rc;
-            }
-        }
-
-        num_reqs = n_extra_sources;
-
-        /* Test for completion */
-        completed =
-            mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("Test for all failed."));
-            return rc;
-        }
-
-        if (!completed) {
-            collreq->tag = tag;
-            collreq->num_reqs = num_reqs;
-            collreq->exchange = 0;
-
-            return BCOL_FN_STARTED;
-        }
-    } else {
-        collreq->need_toserv_extra = 0;
-    }
-
-    /* loop over exchange send/recv pairs */
-    for (exchange = 0; exchange < n_exchange; ++exchange) {
-        for (k = 0; k < tree_order - 1; ++k) {
-            /* rank of exchange partner within the group */
-            pair_comm_rank =
-                ptpcoll_module->super.sbgp_partner_module->group_list[rank_exchanges[exchange][k]];
-
-            assert(2 * ptpcoll_module->k_nomial_radix > (k * 2 + 1));
-
-            /* send to partner - we will wait for completion, as send
-             *   completion is at the MPI level, and will not
-             *   incur network level completion costs
-             */
-            rc = MCA_PML_CALL(isend(
-                        NULL, 0, MPI_INT,
-                        pair_comm_rank, tag,
-                        MCA_PML_BASE_SEND_STANDARD,
-                        comm, &(requests[k * 2 + 1])));
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                PTPCOLL_ERROR(("ISend failed."));
-                return rc;
-            }
-
-            PTPCOLL_VERBOSE(10, ("Ex %d, K %d send to %d[%d]", exchange, k,
-                                  pair_comm_rank, rank_exchanges[exchange][k]));
-
-            /* recive from partner */
-            rc = MCA_PML_CALL(irecv(
-                        NULL, 0, MPI_INT,
-                        pair_comm_rank, tag,
-                        comm, &(requests[k * 2])));
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                PTPCOLL_ERROR(("IRecv failed."));
-                return rc;
-            }
-
-            PTPCOLL_VERBOSE(10, ("Ex %d, K %d irecv from %d[%d]", exchange, k,
-                                  pair_comm_rank, rank_exchanges[exchange][k]));
-        }
-
-        num_reqs = 2 * (tree_order - 1);
-
-        /* Test for completion */
-        completed =
-            mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("Test for all failed."));
-            return rc;
-        }
-
-        if (!completed) {
-            collreq->tag = tag;
-            collreq->num_reqs = num_reqs;
-            collreq->exchange = exchange + 1;
-
-            return BCOL_FN_STARTED;
-        }
-    }
-
-    /* If non power of 2, may need to send message to "extra" proc */
-    if (0 < n_extra_sources)  {  /* EXCHANGE_NODE case */
-        for (k = 0; k < n_extra_sources; ++k) {
-            pair_comm_rank =
-                ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[k]];
-
-            rc = MCA_PML_CALL(isend(
-                        NULL, 0, MPI_INT,
-                        pair_comm_rank, tag,
-                        MCA_PML_BASE_SEND_STANDARD,
-                        comm, &(requests[k])));
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                PTPCOLL_ERROR(("ISend failed."));
-                return rc;
-            }
-        }
-
-        num_reqs = n_extra_sources;
-
-        /* Test for completion */
-        completed =
-            mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("Test for all failed."));
-            return rc;
-        }
-
-        if (!completed) {
-            collreq->tag = tag;
-            collreq->num_reqs = num_reqs;
-
-            collreq->exchange = n_exchange;
-            collreq->need_toserv_extra = 0;
-
-            return BCOL_FN_STARTED;
-        }
-    }
-
-    opal_free_list_return (&ptpcoll_module->collreqs_free, (opal_free_list_item_t *) collreq);
-    return BCOL_FN_COMPLETE;
-}
-
-static int bcol_ptpcoll_barrier_recurs_knomial_new_progress(
-                                bcol_function_args_t *input_args,
-                                struct mca_bcol_base_function_t *const_args)
-{
-    /* local variable */
-    mca_bcol_ptpcoll_module_t *ptpcoll_module =
-                        (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
-
-    netpatterns_k_exchange_node_t *my_exchange_node =
-                                       &ptpcoll_module->knomial_exchange_tree;
-
-    int rc, k, tag, pair_comm_rank, exchange,
-        tree_order = my_exchange_node->tree_order, num_reqs,
-        n_exchange = my_exchange_node->n_exchanges, completed,
-        n_extra_sources = my_exchange_node->n_extra_sources;
-
-    ompi_communicator_t *comm =
-            ptpcoll_module->super.sbgp_partner_module->group_comm;
-
-    int *extra_sources_array,
-        **rank_exchanges = my_exchange_node->rank_exchanges;
-
-    mca_bcol_ptpcoll_collreq_t *collreq =
-                    (mca_bcol_ptpcoll_collreq_t *) input_args->bcol_opaque_data;
-
-    ompi_request_t **requests = collreq->requests;
-
-    num_reqs = collreq->num_reqs;
-
-    /* Test for completion */
-    completed =
-        mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        PTPCOLL_ERROR(("Test for all failed."));
-        return rc;
-    }
-
-    if (!completed) {
-        return BCOL_FN_STARTED;
-    }
-
-    /* Continue loop over exchange send/recv pairs */
-    tag = collreq->tag;
-
-    for (exchange = collreq->exchange; exchange < n_exchange; ++exchange) {
-        for (k = 0; k < tree_order - 1; ++k) {
-            /* rank of exchange partner within the group */
-            pair_comm_rank =
-                ptpcoll_module->super.sbgp_partner_module->group_list[rank_exchanges[exchange][k]];
-
-            assert(2 * ptpcoll_module->k_nomial_radix > (k * 2 + 1));
-
-            /* send to partner - we will wait for completion, as send
-             *   completion is at the MPI level, and will not
-             *   incur network level completion costs
-             */
-            rc = MCA_PML_CALL(isend(
-                        NULL, 0, MPI_INT,
-                        pair_comm_rank, tag,
-                        MCA_PML_BASE_SEND_STANDARD,
-                        comm, &(requests[k * 2 + 1])));
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                PTPCOLL_ERROR(("ISend failed."));
-                return rc;
-            }
-
-            PTPCOLL_VERBOSE(10, ("Ex %d, K %d send to %d[%d]", exchange, k,
-                                  pair_comm_rank, rank_exchanges[exchange][k]));
-
-            /* recive from partner */
-            rc = MCA_PML_CALL(irecv(
-                        NULL, 0, MPI_INT,
-                        pair_comm_rank, tag,
-                        comm, &(requests[k * 2])));
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                PTPCOLL_ERROR(("IRecv failed."));
-                return rc;
-            }
-
-            PTPCOLL_VERBOSE(10, ("Ex %d, K %d irecv from %d[%d]", exchange, k,
-                                  pair_comm_rank, rank_exchanges[exchange][k]));
-        }
-
-        num_reqs = 2 * (tree_order - 1);
-
-        /* Test for completion */
-        completed =
-            mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("Test for all failed."));
-            return rc;
-        }
-
-        if (!completed) {
-            collreq->num_reqs = num_reqs;
-            collreq->exchange = exchange + 1;
-
-            return BCOL_FN_STARTED;
-        }
-    }
-
-    /* If non power of 2, may need to send message to "extra" proc */
-    if (collreq->need_toserv_extra)  {  /* EXCHANGE_NODE case */
-        extra_sources_array = my_exchange_node->rank_extra_sources_array;
-
-        for (k = 0; k < n_extra_sources; ++k) {
-            pair_comm_rank =
-                ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[k]];
-
-            rc = MCA_PML_CALL(isend(
-                        NULL, 0, MPI_INT,
-                        pair_comm_rank, tag,
-                        MCA_PML_BASE_SEND_STANDARD,
-                        comm, &(requests[k])));
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-                PTPCOLL_ERROR(("ISend failed."));
-                return rc;
-            }
-        }
-
-        num_reqs = n_extra_sources;
-
-        /* Test for completion */
-        completed =
-            mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("Test for all failed."));
-            return rc;
-        }
-
-        if (!completed) {
-            collreq->num_reqs = num_reqs;
-            collreq->exchange = n_exchange;
-            collreq->need_toserv_extra = 0;
-
-            return BCOL_FN_STARTED;
-        }
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-/****************************************** Extra node Barrier ******************************************/
-
-static int bcol_ptpcoll_barrier_recurs_knomial_extra_new(
-                                bcol_function_args_t *input_args,
-                                struct mca_bcol_base_function_t *const_args)
-{
-    /* local variable */
-    uint64_t sequence_number;
-    int rc, tag, pair_comm_rank,
-        completed, num_reqs = 2;
-
-    mca_bcol_ptpcoll_module_t *ptpcoll_module =
-                    (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
-
-    netpatterns_k_exchange_node_t *my_exchange_node =
-                                   &ptpcoll_module->knomial_exchange_tree;
-
-    ompi_communicator_t *comm =
-                    ptpcoll_module->super.sbgp_partner_module->group_comm;
-
-    int *extra_sources_array = my_exchange_node->rank_extra_sources_array;
-
-    ompi_request_t **requests;
-    opal_free_list_item_t *item;
-
-    mca_bcol_ptpcoll_collreq_t *collreq;
-
-    item = opal_free_list_wait (&ptpcoll_module->collreqs_free);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        PTPCOLL_ERROR(("Free list waiting failed."));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    collreq = (mca_bcol_ptpcoll_collreq_t *) item;
-    input_args->bcol_opaque_data = (void *) collreq;
-
-    requests = collreq->requests;
-
-    /* TAG Calculation */
-    sequence_number = input_args->sequence_num;
-
-    /* Keep tag within the limit supportd by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-
-    /* Mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-
-    pair_comm_rank =
-            ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[0]];
-
-    rc = MCA_PML_CALL(isend(
-                NULL, 0, MPI_INT,
-                pair_comm_rank, tag,
-                MCA_PML_BASE_SEND_STANDARD,
-                comm, &(requests[0])));
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        PTPCOLL_ERROR(("ISend failed."));
-        return rc;
-    }
-
-    rc = MCA_PML_CALL(irecv(
-                NULL, 0, MPI_INT,
-                pair_comm_rank, tag,
-                comm, &(requests[1])));
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        PTPCOLL_ERROR(("IRecv failed."));
-        return rc;
-    }
-
-    /* Test for completion */
-    completed =
-        mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        PTPCOLL_ERROR(("Test for all failed."));
-        return rc;
-    }
-
-    if (!completed) {
-        return BCOL_FN_STARTED;
-    }
-
-    opal_free_list_return (&ptpcoll_module->collreqs_free, (opal_free_list_item_t *) collreq);
-    return BCOL_FN_COMPLETE;
-}
-
-/*************************************** Recursive-Doubling ***************************************/
-/**************************************************************************************************/
-
-static int bcol_ptpcoll_barrier_recurs_dbl_new(
-                                bcol_function_args_t *input_args,
-                                struct mca_bcol_base_function_t *const_args)
-{
-   /* local variable */
-    uint64_t sequence_number;
-    mca_bcol_ptpcoll_module_t *ptp_module =
-                         (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
-
-    ompi_communicator_t *comm = ptp_module->super.sbgp_partner_module->group_comm;
-
-    int rc, my_extra_partner_comm_rank = 0, exchange, completed,
-        pair_comm_rank, pair_rank, delta, tag, num_reqs = 0,
-        my_rank = ptp_module->super.sbgp_partner_module->my_index,
-        n_exchange = ptp_module->super.sbgp_partner_module->n_levels_pow2;
-
-    ompi_request_t **requests;
-    opal_free_list_item_t *item;
-
-    mca_bcol_ptpcoll_collreq_t *collreq;
-
-    item = opal_free_list_wait (&ptp_module->collreqs_free);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        PTPCOLL_ERROR(("Free list waiting failed."));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    collreq = (mca_bcol_ptpcoll_collreq_t *) item;
-    input_args->bcol_opaque_data = (void *) collreq;
-
-    assert(PTPCOLL_EXTRA != ptp_module->pow_2type);
-
-    requests = collreq->requests;
-
-    /* TAG Calculation */
-    sequence_number = input_args->sequence_num;
-
-    /* keep tag within the limit supportd by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptp_module->tag_mask);
-
-    /* mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-
-    if (PTPCOLL_PROXY == ptp_module->pow_2type) {
-        /* I will participate in the exchange - wait for signal from extra
-         ** process */
-        /*
-         * recv from extra rank - my_extra_partner_comm_rank
-         *  can use blocking recv, as no other communications
-         *  need to take place.
-         */
-        my_extra_partner_comm_rank =
-                       ptp_module->super.sbgp_partner_module->group_list[ptp_module->proxy_extra_index];
-
-        collreq->need_toserv_extra = 1;
-        collreq->extra_partner_rank = my_extra_partner_comm_rank;
-
-        rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT,
-                    my_extra_partner_comm_rank, tag, comm,
-                    &(requests[0])));
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("IRecv failed."));
-            return rc;
-        }
-
-        completed = mca_bcol_ptpcoll_test_for_match(&requests[0], &rc);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("Test for irecv failed."));
-            return rc;
-        }
-
-        if (!completed) {
-            collreq->tag = tag;
-            collreq->num_reqs = 1;
-            collreq->exchange = 0;
-
-            return BCOL_FN_STARTED;
-        }
-    } else {
-        collreq->need_toserv_extra = 0;
-    }
-
-    /* Loop over exchange send/recv pairs */
-    delta = 1;
-    for (exchange = 0; exchange < n_exchange; ++exchange) {
-
-        /* rank of exchange partner within the group */
-        pair_rank = my_rank ^ delta;
-
-        /* rank within the communicator */
-        pair_comm_rank =
-            ptp_module->super.sbgp_partner_module->group_list[pair_rank];
-
-        /* send to partner - we will wait for completion, as send
-         *   completion is at the MPI level, and will not
-         *   incur network level completion costs
-         */
-        rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT,
-                    pair_comm_rank, tag,
-                    MCA_PML_BASE_SEND_STANDARD, comm,
-                    &(requests[0])));
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("ISend failed."));
-            return rc;
-        }
-
-        ++num_reqs;
-
-        /* recive from partner */
-        rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT,
-                    pair_comm_rank, tag, comm,
-                    &(requests[1])));
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("IRecv failed."));
-            return rc;
-        }
-
-        ++num_reqs;
-
-        PTPCOLL_VERBOSE(5, ("exchange - %d, pair_rank - %d, pair_comm_rank - %d",
-                             exchange, pair_rank, pair_comm_rank));
-
-        /* test for completion */
-        completed =
-            mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("Test for all failed."));
-            return rc;
-        }
-
-        if (!completed) {
-            collreq->tag = tag;
-            collreq->num_reqs = num_reqs;
-
-            collreq->exchange = exchange + 1;
-            assert(collreq->exchange >= 0);
-
-            return BCOL_FN_STARTED;
-        }
-
-        delta <<= 1; /* delta *= 2 */
-    }
-
-    if (PTPCOLL_PROXY == ptp_module->pow_2type) {
-        /* send - let the extra rank know that we are done */
-        rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT,
-                    my_extra_partner_comm_rank, tag,
-                    MCA_PML_BASE_SEND_STANDARD, comm,
-                    &(requests[0])));
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("ISend failed."));
-            return rc;
-        }
-
-        completed = mca_bcol_ptpcoll_test_for_match(&requests[0], &rc);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("Test for isend failed."));
-            return rc;
-        }
-
-        if (!completed) {
-            collreq->tag = tag;
-            collreq->num_reqs = 1;
-
-            collreq->need_toserv_extra = 0;
-            collreq->exchange = n_exchange;
-
-            return BCOL_FN_STARTED;
-        }
-    }
-
-    opal_free_list_return (&ptp_module->collreqs_free, (opal_free_list_item_t *) collreq);
-    return BCOL_FN_COMPLETE;
-}
-
-static int bcol_ptpcoll_barrier_recurs_dbl_new_progress(
-                                bcol_function_args_t *input_args,
-                                struct mca_bcol_base_function_t *const_args)
-{
-   /* local variable */
-    mca_bcol_ptpcoll_module_t *ptp_module =
-                         (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
-
-    ompi_communicator_t *comm = ptp_module->super.sbgp_partner_module->group_comm;
-
-    int rc, exchange, pair_comm_rank, tag,
-        pair_rank, delta, num_reqs, completed,
-        my_rank = ptp_module->super.sbgp_partner_module->my_index,
-        n_exchange = ptp_module->super.sbgp_partner_module->n_levels_pow2;
-
-    ompi_request_t **requests;
-    mca_bcol_ptpcoll_collreq_t *collreq =
-                    (mca_bcol_ptpcoll_collreq_t *) input_args->bcol_opaque_data;
-
-    num_reqs = collreq->num_reqs;
-    requests = collreq->requests;
-
-    /* test for completion */
-    completed =
-        mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        PTPCOLL_ERROR(("Test for all failed."));
-        return rc;
-    }
-
-    if (!completed) {
-          return BCOL_FN_STARTED;
-    }
-
-    assert(PTPCOLL_EXTRA != ptp_module->pow_2type);
-
-    /* Continue loop over exchange send/recv pairs */
-    num_reqs = 0;
-    tag = collreq->tag;
-
-    exchange = collreq->exchange;
-    assert(exchange >= 0);
-
-    delta = 1 << exchange;
-    for (; exchange < n_exchange; ++exchange) {
-
-        /* rank of exchange partner within the group */
-        pair_rank = my_rank ^ delta;
-
-        /* rank within the communicator */
-        pair_comm_rank =
-            ptp_module->super.sbgp_partner_module->group_list[pair_rank];
-
-        /* send to partner - we will wait for completion, as send
-         *   completion is at the MPI level, and will not
-         *   incur network level completion costs
-         */
-        rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT,
-                    pair_comm_rank, tag,
-                    MCA_PML_BASE_SEND_STANDARD, comm,
-                    &(requests[0])));
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("ISend failed."));
-            return rc;
-        }
-
-        ++num_reqs;
-
-        /* recive from partner */
-        rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT,
-                    pair_comm_rank, tag, comm,
-                    &(requests[1])));
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("IRecv failed."));
-            return rc;
-        }
-
-        ++num_reqs;
-
-        PTPCOLL_VERBOSE(5, ("exchange - %d, pair_rank - %d, pair_comm_rank - %d",
-                             exchange, pair_rank, pair_comm_rank));
-
-        /* test for completion */
-        completed =
-            mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("Test for all failed."));
-            return rc;
-        }
-
-        if (!completed) {
-            collreq->num_reqs = num_reqs;
-            collreq->exchange = exchange + 1;
-            assert(collreq->exchange >= 0);
-
-            return BCOL_FN_STARTED;
-        }
-
-        delta <<= 1; /* delta *= 2 */
-    }
-
-    /* if non power of 2, may need to send message to "extra" proc */
-    if (collreq->need_toserv_extra) {
-        /* send - let the extra rank know that we are done */
-        rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT,
-                    collreq->extra_partner_rank, tag,
-                    MCA_PML_BASE_SEND_STANDARD, comm,
-                    &(requests[0])));
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("ISend failed."));
-            return rc;
-        }
-
-        completed = mca_bcol_ptpcoll_test_for_match(&requests[0], &rc);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-            PTPCOLL_ERROR(("Test for isend failed."));
-            return rc;
-        }
-
-        if (!completed) {
-            collreq->num_reqs = 1;
-            collreq->need_toserv_extra = 0;
-            collreq->exchange = n_exchange;
-
-            return BCOL_FN_STARTED;
-        }
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-/****************************************** Extra node Barrier ******************************************/
-
-static int bcol_ptpcoll_barrier_recurs_dbl_extra_new(
-                                bcol_function_args_t *input_args,
-                                struct mca_bcol_base_function_t *const_args)
-{
-   /* local variable */
-    uint64_t sequence_number;
-    int rc, completed, num_reqs = 2,
-        tag, my_extra_partner_comm_rank;
-
-    ompi_request_t **requests;
-    opal_free_list_item_t *item;
-
-    mca_bcol_ptpcoll_collreq_t *collreq;
-
-    mca_bcol_ptpcoll_module_t *ptp_module =
-                         (mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
-    ompi_communicator_t *comm = ptp_module->super.sbgp_partner_module->group_comm;
-
-    item = opal_free_list_wait (&ptp_module->collreqs_free);
-    if (OPAL_UNLIKELY(NULL == item)) {
-        PTPCOLL_ERROR(("Free list waiting failed."));
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    collreq = (mca_bcol_ptpcoll_collreq_t *) item;
-    input_args->bcol_opaque_data = (void *) collreq;
-
-    requests = collreq->requests;
-
-    /* TAG Calculation */
-    sequence_number = input_args->sequence_num;
-
-    /* Keep tag within the limit supportd by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptp_module->tag_mask);
-
-    /* mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-
-    /* I will not participate in the exchange - so just "register" as here,
-     * signal the extra rank that I am here */
-
-    my_extra_partner_comm_rank =
-                 ptp_module->super.sbgp_partner_module->group_list[ptp_module->proxy_extra_index];
-
-    rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT,
-                my_extra_partner_comm_rank, tag,
-                MCA_PML_BASE_SEND_STANDARD, comm,
-                &(requests[0])));
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        PTPCOLL_ERROR(("Send failed."));
-        return rc;
-    }
-
-    /* Recv signal that the rest are done - my_extra_partner_comm_rank */
-    rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT,
-                my_extra_partner_comm_rank, tag, comm,
-                &(requests[1])));
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        PTPCOLL_ERROR(("IRecv failed."));
-        return rc;
-    }
-
-    /* Test for completion */
-    completed =
-        mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        PTPCOLL_ERROR(("Test for all failed."));
-        return rc;
-    }
-
-    if (!completed) {
-        return BCOL_FN_STARTED;
-    }
-
-    opal_free_list_return (&ptp_module->collreqs_free, (opal_free_list_item_t *) collreq);
-    return BCOL_FN_COMPLETE;
-}
-
-/* We have the same progress func for both cases (R-D and K-Nominal) */
-static int bcol_ptpcoll_barrier_extra_node_progress(
-                            bcol_function_args_t *input_args,
-                            struct mca_bcol_base_function_t *const_args)
-{
-   /* local variable */
-    ompi_request_t **requests;
-    int rc, completed, num_reqs = 2;
-
-    mca_bcol_ptpcoll_collreq_t *collreq =
-                    (mca_bcol_ptpcoll_collreq_t *) input_args->bcol_opaque_data;
-
-    requests = collreq->requests;
-
-    /* test for completion */
-    completed =
-        mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        PTPCOLL_ERROR(("Test for all failed."));
-        return rc;
-    }
-
-    if (!completed) {
-        return BCOL_FN_STARTED;
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-static int mca_bcol_ptpcoll_barrier_setup(mca_bcol_base_module_t *super, int bcoll_type)
-{
-    netpatterns_k_exchange_node_t *my_exchange_node;
-    mca_bcol_ptpcoll_module_t * ptpcoll_module =
-                           (mca_bcol_ptpcoll_module_t *) super;
-
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    comm_attribs.bcoll_type = bcoll_type;
-
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    switch(mca_bcol_ptpcoll_component.barrier_alg) {
-        case 1:
-            if (PTPCOLL_EXTRA == ptpcoll_module->pow_2type) {
-                mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                    bcol_ptpcoll_barrier_recurs_dbl_extra_new,
-                    bcol_ptpcoll_barrier_extra_node_progress);
-                break;
-            }
-
-            mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                bcol_ptpcoll_barrier_recurs_dbl_new,
-                bcol_ptpcoll_barrier_recurs_dbl_new_progress);
-            break;
-        case 2:
-            my_exchange_node = &ptpcoll_module->knomial_exchange_tree;
-            if (my_exchange_node->n_extra_sources > 0 &&
-                           EXTRA_NODE == my_exchange_node->node_type) {
-                mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                    bcol_ptpcoll_barrier_recurs_knomial_extra_new,
-                    bcol_ptpcoll_barrier_extra_node_progress);
-                break;
-            }
-
-            mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                bcol_ptpcoll_barrier_recurs_knomial_new,
-                bcol_ptpcoll_barrier_recurs_knomial_new_progress);
-            break;
-        default:
-            PTPCOLL_ERROR(("Wrong barrier_alg flag value."));
-    }
-
-    return OMPI_SUCCESS;
-}
-
-int mca_bcol_ptpcoll_memsync_init(mca_bcol_base_module_t *super)
-{
-    return mca_bcol_ptpcoll_barrier_setup(super, BCOL_SYNC);
-}
-
-int bcol_ptpcoll_barrier_init(mca_bcol_base_module_t *super)
-{
-    return mca_bcol_ptpcoll_barrier_setup(super, BCOL_BARRIER);
-}
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.c
+++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.c
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.h
+++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_bcast.h
@ -1,868 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2016      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_PTPCOLL_BCAST_H
-#define MCA_BCOL_PTPCOLL_BCAST_H
-
-#include "ompi_config.h"
-#include "bcol_ptpcoll.h"
-#include "bcol_ptpcoll_utils.h"
-
-BEGIN_C_DECLS
-
-int bcol_ptpcoll_bcast_init(mca_bcol_base_module_t *super);
-
-int bcol_ptpcoll_bcast_k_nomial_anyroot (bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-int bcol_ptpcoll_bcast_k_nomial_anyroot_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-int bcol_ptpcoll_bcast_k_nomial_known_root(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-int bcol_ptpcoll_bcast_k_nomial_known_root_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_extra(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_anyroot_extra_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_extra(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_known_root_extra_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-
-/* macros */
-#define K_NOMIAL_ROOT_BCAST_NB_BINOMIAL_SCATTER(                                                                    \
-        radix_mask_pow,                                                                                             \
-        my_group_index, group_size, group_list,                                                                     \
-        data_buffer, segment_size, count, tag,                                                                      \
-        comm, send_requests, num_pending_sends)                                                                     \
-do {                                                                                                                \
-    int rc = OMPI_SUCCESS;                                                                                          \
-    int dst;                                                                                                        \
-    int comm_dst;                                                                                                   \
-    int send_size;                                                                                                  \
-    int send_offset;                                                                                                \
-    int delta;                                                                                                      \
-    int dst_boundary_rank;                                                                                          \
-    int radix_mask = radix_mask_pow >= 0 ? 1 << radix_mask_pow : 0;                                                 \
-                                                                                                                    \
-    while(radix_mask_pow >= 0) {                                                                                    \
-        /* For each level of tree, do sends */                                                                      \
-        dst = my_group_index ^ radix_mask;                                                                          \
-        comm_dst = group_list[dst];                                                                                 \
-                                                                                                                    \
-        dst_boundary_rank = dst & ((~(int)0) << (radix_mask_pow));                                                  \
-                                                                                                                    \
-        send_offset = segment_size * dst_boundary_rank;                                                             \
-        /* Pasha: make sure that we handle the corner cases */                                                      \
-        delta = count - send_offset;                                                                                \
-        if (delta <= 0) {                                                                                           \
-            send_size = 0; /* we have to send something, other way it will hang */                                  \
-        } else  {                                                                                                   \
-            /* the tail case */                                                                                     \
-            send_size = (int)                                                                                       \
-            (delta - (int)segment_size * radix_mask) < 0 ? delta :                                                  \
-            (int)segment_size * radix_mask;                                                                         \
-        }                                                                                                           \
-                                                                                                                    \
-        /* Non blocking send .... */                                                                                \
-        PTPCOLL_VERBOSE(9 ,                                                                                         \
-                ("Bcast p2s, Isend to %d[%d],count %d,tag %d,addr %p [%p] send_size %d,send_offset %d, radix %d %d",\
-                 dst, comm_dst, count, tag,                                                                         \
-                 data_buffer, (void *)((unsigned char *)data_buffer + (size_t)send_offset),                         \
-                 send_size,                                                                                         \
-                 send_offset,                                                                                       \
-                 radix_mask,                                                                                        \
-                 radix_mask_pow                                                                                     \
-                ));                                                                                                 \
-        rc = MCA_PML_CALL(isend((void *)((unsigned char *)data_buffer + (size_t)send_offset),                       \
-                    send_size, MPI_BYTE,                                                                            \
-                    comm_dst, tag,                                                                                  \
-                    MCA_PML_BASE_SEND_STANDARD, comm,                                                               \
-                    &(send_requests[*num_pending_sends])));                                                         \
-        PTPCOLL_VERBOSE(10, ("send request addr is %p", send_requests[*num_pending_sends]));                        \
-        if( OMPI_SUCCESS != rc ) {                                                                                  \
-            PTPCOLL_VERBOSE(10, ("Failed to isend data"));                                                          \
-            return OMPI_ERROR;                                                                                      \
-        }                                                                                                           \
-        ++(*num_pending_sends);                                                                                     \
-        radix_mask >>= 1;                                                                                           \
-        radix_mask_pow--;                                                                                           \
-    }                                                                                                               \
-} while(0)
-
-#define NARRAY_SCATTER_NB(narray_node, process_shift, group_size,                          \
-        data_buffer, base_block_size, count, tag, comm, send_requests,                     \
-        num_pending_sends)                                                                 \
-do {                                                                                       \
-    int n, rc = OMPI_SUCCESS;                                                              \
-    int dst;                                                                               \
-    int comm_dst;                                                                          \
-    int offset;                                                                            \
-    int size_count = count;                                                                \
-    \
-    /* Send out data to all relevant childrens  */                                         \
-    for (n = 0; n < narray_node->n_children && size_count > 0; n++) {                      \
-        \
-        dst = narray_node->children_ranks[n] + process_shift;                              \
-        if (dst >= group_size) {                                                           \
-            dst -= group_size;                                                             \
-        }                                                                                  \
-        \
-        comm_dst = group_list[dst];                                                        \
-        offset = n * base_block_size;                                                      \
-        size_count -= base_block_size;                                                     \
-        if (OPAL_UNLIKELY(size_count < 0)) {                                               \
-            count = base_block_size + size_count;                                          \
-        } else {                                                                           \
-            count = base_block_size;                                                       \
-        }                                                                                  \
-        \
-        /* Non blocking send .... */                                                       \
-        PTPCOLL_VERBOSE(9 , ("Bcast, Isend data to %d[%d], count %d, tag %d, addr %p",     \
-                    dst, comm_dst, count, tag,                                             \
-                    data_buffer));                                                         \
-        rc = MCA_PML_CALL(isend((void *)((char *)data_buffer + (size_t)offset), count, MPI_BYTE,\
-                    comm_dst, tag,                                                         \
-                    MCA_PML_BASE_SEND_STANDARD, comm,                                      \
-                    &(send_requests[*num_pending_sends])));                                \
-        if( OMPI_SUCCESS != rc ) {                                                         \
-            PTPCOLL_VERBOSE(10, ("Failed to isend data"));                                 \
-            return OMPI_ERROR;                                                             \
-        }                                                                                  \
-        ++(*num_pending_sends);                                                            \
-    }                                                                                      \
-} while(0)
-
-#define NARRAY_SCATTER_B(narray_node, process_shift, group_size,                                                    \
-                          data_buffer, base_block_size, count, tag, comm, send_requests,                            \
-                          num_pending_sends, completed)                                                             \
-do {                                                                                                                \
-    NARRAY_SCATTER_NB(narray_node, process_shift, group_size,                                                       \
-            data_buffer, base_block_size, count, tag, comm, send_requests,                                          \
-            num_pending_sends);                                                                                     \
-    if (*num_pending_sends > 0) {                                                                                   \
-        completed = mca_bcol_ptpcoll_test_all_for_match(num_pending_sends, send_requests, &rc);                     \
-        if (OMPI_SUCCESS != rc) {                                                                                   \
-            return OMPI_ERROR;                                                                                      \
-        }                                                                                                           \
-    } else {                                                                                                        \
-        completed = 1;                                                                                              \
-    }                                                                                                               \
-} while (0)
-
-#define CHECK_IF_ROOT_OR_VROOT(module, i)  \
-    (module->pow_2 == module->ml_mem.ml_buf_desc[i].radix_mask_pow)
-
-/* inline functions */
-static inline __opal_attribute_always_inline__
-int bcol_ptpcoll_bcast_binomial_scatter_gatther_send_extra(
-        mca_bcol_ptpcoll_module_t *ptpcoll_module,
-        void *data_buffer, int count, int tag,
-        int extra_peer, ompi_communicator_t *comm,
-        int *active_requests, ompi_request_t **requests)
-{
-    int rc = OMPI_SUCCESS;
-    int completed = 0;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-
-    /* tag is -1 already */
-    /* send the all data to your extra peer */
-    PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_binomial_scatter_gatther_send_extra to %d tag %d",
-                extra_peer, tag));
-    rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE,
-                group_list[extra_peer], tag,
-                MCA_PML_BASE_SEND_STANDARD, comm,
-                &(requests[*active_requests])));
-    if( OMPI_SUCCESS != rc ) {
-        PTPCOLL_VERBOSE(10, ("Failed to send data"));
-        return OMPI_ERROR;
-    }
-
-    ++(*active_requests);
-
-    completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-    if (0 == completed) {
-        PTPCOLL_VERBOSE(10, ("PR Extra send was not completed"));
-        /* we have to store the iteration number somewhere */
-        return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-static inline __opal_attribute_always_inline__
-int bcol_ptpcoll_send_n_extra(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-        void *data_buffer, int count, int tag,
-        int *extra_peers, int num_peers, int skip,
-        ompi_communicator_t *comm,
-        int *active_requests, ompi_request_t **requests)
-{
-    int rc = OMPI_SUCCESS;
-    int completed = 0;
-    int i;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-
-    /* send the all data to your extra peer */
-    for (i = 0; i < num_peers; i++) {
-        PTPCOLL_VERBOSE(10, ("send_n_extra to %d tag %d",
-                    extra_peers[i], tag));
-        if (extra_peers[i] == skip) {
-            PTPCOLL_VERBOSE(10, ("SKIP"));
-            continue;
-        }
-
-        rc = MCA_PML_CALL(isend(data_buffer, count, MPI_BYTE,
-                    group_list[extra_peers[i]], tag,
-                    MCA_PML_BASE_SEND_STANDARD, comm,
-                    &(requests[*active_requests])));
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10, ("Failed to send data"));
-            return OMPI_ERROR;
-        }
-
-        ++(*active_requests);
-    }
-
-    completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-    if (0 == completed) {
-        PTPCOLL_VERBOSE(10, ("PR Extra send was not completed"));
-        /* we have to store the iteration number somewhere */
-        return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-static inline __opal_attribute_always_inline__
-int bcol_ptpcoll_bcast_binomial_gather_anyroot(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-        int buffer_index, void *data_buffer, int count, int base_block_size)
-{
-    int rc;
-    int completed = 0; /* not completed */
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int i;
-    int *iteration =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration;
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    void *curr_data_sbuffer = NULL,
-         *curr_data_rbuffer = NULL;
-    int radix_mask_pow = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask_pow;
-    int delta;
-    int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag - 1;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-
-    PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_binomial_gather_anyroot %d %d %d",
-                ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration,
-                ptpcoll_module->pow_2,
-                1 << ptpcoll_module->pow_2));
-
-    /* we assume the iteration #iteration already was completed with probe */
-    for (i = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration;
-            i < ptpcoll_module->pow_2; i++) {
-        int pow2 = 1 << i;
-        int peer_index = my_group_index ^ pow2;
-        int comm_rank  = group_list[peer_index];
-        int slen, rlen,
-            send_offset,
-            recv_offset;
-
-        if (i > radix_mask_pow) {
-            /* *active_requests = 0; */
-            /* send - receive data from the peer */
-            slen = rlen = pow2 * base_block_size;
-            send_offset = base_block_size * ((my_group_index) & ((~(int)0) << i));
-            recv_offset = base_block_size * ((peer_index)     & ((~(int)0) << i));
-            curr_data_sbuffer = (void *)((unsigned char *)data_buffer + send_offset);
-            curr_data_rbuffer = (void *)((unsigned char *)data_buffer + recv_offset);
-
-            delta = count - recv_offset;
-            if (delta > 0) {
-                if (delta < rlen) {
-                    /* recv the tail */
-                    rlen = delta;
-                }
-                PTPCOLL_VERBOSE(10, ("[ pow2 %d, radix %d ] recv data %p (offset %d) , len %d , dest %d",
-                            pow2,
-                            1 << ptpcoll_module->pow_2,
-                            curr_data_rbuffer,
-                            recv_offset,
-                            rlen,
-                            comm_rank));
-                rc = MCA_PML_CALL(irecv(curr_data_rbuffer, rlen, MPI_BYTE,
-                            comm_rank, tag, comm, &requests[*active_requests]));
-                if( OMPI_SUCCESS != rc ) {
-                    PTPCOLL_VERBOSE(10, ("Failed to receive data"));
-                    return OMPI_ERROR;
-                }
-                ++(*active_requests);
-            }
-
-            delta = count - send_offset;
-            if (delta > 0) {
-                if (delta < slen) {
-                    /* recv the tail */
-                    slen = delta;
-                }
-                PTPCOLL_VERBOSE(10, ("[ pow2 %d, radix %d ] sending data %p (offset %d) , len %d , dest %d",
-                            pow2,
-                            1 << ptpcoll_module->pow_2,
-                            curr_data_sbuffer,
-                            send_offset,
-                            slen,
-                            comm_rank));
-                rc = MCA_PML_CALL(isend(curr_data_sbuffer, slen, MPI_BYTE,
-                            comm_rank, tag,
-                            MCA_PML_BASE_SEND_STANDARD, comm,
-                            &(requests[*active_requests])));
-                if( OMPI_SUCCESS != rc ) {
-                    PTPCOLL_VERBOSE(10, ("Failed to send data"));
-                    return OMPI_ERROR;
-                }
-                ++(*active_requests);
-            }
-
-            if (*active_requests > 0) {
-                completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-                if (0 == completed) {
-                    *iteration = i;
-                    /* we have to store the iteration number somewhere */
-                    return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-                }
-            }
-        } else if (i == radix_mask_pow) {
-            /* only receive data */
-            rlen = pow2 * base_block_size;
-            recv_offset = base_block_size * ((peer_index)     & ((~(int)0) << i));
-            curr_data_rbuffer = (void *)((unsigned char *)data_buffer + recv_offset);
-            delta = count - recv_offset;
-            if (0 >= delta) {
-                /* we have nothing to send, skip the iteration */
-                continue;
-            }
-            if (delta < rlen) {
-                /* recv the tail */
-                rlen = delta;
-            }
-            /* receive data from the peer */
-            PTPCOLL_VERBOSE(10, ("[ pow2 %d, radix %d ] recv data %p (offset %d) , len %d , dest %d",
-                        pow2,
-                        1 << ptpcoll_module->pow_2,
-                        curr_data_rbuffer,
-                        recv_offset,
-                        rlen,
-                        comm_rank));
-            rc = MCA_PML_CALL(irecv(curr_data_rbuffer, rlen, MPI_BYTE,
-                        comm_rank, tag, comm, &(requests[*active_requests])));
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to receive data"));
-                return OMPI_ERROR;
-            }
-            ++(*active_requests);
-            completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-            if (0 == completed) {
-                *iteration = i;
-                PTPCOLL_VERBOSE(10, ("Recv was not completed"));
-                /* we have to store the iteration number somewhere */
-                return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-            }
-            PTPCOLL_VERBOSE(10, ("Recv was completed"));
-        } else if (i < radix_mask_pow) {
-            /* Only send data */
-            slen = pow2 * base_block_size;
-            send_offset = base_block_size * ((my_group_index) & ((~(int)0) << i));
-            curr_data_sbuffer = (void *)((unsigned char *)data_buffer + send_offset);
-            delta = count - send_offset;
-            if (0 >= delta) {
-                /* we have nothing to send, skip the iteration */
-                continue;
-            }
-            if (delta < slen) {
-                slen = delta;
-            }
-            PTPCOLL_VERBOSE(10, ("[ pow2 %d, radix %d ] sending data %p (offset %d) , len %d , dest %d",
-                        pow2,
-                        1 << ptpcoll_module->pow_2,
-                        curr_data_sbuffer,
-                        send_offset,
-                        slen,
-                        comm_rank));
-            rc = MCA_PML_CALL(isend(curr_data_sbuffer, slen, MPI_BYTE,
-                        comm_rank, tag, MCA_PML_BASE_SEND_STANDARD, comm,
-                        &(requests[*active_requests])));
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to send data"));
-                return OMPI_ERROR;
-            }
-            ++(*active_requests);
-            completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-            if (0 == completed) {
-                *iteration = i;
-                /* we have to store the iteration number somewhere */
-                return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-            }
-        }
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-static inline __opal_attribute_always_inline__
-int bcol_ptpcoll_bcast_binomial_probe_and_scatter_anyroot(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-        int buffer_index, void *data_buffer, int count, int base_block_size)
-{
-    mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    int rc;
-    int completed = 0; /* not completed */
-    int comm_root;
-    int i;
-    int *radix_mask_pow =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask_pow);
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_status_public_t status;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    int pow2_group_size = ptpcoll_module->pow_2num;
-    int pow2_distance;
-    int my_left_boundary_rank;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    int group_root_index = 0;
-    void *curr_data_buffer = NULL;
-    int tag =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag;
-    int recv_count = 0;
-    int *coll_status =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status;
-
-    assert(0 == *active_requests);
-
-    PTPCOLL_VERBOSE(10, ("Running bcol_ptpcoll_bcast_binomial_probe_and_scatter_anyroot"));
-    for (i = 0; i < cm->num_to_probe &&
-            0 == completed; i++) {
-        MCA_PML_CALL(iprobe(MPI_ANY_SOURCE, tag,
-                    comm, &completed, &status));
-        PTPCOLL_VERBOSE(10, ("Bcast, iprobe tag %d",
-                    tag));
-    }
-
-    /* the function always returns OMPI_SUCCESS, so we don't check return code */
-    if (0 == completed) {
-        PTPCOLL_VERBOSE(10, ("IPROBE was not matched"));
-        /* No data was received, return no match error */
-        return BCOL_FN_NOT_STARTED;
-    }
-
-    comm_root = status.MPI_SOURCE;
-
-
-    PTPCOLL_VERBOSE(9, ("IPROBE was matched, root of the data on communicator is %d", comm_root));
-
-    /* For proxy we have to check if we got something from extra node */
-    if (PTPCOLL_PROXY & ptpcoll_module->pow_2type) {
-        if (group_list[ptpcoll_module->proxy_extra_index] == comm_root) {
-            PTPCOLL_VERBOSE(9, ("IPROBE was matched, root of the data on communicator is extra node %d",
-                        comm_root));
-            /* scatter the data among other peer in the pow2 group */
-            *radix_mask_pow =  ptpcoll_module->pow_2;
-
-            pow2_distance  = ptpcoll_module->pow_2 - 1;
-            curr_data_buffer = data_buffer;
-            recv_count = count;
-            goto PR_SCATTHER;
-        }
-    }
-
-    /* Find group index for communicator root of the data */
-    group_root_index = get_group_index_and_distance_for_binomial
-        (my_group_index, comm_root, pow2_group_size, group_list, &pow2_distance);
-    if (OPAL_UNLIKELY(group_root_index < 0)) {
-        PTPCOLL_ERROR(("Fatal error, no group root index found, my id %d, pow2_g_size %d comm_root %d",
-                    my_group_index, pow2_group_size, comm_root));
-        return OMPI_ERROR;
-    }
-
-    PTPCOLL_VERBOSE(10, ("Group root index is %d distance is %d",
-                group_root_index, pow2_distance));
-
-    /* Use group_root_index to calculate the */
-
-    /* Post receive that will fetch the data */
-    /* Pasha: Who is packing data ?
-       Should I assume that we get contiguous buffer ?
-       Or should I pack by myself
-       ===================================================================================================
-       === On this stage I assume that data is contiguous. So I use MPI_BYTE datatype and COUNT = size ===
-       ===================================================================================================
-     */
-
-    recv_count = base_block_size * (1 << pow2_distance); /* we may receive larger data */
-
-    my_left_boundary_rank = my_group_index & ((~(int)0) << pow2_distance );
-
-    curr_data_buffer = (void *)((unsigned char *)data_buffer +
-            (size_t) base_block_size * my_left_boundary_rank);
-
-    *radix_mask_pow = pow2_distance;
-
-    pow2_distance--;
-
-PR_SCATTHER:
-    PTPCOLL_VERBOSE(10, ("Bcast, receive data from %d[%d], "
-                "recv_count %d, tag %d, addr %p, offset %d, pow2_distace %d",
-                comm_root, group_root_index, recv_count,
-                tag, curr_data_buffer,
-                my_group_index * base_block_size, pow2_distance));
-
-    rc = MCA_PML_CALL(recv(curr_data_buffer, recv_count, MPI_BYTE,
-                comm_root, tag, comm, MPI_STATUS_IGNORE));
-    if( OMPI_SUCCESS != rc ) {
-        PTPCOLL_VERBOSE(10, ("Failed to receive data"));
-        return OMPI_ERROR;
-    }
-
-    PTPCOLL_VERBOSE(10, ("Bcast, Data was received"));
-
-    /* Sending forward the data over K-nomial tree */
-    *coll_status = PTPCOLL_SCATTER_STARTED;
-    K_NOMIAL_ROOT_BCAST_NB_BINOMIAL_SCATTER(
-            pow2_distance,
-            my_group_index, group_size, group_list,
-            data_buffer, base_block_size,
-            count, tag, comm, requests,
-            active_requests);
-
-    /* Since the next step (gather) does not really require
-       completion on scatter , we may return complete  */
-    return BCOL_FN_COMPLETE;
-}
-
-static inline __opal_attribute_always_inline__
-int bcol_ptpcoll_binomial_root_to_src(int group_root, int my_rank,
-        int pow2_size, int group_size, int *distance)
-{
-    int root, relative_rank, src,
-        pow2_distance = 0, i;
-
-    if (group_root < pow2_size) {
-        root = group_root;
-    } else {
-        /* the source of the data is extra node,
-           the real root it represented by some rank from
-           pow2 group */
-        root = group_root - pow2_size;
-        /* shortcut for the case when my rank is root for the group */
-        if (my_rank == root) {
-            *distance = -1;
-            return group_root;
-        }
-    }
-
-    relative_rank = (my_rank - root) < 0 ? my_rank - root + pow2_size :
-                                           my_rank - root;
-
-    for (i = 1; i < pow2_size; i<<=1, pow2_distance++) {
-        if (relative_rank & i) {
-            src = my_rank ^ i;
-            if (src >= pow2_size)
-                src -= pow2_size;
-
-            *distance = pow2_distance;
-            return src;
-        }
-    }
-
-    /* error case */
-    *distance = -1;
-    return -1;
-}
-
-static inline __opal_attribute_always_inline__
-int bcol_ptpcoll_bcast_binomial_test_and_scatter_known_root(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-        int buffer_index, void *data_buffer, int count, int base_block_size)
-{
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    int rc;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    int tmp_radix_mask_pow =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask_pow - 1;
-    int tag =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag;
-    int *status =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status;
-
-    PTPCOLL_VERBOSE(10, ("Running bcol_ptpcoll_bcast_binomial_probe_and_scatter_anyroot"));
-
-    if (0 == mca_bcol_ptpcoll_test_all_for_match(active_requests,
-                requests, &rc)) {
-        PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc));
-        return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-    }
-
-    PTPCOLL_VERBOSE(10, ("Bcast, Data was received"));
-
-    /* Sending forward the data over binimial nomial tree */
-    *status = PTPCOLL_SCATTER_STARTED;
-    K_NOMIAL_ROOT_BCAST_NB_BINOMIAL_SCATTER(
-            tmp_radix_mask_pow,
-            my_group_index, group_size, group_list,
-            data_buffer, base_block_size,
-            count, tag, comm, requests,
-            active_requests);
-
-
-    return BCOL_FN_COMPLETE;
-}
-
-#define NARRAY_BLOCK_SIZE(size, module, level_size)                                      \
-                     ((size + (module)->full_narray_tree_num_leafs - 1) /                \
-                     (module)->full_narray_tree_num_leafs) *                             \
-                     ((module)->full_narray_tree_num_leafs /                             \
-                     ((0 == level_size) ?                                                \
-                      mca_bcol_ptpcoll_component.narray_knomial_radix :                  \
-                      level_size))
-
-static inline __opal_attribute_always_inline__
-int bcol_ptpcoll_bcast_narray_test_and_scatter_known_root(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-        int buffer_index, void *data_buffer, int count, int process_shift,
-        int relative_group_index)
-{
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    int rc;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag;
-    int *status =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].status;
-    int scatter_count = 0;
-    int offset = 0;
-    int base_block_size = 0;
-    void *curr_data_buffer = NULL;
-
-    PTPCOLL_VERBOSE(10, ("Running bcol_ptpcoll_bcast_narray_test_and_scatter_known_root"));
-
-    if (0 == mca_bcol_ptpcoll_test_all_for_match(active_requests,
-                requests, &rc)) {
-        PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc));
-        return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-    }
-
-    /* Sending forward the data over binimial nomial tree */
-    *status = PTPCOLL_SCATTER_STARTED;
-    if(0 == relative_group_index) {
-        scatter_count = count;
-    } else {
-        scatter_count = NARRAY_BLOCK_SIZE(count, ptpcoll_module,
-                ptpcoll_module->narray_knomial_node[relative_group_index].level_size);
-    }
-
-    offset = scatter_count *
-        ptpcoll_module->narray_knomial_node[relative_group_index].rank_on_level;
-
-    /* make sure that we do not overun memory */
-    if (OPAL_UNLIKELY(offset + scatter_count > count)) {
-        scatter_count = count - offset;
-    }
-
-    PTPCOLL_VERBOSE(10, ("Bcast, Data was received %d %d %d",
-                scatter_count,
-                ptpcoll_module->narray_knomial_node[relative_group_index].level_size,
-                ptpcoll_module->narray_knomial_node[relative_group_index].rank_on_level));
-
-
-    curr_data_buffer = (void *)((unsigned char *)data_buffer + (size_t)offset);
-
-    /* calculating scatter block size for next level of tree */
-    base_block_size = NARRAY_BLOCK_SIZE(count, ptpcoll_module,
-        ptpcoll_module->narray_knomial_node[relative_group_index].level_size *
-        mca_bcol_ptpcoll_component.narray_knomial_radix);
-
-    PTPCOLL_VERBOSE(10, ("scatter_known_rootaaa %d %d %d %d %d",scatter_count, offset, base_block_size,
-                ptpcoll_module->narray_knomial_node[relative_group_index].level_size /mca_bcol_ptpcoll_component.narray_knomial_radix,
-                ptpcoll_module->full_narray_tree_num_leafs));
-
-    NARRAY_SCATTER_NB((&ptpcoll_module->narray_knomial_node[relative_group_index]),
-            process_shift, ptpcoll_module->full_narray_tree_size,
-            curr_data_buffer, base_block_size, scatter_count, tag, comm,
-            requests, active_requests);
-
-    /* Bummer, I tried to prevent this, special case for virtual root */
-    if(0 == relative_group_index) {
-        if (0 == mca_bcol_ptpcoll_test_all_for_match(active_requests,
-                    requests, &rc)) {
-            PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc));
-            *status = PTPCOLL_ROOT_SEND_STARTED;
-            return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-        }
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-static inline __opal_attribute_always_inline__
-int bcol_ptpcoll_bcast_narray_knomial_gather(mca_bcol_ptpcoll_module_t *ptpcoll_module,
-        const int buffer_index, void *data_buffer, const int count,
-        const int relative_group_index)
-{
-    int completed = 0; /* not completed */
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    int blocks_in_step =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask;
-    int tag = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].tag - 1;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    int group_size = ptpcoll_module->full_narray_tree_size;
-    int i, k,
-        rc,
-        len, slen, rlen,
-        peer, group_peer;
-    size_t s_offset,
-           r_offset;
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **requests =
-        ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests;
-    netpatterns_narray_knomial_tree_node_t *narray_node =
-        &ptpcoll_module->narray_knomial_node[relative_group_index];
-    netpatterns_k_exchange_node_t *k_node =
-        &narray_node->k_node;
-    mca_bcol_ptpcoll_component_t *cm =
-        &mca_bcol_ptpcoll_component;
-    size_t base_block_size =
-        NARRAY_BLOCK_SIZE(count, ptpcoll_module, narray_node->level_size);
-
-    PTPCOLL_VERBOSE(10, ("bcol_ptpcoll_bcast_narray_knomial_gather %d %d %d %d %d %d %d",
-                ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration,
-                base_block_size, count, narray_node->level_size,
-                relative_group_index, k_node->n_exchanges, tag));
-
-    /* we assume the iteration #iteration already was completed with probe */
-    for (i = ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration;
-            i < k_node->n_exchanges; i++, blocks_in_step *= cm->narray_knomial_radix) {
-
-        len = base_block_size * blocks_in_step;
-
-        for (k = 0; k < cm->narray_knomial_radix - 1; k++) {
-            group_peer = my_group_index +
-                (k_node->rank_exchanges[i][k] - narray_node->rank_on_level);
-            if (group_peer >= group_size) {
-                group_peer -= group_size;
-            } else if (group_peer < 0) {
-                group_peer += group_size;
-            }
-            peer = group_list[group_peer];
-
-            r_offset = (size_t)k_node->rank_exchanges[i][k] / blocks_in_step *
-                len;
-
-            /* check that we do not run out of message boundary */
-            if (OPAL_UNLIKELY(r_offset + len > (size_t)count)) {
-                rlen = count - r_offset;
-                if (OPAL_UNLIKELY(rlen <= 0)) {
-                    continue;
-                }
-            } else {
-                rlen = len;
-            }
-            PTPCOLL_VERBOSE(10, ("Recv data from %d, addr %p offset %d len %d %d %d tag %d",
-                        peer, data_buffer, r_offset, rlen, len, blocks_in_step, tag));
-            rc = MCA_PML_CALL(irecv((void *)((unsigned char *)data_buffer + r_offset),
-                        rlen, MPI_BYTE,
-                        peer, tag, comm, &requests[*active_requests]));
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to receive data"));
-                return OMPI_ERROR;
-            }
-            ++(*active_requests);
-        }
-
-        for (k = 0; k < cm->narray_knomial_radix - 1; k++) {
-            group_peer = my_group_index +
-                (k_node->rank_exchanges[i][k] - narray_node->rank_on_level);
-            if (group_peer >= group_size) {
-                group_peer -= group_size;
-            } else if (group_peer < 0) {
-                group_peer += group_size;
-            }
-            peer = group_list[group_peer];
-
-            s_offset = (size_t)narray_node->rank_on_level / blocks_in_step *
-                len;
-
-            /* check that we do not run out of message boundary */
-            if (OPAL_UNLIKELY(s_offset + len > (size_t)count)) {
-                slen =  count - s_offset;
-                if (OPAL_UNLIKELY(slen <= 0)) {
-                    continue;
-                }
-            } else {
-                slen = len;
-            }
-
-            PTPCOLL_VERBOSE(10, ("Send data from %d, addr %p offset %d len %d %d %d tag %d",
-                        peer, data_buffer, s_offset, slen, len, blocks_in_step, tag));
-            rc = MCA_PML_CALL(isend((void *)((unsigned char *)data_buffer + s_offset),
-                        slen, MPI_BYTE,
-                        peer, tag, MCA_PML_BASE_SEND_STANDARD, comm,
-                        &(requests[*active_requests])));
-            if( OMPI_SUCCESS != rc ) {
-                PTPCOLL_VERBOSE(10, ("Failed to send data"));
-                return OMPI_ERROR;
-            }
-            ++(*active_requests);
-        }
-
-        completed = mca_bcol_ptpcoll_test_all_for_match(active_requests, requests, &rc);
-        if (0 == completed) {
-            /* cache data for next iteration */
-            ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].iteration =
-                i; /* why not to store step for next iteration ?! */
-            ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].radix_mask =
-                blocks_in_step * cm->narray_knomial_radix;
-            return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-        }
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-END_C_DECLS
-
-#endif
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_component.c
+++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_component.c
@ -1,174 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2015      Los Alamos National Security, LLC.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "bcol_ptpcoll.h"
-#include "ompi/mca/bcol/base/base.h"
-
-#include "bcol_ptpcoll_mca.h"
-#include "bcol_ptpcoll_utils.h"
-
-/*
- * Public string showing the bcol ptpcoll V2 component version number
- */
-const char *mca_bcol_ptpcoll_component_version_string =
-    "Open MPI bcol - ptpcoll collective MCA component version " OMPI_VERSION;
-
-
-/*
- * Local functions
- */
-
-static int ptpcoll_open(void);
-static int ptpcoll_close(void);
-
-/*
- * Instantiate the public struct with all of our public information
- * and pointers to our public functions in it
- */
-
-mca_bcol_ptpcoll_component_t mca_bcol_ptpcoll_component = {
-
-    /* First, fill in the super */
-
-    {
-        /* First, the mca_component_t struct containing meta
-           information about the component itself */
-
-        .bcol_version = {
-            MCA_BCOL_BASE_VERSION_2_0_0,
-
-            /* Component name and version */
-
-            .mca_component_name = "ptpcoll",
-            MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
-                                  OMPI_RELEASE_VERSION),
-
-            /* Component open and close functions */
-
-            .mca_open_component = ptpcoll_open,
-            .mca_close_component = ptpcoll_close,
-	    .mca_register_component_params = mca_bcol_ptpcoll_register_mca_params,
-        },
-
-        /* Initialization / querying functions */
-
-        .collm_init_query = mca_bcol_ptpcoll_init_query,
-        .collm_comm_query = mca_bcol_ptpcoll_comm_query,
-        .init_done = false,
-        .need_ordering = false,
-    },
-
-    /* component specific */
-
-};
-
-static void
-collreq_construct(mca_bcol_ptpcoll_collreq_t *collreq)
-{
-    collreq->requests = NULL;
-}
-
-static void
-collreq_destruct(mca_bcol_ptpcoll_collreq_t *collreq)
-{
-    if (NULL != collreq->requests) {
-        free(collreq->requests);
-    }
-}
-
-OBJ_CLASS_INSTANCE(mca_bcol_ptpcoll_collreq_t,
-        opal_free_list_item_t,
-        collreq_construct,
-        collreq_destruct);
-
-/*
- * Open the component
- */
-static int ptpcoll_open(void)
-{
-    return OMPI_SUCCESS;
-}
-
-/*
- * Close the component
- */
-static int ptpcoll_close(void)
-{
-    return OMPI_SUCCESS;
-}
-
-/* query to see if the component is available for use, and can
- * satisfy the thread and progress requirements
- */
-int mca_bcol_ptpcoll_init_query(bool enable_progress_threads,
-        bool enable_mpi_threads)
-{
-    /* at this stage there is no reason to disaulify this component */
-
-    /* done */
-    return OMPI_SUCCESS;
-}
-
-/* memory management routines */
-
-/* allocte memory - this is a no-op function intended to work with
- * mpool2, which will use malloc for allocation, if no other allocator
- * is available.
- */
-void * bcol_ptpcoll_allocate_memory(size_t length, size_t alignment,
- struct mca_bcol_base_module_t *bcol_module)
-{
-   /* do nothing */
-   return NULL;
-}
-
-/*
- * register memory - nothing to do
- */
-int bcol_ptpcoll_register_memory(void * in_ptr, size_t length, size_t alignment,
-     struct mca_bcol_base_module_t *bcol_module)
-{
-   /* nothing to do */
-   return OMPI_SUCCESS;
-}
-
-/* deregister memory - nothing to do
- */
-int bcol_ptpcoll_deregister_memory( void * in_ptr,
-     struct mca_bcol_base_module_t *bcol_module)
-{
-   /* nothing to do */
-   return OMPI_SUCCESS;
-}
-
-/* free memory - since we don't allocate, we also don't free */
-int bcol_ptpcoll_free_memory(void *ptr,
-        struct mca_bcol_base_module_t *bcol_module)
-{
-   /* nnthing to do */
-   return OMPI_SUCCESS;
-}
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_fanin.c
+++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_fanin.c
@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-
-#include "ompi/include/ompi/constants.h"
-#include "ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h"
-
-/*
- * Fanin routines - no user data
- */
-
-int bcol_ptpcoll_fanin( bcol_function_args_t *input_args,
-        struct mca_bcol_base_module_t *module)
-{
-    /* local variable */
-    int ret=OMPI_SUCCESS;
-    /* mca_bcol_ptpcoll_module_t *ptp_module=(mca_bcol_ptpcoll_module_t *) module; */
-
-    /* done */
-    return ret;
-}
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_fanout.c
+++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_fanout.c
@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-
-#include "ompi/include/ompi/constants.h"
-#include "ompi/mca/bcol/ptpcoll/bcol_ptpcoll.h"
-
-/*
- * Fanin routines - no user data
- */
-
-int bcol_ptpcoll_fanout( bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    /* local variable */
-    int ret = OMPI_SUCCESS;
-    /* TBD:
-    mca_bcol_ptpcoll_module_t *ptp_module=(mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
-    */
-
-    /* done */
-    return ret;
-}
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_mca.c
+++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_mca.c
@ -1,197 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-
-#include "bcol_ptpcoll_mca.h"
-#include "bcol_ptpcoll.h"
-
-/*
- * Local flags
- */
-enum {
-    REGINT_NEG_ONE_OK = 0x01,
-    REGINT_GE_ZERO = 0x02,
-    REGINT_GE_ONE = 0x04,
-    REGINT_NONZERO = 0x08,
-    REGINT_MAX = 0x88
-};
-
-enum {
-    REGSTR_EMPTY_OK = 0x01,
-
-    REGSTR_MAX = 0x88
-};
-
-#if 0 /* Pasha: we will be need this function in future */
-/*
- * utility routine for string parameter registration
- */
-static int reg_string(const char* param_name,
-                      const char* deprecated_param_name,
-                      const char* param_desc,
-                      const char* default_value, char **storage,
-                      int flags)
-{
-    int index;
-
-    *storage = default_value;
-    index = mca_base_component_var_register(&mca_bcol_ptpcoll_component.super.bcol_version,
-                                            param_name, param_desc, MCA_BASE_VAR_TYPE_STRING,
-                                            NULL, 0, 0, OPAL_INFO_LVL_9,
-                                            MCA_BASE_VAR_SCOPE_READONLY, storage);
-    if (NULL != deprecated_param_name) {
-        (void) mca_base_var_register_synonym(index, "ompi", "bcol", "ptpcoll",
-                                             deprecated_param_name,
-                                             MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-    }
-
-    if (0 != (flags & REGSTR_EMPTY_OK) && (NULL == *storage || 0 == strlen(*storage))) {
-        opal_output(0, "Bad parameter value for parameter \"%s\"",
-                param_name);
-        return OMPI_ERR_BAD_PARAM;
-    }
-
-    return OMPI_SUCCESS;
-}
-#endif
-
-/*
- * utility routine for integer parameter registration
- */
-static int reg_int(const char* param_name,
-                   const char* deprecated_param_name,
-                   const char* param_desc,
-                   int default_value, int *storage, int flags)
-{
-    int index;
-
-    *storage = default_value;
-    index = mca_base_component_var_register(&mca_bcol_ptpcoll_component.super.bcol_version,
-                                            param_name, param_desc, MCA_BASE_VAR_TYPE_INT,
-                                            NULL, 0, 0, OPAL_INFO_LVL_9,
-                                            MCA_BASE_VAR_SCOPE_READONLY, storage);
-    if (NULL != deprecated_param_name) {
-        (void) mca_base_var_register_synonym(index, "ompi", "bcol", "ptpcoll",
-                                             deprecated_param_name,
-                                             MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-    }
-
-    if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == *storage) {
-        return OMPI_SUCCESS;
-    }
-    if ((0 != (flags & REGINT_GE_ZERO) && *storage < 0) ||
-        (0 != (flags & REGINT_GE_ONE) && *storage < 1) ||
-        (0 != (flags & REGINT_NONZERO) && 0 == *storage)) {
-        opal_output(0, "Bad parameter value for parameter \"%s\"",
-                param_name);
-        return OMPI_ERR_BAD_PARAM;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int reg_bool(const char* param_name,
-                    const char* deprecated_param_name,
-                    const char* param_desc,
-                    bool default_value, bool *storage)
-{
-    int index;
-
-    *storage = default_value;
-    index = mca_base_component_var_register(&mca_bcol_ptpcoll_component.super.bcol_version,
-                                            param_name, param_desc, MCA_BASE_VAR_TYPE_BOOL,
-                                            NULL, 0, 0, OPAL_INFO_LVL_9,
-                                            MCA_BASE_VAR_SCOPE_READONLY, storage);
-    if (0 > index) {
-        return index;
-    }
-
-    if (NULL != deprecated_param_name) {
-        (void) mca_base_var_register_synonym(index, "ompi", "bcol", "ptpcoll",
-                                             deprecated_param_name,
-                                             MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
-    }
-
-    return OMPI_SUCCESS;
-}
-
-int mca_bcol_ptpcoll_register_mca_params(void)
-{
-    int ret, tmp;
-    mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
-
-    ret = OMPI_SUCCESS;
-#define CHECK(expr) do {\
-        tmp = (expr); \
-        if (OMPI_SUCCESS != tmp) ret = tmp; \
-     } while (0)
-
-    CHECK(reg_int("priority", NULL,
-                  "PTPCOLL component priority"
-                  "(from 0(low) to 90 (high))", 90, &cm->super.priority, 0));
-
-    CHECK(reg_int("verbose", NULL,
-                  "Output some verbose PTPCOLL information "
-                  "(0 = no output, nonzero = output)", 0, &cm->verbose, REGINT_GE_ZERO));
-
-    CHECK(reg_int("k_nomial_radix", NULL,
-                  "The radix of K-Nomial Tree "
-                  "(starts from 2)", 2, &cm->k_nomial_radix, REGINT_GE_ONE));
-
-    CHECK(reg_int("narray_radix", NULL,
-                  "The radix of Narray Tree "
-                  "(starts from 2)", 2, &cm->narray_radix, REGINT_GE_ONE));
-
-    CHECK(reg_int("narray_knomial_radix", NULL,
-                  "The radix of Narray/Knomial Tree for scatther-gather type algorithms"
-                  "(starts from 2)", 2, &cm->narray_knomial_radix, REGINT_GE_ONE));
-
-    CHECK(reg_int("num_to_probe", NULL,
-                  "Number of probe operation in single source data check"
-                  "(starts from 8)", 8, &cm->num_to_probe, REGINT_GE_ONE));
-
-    CHECK(reg_int("bcast_small_msg_known_root_alg", NULL,
-                  "Algorithm selection for bcast small messages known root"
-                  "(1 - K-nomial, 2 - N-array)", 1, &cm->bcast_small_messages_known_root_alg,
-                  REGINT_GE_ZERO));
-
-    CHECK(reg_int("bcast_large_msg_known_root_alg", NULL,
-                  "Algorithm selection for bcast large messages known root"
-                  "(1 - Binomial scatther-gather, 2 - N-array scather, K-nomial gather)",
-                  1, &cm->bcast_large_messages_known_root_alg, REGINT_GE_ZERO));
-
-    CHECK(reg_int("barrier_alg", NULL,
-                  "Algorithm selection for Barrier"
-                  "(1 - Recursive doubling, 2 - Recursive K-ing)",
-                  1, &cm->barrier_alg, REGINT_GE_ZERO));
-
-    /* register parmeters controlling message fragementation */
-    CHECK(reg_int("min_frag_size", NULL,
-                "Minimum fragment size",
-                getpagesize(), &cm->super.min_frag_size, REGINT_GE_ONE));
-
-    CHECK(reg_int("max_frag_size", NULL,
-                "Maximum fragment size",
-                FRAG_SIZE_NO_LIMIT, &cm->super.max_frag_size, REGINT_NONZERO));
-
-    CHECK(reg_bool("can_use_user_buffers", NULL,
-                "User memory can be used by the collective algorithms",
-                1, &cm->super.can_use_user_buffers));
-
-    return ret;
-}
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_mca.h
+++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_mca.h
@ -1,20 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-#ifndef MCA_BCOL_PTPCOLL_MCA_H
-#define MCA_BCOL_PTPCOLL_MCA_H
-
-#include "ompi_config.h"
-
-BEGIN_C_DECLS
-
-int mca_bcol_ptpcoll_register_mca_params(void);
-
-END_C_DECLS
-#endif
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_module.c
+++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_module.c
@ -1,760 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2014-2015 Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- *
- */
-
-#include "ompi_config.h"
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <errno.h>
-
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "opal/util/show_help.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/mca/pml/pml.h"  /* need this for the max tag size */
-
-#include "bcol_ptpcoll.h"
-#include "bcol_ptpcoll_utils.h"
-#include "bcol_ptpcoll_bcast.h"
-#include "bcol_ptpcoll_allreduce.h"
-#include "bcol_ptpcoll_reduce.h"
-
-#define BCOL_PTP_CACHE_LINE_SIZE 128
-
-/*
- * Local functions
- */
-static int alloc_allreduce_offsets_array(mca_bcol_ptpcoll_module_t *ptpcoll_module)
-{
-    int rc = OMPI_SUCCESS, i = 0;
-    netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree;
-    int n_exchanges = k_node->n_exchanges;
-
-    /* Precalculate the allreduce offsets */
-    if (0 < k_node->n_exchanges) {
-        ptpcoll_module->allgather_offsets = (int **) calloc (n_exchanges, sizeof(int *));
-
-        if (!ptpcoll_module->allgather_offsets) {
-            return OMPI_ERROR;
-        }
-
-        for (i = 0; i < n_exchanges ; i++) {
-            ptpcoll_module->allgather_offsets[i] = (int *) calloc (NOFFSETS, sizeof(int));
-
-            if (!ptpcoll_module->allgather_offsets[i]){
-                return OMPI_ERROR;
-            }
-        }
-    }
-
-    return rc;
-}
-
-static int free_allreduce_offsets_array(mca_bcol_ptpcoll_module_t *ptpcoll_module)
-{
-    int rc = OMPI_SUCCESS, i = 0;
-    netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree;
-    int n_exchanges = k_node->n_exchanges;
-
-    if (ptpcoll_module->allgather_offsets) {
-        for (i=0; i < n_exchanges; i++) {
-            free (ptpcoll_module->allgather_offsets[i]);
-        }
-    }
-
-    free(ptpcoll_module->allgather_offsets);
-    ptpcoll_module->allgather_offsets = NULL;
-    return rc;
-}
-
-static void
-mca_bcol_ptpcoll_module_construct(mca_bcol_ptpcoll_module_t *ptpcoll_module)
-{
-    uint64_t i;
-    /* Pointer to component */
-    ptpcoll_module->narray_node = NULL;
-    ptpcoll_module->allgather_offsets = NULL;
-    ptpcoll_module->super.bcol_component = (mca_bcol_base_component_t *) &mca_bcol_ptpcoll_component;
-    ptpcoll_module->super.list_n_connected = NULL;
-    ptpcoll_module->super.hier_scather_offset = 0;
-    /* no header support in ptp */
-    ptpcoll_module->super.header_size = 0;
-    /* No network context */
-    ptpcoll_module->super.network_context = NULL;
-    /* set the upper limit on the tag */
-    i = 2;
-    ptpcoll_module->tag_mask = 1;
-    while ( i <= (uint64_t) mca_pml.pml_max_tag && i > 0) {
-        i <<= 1;
-    }
-    ptpcoll_module->ml_mem.ml_buf_desc = NULL;
-    ptpcoll_module->tag_mask = i - 1;
-}
-
-static void
-mca_bcol_ptpcoll_module_destruct(mca_bcol_ptpcoll_module_t *ptpcoll_module)
-{
-    int i;
-    mca_bcol_ptpcoll_local_mlmem_desc_t *ml_mem = &ptpcoll_module->ml_mem;
-
-    if (NULL != ml_mem->ml_buf_desc) {
-        /* Release the memory structs that were cache ML memory data */
-        uint32_t i, j, ci;
-        for (i = 0; i < ml_mem->num_banks; i++) {
-            for (j = 0; j < ml_mem->num_buffers_per_bank; j++) {
-                ci = i * ml_mem->num_buffers_per_bank + j;
-                if (NULL != ml_mem->ml_buf_desc[ci].requests) {
-                    free(ml_mem->ml_buf_desc[ci].requests);
-                }
-            }
-        }
-        /* release the buffer descriptor */
-        free(ml_mem->ml_buf_desc);
-        ml_mem->ml_buf_desc = NULL;
-    }
-
-    if (NULL != ptpcoll_module->allgather_offsets) {
-        free_allreduce_offsets_array(ptpcoll_module);
-    }
-
-    if (NULL != ptpcoll_module->narray_node) {
-        for (i = 0; i < ptpcoll_module->group_size; i++) {
-            if (NULL != ptpcoll_module->narray_node[i].children_ranks) {
-                free(ptpcoll_module->narray_node[i].children_ranks);
-            }
-        }
-
-        free(ptpcoll_module->narray_node);
-        ptpcoll_module->narray_node = NULL;
-    }
-
-    OBJ_DESTRUCT(&ptpcoll_module->collreqs_free);
-
-    if (NULL != ptpcoll_module->super.list_n_connected) {
-        free(ptpcoll_module->super.list_n_connected);
-        ptpcoll_module->super.list_n_connected = NULL;
-    }
-
-    for (i = 0; i < BCOL_NUM_OF_FUNCTIONS; i++){
-        OPAL_LIST_DESTRUCT((&ptpcoll_module->super.bcol_fns_table[i]));
-    }
-
-
-    if (NULL != ptpcoll_module->kn_proxy_extra_index) {
-        free(ptpcoll_module->kn_proxy_extra_index);
-        ptpcoll_module->kn_proxy_extra_index = NULL;
-    }
-
-    if (NULL != ptpcoll_module->alltoall_iovec) {
-        free(ptpcoll_module->alltoall_iovec);
-        ptpcoll_module->alltoall_iovec = NULL;
-    }
-
-    if (NULL != ptpcoll_module->narray_knomial_proxy_extra_index) {
-        free(ptpcoll_module->narray_knomial_proxy_extra_index);
-        ptpcoll_module->narray_knomial_proxy_extra_index = NULL;
-    }
-
-    if (NULL != ptpcoll_module->narray_knomial_node) {
-        for(i = 0; i < ptpcoll_module->full_narray_tree_size; i++) {
-            netpatterns_cleanup_narray_knomial_tree (ptpcoll_module->narray_knomial_node + i);
-        }
-        free(ptpcoll_module->narray_knomial_node);
-        ptpcoll_module->narray_knomial_node = NULL;
-    }
-
-    netpatterns_cleanup_recursive_knomial_allgather_tree_node(&ptpcoll_module->knomial_allgather_tree);
-    netpatterns_cleanup_recursive_knomial_tree_node(&ptpcoll_module->knomial_exchange_tree);
-
-}
-
-OBJ_CLASS_INSTANCE(mca_bcol_ptpcoll_module_t,
-                   mca_bcol_base_module_t,
-                   mca_bcol_ptpcoll_module_construct,
-                   mca_bcol_ptpcoll_module_destruct);
-
-static int init_ml_buf_desc(mca_bcol_ptpcoll_ml_buffer_desc_t **desc, void *base_addr, uint32_t num_banks,
-                            uint32_t num_buffers_per_bank, uint32_t size_buffer, uint32_t header_size, int group_size, int pow_k)
-{
-    uint32_t i, j, ci;
-    mca_bcol_ptpcoll_ml_buffer_desc_t *tmp_desc = NULL;
-    int k_nomial_radix = mca_bcol_ptpcoll_component.k_nomial_radix;
-    int pow_k_val = (0 == pow_k) ? 1 : pow_k;
-    int num_to_alloc =
-        ((k_nomial_radix - 1) * pow_k_val * 2 + 1 > mca_bcol_ptpcoll_component.narray_radix) ?
-        (k_nomial_radix - 1) * pow_k_val * 2 + 1 :
-        mca_bcol_ptpcoll_component.narray_radix * 2;
-
-
-    *desc = (mca_bcol_ptpcoll_ml_buffer_desc_t *)calloc(num_banks * num_buffers_per_bank,
-                                                        sizeof(mca_bcol_ptpcoll_ml_buffer_desc_t));
-    if (NULL == *desc) {
-        PTPCOLL_ERROR(("Failed to allocate memory"));
-        return OMPI_ERROR;
-    }
-
-    tmp_desc = *desc;
-
-    for (i = 0; i < num_banks; i++) {
-        for (j = 0; j < num_buffers_per_bank; j++) {
-            ci = i * num_buffers_per_bank + j;
-            tmp_desc[ci].bank_index = i;
-            tmp_desc[ci].buffer_index = j;
-            /* *2  is for gather session  +1 for extra peer */
-            tmp_desc[ci].requests = (ompi_request_t **)
-                calloc(num_to_alloc, sizeof(ompi_request_t *));
-            if (NULL == tmp_desc[ci].requests) {
-                PTPCOLL_ERROR(("Failed to allocate memory for requests"));
-                return OMPI_ERROR;
-            }
-            /*
-             * ptpcoll don't have any header, but other bcols may to have. So
-             * we need to take it in account.
-             */
-            tmp_desc[ci].data_addr = (void *)
-                ((unsigned char*)base_addr + ci * size_buffer + header_size);
-            PTPCOLL_VERBOSE(10, ("ml memory cache setup %d %d - %p", i, j, tmp_desc[ci].data_addr));
-
-            /* init reduce implementation flags */
-            tmp_desc[ci].reduce_init_called = false;
-            tmp_desc[ci].reduction_status = 0;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static void mca_bcol_ptpcoll_set_small_msg_thresholds(struct mca_bcol_base_module_t *super)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module =
-        (mca_bcol_ptpcoll_module_t *) super;
-    mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
-
-    /* Subtract out the maximum header size when calculating the thresholds. This
-     * will account for the headers used by the basesmuma component. If we do not
-     * take these headers into account we may overrun our buffer. */
-
-    /* Set the Allgather threshold equals to a ML buff size */
-    super->small_message_thresholds[BCOL_ALLGATHER] =
-        (ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX) /
-        ompi_comm_size(ptpcoll_module->super.sbgp_partner_module->group_comm);
-
-    /* Set the Bcast threshold, all Bcast algths have the same threshold */
-    super->small_message_thresholds[BCOL_BCAST] =
-        (ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX);
-
-    /* Set the Alltoall threshold, the Ring algth sets some limitation */
-    super->small_message_thresholds[BCOL_ALLTOALL] =
-        (ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX) / 2;
-
-    /* Set the Allreduce threshold, NARRAY algth sets some limitation */
-    super->small_message_thresholds[BCOL_ALLREDUCE] =
-        (ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX) / ptpcoll_module->k_nomial_radix;
-
-    /* Set the Reduce threshold, NARRAY algth sets some limitation */
-    super->small_message_thresholds[BCOL_REDUCE] =
-        (ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX) / cm->narray_radix;
-}
-
-/*
- * Cache information about ML memory
- */
-static int mca_bcol_ptpcoll_cache_ml_memory_info(struct mca_bcol_base_memory_block_desc_t *payload_block,
-                                                 uint32_t data_offset,
-                                                 struct mca_bcol_base_module_t *bcol,
-                                                 void *reg_data)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *) bcol;
-    mca_bcol_ptpcoll_local_mlmem_desc_t *ml_mem = &ptpcoll_module->ml_mem;
-    struct mca_bcol_base_memory_block_desc_t *desc = payload_block;
-    int group_size = ptpcoll_module->super.sbgp_partner_module->group_size;
-
-    PTPCOLL_VERBOSE(10, ("mca_bcol_ptpcoll_init_buffer_memory was called"));
-
-    /* cache ml mem desc tunings localy */
-    ml_mem->num_banks = desc->num_banks;
-    ml_mem->num_buffers_per_bank = desc->num_buffers_per_bank;
-    ml_mem->size_buffer = desc->size_buffer;
-
-    PTPCOLL_VERBOSE(10, ("ML buffer configuration num banks %d num_per_bank %d size %d base addr %p",
-                         desc->num_banks, desc->num_buffers_per_bank, desc->size_buffer, desc->block->base_addr));
-
-    /* Set first bank index for release */
-    ml_mem->bank_index_for_release = 0;
-
-    if (OMPI_SUCCESS != init_ml_buf_desc(&ml_mem->ml_buf_desc,
-                                         desc->block->base_addr,
-                                         ml_mem->num_banks,
-                                         ml_mem->num_buffers_per_bank,
-                                         ml_mem->size_buffer,
-                                         data_offset,
-                                         group_size,
-                                         ptpcoll_module->pow_k)) {
-        PTPCOLL_VERBOSE(10, ("Failed to allocate rdma memory descriptor\n"));
-        return OMPI_ERROR;
-    }
-
-    PTPCOLL_VERBOSE(10, ("ptpcoll_module = %p, ml_mem_desc = %p.\n",
-                         ptpcoll_module));
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * Load ptpcoll bcol functions
- */
-static void load_func(mca_bcol_ptpcoll_module_t *ptpcoll_module)
-{
-    int fnc;
-
-    /* reset everything to NULL */
-    for (fnc = 0; fnc < BCOL_NUM_OF_FUNCTIONS; fnc++) {
-
-        /*ptpcoll_module->super.bcol_function_table[fnc] = NULL;*/
-        ptpcoll_module->super.bcol_function_table[fnc] = NULL;
-        ptpcoll_module->super.bcol_function_init_table[fnc] = NULL;
-    }
-
-    ptpcoll_module->super.bcol_function_init_table[BCOL_BARRIER] = bcol_ptpcoll_barrier_init;
-
-    ptpcoll_module->super.bcol_function_init_table[BCOL_BCAST] = bcol_ptpcoll_bcast_init;
-    ptpcoll_module->super.bcol_function_init_table[BCOL_ALLREDUCE] = bcol_ptpcoll_allreduce_init;
-    ptpcoll_module->super.bcol_function_init_table[BCOL_ALLGATHER] = bcol_ptpcoll_allgather_init;
-    ptpcoll_module->super.bcol_function_table[BCOL_BCAST] = bcol_ptpcoll_bcast_k_nomial_anyroot;
-    ptpcoll_module->super.bcol_function_init_table[BCOL_ALLTOALL] = NULL;
-    ptpcoll_module->super.bcol_function_init_table[BCOL_SYNC] = mca_bcol_ptpcoll_memsync_init;
-    ptpcoll_module->super.bcol_function_init_table[BCOL_REDUCE] = bcol_ptpcoll_reduce_init;
-
-    /* ML memory cacher */
-    ptpcoll_module->super.bcol_memory_init = mca_bcol_ptpcoll_cache_ml_memory_info;
-
-    /* Set thresholds */
-    ptpcoll_module->super.set_small_msg_thresholds = mca_bcol_ptpcoll_set_small_msg_thresholds;
-
-    /* setup recursive k-ing tree */
-    ptpcoll_module->super.k_nomial_tree = mca_bcol_ptpcoll_setup_knomial_tree;
-}
-
-int mca_bcol_ptpcoll_setup_knomial_tree(mca_bcol_base_module_t *super)
-{
-    mca_bcol_ptpcoll_module_t *p2p_module = (mca_bcol_ptpcoll_module_t *) super;
-    int rc = 0;
-
-    rc = netpatterns_setup_recursive_knomial_allgather_tree_node(
-        p2p_module->super.sbgp_partner_module->group_size,
-        p2p_module->super.sbgp_partner_module->my_index,
-        mca_bcol_ptpcoll_component.k_nomial_radix,
-        super->list_n_connected,
-        &p2p_module->knomial_allgather_tree);
-
-    return rc;
-}
-
-/* The function used to calculate size */
-static int calc_full_tree_size(int radix, int group_size, int *num_leafs)
-{
-    int level_cnt = 1;
-    int total_cnt = 0;
-
-    while( total_cnt < group_size ) {
-        total_cnt += level_cnt;
-        level_cnt *= radix;
-    }
-
-    if (total_cnt > group_size) {
-        *num_leafs = level_cnt / radix;
-        return total_cnt - level_cnt / radix;
-    } else {
-        *num_leafs = level_cnt;
-        return group_size;
-    }
-}
-
-/* Setup N-array scatter Knomial-gather static information */
-static int load_narray_knomial_tree (mca_bcol_ptpcoll_module_t *ptpcoll_module)
-{
-    int rc, i, peer;
-    mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
-
-    ptpcoll_module->full_narray_tree_size = calc_full_tree_size(
-        cm->narray_knomial_radix,
-        ptpcoll_module->group_size,
-        &ptpcoll_module->full_narray_tree_num_leafs);
-
-    ptpcoll_module->narray_knomial_proxy_extra_index = (int *)
-        malloc(sizeof(int) * (cm->narray_knomial_radix));
-    if (NULL == ptpcoll_module->narray_knomial_proxy_extra_index) {
-        PTPCOLL_ERROR(("Failed to allocate memory"));
-        goto Error;
-    }
-
-    ptpcoll_module->narray_knomial_node = calloc(
-        ptpcoll_module->full_narray_tree_size,
-        sizeof(netpatterns_narray_knomial_tree_node_t));
-    if(NULL == ptpcoll_module->narray_knomial_node) {
-        goto Error;
-    }
-
-    PTPCOLL_VERBOSE(10 ,("My type is proxy, full tree size = %d [%d]",
-                         ptpcoll_module->full_narray_tree_size,
-                         cm->narray_knomial_radix
-                        ));
-
-    if (ptpcoll_module->super.sbgp_partner_module->my_index <
-        ptpcoll_module->full_narray_tree_size) {
-        if (ptpcoll_module->super.sbgp_partner_module->my_index <
-            ptpcoll_module->group_size - ptpcoll_module->full_narray_tree_size) {
-            ptpcoll_module->narray_type = PTPCOLL_PROXY;
-            for (i = 0; i < cm->narray_knomial_radix; i++) {
-                peer =
-                    ptpcoll_module->super.sbgp_partner_module->my_index *
-                    cm->narray_knomial_radix + i +
-                    ptpcoll_module->full_narray_tree_size;
-                if (peer >= ptpcoll_module->group_size) {
-                    break;
-                }
-                ptpcoll_module->narray_knomial_proxy_extra_index[i] = peer;
-            }
-            ptpcoll_module->narray_knomial_proxy_num = i;
-        } else {
-            ptpcoll_module->narray_type = PTPCOLL_IN_GROUP;;
-        }
-        /* Setting node info */
-        for(i = 0; i < ptpcoll_module->full_narray_tree_size; i++) {
-            rc = netpatterns_setup_narray_knomial_tree(
-                cm->narray_knomial_radix,
-                i,
-                ptpcoll_module->full_narray_tree_size,
-                &ptpcoll_module->narray_knomial_node[i]);
-            if(OMPI_SUCCESS != rc) {
-                goto Error;
-            }
-        }
-    } else {
-        ptpcoll_module->narray_type = PTPCOLL_EXTRA;
-        ptpcoll_module->narray_knomial_proxy_extra_index[0] =
-            (ptpcoll_module->super.sbgp_partner_module->my_index -
-             ptpcoll_module->full_narray_tree_size) /
-            cm->narray_knomial_radix;
-    }
-
-    return OMPI_SUCCESS;
-
-Error:
-    if (NULL != ptpcoll_module->narray_knomial_node) {
-        free(ptpcoll_module->narray_knomial_node);
-    }
-    if (NULL != ptpcoll_module->narray_knomial_proxy_extra_index) {
-        free(ptpcoll_module->narray_knomial_proxy_extra_index);
-    }
-    return OMPI_ERROR;
-}
-
-/* Setup N-array static information */
-static int load_narray_tree(mca_bcol_ptpcoll_module_t *ptpcoll_module)
-{
-    int rc, i;
-    mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
-
-    ptpcoll_module->narray_node = calloc(ptpcoll_module->group_size,
-                                         sizeof(netpatterns_tree_node_t));
-    if(NULL == ptpcoll_module->narray_node ) {
-        goto Error;
-    }
-
-    for(i = 0; i < ptpcoll_module->group_size; i++) {
-        rc = netpatterns_setup_narray_tree(
-            cm->narray_radix,
-            i,
-            ptpcoll_module->group_size,
-            &ptpcoll_module->narray_node[i]);
-        if(OMPI_SUCCESS != rc) {
-            goto Error;
-        }
-    }
-
-    return OMPI_SUCCESS;
-
-Error:
-    if (NULL != ptpcoll_module->narray_node) {
-        free(ptpcoll_module->narray_node);
-    }
-    return OMPI_ERROR;
-}
-
-static int load_knomial_info(mca_bcol_ptpcoll_module_t *ptpcoll_module)
-{
-    int i;
-    mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
-
-    ptpcoll_module->k_nomial_radix =
-        cm->k_nomial_radix > ptpcoll_module->group_size ?
-        ptpcoll_module->group_size :
-        cm->k_nomial_radix;
-
-    ptpcoll_module->pow_k = pow_k_calc(ptpcoll_module->k_nomial_radix,
-                                       ptpcoll_module->group_size,
-                                       &ptpcoll_module->pow_knum);
-
-    ptpcoll_module->kn_proxy_extra_index = (int *)
-        malloc(sizeof(int) * (ptpcoll_module->k_nomial_radix - 1));
-    if (NULL == ptpcoll_module->kn_proxy_extra_index) {
-        PTPCOLL_ERROR(("Failed to allocate memory"));
-        goto Error;
-    }
-
-    /* Setting peer type for K-nomial algorithm*/
-    if (ptpcoll_module->super.sbgp_partner_module->my_index < ptpcoll_module->pow_knum ) {
-        if (ptpcoll_module->super.sbgp_partner_module->my_index <
-            ptpcoll_module->group_size - ptpcoll_module->pow_knum) {
-            for (i = 0;
-                 i < (ptpcoll_module->k_nomial_radix - 1) &&
-                     ptpcoll_module->super.sbgp_partner_module->my_index *
-                     (ptpcoll_module->k_nomial_radix - 1)  +
-                     i + ptpcoll_module->pow_knum < ptpcoll_module->group_size
-                     ; i++) {
-                ptpcoll_module->pow_ktype = PTPCOLL_KN_PROXY;
-                ptpcoll_module->kn_proxy_extra_index[i] =
-                    ptpcoll_module->super.sbgp_partner_module->my_index *
-                    (ptpcoll_module->k_nomial_radix - 1) +
-                    i + ptpcoll_module->pow_knum;
-                PTPCOLL_VERBOSE(10 ,("My type is proxy, pow_knum = %d [%d] my extra %d",
-                                     ptpcoll_module->pow_knum,
-                                     ptpcoll_module->pow_k,
-                                     ptpcoll_module->kn_proxy_extra_index[i]));
-            }
-            ptpcoll_module->kn_proxy_extra_num = i;
-        } else {
-            PTPCOLL_VERBOSE(10 ,("My type is in group, pow_knum = %d [%d]", ptpcoll_module->pow_knum,
-                                 ptpcoll_module->pow_k));
-            ptpcoll_module->pow_ktype = PTPCOLL_KN_IN_GROUP;
-        }
-    } else {
-        ptpcoll_module->pow_ktype = PTPCOLL_KN_EXTRA;
-        ptpcoll_module->kn_proxy_extra_index[0] = (ptpcoll_module->super.sbgp_partner_module->my_index -
-                                                   ptpcoll_module->pow_knum) / (ptpcoll_module->k_nomial_radix - 1);
-        PTPCOLL_VERBOSE(10 ,("My type is extra , pow_knum = %d [%d] my proxy %d",
-                             ptpcoll_module->pow_knum,
-                             ptpcoll_module->pow_k,
-                             ptpcoll_module->kn_proxy_extra_index[0]));
-    }
-
-    return OMPI_SUCCESS;
-
-Error:
-    if (NULL == ptpcoll_module->kn_proxy_extra_index) {
-        free(ptpcoll_module->kn_proxy_extra_index);
-    }
-
-    return OMPI_ERROR;
-}
-
-static int load_binomial_info(mca_bcol_ptpcoll_module_t *ptpcoll_module)
-{
-    ptpcoll_module->pow_2 = pow_k_calc(2,
-                                       ptpcoll_module->group_size,
-                                       &ptpcoll_module->pow_2num);
-
-    assert(ptpcoll_module->pow_2num == 1 << ptpcoll_module->pow_2);
-    assert(ptpcoll_module->pow_2num  <= ptpcoll_module->group_size);
-
-    /* Setting peer type for binary algorithm*/
-    if (ptpcoll_module->super.sbgp_partner_module->my_index < ptpcoll_module->pow_2num ) {
-        if (ptpcoll_module->super.sbgp_partner_module->my_index <
-            ptpcoll_module->group_size - ptpcoll_module->pow_2num) {
-            PTPCOLL_VERBOSE(10 ,("My type is proxy, pow_2num = %d [%d]", ptpcoll_module->pow_2num,
-                                 ptpcoll_module->pow_2));
-            ptpcoll_module->pow_2type = PTPCOLL_PROXY;
-            ptpcoll_module->proxy_extra_index = ptpcoll_module->super.sbgp_partner_module->my_index +
-                ptpcoll_module->pow_2num;
-        } else {
-            PTPCOLL_VERBOSE(10 ,("My type is in group, pow_2num = %d [%d]", ptpcoll_module->pow_2num,
-                                 ptpcoll_module->pow_2));
-            ptpcoll_module->pow_2type = PTPCOLL_IN_GROUP;
-        }
-    } else {
-        PTPCOLL_VERBOSE(10 ,("My type is extra , pow_2num = %d [%d]", ptpcoll_module->pow_2num,
-                             ptpcoll_module->pow_2));
-        ptpcoll_module->pow_2type = PTPCOLL_EXTRA;
-        ptpcoll_module->proxy_extra_index = ptpcoll_module->super.sbgp_partner_module->my_index -
-            ptpcoll_module->pow_2num;
-    }
-    return OMPI_SUCCESS;
-}
-
-static int load_recursive_knomial_info(mca_bcol_ptpcoll_module_t *ptpcoll_module)
-{
-    int rc = OMPI_SUCCESS;
-    rc = netpatterns_setup_recursive_knomial_tree_node(
-        ptpcoll_module->group_size,
-        ptpcoll_module->super.sbgp_partner_module->my_index,
-        mca_bcol_ptpcoll_component.k_nomial_radix,
-        &ptpcoll_module->knomial_exchange_tree);
-    return rc;
-}
-
-static int bcol_ptpcoll_collreq_init(opal_free_list_item_t *item, void* ctx)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module= (mca_bcol_ptpcoll_module_t *) ctx;
-    mca_bcol_ptpcoll_collreq_t *collreq = (mca_bcol_ptpcoll_collreq_t *) item;
-
-    switch(mca_bcol_ptpcoll_component.barrier_alg) {
-    case 1:
-        collreq->requests = (ompi_request_t **)
-            calloc(2, sizeof(ompi_request_t *));
-        break;
-    case 2:
-        collreq->requests = (ompi_request_t **)
-            calloc(2 * ptpcoll_module->k_nomial_radix, sizeof(ompi_request_t *));
-        break;
-    }
-
-    if (NULL == collreq->requests) {
-        return OPAL_ERR_OUT_OF_RESOURCE;
-    }
-
-    return OPAL_SUCCESS;
-}
-
-/* query to see if the module is available for use on the given
- * communicator, and if so, what it's priority is.  This is where
- * the backing shared-memory file is created.
- */
-mca_bcol_base_module_t **mca_bcol_ptpcoll_comm_query(mca_sbgp_base_module_t *sbgp,
-                                                     int *num_modules)
-{
-    int rc;
-    /* local variables */
-    struct ompi_communicator_t *comm = sbgp->group_comm;
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = NULL;
-    mca_bcol_base_module_t **ptpcoll_modules = NULL;
-    int iovec_size;
-
-    /* initialize local variables */
-    *num_modules = 0;
-
-    /*
-     * This is activated only for intra-communicators
-     */
-    if (OMPI_COMM_IS_INTER(comm) ) {
-        return NULL;
-    }
-
-    /* allocate and initialize an sm-v2  module */
-    ptpcoll_modules = (mca_bcol_base_module_t **) malloc(sizeof(mca_bcol_base_module_t *));
-    if (NULL == ptpcoll_modules) {
-        return NULL;
-    }
-
-    ptpcoll_module = OBJ_NEW(mca_bcol_ptpcoll_module_t);
-    if (NULL == ptpcoll_module) {
-        free(ptpcoll_modules);
-        return NULL;
-    }
-
-    /* On this stage we support only one single module */
-    ptpcoll_modules[*num_modules] = &(ptpcoll_module->super);
-
-    (*num_modules)++;
-    /* set the subgroup */
-    ptpcoll_module->super.sbgp_partner_module = sbgp;
-    /* caching some useful information */
-    ptpcoll_module->group_size =
-        ptpcoll_module->super.sbgp_partner_module->group_size;
-
-    rc = load_binomial_info(ptpcoll_module);
-    if (OMPI_SUCCESS != rc) {
-        PTPCOLL_VERBOSE(10, ("Failed to load knomial info"));
-        goto CLEANUP;
-    }
-
-    rc = load_knomial_info(ptpcoll_module);
-    if (OMPI_SUCCESS != rc) {
-        PTPCOLL_VERBOSE(10, ("Failed to load knomial info"));
-        goto CLEANUP;
-    }
-
-    rc = load_narray_tree(ptpcoll_module);
-    if (OMPI_SUCCESS != rc) {
-        PTPCOLL_VERBOSE(10, ("Failed to load narray tree"));
-        goto CLEANUP;
-    }
-
-    rc = load_narray_knomial_tree(ptpcoll_module);
-    if (OMPI_SUCCESS != rc) {
-        PTPCOLL_VERBOSE(10, ("Failed to load narray-knomila tree"));
-        goto CLEANUP;
-    }
-
-    rc = load_recursive_knomial_info(ptpcoll_module);
-    if (OMPI_SUCCESS != rc) {
-        PTPCOLL_VERBOSE(10, ("Failed to load recursive knomial tree"));
-        goto CLEANUP;
-    }
-
-    /* creating collfrag free list */
-    OBJ_CONSTRUCT(&ptpcoll_module->collreqs_free, opal_free_list_t);
-    rc = opal_free_list_init (&ptpcoll_module->collreqs_free,
-                              sizeof(mca_bcol_ptpcoll_collreq_t),
-                              BCOL_PTP_CACHE_LINE_SIZE,
-                              OBJ_CLASS(mca_bcol_ptpcoll_collreq_t),
-                              0, BCOL_PTP_CACHE_LINE_SIZE,
-                              256 /* free_list_num */,
-                              -1  /* free_list_max, -1 = infinite */,
-                              32  /* free_list_inc */,
-                              NULL, 0, NULL,
-                              bcol_ptpcoll_collreq_init,
-                              ptpcoll_module);
-    if (OMPI_SUCCESS != rc) {
-        goto CLEANUP;
-    }
-
-    load_func(ptpcoll_module);
-
-    rc = alloc_allreduce_offsets_array(ptpcoll_module);
-    if (OMPI_SUCCESS != rc) {
-        goto CLEANUP;
-    }
-
-    /* Allocating iovec for PTP alltoall */
-    iovec_size = ptpcoll_module->group_size / 2 + ptpcoll_module->group_size % 2;
-    ptpcoll_module->alltoall_iovec = (struct iovec *) malloc(sizeof(struct iovec)
-                                                             * iovec_size);
-    ptpcoll_module->log_group_size = lognum(ptpcoll_module->group_size);
-
-    rc = mca_bcol_base_bcol_fns_table_init(&(ptpcoll_module->super));
-    if (OMPI_SUCCESS != rc) {
-        goto CLEANUP;
-    }
-
-    /* Zero copy is supported */
-    ptpcoll_module->super.supported_mode = MCA_BCOL_BASE_ZERO_COPY;
-
-    /* return */
-    return ptpcoll_modules;
-
-CLEANUP:
-
-    OBJ_RELEASE(ptpcoll_module);
-    free(ptpcoll_modules);
-    return NULL;
-}
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_reduce.c
+++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_reduce.c
@ -1,405 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "ompi/include/ompi/constants.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "bcol_ptpcoll_reduce.h"
-#include "bcol_ptpcoll_utils.h"
-
-static int bcol_ptpcoll_reduce_narray_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-static int bcol_ptpcoll_reduce_narray(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args);
-
-
-#define NARRAY_RECV_NB(narray_node, process_shift, group_size,                            \
-                        recv_buffer, pack_len, tag, comm, recv_requests,                      \
-                        num_pending_recvs)                                                 \
-do {                                                                                       \
-    int n, rc = OMPI_SUCCESS;                                                              \
-    int dst;                                                                               \
-    int comm_dst;                                                                          \
-    int offset = 0 ;                                                                       \
-                                                                                           \
-    /* Recieve data from all relevant childrens  */                                        \
-    for (n = 0; n < narray_node->n_children; n++) {                                        \
-                                                                                           \
-        dst = narray_node->children_ranks[n] + process_shift;                              \
-        if (dst >= group_size) {                                                           \
-            dst -= group_size;                                                             \
-        }                                                                                  \
-        comm_dst = group_list[dst];                                                        \
-                                                                                           \
-        /* Non blocking send .... */                                                       \
-        PTPCOLL_VERBOSE(1 , ("Reduce, Irecv data to %d[%d], count %d, tag %d, addr %p",    \
-                    dst, comm_dst, pack_len, tag,                                             \
-                    data_buffer));                                                         \
-        rc = MCA_PML_CALL(irecv((void *)((unsigned char*)recv_buffer + offset), pack_len, MPI_BYTE,                     \
-                    comm_dst, tag, comm,                                      \
-                    &(recv_requests[*num_pending_recvs])));                                \
-        if( OMPI_SUCCESS != rc ) {                                                         \
-            PTPCOLL_VERBOSE(10, ("Failed to start non-blocking receive"));                 \
-            return OMPI_ERROR;                                                             \
-        }                                                                                  \
-        ++(*num_pending_recvs);                                                            \
-        offset += pack_len;                                                                \
-    }                                                                                      \
-} while(0)
-
-
-static inline int narray_reduce(void *data_buffer, void *recv_buffer,
-                                int nrecvs, int count,
-                                struct ompi_datatype_t *dtype, struct ompi_op_t *op,
-                                int *reduction_status) {
-    int pack_len = count * dtype->super.size;
-    int i = 0;
-    void *source_buffer = NULL, *result_buffer = NULL;
-
-    source_buffer = data_buffer;
-    result_buffer = recv_buffer;
-
-    for (i = 0; i < nrecvs; i++) {
-        ompi_op_reduce(op, (void*)((unsigned char*) source_buffer) ,
-                       (void*)((unsigned char*) result_buffer),
-                       count,dtype);
-
-        source_buffer = (void *)((unsigned char*)recv_buffer
-                                 + (i+1) * pack_len);
-    }
-
-    *reduction_status = 1;
-    return OMPI_SUCCESS;
-}
-static int bcol_ptpcoll_reduce_narray_progress(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-
-    int tag = -1;
-    int rc;
-    int group_size = ptpcoll_module->group_size;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    uint32_t buffer_index = input_args->buffer_index;
-    struct ompi_op_t *op = input_args->op;
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **send_request =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests[0];
-    ompi_request_t **recv_requests =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests[1];
-    void *data_buffer = NULL;
-    void *src_buffer = (void *) (
-            (unsigned char *)input_args->sbuf +
-            (size_t)input_args->sbuf_offset);
-    void *recv_buffer = (void *) (
-            (unsigned char *)input_args->rbuf +
-            (size_t)input_args->rbuf_offset);
-    int count = input_args->count;
-    struct ompi_datatype_t *dtype = input_args->dtype;
-    int pack_len = input_args->count * input_args->dtype->super.size;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int matched = false;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    int relative_group_index = 0;
-    netpatterns_tree_node_t *narray_node = NULL;
-    bool not_sent = false;
-    int parent_rank  = -1, comm_parent_rank = -1;
-    int group_root_index = input_args->root;
-
-    if (!ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].reduce_init_called) {
-        bcol_ptpcoll_reduce_narray(input_args, const_args);
-    }
-    /*
-     * By default the src buffer is the data buffer,
-     * only after reduction, the recv buffer becomes the
-     * data buffer
-     */
-    data_buffer = src_buffer;
-
-    relative_group_index = my_group_index - group_root_index;
-    if (relative_group_index < 0) {
-        relative_group_index +=group_size;
-    }
-
-    /* keep tag within the limit support by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    /* mark this as a collective tag, to avoid conflict with user-level tags */
-    tag = -tag;
-
-    narray_node = &ptpcoll_module->narray_node[relative_group_index];
-
-    PTPCOLL_VERBOSE(3, ("reduce, Narray tree Progress"));
-
-    PTPCOLL_VERBOSE(8, ("bcol_ptpcoll_reduce_narray, buffer index: %d "
-                         "tag: %d "
-                         "tag_mask: %d "
-                         "sn: %d "
-                         "root: %d [%d]"
-                         "buff: %p ",
-                         buffer_index, tag,
-                         ptpcoll_module->tag_mask, input_args->sequence_num,
-                         input_args->root_flag, input_args->root_route->rank,
-                         data_buffer));
-
-    /*
-      Check if the data was received
-     */
-    if (0 != *active_requests) {
-        matched = mca_bcol_ptpcoll_test_all_for_match
-            (active_requests, recv_requests, &rc);
-        if (OMPI_SUCCESS != rc) {
-            return OMPI_ERROR;
-        }
-
-
-        /* All data was received, then do a reduction*/
-        if(matched) {
-           narray_reduce(data_buffer, recv_buffer, narray_node->n_children, count, dtype, op,
-                   &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].reduction_status);
-
-           /*
-            * The reduction result is in the recv buffer, so it is the new data
-            * buffer
-            */
-           data_buffer = recv_buffer;
-
-           /* If not reduced, means also, you might not posted a send */
-            not_sent = true;
-        } else {
-            PTPCOLL_VERBOSE(10, ("reduce root is started"));
-            return BCOL_FN_STARTED;
-        }
-    }
-
-    /* I'm root, I'm done  */
-    if (input_args->root_flag) {
-       return BCOL_FN_COMPLETE;
-    }
-
-    PTPCOLL_VERBOSE(1,("Testing Sending Match"));
-
-    /* If send was not posted */
-    /* Manju: Leaf node should never post in the progress logic */
-    if (not_sent) {
-        parent_rank =
-            ptpcoll_module->narray_node[relative_group_index].parent_rank +
-            group_root_index;
-        if (parent_rank >= group_size) {
-            parent_rank -= group_size;
-        }
-
-        comm_parent_rank = group_list[parent_rank];
-        PTPCOLL_VERBOSE(1,("Sending data to %d ",comm_parent_rank));
-
-        rc = MCA_PML_CALL(isend(data_buffer, pack_len, MPI_BYTE,
-                    comm_parent_rank,
-                    tag, MCA_PML_BASE_SEND_STANDARD, comm, send_request));
-        if( OMPI_SUCCESS != rc ) {
-            PTPCOLL_VERBOSE(10, ("Failed to send data"));
-            return OMPI_ERROR;
-        }
-    }
-
-    if (0 == mca_bcol_ptpcoll_test_for_match(send_request, &rc)) {
-        PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc));
-        /* Data has not been sent. Return that the collective has been stated
-         * because we MUST call test on this request once it is finished to
-         * ensure that it is properly freed. */
-        return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-static int bcol_ptpcoll_reduce_narray(bcol_function_args_t *input_args,
-        struct mca_bcol_base_function_t *const_args)
-{
-    mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *)const_args->bcol_module;
-
-    int tag;
-    int rc;
-    int group_size = ptpcoll_module->group_size;
-    int *group_list = ptpcoll_module->super.sbgp_partner_module->group_list;
-    uint32_t buffer_index = input_args->buffer_index;
-
-    struct ompi_op_t *op = input_args->op;
-    ompi_communicator_t* comm = ptpcoll_module->super.sbgp_partner_module->group_comm;
-    ompi_request_t **recv_requests =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests[1];
-    ompi_request_t **send_request =
-        &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].requests[0];
-
-    void *data_buffer = NULL;
-    void *src_buffer = (void *) (
-            (unsigned char *)input_args->sbuf +
-            (size_t)input_args->sbuf_offset);
-    void *recv_buffer = (void *) (
-            (unsigned char *)input_args->rbuf +
-            (size_t)input_args->rbuf_offset);
-    int count = input_args->count;
-    struct ompi_datatype_t *dtype = input_args->dtype;
-    int pack_len = input_args->count * input_args->dtype->super.size;
-    int *active_requests =
-        &(ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].active_requests);
-    int matched = true;
-    int my_group_index = ptpcoll_module->super.sbgp_partner_module->my_index;
-    int group_root_index  = -1;
-    int relative_group_index = 0;
-    netpatterns_tree_node_t *narray_node = NULL;
-    int parent_rank  = -1, comm_parent_rank = -1;
-
-
-    /* This is first function call that should be called, not progress.
-     * The fragmentation code does this, so switch from progress to here.
-     * The flag indicates whether, we have entered this code *
-     */
-    ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].reduce_init_called = true;
-
-    PTPCOLL_VERBOSE(1, ("Reduce, Narray tree"));
-    /* reset active request counter */
-    (*active_requests) = 0;
-    /* keep tag within the limit support by the pml */
-    tag = (PTPCOLL_TAG_OFFSET + input_args->sequence_num * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
-    /* mark this as a collective tag, to avoid conflict with user-level flags */
-    tag = -tag;
-
-    PTPCOLL_VERBOSE(1, ("bcol_ptpcoll_reduce_narray, buffer index: %d "
-                         "tag: %d "
-                         "tag_mask: %d "
-                         "sn: %d "
-                         "root: %d "
-                         "buff: %p ",
-                         buffer_index, tag,
-                         ptpcoll_module->tag_mask, input_args->sequence_num,
-                         input_args->root_flag,
-                         src_buffer));
-
-    /* Compute Root Index Shift */
-    group_root_index = input_args->root;
-    relative_group_index = my_group_index - group_root_index;
-    if (relative_group_index < 0) {
-        relative_group_index += group_size;
-    }
-
-    narray_node = &ptpcoll_module->narray_node[relative_group_index];
-
-    if (0 == narray_node->n_children) {
-        PTPCOLL_VERBOSE(10, ("I'm leaf of the data"));
-        /*
-         * I'm root of the operation
-         * send data to N childrens
-         */
-        data_buffer = src_buffer;
-        goto NARRAY_SEND_DATA;
-    }
-
-    /* Not leaf, either an internal node or root */
-    NARRAY_RECV_NB(narray_node, group_root_index, group_size,
-                    recv_buffer, pack_len, tag, comm, recv_requests,
-                    active_requests);
-
-
-    /* We have not done reduction, yet */
-    ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].reduction_status = 0;
-
-    /* We can not block. So run couple of test for data arrival */
-    matched = mca_bcol_ptpcoll_test_all_for_match
-        (active_requests, recv_requests, &rc);
-
-    /* Check if received the data */
-    if(matched) {
-
-        narray_reduce(src_buffer, recv_buffer, narray_node->n_children,
-                        count, dtype, op, &ptpcoll_module->ml_mem.ml_buf_desc[buffer_index].reduction_status);
-        PTPCOLL_VERBOSE(1, ("Reduce, received data from  all childrend "));
-        data_buffer = recv_buffer;
-
-    } else {
-
-        PTPCOLL_VERBOSE(1, ("reduce root is started"));
-        return BCOL_FN_STARTED;
-    }
-
-    /* I'm root, I'm done  */
-    if (input_args->root_flag) {
-       return BCOL_FN_COMPLETE;
-    }
-
-
-NARRAY_SEND_DATA:
-
-    /*
-     * Send the data (reduce in case of internal nodes, or just data in
-     * case of leaf nodes) to the parent
-     */
-    narray_node = &ptpcoll_module->narray_node[relative_group_index];
-
-    parent_rank =
-        ptpcoll_module->narray_node[relative_group_index].parent_rank +
-        group_root_index;
-    if (parent_rank >= group_size) {
-        parent_rank -= group_size;
-    }
-
-    comm_parent_rank = group_list[parent_rank];
-    PTPCOLL_VERBOSE(1,("Sending data to %d ",comm_parent_rank));
-
-    rc = MCA_PML_CALL(isend(data_buffer, pack_len, MPI_BYTE,
-                comm_parent_rank,
-                tag, MCA_PML_BASE_SEND_STANDARD, comm, send_request));
-    if( OMPI_SUCCESS != rc ) {
-        PTPCOLL_VERBOSE(10, ("Failed to send data"));
-        return OMPI_ERROR;
-    }
-
-    /* We can not block. So run couple of test for data arrival */
-    if (0 == mca_bcol_ptpcoll_test_for_match(send_request, &rc)) {
-        PTPCOLL_VERBOSE(10, ("Test was not matched - %d", rc));
-        /* No data was received, return no match error */
-        return (OMPI_SUCCESS != rc) ? rc : BCOL_FN_STARTED;
-    }
-
-    return BCOL_FN_COMPLETE;
-}
-
-
-int bcol_ptpcoll_reduce_init(mca_bcol_base_module_t *super)
-{
-    mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
-    mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
-
-    PTPCOLL_VERBOSE(1,("Initialization Reduce - Narray"));
-    comm_attribs.bcoll_type = BCOL_REDUCE;
-    comm_attribs.comm_size_min = 0;
-    comm_attribs.comm_size_max = 1024 * 1024;
-    comm_attribs.waiting_semantics = NON_BLOCKING;
-
-    inv_attribs.bcol_msg_min = 0;
-    inv_attribs.bcol_msg_max = 20000; /* range 1 */
-
-    inv_attribs.datatype_bitmap = 0xffffffff;
-    inv_attribs.op_types_bitmap = 0xffffffff;
-
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-    mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
-                bcol_ptpcoll_reduce_narray,
-                bcol_ptpcoll_reduce_narray_progress);
-
-    comm_attribs.data_src = DATA_SRC_KNOWN;
-
-    return OMPI_SUCCESS;
-}
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_reduce.h
+++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_reduce.h
@ -1,25 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2013 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_PTPCOLL_REDUCE_H
-#define MCA_BCOL_PTPCOLL_REDUCE_H
-
-#include "ompi_config.h"
-#include "bcol_ptpcoll.h"
-#include "bcol_ptpcoll_utils.h"
-
-BEGIN_C_DECLS
-
-int bcol_ptpcoll_reduce_init(mca_bcol_base_module_t *super);
-
-int bcol_ptpcoll_reduce_init(mca_bcol_base_module_t *super);
-
-#endif /* MCA_BCOL_PTPCOLL_REDUCE_H */
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_utils.c
+++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_utils.c
@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-#include "ompi_config.h"
-
-#include "bcol_ptpcoll.h"
-#include "bcol_ptpcoll_utils.h"
-
-/*
- *  Return closet power of K, for the number, and the number
- */
-int pow_k_calc(int k, int number, int *out_number)
-{
-    int power = 0;
-    int n = 1;
-
-    while (n < number) {
-        n *= k;
-        ++power;
-    }
-
-    if (n > number) {
-        n /= k;
-        --power;
-    }
-    if (NULL != out_number) {
-        *out_number = n;
-    }
-
-    return power;
-}
-
-/*
- * Communicator rank to group index conversion function for K-nomial tree.
- * Complexity: (K-1) Log _base_K N
- *
- * Input:
- * my_group_index       - my process index in the group
- * comm_source          - the communicator rank of the source of data
- * radix                - radix of K-nomial tree
- * group_size           - the size of my group
- * group_array[]        - one to one map from group index to communicator rank
- *
- * Output:
- * Group index for comm_source.
- */
-
-int get_group_index_and_distance_for_binomial(int my_group_index, int comm_source,
-        int group_size, int *group_array, int *pow_distance)
-{
-    int group_index;
-    int i;
-    *pow_distance = 0;
-
-    for (i = 1; i < group_size; i<<=1, (*pow_distance)++) {
-        group_index = my_group_index ^ i;
-        if (comm_source == group_array[group_index]) {
-            return group_index;
-        }
-    }
-
-    *pow_distance = -1;
-    return -1;
-}
-
-int get_group_index_and_distance_for_k_nomial(int my_group_index, int comm_source, int radix,
-        int group_size, int *group_array, int *pow_distance)
-{
-    int group_index;
-    int offset = 1;      /* offset equal to 1 (radix_power) */
-    int radix_power = 1; /* radix power 0 */
-    *pow_distance = 0;
-
-    /*
-     *  Go trough range of possible offsets from my rank,
-     *  for each offset we calculate k-nomial tree root.
-     */
-    while(offset < group_size) {
-        /* K-nomial tree root calculation for the offset */
-        if (offset % (radix * radix_power)) {
-            group_index = my_group_index - offset;
-            /* wrap around if the group is negative */
-            if (group_index < 0) {
-                group_index += group_size;
-            }
-            PTPCOLL_VERBOSE(10, ("Checking %d", group_index));
-            if (comm_source == group_array[group_index]) {
-                return group_index;
-            }
-            offset += radix_power;
-        } else {
-            /* we done with the section of the tree, go to next one */
-            radix_power *= radix;
-            (*pow_distance)++;
-        }
-    }
-
-    /* No source was found, return -1 */
-    *pow_distance = -1;
-    return -1;
-}
-
-int get_group_index_for_k_nomial(int my_group_index, int comm_source, int radix, int group_size, int *group_array)
-{
-    int group_index;
-    int radix_power = 1; /* radix power 0 */
-    int offset = 1;      /* offset equal to 1 (radix_power) */
-
-    /*
-     *  Go trough range of possible offsets from my rank,
-     *  for each offset we calculate k-nomial tree root.
-     */
-    while(offset < group_size) {
-        /* K-nomial tree root calculation for the offset */
-        if (offset % (radix * radix_power)) {
-            group_index = my_group_index - offset;
-            /* wrap around if the group is negative */
-            if (group_index < 0) {
-                group_index += group_size;
-            }
-            if (comm_source == group_array[group_index]) {
-                return group_index;
-            }
-            offset += radix_power;
-        } else {
-            /* we done with the section of the tree, go to next one */
-            radix_power *= radix;
-        }
-    }
-
-    /* No source was found, return -1 */
-    return -1;
-}
--- a/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_utils.h
+++ b/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_utils.h
@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2012      Los Alamos National Security, LLC.
- *                         All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_BCOL_PTPCOLL_UTILS_H
-#define MCA_BCOL_PTPCOLL_UTILS_H
-
-#include "ompi_config.h"
-
-#include "ompi/mca/rte/rte.h"
-
-BEGIN_C_DECLS
-
-/*
- *  Return closet power of K, for the number
- */
-int pow_k_calc(int k, int number, int *out_number);
-
-/*
- * Communicator rank to group index conversion function for K-nomial tree.
- */
-int get_group_index_for_k_nomial(int my_group_index, int comm_source, int radix, int group_size, int *group_array);
-
-/* the same like above, just more information on return */
-int get_group_index_and_distance_for_k_nomial(int my_group_index, int comm_source, int radix,
-        int group_size, int *group_array, int *pow_distance);
-
-int get_group_index_and_distance_for_binomial(int my_group_index, int comm_source,
-        int group_size, int *group_array, int *pow_distance);
-/*
- * Error and debug Macros/Functions
- */
-static inline int mca_bcol_ptpcoll_err(const char* fmt, ...)
-{
-    va_list list;
-    int ret;
-
-    va_start(list, fmt);
-    ret = vfprintf(stderr, fmt, list);
-    va_end(list);
-    return ret;
-}
-
-#define PTPCOLL_ERROR(args)                                         \
-    do {                                                            \
-        mca_bcol_ptpcoll_err("[%s]%s[%s:%d:%s] PTPCOLL ",           \
-                ompi_process_info.nodename,                         \
-                OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),                 \
-                __FILE__, __LINE__, __func__);                      \
-        mca_bcol_ptpcoll_err args;                                  \
-        mca_bcol_ptpcoll_err("\n");                                 \
-    } while(0)
-
-#if OPAL_ENABLE_DEBUG
-#define PTPCOLL_VERBOSE(level, args)                                \
-    do {                                                            \
-        if (mca_bcol_ptpcoll_component.verbose >= level) {          \
-            mca_bcol_ptpcoll_err("[%s]%s[%s:%d:%s] PTPCOLL ",       \
-                    ompi_process_info.nodename,                     \
-                    OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),             \
-                    __FILE__, __LINE__, __func__);                  \
-            mca_bcol_ptpcoll_err args;                              \
-            mca_bcol_ptpcoll_err("\n");                             \
-        }                                                           \
-    } while(0)
-#else
-#define PTPCOLL_VERBOSE(level, args)
-#endif
-
-END_C_DECLS
-
-#endif
--- a/ompi/mca/bcol/ptpcoll/owner.txt
+++ b/ompi/mca/bcol/ptpcoll/owner.txt
@ -1,7 +0,0 @@
-#
-# owner/status file
-# owner: institution that is responsible for this package
-# status: e.g. active, maintenance, unmaintained
-#
-owner: ORNL
-status: unmaintained
--- a/ompi/mca/coll/ml/Makefile.am
+++ b/ompi/mca/coll/ml/Makefile.am
@ -1,89 +0,0 @@
-#
-# Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
-# Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
-# Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
-#                         reserved.
-# Copyright (c) 2015      Cisco Systems, Inc.  All rights reserved.
-# Copyright (c) 2016      Research Organization for Information Science
-#                         and Technology (RIST). All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-AM_LFLAGS = -Pcoll_ml_config_yy
-LEX_OUTPUT_ROOT = lex.coll_ml_config_yy
-
-dist_ompidata_DATA = \
-        mca-coll-ml.config \
-		help-mpi-coll-ml.txt
-
-sources = coll_ml.h \
-        coll_ml_inlines.h \
-        coll_ml_module.c \
-        coll_ml_allocation.h \
-        coll_ml_allocation.c \
-        coll_ml_barrier.c \
-        coll_ml_bcast.c \
-        coll_ml_colls.h \
-        coll_ml_component.c \
-        coll_ml_copy_fns.c \
-        coll_ml_descriptors.c  \
-        coll_ml_functions.h \
-        coll_ml_hier_algorithms.c \
-        coll_ml_hier_algorithms_setup.c \
-        coll_ml_hier_algorithms_bcast_setup.c \
-        coll_ml_hier_algorithms_allreduce_setup.c \
-        coll_ml_hier_algorithms_reduce_setup.c \
-        coll_ml_hier_algorithms_common_setup.c \
-        coll_ml_hier_algorithms_common_setup.h \
-        coll_ml_hier_algorithms_allgather_setup.c \
-        coll_ml_hier_algorithm_memsync_setup.c \
-        coll_ml_custom_utils.h \
-        coll_ml_custom_utils.c \
-        coll_ml_progress.c \
-        coll_ml_reduce.c \
-        coll_ml_allreduce.c \
-        coll_ml_allgather.c \
-        coll_ml_mca.h \
-        coll_ml_mca.c \
-        coll_ml_lmngr.h \
-        coll_ml_lmngr.c \
-        coll_ml_hier_algorithms_barrier_setup.c \
-        coll_ml_select.h \
-        coll_ml_select.c \
-        coll_ml_memsync.c \
-        coll_ml_lex.h \
-        coll_ml_lex.l \
-        coll_ml_config.c \
-        coll_ml_config.h
-
-# Make the output library in this directory, and name it either
-# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
-# (for static builds).
-
-component_noinst =
-component_install =
-if MCA_BUILD_ompi_coll_ml_DSO
-component_install += mca_coll_ml.la
-else
-component_noinst += libmca_coll_ml.la
-endif
-
-# See ompi/mca/btl/ml/Makefile.am for an explanation of
-# libmca_common_ml.la.
-
-mcacomponentdir = $(ompilibdir)
-mcacomponent_LTLIBRARIES = $(component_install)
-mca_coll_ml_la_SOURCES = $(sources)
-mca_coll_ml_la_LDFLAGS = -module -avoid-version
-mca_coll_ml_la_LIBADD =
-
-
-noinst_LTLIBRARIES = $(component_noinst)
-libmca_coll_ml_la_SOURCES =$(sources)
-libmca_coll_ml_la_LDFLAGS = -module -avoid-version
-
-maintainer-clean-local:
-	rm -f coll_ml_lex.c
--- a/ompi/mca/coll/ml/coll_ml.h
+++ b/ompi/mca/coll/ml/coll_ml.h
--- a/ompi/mca/coll/ml/coll_ml_allgather.c
+++ b/ompi/mca/coll/ml/coll_ml_allgather.c
@ -1,633 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2015      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/** @file */
-
-#include "ompi_config.h"
-
-#include <stdlib.h>
-
-#include "ompi/constants.h"
-#include "opal/threads/mutex.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/coll/coll.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "opal/sys/atomic.h"
-#include "coll_ml.h"
-#include "coll_ml_select.h"
-#include "coll_ml_allocation.h"
-
-static int mca_coll_ml_allgather_small_unpack_data(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    bool rcontig = coll_op->full_message.recv_data_continguous;
-    int n_ranks_in_comm = ompi_comm_size(OP_ML_MODULE(coll_op)->comm);
-
-    void *dest = (void *)((uintptr_t)coll_op->full_message.dest_user_addr +
-            (uintptr_t)coll_op->full_message.n_bytes_delivered);
-    void *src = (void *)((uintptr_t)coll_op->fragment_data.buffer_desc->data_addr +
-            (size_t)coll_op->variable_fn_params.rbuf_offset);
-
-    if (rcontig) {
-        memcpy(dest, src, n_ranks_in_comm * coll_op->full_message.n_bytes_scheduled);
-    } else {
-        mca_coll_ml_convertor_unpack(src, n_ranks_in_comm * coll_op->full_message.n_bytes_scheduled,
-                                          &coll_op->fragment_data.message_descriptor->recv_convertor);
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static inline void copy_data (mca_coll_ml_collective_operation_progress_t *coll_op, rank_properties_t *rank_props, int soffset) {
-    bool rcontig = coll_op->fragment_data.message_descriptor->recv_data_continguous;
-    size_t total_bytes = coll_op->fragment_data.message_descriptor->n_bytes_total;
-    size_t pack_len = coll_op->fragment_data.fragment_size;
-    int doffset = rank_props->rank;
-    void *dest, *src;
-
-    src = (void *) ((uintptr_t)coll_op->fragment_data.buffer_desc->data_addr +
-                    (size_t)coll_op->variable_fn_params.rbuf_offset + soffset * pack_len);
-
-    if (rcontig) {
-        dest = (void *) ((uintptr_t) coll_op->full_message.dest_user_addr +
-                         (uintptr_t) coll_op->fragment_data.offset_into_user_buffer +
-                         doffset * total_bytes);
-
-        memcpy(dest, src, pack_len);
-    } else {
-        size_t position;
-        opal_convertor_t *recv_convertor =
-            &coll_op->fragment_data.message_descriptor->recv_convertor;
-
-        position = (size_t) coll_op->fragment_data.offset_into_user_buffer +
-            doffset * total_bytes;
-
-        opal_convertor_set_position(recv_convertor, &position);
-        mca_coll_ml_convertor_unpack(src, pack_len, recv_convertor);
-    }
-}
-
-static int mca_coll_ml_allgather_noncontiguous_unpack_data(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    int i, j, n_level_one_sbgps;
-    size_t soffset;
-
-    mca_coll_ml_topology_t *topo_info = coll_op->coll_schedule->topo_info;
-    sub_group_params_t *array_of_all_subgroup_ranks = topo_info->array_of_all_subgroups;
-
-    n_level_one_sbgps = array_of_all_subgroup_ranks->level_one_index;
-
-    for (i = 0 ; i < n_level_one_sbgps; i++) {
-        /* determine where in the source buffer the data can be found */
-        soffset = array_of_all_subgroup_ranks[i].index_of_first_element;
-        for (j = 0 ; j < array_of_all_subgroup_ranks[i].n_ranks; j++, ++soffset) {
-            copy_data (coll_op, array_of_all_subgroup_ranks[i].rank_data + j, soffset);
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/* Allgather dependencies seem easy, everyone needs to work from the "bottom up".
- * Following Pasha, I too will put the simplest dependencies graph and change it later
- * when we add hierarchy. Basically, allgather has the same dependency profile as the
- * sequential broadcast except that there is only a single ordering of tasks.
- */
-static int mca_coll_ml_allgather_task_setup(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    int fn_idx, h_level, my_index, root;
-    mca_sbgp_base_module_t *sbgp;
-    mca_coll_ml_topology_t *topo = coll_op->coll_schedule->topo_info;
-
-    fn_idx      = coll_op->sequential_routine.current_active_bcol_fn;
-    h_level     = coll_op->coll_schedule->component_functions[fn_idx].h_level;
-    sbgp        = topo->component_pairs[h_level].
-                  subgroup_module;
-    my_index    = sbgp->my_index;
-
-    /* In the case of allgather, the local leader is always the root */
-    root = 0;
-    if (my_index == root) {
-        coll_op->variable_fn_params.root_flag = true;
-        coll_op->variable_fn_params.root_route = NULL;
-    } else {
-        coll_op->variable_fn_params.root_flag = false;
-        coll_op->variable_fn_params.root_route = &topo->route_vector[root];
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_coll_ml_allgather_frag_progress(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    /* local variables */
-    int ret;
-    size_t frag_len, dt_size;
-
-    const void *buf;
-    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc;
-    mca_coll_ml_collective_operation_progress_t *new_op;
-
-    mca_coll_ml_module_t *ml_module = OP_ML_MODULE(coll_op);
-    bool scontig = coll_op->fragment_data.message_descriptor->send_data_continguous;
-
-    ompi_datatype_type_size(coll_op->variable_fn_params.dtype, &dt_size);
-    /* Keep the pipeline filled with fragments */
-    while (coll_op->fragment_data.message_descriptor->n_active <
-        coll_op->fragment_data.message_descriptor->pipeline_depth) {
-        /* If an active fragment happens to have completed the collective during
-         * a hop into the progress engine, then don't launch a new fragment,
-         * instead break and return.
-         */
-        if (coll_op->fragment_data.message_descriptor->n_bytes_scheduled
-            == coll_op->fragment_data.message_descriptor->n_bytes_total) {
-            break;
-        }
-        /* Get an ml buffer */
-        src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        if (NULL == src_buffer_desc) {
-            /* If there exist outstanding fragments, then break out
-             * and let an active fragment deal with this later,
-             * there are no buffers available.
-             */
-            if (0 < coll_op->fragment_data.message_descriptor->n_active) {
-                return OMPI_SUCCESS;
-            } else {
-                /* The fragment is already on list and
-                 * the we still have no ml resources
-                 * Return busy */
-                if (coll_op->pending & REQ_OUT_OF_MEMORY) {
-                    ML_VERBOSE(10,("Out of resources %p", coll_op));
-                    return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-                }
-
-                coll_op->pending |= REQ_OUT_OF_MEMORY;
-                opal_list_append(&((OP_ML_MODULE(coll_op))->waiting_for_memory_list),
-                        (opal_list_item_t *)coll_op);
-                ML_VERBOSE(10,("Out of resources %p adding to pending queue", coll_op));
-                return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-            }
-        }
-
-        /* Get a new collective descriptor and initialize it */
-        new_op =  mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                     ml_module->coll_ml_allgather_functions[ML_SMALL_DATA_ALLGATHER],
-                     coll_op->fragment_data.message_descriptor->src_user_addr,
-                     coll_op->fragment_data.message_descriptor->dest_user_addr,
-                     coll_op->fragment_data.message_descriptor->n_bytes_total,
-                     coll_op->fragment_data.message_descriptor->n_bytes_scheduled);
-
-        new_op->fragment_data.current_coll_op = coll_op->fragment_data.current_coll_op;
-        new_op->fragment_data.message_descriptor = coll_op->fragment_data.message_descriptor;
-
-        /* set the task setup callback  */
-        new_op->sequential_routine.seq_task_setup = mca_coll_ml_allgather_task_setup;
-
-        /*
-        MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(new_op,
-                src_buffer_desc->buffer_index, src_buffer_desc);
-        */
-
-        /* We need this address for pointer arithmetic in memcpy */
-        buf = coll_op->fragment_data.message_descriptor->src_user_addr;
-
-        if (!scontig) {
-            frag_len = ml_module->small_message_thresholds[BCOL_ALLGATHER];
-            mca_coll_ml_convertor_get_send_frag_size(
-                            ml_module, &frag_len,
-                            coll_op->fragment_data.message_descriptor);
-
-            mca_coll_ml_convertor_pack(
-                (void *) ((uintptr_t) src_buffer_desc->data_addr +
-                frag_len * coll_op->coll_schedule->topo_info->hier_layout_info[0].offset +
-                frag_len * coll_op->coll_schedule->topo_info->hier_layout_info[0].level_one_index),
-                frag_len, &coll_op->fragment_data.message_descriptor->send_convertor);
-       } else {
-            /* calculate new frag length, there are some issues here */
-            frag_len = (coll_op->fragment_data.message_descriptor->n_bytes_total -
-                    coll_op->fragment_data.message_descriptor->n_bytes_scheduled <
-                    coll_op->fragment_data.fragment_size ?
-                    coll_op->fragment_data.message_descriptor->n_bytes_total -
-                    coll_op->fragment_data.message_descriptor->n_bytes_scheduled :
-                    coll_op->fragment_data.fragment_size);
-
-            /* everybody copies in, based on the new values */
-            memcpy((void *) ((uintptr_t)src_buffer_desc->data_addr +
-                    frag_len * new_op->coll_schedule->topo_info->hier_layout_info[0].offset +
-                    frag_len * new_op->coll_schedule->topo_info->hier_layout_info[0].level_one_index),
-                    (void *) ((uintptr_t) buf + (uintptr_t)
-                            coll_op->fragment_data.message_descriptor->n_bytes_scheduled), frag_len);
-        }
-
-        new_op->variable_fn_params.sbuf = (void *) src_buffer_desc->data_addr;
-        new_op->variable_fn_params.rbuf = (void *) src_buffer_desc->data_addr;
-
-        /* update the number of bytes scheduled */
-        new_op->fragment_data.message_descriptor->n_bytes_scheduled += frag_len;
-        /* everyone needs an unpack function */
-        new_op->process_fn = mca_coll_ml_allgather_noncontiguous_unpack_data;
-
-        new_op->fragment_data.fragment_size = frag_len;
-        new_op->fragment_data.buffer_desc = src_buffer_desc;
-
-        /* Setup fragment specific data */
-        ++(new_op->fragment_data.message_descriptor->n_active);
-
-        ML_VERBOSE(10, ("Start more, My index %d ",
-                    new_op->fragment_data.buffer_desc->buffer_index));
-
-        /* this is a bit buggy */
-        ML_SET_VARIABLE_PARAMS_BCAST(
-                new_op,
-                OP_ML_MODULE(new_op),
-                frag_len /* yes, we have consistent units, so this makes sense */,
-                MPI_BYTE /* we fragment according to buffer size
-                          * we don't reduce the data thus we needn't
-                          * keep "whole" datatypes, we may freely
-                          * fragment without regard for multiples
-                          * of any specific datatype
-                          */,
-                src_buffer_desc,
-                0,
-                0,
-                frag_len,
-                src_buffer_desc->data_addr);
-        /* initialize first coll */
-        ret = new_op->sequential_routine.seq_task_setup(new_op);
-        if (OMPI_SUCCESS != ret) {
-            ML_VERBOSE(3, ("Fragment failed to initialize itself"));
-            return ret;
-        }
-
-        new_op->variable_fn_params.buffer_size = frag_len;
-        new_op->variable_fn_params.hier_factor = coll_op->variable_fn_params.hier_factor;
-        new_op->variable_fn_params.root = 0;
-
-        MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(new_op);
-
-        /* append this collective !! */
-        OPAL_THREAD_LOCK(&(mca_coll_ml_component.sequential_collectives_mutex));
-        opal_list_append(&mca_coll_ml_component.sequential_collectives,
-                                    (opal_list_item_t *)new_op);
-        OPAL_THREAD_UNLOCK(&(mca_coll_ml_component.sequential_collectives_mutex));
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__
-int mca_coll_ml_allgather_start (const void *sbuf, int scount,
-                                 struct ompi_datatype_t *sdtype,
-                                 void* rbuf, int rcount,
-                                 struct ompi_datatype_t *rdtype,
-                                 struct ompi_communicator_t *comm,
-                                 mca_coll_base_module_t *module,
-                                 ompi_request_t **req)
-{
-    size_t pack_len, sdt_size;
-    int ret, n_fragments = 1, comm_size;
-
-    mca_coll_ml_topology_t *topo_info;
-    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc;
-
-    mca_coll_ml_component_t *cm = &mca_coll_ml_component;
-
-    mca_coll_ml_collective_operation_progress_t *coll_op;
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module;
-
-    ptrdiff_t lb, extent;
-    bool scontig, rcontig, in_place = false;
-
-    /* check for in place setting */
-    if (MPI_IN_PLACE == sbuf) {
-        in_place = true;
-        sdtype = rdtype;
-        scount = rcount;
-    }
-
-    /* scontig could be != to rcontig */
-    scontig = ompi_datatype_is_contiguous_memory_layout(sdtype, scount);
-    rcontig = ompi_datatype_is_contiguous_memory_layout(rdtype, rcount);
-
-    comm_size = ompi_comm_size(comm);
-
-    ML_VERBOSE(10, ("Starting allgather"));
-
-    assert(NULL != sdtype);
-    /* Calculate size of the data,
-     * at this stage, only contiguous data is supported */
-
-    /* this is valid for allagther */
-    ompi_datatype_type_size(sdtype, &sdt_size);
-    pack_len = scount * sdt_size;
-
-    if (in_place) {
-        sbuf = (char *) rbuf + ompi_comm_rank(comm) * pack_len;
-    }
-
-    /* Allocate collective schedule and pack message */
-    /* this is the total ending message size that will need to fit in the ml-buffer */
-    if (pack_len <= (size_t) ml_module->small_message_thresholds[BCOL_ALLGATHER]) {
-        /* The len of the message can not be larger than ML buffer size */
-        ML_VERBOSE(10, ("Single frag %d %d %d", pack_len, comm_size, ml_module->payload_block->size_buffer));
-        assert(pack_len * comm_size <= ml_module->payload_block->size_buffer);
-
-        src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        while (NULL == src_buffer_desc) {
-            opal_progress();
-            src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        }
-
-        /* change 1 */
-        coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                ml_module->coll_ml_allgather_functions[ML_SMALL_DATA_ALLGATHER],
-                sbuf, rbuf, pack_len, 0 /* offset for first pack */);
-
-        MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(coll_op,
-                src_buffer_desc->buffer_index, src_buffer_desc);
-
-        coll_op->fragment_data.current_coll_op = ML_SMALL_DATA_ALLGATHER;
-        /* task setup callback function */
-        coll_op->sequential_routine.seq_task_setup = mca_coll_ml_allgather_task_setup;
-
-        /* change 2 */
-        if (!scontig) {
-            coll_op->full_message.n_bytes_scheduled =
-                mca_coll_ml_convertor_prepare(sdtype, scount, sbuf,
-                    &coll_op->full_message.send_convertor, MCA_COLL_ML_NET_STREAM_SEND);
-
-            mca_coll_ml_convertor_pack(
-                        (void *) ((uintptr_t) src_buffer_desc->data_addr + pack_len *
-                                  (coll_op->coll_schedule->topo_info->hier_layout_info[0].offset +
-                                   coll_op->coll_schedule->topo_info->hier_layout_info[0].level_one_index)),
-                        pack_len, &coll_op->full_message.send_convertor);
-        } else {
-            /* change 3 */
-            memcpy((void *)((uintptr_t) src_buffer_desc->data_addr + pack_len *
-                            (coll_op->coll_schedule->topo_info->hier_layout_info[0].offset +
-                             coll_op->coll_schedule->topo_info->hier_layout_info[0].level_one_index)),
-                   sbuf, pack_len);
-
-            coll_op->full_message.n_bytes_scheduled = pack_len;
-        }
-
-        if (!rcontig) {
-            mca_coll_ml_convertor_prepare(rdtype, rcount * comm_size, rbuf,
-                &coll_op->full_message.recv_convertor, MCA_COLL_ML_NET_STREAM_RECV);
-        }
-
-        if (coll_op->coll_schedule->topo_info->ranks_contiguous) {
-            coll_op->process_fn = mca_coll_ml_allgather_small_unpack_data;
-        } else {
-            coll_op->process_fn = mca_coll_ml_allgather_noncontiguous_unpack_data;
-        }
-
-        /* whole ml-buffer is used to send AND receive */
-        coll_op->variable_fn_params.sbuf = (void *) src_buffer_desc->data_addr;
-        coll_op->variable_fn_params.rbuf = (void *) src_buffer_desc->data_addr;
-
-        /* we can set the initial offset here */
-        coll_op->variable_fn_params.sbuf_offset = 0;
-        coll_op->variable_fn_params.rbuf_offset = 0;
-
-        coll_op->variable_fn_params.count = scount;
-        coll_op->fragment_data.fragment_size =
-                             coll_op->full_message.n_bytes_scheduled;
-
-        /* For small CINCO, we may use the native datatype */
-        coll_op->variable_fn_params.dtype = sdtype;
-        coll_op->variable_fn_params.buffer_size = pack_len;
-        coll_op->variable_fn_params.root = 0;
-    } else if (cm->enable_fragmentation || pack_len * comm_size < (1 << 20)) {
-        /* calculate the number of fragments and the size of each frag */
-        size_t n_dts_per_frag, frag_len;
-        int pipeline_depth = mca_coll_ml_component.pipeline_depth;
-
-        /* Calculate the number of fragments required for this message careful watch the integer division !*/
-        frag_len = (pack_len <= (size_t) ml_module->small_message_thresholds[BCOL_ALLGATHER] ?
-                pack_len : (size_t) ml_module->small_message_thresholds[BCOL_ALLGATHER]);
-
-        n_dts_per_frag = frag_len / sdt_size;
-        n_fragments = (pack_len + sdt_size * n_dts_per_frag - 1) / (sdt_size * n_dts_per_frag);
-        pipeline_depth = (n_fragments < pipeline_depth ? n_fragments : pipeline_depth);
-
-        src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        while (NULL == src_buffer_desc) {
-            opal_progress();
-            src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        }
-
-        /* change 4 */
-        coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                ml_module->coll_ml_allgather_functions[ML_SMALL_DATA_ALLGATHER],
-                sbuf, rbuf, pack_len,
-                0 /* offset for first pack */);
-
-        MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(coll_op,
-                src_buffer_desc->buffer_index, src_buffer_desc);
-        topo_info = coll_op->coll_schedule->topo_info;
-
-        /* task setup callback function */
-        coll_op->sequential_routine.seq_task_setup = mca_coll_ml_allgather_task_setup;
-
-        if (!scontig) {
-            coll_op->full_message.send_converter_bytes_packed =
-                        mca_coll_ml_convertor_prepare(
-                                sdtype, scount, NULL,
-                                &coll_op->full_message.dummy_convertor,
-                                MCA_COLL_ML_NET_STREAM_SEND);
-
-            coll_op->full_message.dummy_conv_position = 0;
-            mca_coll_ml_convertor_get_send_frag_size(
-                                    ml_module, &frag_len,
-                                    &coll_op->full_message);
-
-            /* change 5 */
-            mca_coll_ml_convertor_prepare(sdtype, scount, sbuf,
-                    &coll_op->full_message.send_convertor, MCA_COLL_ML_NET_STREAM_SEND);
-
-            mca_coll_ml_convertor_pack(
-                    (void *) ((uintptr_t) src_buffer_desc->data_addr + frag_len *
-                              (topo_info->hier_layout_info[0].offset +
-                               topo_info->hier_layout_info[0].level_one_index)),
-                    frag_len, &coll_op->full_message.send_convertor);
-        } else {
-            /* change 6 */
-            memcpy((void *)((uintptr_t)src_buffer_desc->data_addr + frag_len *
-                            (topo_info->hier_layout_info[0].offset +
-                             topo_info->hier_layout_info[0].level_one_index)),
-                    sbuf, frag_len);
-        }
-
-        if (!rcontig) {
-            mca_coll_ml_convertor_prepare(rdtype, rcount * comm_size, rbuf,
-                    &coll_op->full_message.recv_convertor, MCA_COLL_ML_NET_STREAM_RECV);
-        }
-
-        coll_op->process_fn = mca_coll_ml_allgather_noncontiguous_unpack_data;
-
-        /* hopefully this doesn't royaly screw things up idea behind this is the
-         * whole ml-buffer is used to send and receive
-         */
-        coll_op->variable_fn_params.sbuf = (void *) src_buffer_desc->data_addr;
-        coll_op->variable_fn_params.rbuf = (void *) src_buffer_desc->data_addr;
-
-        /* we can set the initial offset here */
-        coll_op->variable_fn_params.sbuf_offset = 0;
-        coll_op->variable_fn_params.rbuf_offset = 0;
-
-        coll_op->fragment_data.buffer_desc = src_buffer_desc;
-
-        coll_op->fragment_data.fragment_size = frag_len;
-        coll_op->fragment_data.message_descriptor->n_active = 1;
-
-        coll_op->full_message.n_bytes_scheduled = frag_len;
-        coll_op->full_message.fragment_launcher = mca_coll_ml_allgather_frag_progress;
-
-        coll_op->full_message.pipeline_depth = pipeline_depth;
-        coll_op->fragment_data.current_coll_op = ML_SMALL_DATA_ALLGATHER;
-
-        /* remember this is different for frags !! Caused data corruption when
-         * not properly set. Need to be sure you have consistent units.
-         */
-        coll_op->variable_fn_params.count = frag_len;
-        coll_op->variable_fn_params.dtype = MPI_BYTE; /* for fragmented data, we work in
-                                                       * units of bytes. This means that
-                                                       * all of our arithmetic is done
-                                                       * in terms of bytes
-                                                       */
-
-        coll_op->variable_fn_params.root = 0;
-        coll_op->variable_fn_params.frag_size = frag_len;
-        coll_op->variable_fn_params.buffer_size = frag_len;
-    } else {
-        /* change 7 */
-        ML_VERBOSE(10, ("ML_ALLGATHER_LARGE_DATA_KNOWN case."));
-        coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                ml_module->coll_ml_allgather_functions[ML_LARGE_DATA_ALLGATHER],
-                sbuf, rbuf, pack_len, 0 /* offset for first pack */);
-        topo_info = coll_op->coll_schedule->topo_info;
-        if (MCA_BCOL_BASE_NO_ML_BUFFER_FOR_LARGE_MSG & topo_info->all_bcols_mode) {
-            MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(coll_op, MCA_COLL_ML_NO_BUFFER, NULL);
-        } else {
-            src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-            while (NULL == src_buffer_desc) {
-                opal_progress();
-                src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-            }
-
-            MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(coll_op, src_buffer_desc->buffer_index, src_buffer_desc);
-        }
-
-        /* not sure if I really need this here */
-        coll_op->sequential_routine.seq_task_setup = mca_coll_ml_allgather_task_setup;
-        coll_op->process_fn = NULL;
-        /* probably the most important piece */
-        coll_op->variable_fn_params.sbuf = sbuf;
-        coll_op->variable_fn_params.rbuf = rbuf;
-        coll_op->variable_fn_params.sbuf_offset = 0;
-        coll_op->variable_fn_params.rbuf_offset = 0;
-        coll_op->variable_fn_params.count = scount;
-        coll_op->variable_fn_params.dtype = sdtype;/* for zero copy, we want the
-                                                    * native datatype and actual count
-                                                    */
-        coll_op->variable_fn_params.root = 0;
-
-        /* you still need to copy in your own data into the rbuf */
-        /* don't need to do this if you have in place data */
-        if (!in_place) {
-            memcpy((char *) rbuf + ompi_comm_rank(comm) * pack_len, sbuf, pack_len);
-        }
-    }
-
-    coll_op->full_message.send_count = scount;
-    coll_op->full_message.recv_count = rcount;
-
-    coll_op->full_message.send_data_continguous = scontig;
-    coll_op->full_message.recv_data_continguous = rcontig;
-
-    ompi_datatype_get_extent(sdtype, &lb, &extent);
-    coll_op->full_message.send_extent = (size_t) extent;
-
-    ompi_datatype_get_extent(rdtype, &lb, &extent);
-    coll_op->full_message.recv_extent = (size_t) extent;
-
-
-    /* Fill in the function arguments */
-    coll_op->variable_fn_params.sequence_num =
-        OPAL_THREAD_ADD32(&(ml_module->collective_sequence_num), 1);
-    coll_op->variable_fn_params.hier_factor = comm_size;
-
-    MCA_COLL_ML_SET_ORDER_INFO(coll_op, n_fragments);
-
-
-    ret = mca_coll_ml_launch_sequential_collective (coll_op);
-    if (OMPI_SUCCESS != ret) {
-        ML_VERBOSE(10, ("Failed to launch"));
-        return ret;
-    }
-
-    *req = &coll_op->full_message.super;
-
-    return OMPI_SUCCESS;
-}
-
-int mca_coll_ml_allgather(const void *sbuf, int scount,
-                          struct ompi_datatype_t *sdtype,
-                          void* rbuf, int rcount,
-                          struct ompi_datatype_t *rdtype,
-                          struct ompi_communicator_t *comm,
-                          mca_coll_base_module_t *module)
-{
-    ompi_request_t *req;
-    int ret;
-
-    ML_VERBOSE(10, ("Starting blocking allgather"));
-
-    ret = mca_coll_ml_allgather_start (sbuf, scount, sdtype,
-                                       rbuf, rcount, rdtype,
-                                       comm, module, &req);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        return ret;
-    }
-
-    ret = ompi_request_wait (&req, MPI_STATUS_IGNORE);
-
-    ML_VERBOSE(10, ("Blocking allgather is complete"));
-
-    return ret;
-}
-
-int mca_coll_ml_allgather_nb(const void *sbuf, int scount,
-                             struct ompi_datatype_t *sdtype,
-                             void* rbuf, int rcount,
-                             struct ompi_datatype_t *rdtype,
-                             struct ompi_communicator_t *comm,
-                             ompi_request_t **req,
-                             mca_coll_base_module_t *module)
-{
-    int ret;
-
-    ML_VERBOSE(10, ("Starting non-blocking allgather"));
-
-    ret = mca_coll_ml_allgather_start (sbuf, scount, sdtype,
-                                       rbuf, rcount, rdtype,
-                                       comm, module, req);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        return ret;
-    }
-
-    ML_VERBOSE(10, ("Non-blocking allgather started"));
-
-    return ret;
-}
--- a/ompi/mca/coll/ml/coll_ml_allocation.c
+++ b/ompi/mca/coll/ml/coll_ml_allocation.c
@ -1,213 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2015      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-
-#include "ompi_config.h"
-#include <stdlib.h>
-
-#include "coll_ml.h"
-#include "coll_ml_inlines.h"
-#include "coll_ml_allocation.h"
-
-mca_bcol_base_memory_block_desc_t *mca_coll_ml_allocate_block(struct mca_coll_ml_component_t *ml_component,
-                                                   mca_bcol_base_memory_block_desc_t *ml_memblock)
-{
-    mca_bcol_base_memory_block_desc_t *ret = NULL;
-    mca_bcol_base_memory_block_desc_t *memory_block = NULL;
-    mca_coll_ml_lmngr_t *memory_manager = NULL;
-
-    if (ml_memblock) {
-        ML_ERROR(("Memory already allocated - expecting NULL pointer"));
-        return ret;
-    }
-    memory_block = (mca_bcol_base_memory_block_desc_t*) calloc(1, sizeof(mca_bcol_base_memory_block_desc_t));
-
-    if (NULL == memory_block){
-        ML_ERROR(("Couldn't allocate memory for ml_memblock"));
-        return ret;
-    }
-
-    memory_manager = &ml_component->memory_manager;
-    memory_block->block = mca_coll_ml_lmngr_alloc(memory_manager);
-    memory_block->size_block = memory_manager->list_block_size;
-
-    if (!memory_block->block){
-        ML_VERBOSE(1, ("lmngr failed."));
-        free(memory_block);
-        return NULL;
-    }
-
-    return memory_block;
-}
-
-void mca_coll_ml_free_block (mca_bcol_base_memory_block_desc_t *ml_memblock)
-{
-    if (!ml_memblock)
-        return;
-
-    if (ml_memblock->buffer_descs){
-        free(ml_memblock->buffer_descs);
-    }
-
-    mca_coll_ml_lmngr_free(ml_memblock->block);
-    free(ml_memblock->bank_release_counters);
-    free(ml_memblock->ready_for_memsync);
-    free(ml_memblock->bank_is_busy);
-    free(ml_memblock);
-}
-
-int mca_coll_ml_initialize_block(mca_bcol_base_memory_block_desc_t *ml_memblock,
-                                 uint32_t num_buffers,
-                                 uint32_t num_banks,
-                                 uint32_t buffer_size,
-                                 int32_t data_offset,
-                                 opal_list_t *bcols_in_use)
-{
-    int ret = OMPI_SUCCESS;
-    uint32_t bank_loop, buff_loop;
-    uint64_t addr_offset = 0;
-    mca_bcol_base_payload_buffer_desc_t *pbuff_descs = NULL,*pbuff_desc = NULL;
-
-    if (0 == num_banks || 0 == num_buffers || 0 == buffer_size) {
-        return OMPI_ERR_BAD_PARAM;
-    }
-
-    if (NULL == ml_memblock){
-        ML_ERROR(("Memory block not initialized"));
-        ret = OMPI_ERROR;
-        goto exit_ERROR;
-    }
-
-    if (ml_memblock->size_block < (num_buffers * num_banks * buffer_size) ){
-        ML_ERROR(("Not enough memory for all buffers  and banks in the memory block"));
-        ret = OMPI_ERROR;
-        goto exit_ERROR;
-    }
-
-    pbuff_descs = (mca_bcol_base_payload_buffer_desc_t*) malloc(sizeof(mca_bcol_base_payload_buffer_desc_t)
-            * num_banks * num_buffers);
-    if (NULL == pbuff_descs) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    for(bank_loop = 0; bank_loop < num_banks; bank_loop++)
-        for(buff_loop = 0; buff_loop < num_buffers; buff_loop++){
-            pbuff_desc = &pbuff_descs[bank_loop*num_buffers + buff_loop];
-
-            pbuff_desc->base_data_addr = (void *)
-                ((char *)ml_memblock->block->base_addr + addr_offset);
-            pbuff_desc->data_addr = (void *)
-                ((char *)pbuff_desc->base_data_addr + (size_t)data_offset);
-
-            addr_offset+=buffer_size;
-            pbuff_desc->buffer_index = BUFFER_INDEX(bank_loop,num_buffers,buff_loop);
-
-            pbuff_desc->bank_index=bank_loop;
-            pbuff_desc->generation_number=0;
-        }
-
-    /* Initialize ml memory block */
-    /* gvm FIX:This counter when zero indicates that the bank is ready for
-     * recycle. This is  initialized to number of bcol components as each bcol is responsible for
-     * releasing the buffers of a bank. This initialization will have
-     * faulty behavior, example in case of multiple interfaces,  when more than
-     * one bcol module of the component type is in use.
-     */
-    ml_memblock->bank_release_counters = (uint32_t *) calloc(num_banks, sizeof(uint32_t));
-    if (NULL == ml_memblock->bank_release_counters) {
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-
-    ml_memblock->ready_for_memsync = (bool *) calloc(num_banks, sizeof(bool));
-    if (NULL == ml_memblock->ready_for_memsync) {
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-
-    ml_memblock->bank_is_busy = (bool *) calloc(num_banks, sizeof(bool));
-    if (NULL == ml_memblock->bank_is_busy) {
-        ret = OMPI_ERR_OUT_OF_RESOURCE;
-        goto exit_ERROR;
-    }
-
-    /* Set index for first bank to sync */
-    ml_memblock->memsync_counter = 0;
-
-    /* use first bank and first buffer */
-    ml_memblock->next_free_buffer = 0;
-
-    ml_memblock->block_addr_offset = addr_offset;
-    ml_memblock->num_buffers_per_bank = num_buffers;
-    ml_memblock->num_banks = num_banks;
-    ml_memblock->size_buffer = buffer_size;
-    ml_memblock->buffer_descs = pbuff_descs;
-
-    return ret;
-
-exit_ERROR:
-    /* Free all buffer descriptors */
-    if (pbuff_descs){
-        free(pbuff_descs);
-    }
-
-    return ret;
-}
-
-mca_bcol_base_payload_buffer_desc_t *mca_coll_ml_alloc_buffer (mca_coll_ml_module_t *module)
-{
-    uint64_t bindex;
-    uint32_t bank, buffer, num_buffers;
-    mca_bcol_base_memory_block_desc_t *ml_memblock = module->payload_block;
-    mca_bcol_base_payload_buffer_desc_t *pbuff_descs = NULL,
-        *ml_membuffer = NULL;
-
-    /* Return a buffer */
-    num_buffers = ml_memblock->num_buffers_per_bank;
-    pbuff_descs = ml_memblock->buffer_descs;
-    bindex = ml_memblock->next_free_buffer;
-    buffer = bindex % num_buffers;
-    bank = bindex/num_buffers;
-
-    ML_VERBOSE(10, ("ML allocator: allocating buffer index %d, bank index %d", buffer, bank));
-
-    /* First buffer in bank, use next bank */
-    if (0 == buffer) {
-        if(!ml_memblock->bank_is_busy[bank]) {
-            /* the bank is free, mark it busy */
-            ml_memblock->bank_is_busy[bank] = true;
-            ML_VERBOSE(10, ("ML allocator: reset bank %d to value %d", bank,
-                            ml_memblock->bank_release_counters[bank]));
-        } else {
-            /* the bank is busy, return NULL and upper layer will handle it */
-            ML_VERBOSE(10, ("No free payload buffers are available for use."
-                            " Next memory bank is still used by one of bcols"));
-            return NULL;
-        }
-    }
-
-    assert(true == ml_memblock->bank_is_busy[bank]);
-
-    ml_membuffer = &pbuff_descs[bindex];
-    ML_VERBOSE(10, ("ML allocator: ml buffer index %d", bindex));
-
-    /* Compute next free buffer */
-    buffer = (buffer == num_buffers - 1) ? 0 : buffer + 1;
-    if (0 == buffer) {
-        bank = (bank == ml_memblock->num_banks - 1) ? 0 : bank + 1;
-    }
-
-    ml_memblock->next_free_buffer = BUFFER_INDEX(bank,num_buffers,buffer);
-
-    return ml_membuffer;
-}
--- a/ompi/mca/coll/ml/coll_ml_allocation.h
+++ b/ompi/mca/coll/ml/coll_ml_allocation.h
@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_ML_ALLOC_H
-#define MCA_ML_ALLOC_H
-
-#include "ompi_config.h"
-#include "ompi/include/ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/coll/coll.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "opal/sys/atomic.h"
-#include "opal/mca/mpool/base/base.h"
-#include "coll_ml_lmngr.h"
-
-/*
-  Returns a block of memory from mpool
-
-  ARGS:
-  IN ml_component: component descriptor
-  OUT ml_memblock: block_addr - Starting address of the memory block
-                   size       - Size of the block
-                   register_info - Register information passed from the mpool
-
-  Return
-  On Sucess : Returns size of memory block
-  On Failure: Returns -1
-
- */
-
-struct mca_coll_ml_component_t;
-struct mca_coll_ml_module_t;
-
-mca_bcol_base_memory_block_desc_t *mca_coll_ml_allocate_block(
-                struct mca_coll_ml_component_t  *ml_component,
-                struct mca_bcol_base_memory_block_desc_t *ml_memblock
-                );
-    /* Allocate the memory from mpool */
-    /* Register the memory block with bcols */
-
-void mca_coll_ml_free_block(
-                 mca_bcol_base_memory_block_desc_t *ml_memblock
-                );
-
-
-
-
-/*
-   Initialize the memory block and map into buffers and memory banks, and
-   also buffer descriptors are initialized.
-
-   IN ml_memblock: Memory block descriptor
-   IN num_buffers: number of buffers
-   IN num_banks: number of banks
-   Return
-   On Sucess: OMPI_SUCCESS
-   On Failure: OMPI_ERROR
- */
-int mca_coll_ml_initialize_block(
-        mca_bcol_base_memory_block_desc_t *ml_memblock,
-        uint32_t num_buffers,
-        uint32_t num_banks,
-        uint32_t buffer_size,
-        int32_t data_offset,
-        opal_list_t *bcols_in_use
-        );
-    /* Map blocks into buffers and banks */
-    /* Initialize the descriptors */
-
-
-
-/*
-   Allocate a memory buffer from the block
-    IN ml_memblock: Memory block descriptor
-    OUT ml_membuffer: Buffer allocated for data from the block
-
-   Return
-   On Sucess: OMPI_SUCCESS
-   On Failure: OMPI_ERROR
- */
-mca_bcol_base_payload_buffer_desc_t *mca_coll_ml_alloc_buffer(
-            struct mca_coll_ml_module_t *module);
-
-int mca_coll_ml_free_buffer(
-        mca_bcol_base_memory_block_desc_t *ml_memblock,
-        struct mca_bcol_base_payload_buffer_desc_t *ml_membuffer
-        );
-
-/*
-   Register the memory block with bcol component
-
-   IN ml_memblock: Memory block descriptor
-   OUT registerations (ml_memblock)
-
-   Return
-   On Sucess: OMPI_SUCCESS
-   On Failure: OMPI_ERROR
-
-  */
-int mca_coll_ml_register_block_bcol(
-                mca_bcol_base_memory_block_desc_t *ml_memblock
-                );
-
-#endif /* MCA_ML_ALLOC_H */
--- a/ompi/mca/coll/ml/coll_ml_allreduce.c
+++ b/ompi/mca/coll/ml/coll_ml_allreduce.c
@ -1,553 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2015      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/** @file */
-
-#include "ompi_config.h"
-
-#include <stdlib.h>
-
-#include "ompi/constants.h"
-#include "opal/threads/mutex.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/coll/coll.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "opal/sys/atomic.h"
-#include "coll_ml.h"
-#include "coll_ml_select.h"
-#include "coll_ml_allocation.h"
-
-static int mca_coll_ml_allreduce_small_unpack(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    int ret;
-    /* need to put in more */
-    int count = coll_op->variable_fn_params.count;
-    ompi_datatype_t *dtype = coll_op->variable_fn_params.dtype;
-
-    void *dest = (void *)((uintptr_t)coll_op->full_message.dest_user_addr +
-            (uintptr_t)coll_op->fragment_data.offset_into_user_buffer);
-    void *src = (void *)((uintptr_t)coll_op->fragment_data.buffer_desc->data_addr +
-            (size_t)coll_op->variable_fn_params.rbuf_offset);
-
-    ret = ompi_datatype_copy_content_same_ddt(dtype, (int32_t) count, (char *) dest,
-            (char *) src);
-    if (ret < 0) {
-        return OMPI_ERROR;
-    }
-
-    ML_VERBOSE(10, ("sbuf addr %p, sbuf offset %d, rbuf addr %p, rbuf offset %d.",
-                    src, coll_op->variable_fn_params.sbuf_offset, dest,
-                    coll_op->variable_fn_params.rbuf_offset));
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_coll_ml_allreduce_task_setup(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    int fn_idx, h_level, my_index, root;
-    mca_sbgp_base_module_t *sbgp;
-    mca_coll_ml_topology_t *topo = coll_op->coll_schedule->topo_info;
-
-    fn_idx      = coll_op->sequential_routine.current_active_bcol_fn;
-    h_level     = coll_op->coll_schedule->component_functions[fn_idx].h_level;
-    sbgp        = topo->component_pairs[h_level].subgroup_module;
-    my_index    = sbgp->my_index;
-
-    /* In the case of allreduce, the local leader is always the root */
-    root = 0;
-    if (my_index == root) {
-        coll_op->variable_fn_params.root_flag = true;
-        coll_op->variable_fn_params.root_route = NULL;
-    } else {
-        coll_op->variable_fn_params.root_flag = false;
-        coll_op->variable_fn_params.root_route = &topo->route_vector[root];
-    }
-
-    /* NTH: This was copied from the old allreduce launcher. */
-    if (0 < fn_idx) {
-        coll_op->variable_fn_params.sbuf = coll_op->variable_fn_params.rbuf;
-        coll_op->variable_fn_params.userbuf = coll_op->variable_fn_params.rbuf;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_coll_ml_allreduce_frag_progress(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    /* local variables */
-    const void *buf;
-
-    size_t dt_size;
-    int ret, frag_len, count;
-
-    ptrdiff_t lb, extent;
-
-    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc;
-    mca_coll_ml_collective_operation_progress_t *new_op;
-
-    mca_coll_ml_module_t *ml_module = OP_ML_MODULE(coll_op);
-
-    ret = ompi_datatype_get_extent(coll_op->variable_fn_params.dtype, &lb, &extent);
-    if (ret < 0) {
-     return OMPI_ERROR;
-    }
-
-    dt_size = (size_t) extent;
-
-    /* Keep the pipeline filled with fragments */
-    while (coll_op->fragment_data.message_descriptor->n_active <
-        coll_op->fragment_data.message_descriptor->pipeline_depth) {
-        /* If an active fragment happens to have completed the collective during
-         * a hop into the progress engine, then don't launch a new fragment,
-         * instead break and return.
-         */
-        if (coll_op->fragment_data.message_descriptor->n_bytes_scheduled
-            == coll_op->fragment_data.message_descriptor->n_bytes_total) {
-            break;
-        }
-
-        /* Get an ml buffer */
-        src_buffer_desc = mca_coll_ml_alloc_buffer(OP_ML_MODULE(coll_op));
-        if (NULL == src_buffer_desc) {
-            /* If there exist outstanding fragments, then break out
-             * and let an active fragment deal with this later,
-             * there are no buffers available.
-             */
-            if (0 < coll_op->fragment_data.message_descriptor->n_active) {
-                return OMPI_SUCCESS;
-            }
-
-            /* It is useless to call progress from here, since
-             * ml progress can't be executed as result ml memsync
-             * call will not be completed and no memory will be
-             * recycled. So we put the element on the list, and we will
-             * progress it later when memsync will recycle some memory*/
-
-            /* The fragment is already on list and
-             * the we still have no ml resources
-             * Return busy */
-            if (!(coll_op->pending & REQ_OUT_OF_MEMORY)) {
-                coll_op->pending |= REQ_OUT_OF_MEMORY;
-                opal_list_append(&((OP_ML_MODULE(coll_op))->waiting_for_memory_list),
-                                 (opal_list_item_t *)coll_op);
-                ML_VERBOSE(10,("Out of resources %p adding to pending queue", coll_op));
-            } else {
-                ML_VERBOSE(10,("Out of resources %p", coll_op));
-            }
-
-            return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-        }
-
-        /* Get a new collective descriptor and initialize it */
-        new_op =  mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                ml_module->coll_ml_allreduce_functions[coll_op->fragment_data.current_coll_op],
-                coll_op->fragment_data.message_descriptor->src_user_addr,
-                coll_op->fragment_data.message_descriptor->dest_user_addr,
-                coll_op->fragment_data.message_descriptor->n_bytes_total,
-                coll_op->fragment_data.message_descriptor->n_bytes_scheduled);
-
-        MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(new_op,
-                src_buffer_desc->buffer_index, src_buffer_desc);
-
-        new_op->fragment_data.current_coll_op = coll_op->fragment_data.current_coll_op;
-        new_op->fragment_data.message_descriptor = coll_op->fragment_data.message_descriptor;
-
-        /* set the task setup callback  */
-        new_op->sequential_routine.seq_task_setup = mca_coll_ml_allreduce_task_setup;
-        /* We need this address for pointer arithmetic in memcpy */
-        buf = coll_op->fragment_data.message_descriptor->src_user_addr;
-        /* calculate the number of data types in this packet */
-        count = (coll_op->fragment_data.message_descriptor->n_bytes_total -
-                coll_op->fragment_data.message_descriptor->n_bytes_scheduled <
-                 (size_t) OP_ML_MODULE(coll_op)->small_message_thresholds[BCOL_ALLREDUCE] ?
-                (coll_op->fragment_data.message_descriptor->n_bytes_total -
-                coll_op->fragment_data.message_descriptor->n_bytes_scheduled) / dt_size :
-                (size_t) coll_op->variable_fn_params.count);
-
-        /* calculate the fragment length */
-        frag_len = count*dt_size;
-
-        ret = ompi_datatype_copy_content_same_ddt(coll_op->variable_fn_params.dtype, count,
-                (char *) src_buffer_desc->data_addr, (char *) ((uintptr_t) buf + (uintptr_t)
-                    coll_op->fragment_data.message_descriptor->n_bytes_scheduled));
-        if (ret < 0) {
-            return OMPI_ERROR;
-        }
-
-        /* No unpack for root */
-        new_op->process_fn = mca_coll_ml_allreduce_small_unpack;
-
-        /* Setup fragment specific data */
-        new_op->fragment_data.message_descriptor->n_bytes_scheduled += frag_len;
-        new_op->fragment_data.buffer_desc = src_buffer_desc;
-        new_op->fragment_data.fragment_size = frag_len;
-        (new_op->fragment_data.message_descriptor->n_active)++;
-
-        ML_SET_VARIABLE_PARAMS_BCAST(
-                new_op,
-                OP_ML_MODULE(new_op),
-                count,
-                MPI_BYTE,
-                src_buffer_desc,
-                0,
-                0,
-                frag_len,
-                src_buffer_desc->data_addr);
-        /* Fill in bcast specific arguments */
-        /* TBD: remove buffer_size */
-        new_op->variable_fn_params.buffer_size = frag_len;
-        new_op->variable_fn_params.count = count;
-        new_op->variable_fn_params.hier_factor = coll_op->variable_fn_params.hier_factor;
-        new_op->variable_fn_params.op = coll_op->variable_fn_params.op;
-        new_op->variable_fn_params.dtype = coll_op->variable_fn_params.dtype;
-        new_op->variable_fn_params.root = 0;
-        new_op->variable_fn_params.sbuf = src_buffer_desc->data_addr;
-        new_op->variable_fn_params.rbuf = src_buffer_desc->data_addr;
-        new_op->sequential_routine.current_bcol_status = SEQ_TASK_PENDING;
-
-        MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(new_op);
-
-        ML_VERBOSE(10,("FFFF Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d %d",
-                    new_op->variable_fn_params.buffer_size,
-                    new_op->fragment_data.fragment_size,
-                    new_op->fragment_data.message_descriptor->n_bytes_scheduled));
-        /* initialize first coll */
-        ret = new_op->sequential_routine.seq_task_setup(new_op);
-        if (OMPI_SUCCESS != ret) {
-            ML_VERBOSE(3,("Fragment failed to initialize itself"));
-            return ret;
-        }
-
-        /* append this collective !! */
-        OPAL_THREAD_LOCK(&(mca_coll_ml_component.sequential_collectives_mutex));
-        opal_list_append(&mca_coll_ml_component.sequential_collectives,
-                (opal_list_item_t *)new_op);
-        OPAL_THREAD_UNLOCK(&(mca_coll_ml_component.sequential_collectives_mutex));
-
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static inline __opal_attribute_always_inline__
-int parallel_allreduce_start(const void *sbuf, void *rbuf, int count,
-                                struct ompi_datatype_t *dtype, struct ompi_op_t *op,
-                                struct ompi_communicator_t *comm,
-                                mca_coll_ml_module_t *ml_module,
-                                ompi_request_t **req,
-                                int small_data_allreduce,
-                                int large_data_allreduce)
-{
-    int ret, n_fragments = 1, frag_len,
-        pipeline_depth, n_dts_per_frag ;
-
-    ptrdiff_t lb, extent;
-    size_t pack_len, dt_size;
-
-    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc;
-    mca_coll_ml_collective_operation_progress_t *coll_op;
-
-    mca_coll_ml_component_t *cm = &mca_coll_ml_component;
-
-    bool contiguous = ompi_datatype_is_contiguous_memory_layout(dtype, count);
-
-    if (MPI_IN_PLACE == sbuf) {
-        sbuf = rbuf;
-    }
-
-    ret = ompi_datatype_get_extent(dtype, &lb, &extent);
-    if (ret < 0) {
-        return OMPI_ERROR;
-    }
-
-    dt_size = (size_t) extent;
-    pack_len = count * dt_size;
-
-    ML_VERBOSE(1,("The allreduce requested %d enable fragmentation %d ",
-                    pack_len,
-                    cm->enable_fragmentation));
-    if (pack_len <= (size_t) ml_module->small_message_thresholds[BCOL_ALLREDUCE]) {
-        /* The len of the message can not be larger than ML buffer size */
-        assert(pack_len <= ml_module->payload_block->size_buffer);
-
-        ML_VERBOSE(1,("Using small data allreduce (threshold = %d)",
-                    ml_module->small_message_thresholds[BCOL_ALLREDUCE]));
-
-        src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        while (OPAL_UNLIKELY(NULL == src_buffer_desc)) {
-            opal_progress();
-            src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        }
-
-        coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                ml_module->coll_ml_allreduce_functions[small_data_allreduce],
-                sbuf, rbuf, pack_len, 0);
-
-        coll_op->variable_fn_params.rbuf = src_buffer_desc->data_addr;
-        coll_op->variable_fn_params.sbuf = src_buffer_desc->data_addr;
-        coll_op->variable_fn_params.count = count;
-
-        ret = ompi_datatype_copy_content_same_ddt(dtype, count,
-                (void *) (uintptr_t) src_buffer_desc->data_addr, (char *) sbuf);
-        if (ret < 0){
-            return OMPI_ERROR;
-        }
-
-        /* unpack function */
-        coll_op->process_fn = mca_coll_ml_allreduce_small_unpack;
-    } else if (cm->enable_fragmentation || !contiguous) {
-        ML_VERBOSE(1,("Using Fragmented Allreduce"));
-
-        /* fragment the data */
-        /* check for retarded application programming decisions */
-        if (dt_size > (size_t) ml_module->small_message_thresholds[BCOL_ALLREDUCE]) {
-            ML_ERROR(("Sorry, but we don't support datatypes that large"));
-            return OMPI_ERROR;
-        }
-
-        /* calculate the number of data types that can fit per ml-buffer */
-        n_dts_per_frag = ml_module->small_message_thresholds[BCOL_ALLREDUCE] / dt_size;
-
-        /* calculate the number of fragments */
-        n_fragments = (count + n_dts_per_frag - 1) / n_dts_per_frag; /* round up */
-
-        /* calculate the actual pipeline depth */
-        pipeline_depth = n_fragments < cm->pipeline_depth ? n_fragments : cm->pipeline_depth;
-
-        /* calculate the fragment size */
-        frag_len = n_dts_per_frag * dt_size;
-
-        /* allocate an ml buffer */
-        src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        while (NULL == src_buffer_desc) {
-            opal_progress();
-            src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        }
-
-        coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                ml_module->coll_ml_allreduce_functions[small_data_allreduce],
-                sbuf, rbuf, pack_len, 0 /* offset for first pack */);
-
-        /* task setup callback function */
-        coll_op->sequential_routine.seq_task_setup = mca_coll_ml_allreduce_task_setup;
-
-        coll_op->process_fn = mca_coll_ml_allreduce_small_unpack;
-
-        coll_op->variable_fn_params.sbuf = (void *) src_buffer_desc->data_addr;
-        coll_op->variable_fn_params.rbuf = (void *) src_buffer_desc->data_addr;
-
-        coll_op->fragment_data.message_descriptor->n_active = 1;
-        coll_op->full_message.n_bytes_scheduled = frag_len;
-        coll_op->full_message.fragment_launcher = mca_coll_ml_allreduce_frag_progress;
-        coll_op->full_message.pipeline_depth = pipeline_depth;
-        coll_op->fragment_data.current_coll_op = small_data_allreduce;
-        coll_op->fragment_data.fragment_size = frag_len;
-
-        coll_op->variable_fn_params.count = n_dts_per_frag;  /* seems fishy */
-        coll_op->variable_fn_params.buffer_size = frag_len;
-
-        /* copy into the ml-buffer */
-        ret = ompi_datatype_copy_content_same_ddt(dtype, n_dts_per_frag,
-                (char *) src_buffer_desc->data_addr, (char *) sbuf);
-        if (ret < 0) {
-            return OMPI_ERROR;
-        }
-    } else {
-        ML_VERBOSE(1,("Using zero-copy ptp allreduce"));
-        coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                ml_module->coll_ml_allreduce_functions[large_data_allreduce],
-                sbuf, rbuf, pack_len, 0);
-
-        coll_op->variable_fn_params.userbuf =
-            coll_op->variable_fn_params.sbuf = sbuf;
-
-        coll_op->variable_fn_params.rbuf = rbuf;
-
-        /* The ML buffer is used for testing. Later, when we
-         * switch to use knem/mmap/portals this should be replaced
-         * appropriately
-         */
-        src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        while (NULL == src_buffer_desc) {
-            opal_progress();
-            src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        }
-
-        coll_op->variable_fn_params.count = count;
-    }
-
-    MCA_COLL_IBOFFLOAD_SET_ML_BUFFER_INFO(coll_op, src_buffer_desc->buffer_index,
-                                          src_buffer_desc);
-
-    /* set the offset */
-    coll_op->variable_fn_params.sbuf_offset = 0;
-    coll_op->variable_fn_params.rbuf_offset = 0;
-
-    /* Fill in the function arguments */
-    coll_op->variable_fn_params.sequence_num =
-        OPAL_THREAD_ADD32(&(ml_module->collective_sequence_num), 1);
-    coll_op->sequential_routine.current_active_bcol_fn = 0;
-    coll_op->variable_fn_params.dtype = dtype;
-    coll_op->variable_fn_params.op = op;
-    coll_op->variable_fn_params.root = 0;
-    coll_op->sequential_routine.seq_task_setup = mca_coll_ml_allreduce_task_setup; /* invoked after each level in sequential
-                                                                                    * progress call
-                                                                                    */
-    MCA_COLL_ML_SET_ORDER_INFO(coll_op, n_fragments);
-
-    ret = mca_coll_ml_launch_sequential_collective (coll_op);
-    if (ret != OMPI_SUCCESS) {
-        ML_VERBOSE(10, ("Failed to launch"));
-        return ret;
-    }
-
-    *req = &coll_op->full_message.super;
-
-    return OMPI_SUCCESS;
-}
-
-int mca_coll_ml_allreduce(const void *sbuf, void *rbuf, int count,
-                           struct ompi_datatype_t *dtype, struct ompi_op_t *op,
-                           struct ompi_communicator_t *comm,
-                           mca_coll_base_module_t *module)
-{
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t*)module;
-    ompi_request_t *req;
-    int ret;
-
-    if (OPAL_UNLIKELY(!ompi_op_is_commute(op))) {
-        /* coll/ml does not handle non-communative operations at this time. fallback
-         * on another collective module */
-        return ml_module->fallback.coll_allreduce (sbuf, rbuf, count, dtype, op, comm,
-                                                   ml_module->fallback.coll_allreduce_module);
-    }
-
-    ret = parallel_allreduce_start(sbuf, rbuf, count, dtype, op, comm,
-                                   (mca_coll_ml_module_t *) module, &req,
-                                    ML_SMALL_DATA_ALLREDUCE,
-                                    ML_LARGE_DATA_ALLREDUCE);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        ML_ERROR(("Failed to launch"));
-        return ret;
-    }
-
-    ompi_request_wait_completion(req);
-    ompi_request_free(&req);
-
-    ML_VERBOSE(10, ("Blocking NB allreduce is done"));
-
-    return OMPI_SUCCESS;
-}
-
-int mca_coll_ml_allreduce_nb(const void *sbuf, void *rbuf, int count,
-                           struct ompi_datatype_t *dtype, struct ompi_op_t *op,
-                           struct ompi_communicator_t *comm,
-                           ompi_request_t **req,
-                           mca_coll_base_module_t *module)
-{
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t*)module;
-    int ret;
-
-    if (OPAL_UNLIKELY(!ompi_op_is_commute(op))) {
-        /* coll/ml does not handle non-communative operations at this time. fallback
-         * on another collective module */
-        return ml_module->fallback.coll_iallreduce (sbuf, rbuf, count, dtype, op, comm, req,
-                                                    ml_module->fallback.coll_iallreduce_module);
-    }
-
-    ret = parallel_allreduce_start(sbuf, rbuf, count, dtype, op, comm,
-                                   (mca_coll_ml_module_t *) module, req,
-                                    ML_SMALL_DATA_ALLREDUCE,
-                                    ML_LARGE_DATA_ALLREDUCE);
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-        ML_ERROR(("Failed to launch"));
-        return ret;
-    }
-
-    ML_VERBOSE(10, ("Blocking NB allreduce is done"));
-
-    return OMPI_SUCCESS;
-}
-
-int mca_coll_ml_allreduce_dispatch(const void *sbuf, void *rbuf, int count,
-                                   struct ompi_datatype_t *dtype, struct ompi_op_t *op,
-                                   struct ompi_communicator_t *comm, mca_coll_base_module_t *module)
-{
-    int rc;
-    bool use_extra_topo;
-    ompi_request_t *req;
-
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module;
-
-    use_extra_topo = (count > 1) ?
-            !ml_module->allreduce_matrix[op->op_type][dtype->id][BCOL_MULTI_ELEM_TYPE] :
-            !ml_module->allreduce_matrix[op->op_type][dtype->id][BCOL_SINGLE_ELEM_TYPE];
-
-    if (use_extra_topo) {
-        rc = parallel_allreduce_start(sbuf, rbuf, count, dtype,
-                                         op, comm, ml_module, &req,
-                                         ML_SMALL_DATA_EXTRA_TOPO_ALLREDUCE,
-                                         ML_LARGE_DATA_EXTRA_TOPO_ALLREDUCE);
-    } else {
-        rc = parallel_allreduce_start(sbuf, rbuf, count, dtype,
-                                         op, comm, ml_module, &req,
-                                         ML_SMALL_DATA_ALLREDUCE,
-                                         ML_LARGE_DATA_ALLREDUCE);
-    }
-
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        ML_ERROR(("Failed to launch"));
-        return rc;
-    }
-
-    ompi_request_wait_completion(req);
-    ompi_request_free(&req);
-
-    return OMPI_SUCCESS;
-}
-
-int mca_coll_ml_allreduce_dispatch_nb(const void *sbuf, void *rbuf, int count,
-                                   ompi_datatype_t *dtype, ompi_op_t *op,
-                                   ompi_communicator_t *comm,
-                                   ompi_request_t **req,
-                                   mca_coll_base_module_t *module)
-{
-    int rc;
-    bool use_extra_topo;
-
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module;
-
-    use_extra_topo = (count > 1) ?
-            !ml_module->allreduce_matrix[op->op_type][dtype->id][BCOL_MULTI_ELEM_TYPE] :
-            !ml_module->allreduce_matrix[op->op_type][dtype->id][BCOL_SINGLE_ELEM_TYPE];
-
-    if (use_extra_topo) {
-        rc = parallel_allreduce_start(sbuf, rbuf, count, dtype,
-                                         op, comm, ml_module, req,
-                                         ML_SMALL_DATA_EXTRA_TOPO_ALLREDUCE,
-                                         ML_LARGE_DATA_EXTRA_TOPO_ALLREDUCE);
-    } else {
-        rc = parallel_allreduce_start(sbuf, rbuf, count, dtype,
-                                         op, comm, ml_module, req,
-                                         ML_SMALL_DATA_ALLREDUCE,
-                                         ML_LARGE_DATA_ALLREDUCE);
-    }
-
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        ML_ERROR(("Failed to launch"));
-        return rc;
-    }
-
-    return OMPI_SUCCESS;
-}
--- a/ompi/mca/coll/ml/coll_ml_barrier.c
+++ b/ompi/mca/coll/ml/coll_ml_barrier.c
@ -1,146 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2015      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/** @file */
-
-#include "ompi_config.h"
-
-#include "ompi/constants.h"
-#include "opal/threads/mutex.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/coll/coll.h"
-#include "opal/sys/atomic.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
-#include "ompi/mca/coll/ml/coll_ml_inlines.h"
-
-static void mca_coll_ml_barrier_task_setup(
-                mca_coll_ml_task_status_t *task_status,
-                int index, mca_coll_ml_compound_functions_t *func)
-{
-    task_status->rt_num_dependencies = func->num_dependencies;
-    task_status->rt_num_dependent_tasks = func->num_dependent_tasks;
-    task_status->rt_dependent_task_indices = func->dependent_task_indices;
-}
-
-static int mca_coll_ml_barrier_launch(mca_coll_ml_module_t *ml_module,
-                                     ompi_request_t **req)
-{
-    opal_free_list_item_t *item;
-    mca_coll_ml_collective_operation_progress_t *coll_op;
-    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL;
-
-    /* allocate an ml buffer for signaling purposes */
-    src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-
-    while (NULL == src_buffer_desc) {
-        opal_progress();
-        src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-    }
-
-
-    /* Blocking call on fragment allocation (Maybe we want to make it non blocking ?) */
-    item = opal_free_list_wait (&(ml_module->coll_ml_collective_descriptors));
-
-    coll_op = (mca_coll_ml_collective_operation_progress_t *) item;
-    assert(NULL != coll_op);
-
-    ML_VERBOSE(10, ("Get coll request %p", coll_op));
-
-    MCA_COLL_ML_OP_BASIC_SETUP(coll_op, 0, 0, NULL, NULL, ml_module->coll_ml_barrier_function);
-
-    coll_op->fragment_data.buffer_desc = src_buffer_desc;
-    coll_op->dag_description.num_tasks_completed = 0;
-
-    coll_op->variable_fn_params.buffer_index = src_buffer_desc->buffer_index;
-
-    coll_op->variable_fn_params.sequence_num =
-        OPAL_THREAD_ADD32(&(ml_module->collective_sequence_num), 1);
-
-    /* Pointer to a coll finalize function */
-    coll_op->process_fn = NULL;
-
-    (*req) = &coll_op->full_message.super;
-
-    OMPI_REQUEST_INIT((*req), false);
-
-    (*req)->req_status._cancelled = 0;
-    (*req)->req_state = OMPI_REQUEST_ACTIVE;
-    (*req)->req_status.MPI_ERROR = OMPI_SUCCESS;
-
-    /* Set order info if there is a bcol needs ordering */
-    MCA_COLL_ML_SET_ORDER_INFO(coll_op, 1);
-
-    return mca_coll_ml_generic_collectives_launcher(coll_op, mca_coll_ml_barrier_task_setup);
-}
-
-/**
- * Hierarchical blocking barrier
- */
-int mca_coll_ml_barrier_intra(struct ompi_communicator_t *comm,
-                              mca_coll_base_module_t *module)
-{
-    int rc;
-    ompi_request_t *req;
-
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module;
-
-#if OPAL_ENABLE_DEBUG
-    static int barriers_count = 0;
-#endif
-
-    ML_VERBOSE(10, ("Barrier num %d start.", ++barriers_count));
-
-    rc = mca_coll_ml_barrier_launch(ml_module, &req);
-    if (OPAL_UNLIKELY(rc != OMPI_SUCCESS)) {
-        ML_ERROR(("Failed to launch a barrier."));
-        return rc;
-    }
-
-    /* Blocking barrier */
-    ompi_request_wait_completion(req);
-    ompi_request_free(&req);
-
-    ML_VERBOSE(10, ("Barrier num %d was done.", barriers_count));
-
-    return OMPI_SUCCESS;
-}
-
-/**
- * Hierarchical non-blocking barrier
- */
-int mca_coll_ml_ibarrier_intra(struct ompi_communicator_t *comm,
-                               ompi_request_t **req,
-                               mca_coll_base_module_t *module)
-{
-    int rc;
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module;
-
-#if OPAL_ENABLE_DEBUG
-    static int barriers_count = 0;
-#endif
-
-    ML_VERBOSE(10, ("IBarrier num %d start.", ++barriers_count));
-
-    rc = mca_coll_ml_barrier_launch(ml_module, req);
-    if (OPAL_UNLIKELY(rc != OMPI_SUCCESS)) {
-        ML_ERROR(("Failed to launch a barrier."));
-        return rc;
-    }
-
-    ML_VERBOSE(10, ("IBarrier num %d was done.", barriers_count));
-
-    return OMPI_SUCCESS;
-}
--- a/ompi/mca/coll/ml/coll_ml_bcast.c
+++ b/ompi/mca/coll/ml/coll_ml_bcast.c
@ -1,849 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/** @file */
-
-#include "ompi_config.h"
-
-#include <unistd.h>
-#include <sys/uio.h>
-
-#include "opal/threads/mutex.h"
-#include "opal/sys/atomic.h"
-
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/coll/coll.h"
-#include "ompi/mca/bcol/bcol.h"
-
-#include "coll_ml.h"
-#include "coll_ml_inlines.h"
-#include "coll_ml_colls.h"
-#include "coll_ml_allocation.h"
-
-#define ML_BUFFER_ALLOC_WAIT(ml, buffer)        \
-do {                                            \
-    buffer = mca_coll_ml_alloc_buffer(ml);      \
-    while (NULL == buffer) {                    \
-        opal_progress();                        \
-        buffer = mca_coll_ml_alloc_buffer(ml);  \
-    }                                           \
-} while (0)
-
-#define COLL_ML_SETUP_ORDERING_INFO(op, last, prev)                   \
-do {                                                                  \
-    /* Don't change order of commands !!!! */                         \
-    (op)->prev_frag = prev;                                           \
-    (op)->fragment_data.message_descriptor->last_started_frag = last; \
-    /* op->next_to_process_frag = NULL;   */                          \
-} while (0)
-
-#define ALLOCATE_AND_PACK_CONTIG_BCAST_FRAG(ml_module, op, coll_index, root,    \
-        total_len, frag_len, buf, ml_buff_desc)                                 \
-do {                                                                            \
-    op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,                   \
-            ml_module->coll_ml_bcast_functions[coll_index],                     \
-            buf, buf,                                                           \
-            total_len,                                                          \
-            0 /* offset for first pack */);                                     \
-    if (OPAL_LIKELY(frag_len > 0)) {                                            \
-        if (ompi_comm_rank(ml_module->comm) == root) {                          \
-            /* single frag, pack the data */                                    \
-            memcpy((void *)(uintptr_t)(ml_buff_desc)->data_addr,                \
-                    buf, frag_len);                                             \
-            /* No unpack for root */                                            \
-            op->process_fn = NULL;                                              \
-        } else {                                                                \
-            op->process_fn = mca_coll_ml_bcast_small_unpack_data;               \
-        }                                                                       \
-    }                                                                           \
-    op->full_message.n_bytes_scheduled = frag_len;                              \
-} while (0)
-
-#define SMALL_BCAST 0
-#define LARGE_BCAST (SMALL_BCAST + 1)
-
-/* bcast data unpack */
-static int mca_coll_ml_bcast_converter_unpack_data(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    struct iovec iov;
-    uint32_t iov_count = 1;
-    size_t max_data = 0;
-
-    mca_coll_ml_collective_operation_progress_t *next_op;
-    mca_coll_ml_module_t *ml_module =
-                 (mca_coll_ml_module_t *) coll_op->coll_module;
-
-    size_t max_index =
-        ml_module->payload_block->num_banks * ml_module->payload_block->num_buffers_per_bank;
-
-    bool is_first = true;
-    int ret;
-
-    /* Check if the fragment delivered in order */
-    if (coll_op->fragment_data.buffer_desc->buffer_index !=
-            coll_op->fragment_data.message_descriptor->next_expected_index) {
-        mca_coll_ml_collective_operation_progress_t *prev_coll_op = coll_op->prev_frag;
-        assert(NULL == prev_coll_op->next_to_process_frag);
-        /* make sure that previous process will have pointer to the out
-         of order process */
-        prev_coll_op->next_to_process_frag = coll_op;
-        assert(!(coll_op->pending & REQ_OUT_OF_ORDER));
-        coll_op->pending |= REQ_OUT_OF_ORDER;
-        /* we will unpack it later */
-        ML_VERBOSE(10, ("Get %d expecting %d previous %d",
-                    coll_op->fragment_data.buffer_desc->buffer_index,
-                    coll_op->fragment_data.message_descriptor->next_expected_index,
-                    prev_coll_op->fragment_data.buffer_desc->buffer_index));
-        return ORTE_ERR_NO_MATCH_YET;
-    }
-
-    do {
-        iov.iov_len = coll_op->fragment_data.fragment_size;
-        iov.iov_base = (void *)((uintptr_t) coll_op->fragment_data.buffer_desc->data_addr);
-
-        ML_VERBOSE(10, ("Data unpack with convertern index %d",
-                         coll_op->fragment_data.buffer_desc->buffer_index));
-
-        opal_convertor_unpack(&coll_op->fragment_data.message_descriptor->recv_convertor,
-                &iov, &iov_count, &max_data);
-
-        /* update next index */
-        ++coll_op->fragment_data.message_descriptor->next_expected_index;
-        if (coll_op->fragment_data.message_descriptor->next_expected_index >= max_index) {
-            coll_op->fragment_data.message_descriptor->next_expected_index = 0;
-        }
-
-        /* Return to queue if the packet is done,
-           the exeption is first packet, we release it later.
-         */
-        next_op = coll_op->next_to_process_frag;
-        coll_op->next_to_process_frag = NULL;
-        if ((!is_first) &&
-                (0 != coll_op->fragment_data.offset_into_user_buffer)) {
-            assert(coll_op->pending & REQ_OUT_OF_ORDER);
-            coll_op->pending ^= REQ_OUT_OF_ORDER;
-            /* Pasha: On one hand - I'm not sure that conceptually it is right place to call buffer recycling. Potentially,
-               coll_ml_fragment_completion_processing() sounds like right place for out of order unpack/sync handling.
-             * On the other hand - non contiguous data is not supper common and we would like to minimize effect on critical pass
-             * for non contiguous data types. */
-            ret = mca_coll_ml_buffer_recycling(coll_op);
-            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-                return OMPI_ERROR;
-            }
-
-            CHECK_AND_RECYCLE(coll_op);
-        }
-
-        coll_op = next_op;
-        is_first = false;
-    } while (NULL != coll_op);
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_coll_ml_bcast_small_unpack_data(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    void * dest = (void *)((uintptr_t) coll_op->full_message.dest_user_addr +
-                           (uintptr_t) coll_op->full_message.n_bytes_delivered);
-    void * src = (void *)((uintptr_t) coll_op->fragment_data.buffer_desc->data_addr);
-
-    memcpy(dest, src, coll_op->fragment_data.fragment_size);
-    return OMPI_SUCCESS;
-}
-
-static int mca_coll_ml_bcast_large_unpack_data(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    void * dest = (void *)((uintptr_t) coll_op->fragment_data.message_descriptor->dest_user_addr +
-                           (uintptr_t) coll_op->fragment_data.offset_into_user_buffer);
-    void * src = (void *)((uintptr_t) coll_op->fragment_data.buffer_desc->data_addr);
-
-    memcpy(dest, src, coll_op->fragment_data.fragment_size);
-    return OMPI_SUCCESS;
-}
-
-static int mca_coll_ml_bcast_frag_converter_progress(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    /* local variables */
-    int ret, frag_len;
-    size_t max_data = 0;
-
-    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL;
-    mca_coll_ml_collective_operation_progress_t *new_op = NULL;
-    mca_coll_ml_task_setup_fn_t task_setup = NULL;
-    mca_coll_ml_module_t *ml_module = OP_ML_MODULE(coll_op);
-
-    /* Keep the pipeline filled with fragments */
-    while (coll_op->fragment_data.message_descriptor->n_active <
-                 mca_coll_ml_component.pipeline_depth) {
-        /* If an active fragment happens to have completed the collective during
-         * a hop into the progress engine, then don't launch a new fragment,
-         * instead break and return.
-         */
-        if (coll_op->fragment_data.message_descriptor->n_bytes_scheduled
-            == coll_op->fragment_data.message_descriptor->n_bytes_total) {
-            break;
-        }
-
-        /* Get an ml buffer */
-        src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-        if (OPAL_UNLIKELY(NULL == src_buffer_desc)) {
-            /* If there exist outstanding fragments, then break out
-             * and let an active fragment deal with this later,
-             * there are no buffers available.
-             */
-            if (0 < coll_op->fragment_data.message_descriptor->n_active) {
-                return OMPI_SUCCESS;
-            }
-
-            /* It is useless to call progress from here, since
-             * ml progress can't be executed as result ml memsync
-             * call will not be completed and no memory will be
-             * recycled. So we put the element on the list, and we will
-             * progress it later when memsync will recycle some memory*/
-
-            /* The fragment is already on list and
-             * the we still have no ml resources
-             * Return busy */
-            if (!(coll_op->pending & REQ_OUT_OF_MEMORY)) {
-              coll_op->pending |= REQ_OUT_OF_MEMORY;
-              opal_list_append(&ml_module->waiting_for_memory_list,
-                               (opal_list_item_t *)coll_op);
-            }
-
-            return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-        }
-
-        /* Get a new collective descriptor and initialize it */
-        new_op = mca_coll_ml_duplicate_op_prog_single_frag_dag
-            (ml_module, coll_op);
-        /* We need this address for pointer arithmetic in memcpy */
-        frag_len = ML_GET_FRAG_SIZE(coll_op, BCOL_BCAST);
-        /* Decide based on global flag, not variable one */
-        if (coll_op->fragment_data.message_descriptor->root) {
-            struct iovec iov;
-            uint32_t iov_count = 1;
-
-            /* OBJ_RETAIN(new_op->variable_fn_params.dtype); */
-            iov.iov_base = (IOVBASE_TYPE*) src_buffer_desc->data_addr;
-            iov.iov_len  = ml_module->small_message_thresholds[BCOL_BCAST];
-            assert(0 != iov.iov_len);
-
-            max_data = ml_module->small_message_thresholds[BCOL_BCAST];
-            opal_convertor_pack(&new_op->fragment_data.message_descriptor->send_convertor,
-                                &iov, &iov_count, &max_data);
-
-            new_op->process_fn = NULL;
-            new_op->variable_fn_params.root_flag = true;
-            new_op->variable_fn_params.root_route = NULL;
-
-            task_setup = OP_ML_MODULE(new_op)->
-                coll_ml_bcast_functions[new_op->fragment_data.current_coll_op]->
-                task_setup_fn[COLL_ML_ROOT_TASK_FN];
-        } else {
-            new_op->process_fn = mca_coll_ml_bcast_converter_unpack_data;
-            new_op->variable_fn_params.root_flag = false;
-            new_op->variable_fn_params.root_route = coll_op->variable_fn_params.root_route;
-
-            task_setup = OP_ML_MODULE(new_op)->
-                coll_ml_bcast_functions[new_op->fragment_data.current_coll_op]->
-                task_setup_fn[COLL_ML_GENERAL_TASK_FN];
-
-            max_data = ml_module->small_message_thresholds[BCOL_BCAST];
-            mca_coll_ml_convertor_get_send_frag_size(
-                                    ml_module, &max_data,
-                                    new_op->fragment_data.message_descriptor);
-        }
-
-        new_op->fragment_data.message_descriptor->n_bytes_scheduled += max_data;
-        new_op->fragment_data.fragment_size = max_data;
-        new_op->fragment_data.buffer_desc = src_buffer_desc;
-
-        /* Setup fragment specific data */
-        ++(new_op->fragment_data.message_descriptor->n_active);
-
-        COLL_ML_SETUP_ORDERING_INFO(new_op, new_op,
-                new_op->fragment_data.message_descriptor->last_started_frag);
-        ML_VERBOSE(10, ("Start more, My index %d my prev %d",
-                    new_op->fragment_data.buffer_desc->buffer_index,
-                    new_op->prev_frag->fragment_data.buffer_desc->buffer_index));
-
-        ML_SET_VARIABLE_PARAMS_BCAST(
-                new_op,
-                OP_ML_MODULE(new_op),
-                frag_len,
-                MPI_BYTE,
-                src_buffer_desc,
-                0,
-                0,
-                frag_len,
-                src_buffer_desc->data_addr);
-
-        /* TBD: remove buffer_size */
-        new_op->variable_fn_params.buffer_size = coll_op->variable_fn_params.buffer_size;
-        new_op->variable_fn_params.hier_factor = coll_op->variable_fn_params.hier_factor;
-
-        /* Set order info for new frag if there is a bcol needs ordering */
-        MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(new_op);
-
-        /* Launch this collective !! */
-        ret = mca_coll_ml_generic_collectives_append_to_queue(new_op, task_setup);
-
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-            ML_ERROR(("Failed to launch"));
-            return ret;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_coll_ml_bcast_frag_progress(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    /* local variables */
-    int ret;
-    int frag_len, current_coll_op = coll_op->fragment_data.current_coll_op;
-    size_t dt_size;
-    void *buf;
-
-    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL;
-    mca_coll_ml_collective_operation_progress_t *new_op = NULL;
-    mca_coll_ml_task_setup_fn_t task_setup = NULL;
-
-    ompi_datatype_type_size(coll_op->variable_fn_params.dtype, &dt_size);
-
-    /* Keep the pipeline filled with fragments */
-    while (coll_op->fragment_data.message_descriptor->n_active <
-                  coll_op->fragment_data.message_descriptor->pipeline_depth) {
-        /* If an active fragment happens to have completed the collective during
-         * a hop into the progress engine, then don't launch a new fragment,
-         * instead break and return.
-         */
-        if (coll_op->fragment_data.message_descriptor->n_bytes_scheduled
-            == coll_op->fragment_data.message_descriptor->n_bytes_total) {
-            break;
-        }
-
-        /* Get an ml buffer */
-        src_buffer_desc = mca_coll_ml_alloc_buffer(OP_ML_MODULE(coll_op));
-        if (NULL == src_buffer_desc) {
-            /* If there exist outstanding fragments, then break out
-             * and let an active fragment deal with this later,
-             * there are no buffers available.
-             */
-            if (0 < coll_op->fragment_data.message_descriptor->n_active) {
-                return OMPI_SUCCESS;
-            }
-
-            /* It is useless to call progress from here, since
-             * ml progress can't be executed as result ml memsync
-             * call will not be completed and no memory will be
-             * recycled. So we put the element on the list, and we will
-             * progress it later when memsync will recycle some memory*/
-
-            /* The fragment is already on list and
-             * the we still have no ml resources
-             * Return busy */
-            if (!(coll_op->pending & REQ_OUT_OF_MEMORY)) {
-                ML_VERBOSE(10,("Out of resources %p adding to pending queue", coll_op));
-                coll_op->pending |= REQ_OUT_OF_MEMORY;
-                opal_list_append(&((OP_ML_MODULE(coll_op))->waiting_for_memory_list),
-                                (opal_list_item_t *) coll_op);
-            } else {
-                ML_VERBOSE(10,("Out of resources %p", coll_op));
-            }
-
-            return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-        }
-
-        /* Get a new collective descriptor and initialize it */
-        new_op = mca_coll_ml_duplicate_op_prog_single_frag_dag
-            (OP_ML_MODULE(coll_op), coll_op);
-        /* We need this address for pointer arithmetic in memcpy */
-        buf = coll_op->fragment_data.message_descriptor->dest_user_addr;
-        frag_len = ML_GET_FRAG_SIZE(coll_op, BCOL_BCAST);
-
-        /* Decide based on global flag, not variable one */
-        if (coll_op->fragment_data.message_descriptor->root) {
-            memcpy((void *)(uintptr_t)src_buffer_desc->data_addr,
-                    (void *) ((uintptr_t) buf + (uintptr_t) coll_op->
-                    fragment_data.message_descriptor->n_bytes_scheduled) , frag_len);
-
-            /* No unpack for root */
-            new_op->process_fn = NULL;
-            new_op->variable_fn_params.root_flag = true;
-            new_op->variable_fn_params.root_route = NULL;
-            task_setup = OP_ML_MODULE(new_op)->coll_ml_bcast_functions[current_coll_op]->
-                task_setup_fn[COLL_ML_ROOT_TASK_FN];
-
-        } else {
-            new_op->process_fn = mca_coll_ml_bcast_large_unpack_data;
-            new_op->variable_fn_params.root_flag = false;
-            new_op->variable_fn_params.root_route = coll_op->variable_fn_params.root_route;
-            task_setup = OP_ML_MODULE(new_op)->coll_ml_bcast_functions[current_coll_op]->
-                task_setup_fn[COLL_ML_GENERAL_TASK_FN];
-        }
-
-        /* Setup fragment specific data */
-        new_op->fragment_data.message_descriptor->n_bytes_scheduled += frag_len;
-        new_op->fragment_data.buffer_desc = src_buffer_desc;
-        new_op->fragment_data.fragment_size = frag_len;
-        new_op->fragment_data.message_descriptor->n_active++;
-
-        ML_SET_VARIABLE_PARAMS_BCAST(
-                new_op,
-                OP_ML_MODULE(new_op),
-                frag_len,
-                MPI_BYTE,
-                src_buffer_desc,
-                0,
-                0,
-                frag_len,
-                src_buffer_desc->data_addr);
-
-        /* Fill in bcast specific arguments */
-        /* TBD: remove buffer_size */
-        new_op->variable_fn_params.buffer_size = coll_op->variable_fn_params.buffer_size;
-        new_op->variable_fn_params.hier_factor = coll_op->variable_fn_params.hier_factor;
-
-        /* Set order info for new frag if there is a bcol needs ordering */
-        MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(new_op);
-
-        ML_VERBOSE(10, ("FFFF Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d %d",
-                         new_op->variable_fn_params.buffer_size ,
-                         new_op->fragment_data.fragment_size,
-                         new_op->fragment_data.message_descriptor->n_bytes_scheduled));
-
-        /* Launch this collective !! */
-        ret = mca_coll_ml_generic_collectives_append_to_queue(new_op, task_setup);
-        if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-            ML_VERBOSE(10, ("Failed to launch"));
-            return ret;
-        }
-    }
-
-    return OMPI_SUCCESS;
-}
-
-#define BCAST_FRAGMENTATION_IS_ENABLED(module)  \
-    (module->bcast_fn_index_table[LARGE_BCAST] < ML_BCAST_LARGE_DATA_KNOWN)
-
-static inline __opal_attribute_always_inline__
-   int parallel_bcast_start(void *buf, int count, struct ompi_datatype_t *dtype,
-                            int root, mca_coll_base_module_t *module, ompi_request_t **req)
-{
-    size_t pack_len = 0;
-    size_t dt_size = 0;
-    bool contig = false;
-    int bcast_index, n_fragments = 1;
-
-    mca_coll_ml_collective_operation_progress_t * coll_op = NULL;
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module;
-    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL;
-    mca_coll_ml_task_setup_fn_t task_setup;
-    OPAL_PTRDIFF_TYPE lb, extent;
-
-    /* actual starting place of the user buffer (lb added) */
-    void *actual_buf;
-
-    ML_VERBOSE(10, ("Starting bcast, mca_coll_ml_bcast_uknown_root buf: %p", buf));
-
-    ompi_datatype_type_size(dtype, &dt_size);
-    pack_len = count * dt_size;
-
-    /* Setup data buffer */
-    ML_BUFFER_ALLOC_WAIT(ml_module, src_buffer_desc);
-    /* Get information about memory layout */
-    contig = opal_datatype_is_contiguous_memory_layout((opal_datatype_t *)dtype, count);
-
-    ompi_datatype_get_extent (dtype, &lb, &extent);
-
-    actual_buf = (void *) ((uintptr_t) buf + lb);
-
-    /* Allocate collective schedule and pack message */
-    if (contig) {
-        if (pack_len <= (size_t) ml_module->small_message_thresholds[BCOL_BCAST]) {
-            assert(pack_len <=  ml_module->payload_block->size_buffer);
-            bcast_index = ml_module->bcast_fn_index_table[SMALL_BCAST];
-
-            ML_VERBOSE(10, ("Contig + small message %d [0-sk, 1-lk, 3-su, 4-lu]", bcast_index));
-            ALLOCATE_AND_PACK_CONTIG_BCAST_FRAG(ml_module, coll_op, bcast_index, root, pack_len,
-                                                pack_len, actual_buf, src_buffer_desc);
-
-            ML_SET_VARIABLE_PARAMS_BCAST(coll_op, ml_module, count, dtype,
-                    src_buffer_desc, 0, 0, ml_module->payload_block->size_buffer,
-                    (src_buffer_desc->data_addr));
-        } else if (BCAST_FRAGMENTATION_IS_ENABLED(ml_module)) {
-            /* We moved the fragmentation decision from communication creation time to
-               runtime, since for large messages the if latency is not so critical */
-            size_t n_dts_per_frag;
-            int frag_len, pipeline_depth = mca_coll_ml_component.pipeline_depth;
-            bcast_index = ml_module->bcast_fn_index_table[LARGE_BCAST];
-
-            ML_VERBOSE(10, ("Contig + fragmentation %d [0-sk, 1-lk, 3-su, 4-lu]", bcast_index));
-
-            /* Calculate the number of fragments required for this message */
-            frag_len = (pack_len < (size_t) ml_module->small_message_thresholds[BCOL_BCAST] ?
-                        pack_len : (size_t) ml_module->small_message_thresholds[BCOL_BCAST]);
-
-            n_dts_per_frag = frag_len/dt_size;
-            n_fragments = (pack_len + dt_size*n_dts_per_frag - 1)/(dt_size*n_dts_per_frag);
-            pipeline_depth = (n_fragments < pipeline_depth ? n_fragments : pipeline_depth);
-
-            ALLOCATE_AND_PACK_CONTIG_BCAST_FRAG(ml_module, coll_op, bcast_index, root, pack_len,
-                                                frag_len, actual_buf, src_buffer_desc);
-            ML_SET_VARIABLE_PARAMS_BCAST(coll_op, ml_module, (frag_len/dt_size), dtype,
-                    src_buffer_desc, 0, 0, frag_len, (src_buffer_desc->data_addr));
-
-            coll_op->full_message.fragment_launcher = mca_coll_ml_bcast_frag_progress;
-            coll_op->full_message.pipeline_depth = pipeline_depth;
-            /* Initialize fragment specific information */
-            coll_op->fragment_data.current_coll_op = bcast_index;
-            /* coll_op->fragment_data.message_descriptor->n_bytes_scheduled += frag_len; */
-            coll_op->fragment_data.fragment_size = frag_len;
-            coll_op->fragment_data.message_descriptor->n_active++;
-            /* should be removed */
-            coll_op->variable_fn_params.buffer_size = frag_len;
-
-            ML_VERBOSE(10, ("Contig + fragmentation [0-sk, 1-lk, 3-su, 4-lu] %d %d",
-                             coll_op->variable_fn_params.buffer_size,
-                             coll_op->fragment_data.fragment_size));
-        } else {
-            bcast_index = ml_module->bcast_fn_index_table[LARGE_BCAST];
-            ML_VERBOSE(10, ("Contig + zero copy %d [0-sk, 1-lk, 3-su, 4-lu]", bcast_index));
-
-            coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                                                                ml_module->coll_ml_bcast_functions[bcast_index],
-                                                                actual_buf, actual_buf, pack_len,
-                                                                0 /* offset for first pack */);
-            /* For large messages (bcast) this points to userbuf */
-            /* Pasha: temporary work around for basesmuma, userbuf should
-               be removed  */
-            coll_op->variable_fn_params.userbuf = buf;
-            coll_op->process_fn = NULL;
-            coll_op->full_message.n_bytes_scheduled = pack_len;
-
-            ML_SET_VARIABLE_PARAMS_BCAST(coll_op, ml_module, count, dtype,
-                    src_buffer_desc, 0, 0,
-                    ml_module->payload_block->size_buffer, buf);
-        }
-    } else {
-        /* Non contiguous data type */
-        bcast_index = ml_module->bcast_fn_index_table[SMALL_BCAST];
-        ML_VERBOSE(10, ("NON Contig + fragmentation %d [0-sk, 1-lk, 3-su, 4-lu]", bcast_index));
-
-        coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                                                            ml_module->coll_ml_bcast_functions[bcast_index],
-                                                            actual_buf, actual_buf, pack_len,
-                                                            0 /* offset for first pack */);
-        if (OPAL_LIKELY(pack_len > 0)) {
-            size_t max_data = 0;
-
-            if (ompi_comm_rank(ml_module->comm) == root) {
-                struct iovec iov;
-                uint32_t iov_count = 1;
-
-                opal_convertor_copy_and_prepare_for_send(
-                        ompi_mpi_local_convertor,
-                        &dtype->super, count, buf, 0,
-                        &coll_op->full_message.send_convertor);
-
-                opal_convertor_get_packed_size(&coll_op->full_message.send_convertor,
-                                    &coll_op->full_message.send_converter_bytes_packed);
-
-                coll_op->full_message.n_bytes_total =
-                    coll_op->full_message.send_converter_bytes_packed;
-
-                iov.iov_base = (IOVBASE_TYPE*) src_buffer_desc->data_addr;
-                iov.iov_len  =  ml_module->small_message_thresholds[BCOL_BCAST];
-                max_data = ml_module->small_message_thresholds[BCOL_BCAST];
-                opal_convertor_pack(&coll_op->full_message.send_convertor,
-                                    &iov, &iov_count, &max_data);
-                coll_op->process_fn = NULL;
-                coll_op->full_message.n_bytes_scheduled = max_data;
-
-                /* We need prepare the data for future pipe line comunication */
-                coll_op->full_message.fragment_launcher = mca_coll_ml_bcast_frag_converter_progress;
-                coll_op->full_message.pipeline_depth = mca_coll_ml_component.pipeline_depth;
-                coll_op->full_message.root = true;
-
-            } else {
-                opal_convertor_copy_and_prepare_for_send(
-                        ompi_mpi_local_convertor,
-                        &dtype->super, count, NULL, 0,
-                        &coll_op->full_message.dummy_convertor);
-
-                /* In non-root case we use it for #bytes remaining to receive */
-                opal_convertor_get_packed_size(&coll_op->full_message.dummy_convertor,
-                                    &coll_op->full_message.send_converter_bytes_packed);
-
-                opal_convertor_copy_and_prepare_for_recv(
-                        ompi_mpi_local_convertor,
-                        &dtype->super, count, buf, 0,
-                        &coll_op->full_message.recv_convertor);
-
-                opal_convertor_get_unpacked_size(&coll_op->full_message.recv_convertor,
-                        &coll_op->full_message.recv_converter_bytes_packed);
-
-                coll_op->full_message.root = false;
-                coll_op->full_message.n_bytes_total =
-                    coll_op->full_message.recv_converter_bytes_packed;
-                coll_op->process_fn = mca_coll_ml_bcast_converter_unpack_data;
-
-                coll_op->full_message.fragment_launcher = mca_coll_ml_bcast_frag_converter_progress;
-                coll_op->full_message.pipeline_depth = mca_coll_ml_component.pipeline_depth;
-
-                max_data = ml_module->small_message_thresholds[BCOL_BCAST];
-                coll_op->full_message.dummy_conv_position = 0;
-                mca_coll_ml_convertor_get_send_frag_size(
-                                             ml_module, &max_data,
-                                             &coll_op->full_message);
-
-                coll_op->full_message.n_bytes_scheduled = max_data;
-            }
-        }
-        coll_op->fragment_data.current_coll_op = bcast_index;
-        coll_op->fragment_data.message_descriptor->n_active++;
-        coll_op->fragment_data.fragment_size = coll_op->full_message.n_bytes_scheduled;
-
-        /* Set initial index */
-        coll_op->full_message.next_expected_index = src_buffer_desc->buffer_index;
-
-        /* Prepare linking information for future frags */
-        COLL_ML_SETUP_ORDERING_INFO(coll_op, coll_op, NULL);
-
-        /* Since the data is already packed we will use MPI_BYTE and byte count as datatype */
-        ML_SET_VARIABLE_PARAMS_BCAST(coll_op, ml_module, coll_op->full_message.n_bytes_scheduled, MPI_BYTE,
-                src_buffer_desc, 0, 0, ml_module->payload_block->size_buffer,(src_buffer_desc->data_addr));
-
-        n_fragments = (coll_op->full_message.n_bytes_total +
-                       ml_module->small_message_thresholds[BCOL_BCAST] - 1) / ml_module->small_message_thresholds[BCOL_BCAST];
-    }
-
-    coll_op->variable_fn_params.hier_factor = 1;
-    coll_op->fragment_data.buffer_desc = src_buffer_desc;
-
-    /* Set order info if there is a bcol needs ordering */
-    MCA_COLL_ML_SET_ORDER_INFO(coll_op, n_fragments);
-
-    if (ompi_comm_rank(ml_module->comm) == root) {
-        coll_op->full_message.root =
-            coll_op->variable_fn_params.root_flag = true;
-        coll_op->variable_fn_params.root_route = NULL;
-        task_setup = ml_module->coll_ml_bcast_functions[bcast_index]->
-            task_setup_fn[COLL_ML_ROOT_TASK_FN];
-    } else {
-        coll_op->full_message.root =
-            coll_op->variable_fn_params.root_flag = false;
-
-        coll_op->variable_fn_params.root_route =
-            (NULL == coll_op->coll_schedule->topo_info->route_vector ?
-             NULL : &coll_op->coll_schedule->topo_info->route_vector[root]);
-
-        task_setup = ml_module->coll_ml_bcast_functions[bcast_index]->
-            task_setup_fn[COLL_ML_GENERAL_TASK_FN];
-    }
-
-    *req = &coll_op->full_message.super;
-    return mca_coll_ml_generic_collectives_launcher(coll_op, task_setup);
-}
-
-int mca_coll_ml_parallel_bcast(void *buf, int count, struct ompi_datatype_t *dtype,
-                            int root, struct ompi_communicator_t *comm,
-                            mca_coll_base_module_t *module)
-{
-    int ret;
-    ompi_request_t *req;
-
-    ret = parallel_bcast_start(buf, count, dtype, root, module, &req);
-    if (OPAL_UNLIKELY(ret != OMPI_SUCCESS)) {
-        ML_VERBOSE(10, ("Failed to launch"));
-        return ret;
-    }
-
-    /* Blocking bcast */
-    ompi_request_wait_completion(req);
-    ompi_request_free(&req);
-
-    ML_VERBOSE(10, ("Bcast is done mca_coll_ml_bcast_known"));
-
-    return OMPI_SUCCESS;
-}
-
-int mca_coll_ml_parallel_bcast_nb(void *buf, int count, struct ompi_datatype_t *dtype,
-                                  int root, struct ompi_communicator_t *comm,
-                                  ompi_request_t **req,
-                                  mca_coll_base_module_t *module)
-{
-    int ret;
-
-    ret = parallel_bcast_start(buf, count, dtype, root, module, req);
-    if (OPAL_UNLIKELY(ret != OMPI_SUCCESS)) {
-        ML_VERBOSE(10, ("Failed to launch"));
-        return ret;
-    }
-
-    ML_VERBOSE(10, ("Bcast is done mca_coll_ml_bcast_known"));
-
-    return OMPI_SUCCESS;
-}
-
-int mca_coll_ml_bcast_sequential_root(void *buf, int count, struct ompi_datatype_t *dtype,
-                                      int root, struct ompi_communicator_t *comm,
-                                      mca_coll_base_module_t *module)
-{
-
-    /* local variables */
-    int ret, fn_idx;
-    size_t pack_len = 0;
-    size_t dt_size = 0;
-
-    mca_coll_ml_collective_operation_progress_t * coll_op = NULL;
-    mca_coll_ml_compound_functions_t *fixed_schedule;
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *) module;
-    mca_bcol_base_payload_buffer_desc_t *src_buffer_desc = NULL;
-    mca_bcol_base_coll_fn_desc_t *func;
-    OPAL_PTRDIFF_TYPE lb, extent;
-
-    /* actual starting place of the user buffer (lb added) */
-    void *actual_buf;
-
-    ML_VERBOSE(10, ("Starting static bcast, small messages"));
-
-    assert(NULL != dtype);
-    /* Calculate size of the data,
-     * on this stage only contiguous data is supported */
-    ompi_datatype_type_size(dtype, &dt_size);
-    pack_len = count * dt_size;
-    ompi_datatype_get_extent (dtype, &lb, &extent);
-
-    actual_buf = (void *) ((uintptr_t) buf + lb);
-
-    /* Setup data buffer */
-    src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-    while (NULL == src_buffer_desc) {
-        opal_progress();
-        src_buffer_desc = mca_coll_ml_alloc_buffer(ml_module);
-    }
-
-    /* Allocate collective schedule and pack message */
-    if (pack_len <= (size_t) ml_module->small_message_thresholds[BCOL_BCAST]) {
-        /* The len of the message can not be larger than ML buffer size */
-        assert(pack_len <=  ml_module->payload_block->size_buffer);
-
-        coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                                                            ml_module->coll_ml_bcast_functions[ML_BCAST_SMALL_DATA_SEQUENTIAL],
-                                                            actual_buf, actual_buf, pack_len,
-                                                            0 /* offset for first pack */);
-        if (ompi_comm_rank(comm) == root) {
-            /* single frag, pack the data */
-            memcpy((void *)(uintptr_t)src_buffer_desc->data_addr,
-                    buf, pack_len);
-            /* No unpack for root */
-            coll_op->process_fn = NULL;
-        } else {
-            coll_op->process_fn = mca_coll_ml_bcast_small_unpack_data;
-        }
-
-        coll_op->variable_fn_params.sbuf =
-                   src_buffer_desc->data_addr;
-    } else {
-        ML_VERBOSE(10, ("ML_BCAST_LARGE_DATA_KNOWN case."));
-        coll_op = mca_coll_ml_alloc_op_prog_single_frag_dag(ml_module,
-                                                            ml_module->coll_ml_bcast_functions[ML_BCAST_LARGE_DATA_SEQUENTIAL],
-                                                            actual_buf, actual_buf, pack_len,
-                                                            0 /* offset for first pack */);
-        /* For large messages (bcast) this points to userbuf */
-        /* Pasha: temporary work around for basesmuma, userbuf should
-           be removed  */
-        coll_op->variable_fn_params.userbuf =
-        coll_op->variable_fn_params.sbuf = actual_buf;
-
-        coll_op->process_fn = NULL;
-    }
-
-    /* Fill in the function arguments */
-    coll_op->variable_fn_params.sequence_num =
-        OPAL_THREAD_ADD32(&(ml_module->collective_sequence_num), 1);
-    coll_op->variable_fn_params.count = count;
-    coll_op->variable_fn_params.dtype = dtype;
-
-    coll_op->variable_fn_params.buffer_index = src_buffer_desc->buffer_index;
-    coll_op->variable_fn_params.src_desc = src_buffer_desc;
-    coll_op->variable_fn_params.sbuf_offset = 0;
-    coll_op->variable_fn_params.rbuf_offset = 0;
-
-    /* pasha - why we duplicate it ? */
-    coll_op->fragment_data.buffer_desc = src_buffer_desc;
-
-    /* pack data into payload buffer - NOTE: assume no fragmenation at this stage */
-    if (ompi_comm_rank(comm) == root) {
-        coll_op->variable_fn_params.root_flag = true;
-        coll_op->variable_fn_params.root_route =
-                    &coll_op->coll_schedule->topo_info->route_vector[root];
-
-        coll_op->full_message.n_bytes_scheduled = pack_len;
-    } else {
-        coll_op->variable_fn_params.root_flag = false;
-        coll_op->variable_fn_params.root_route =
-                    &coll_op->coll_schedule->topo_info->route_vector[root];
-    }
-
-    /* seems like we should fix a schedule here and now */
-    fixed_schedule = coll_op->coll_schedule->
-        comp_fn_arr[coll_op->variable_fn_params.root_route->level];
-
-    /* now we set this schedule as the compound function list */
-    coll_op->coll_schedule->component_functions = fixed_schedule;
-
-    coll_op->sequential_routine.current_active_bcol_fn = 0;
-
-    while (true) {
-        /* ready, aim, fire collective(s)!! */
-        fn_idx = coll_op->sequential_routine.current_active_bcol_fn;
-
-        func = fixed_schedule[fn_idx].bcol_function;
-        ret = func->coll_fn(&coll_op->variable_fn_params,
-                (struct mca_bcol_base_function_t *) &fixed_schedule[fn_idx].constant_group_data);
-        /* set the coll_fn_started flag to true */
-        if (BCOL_FN_COMPLETE == ret) {
-            /* done with this routine, bump the active counter */
-            coll_op->sequential_routine.current_active_bcol_fn++;
-            coll_op->variable_fn_params.root_flag = true;
-            /* check for collective completion */
-            if (coll_op->sequential_routine.current_active_bcol_fn ==
-                    coll_op->coll_schedule->n_fns) {
-                /* handle fragment completion */
-                ret = coll_ml_fragment_completion_processing(coll_op);
-                if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
-                    mca_coll_ml_abort_ml("Failed to run coll_ml_fragment_completion_processing");
-                }
-
-                /* break out of while loop */
-                break;
-            }
-        } else {
-            /* put entire collective opperation onto sequential queue */
-            opal_list_append(&mca_coll_ml_component.sequential_collectives,
-                            (opal_list_item_t *) coll_op);
-            break;
-        }
-    }
-
-    /* Blocking bcast */
-    ompi_request_wait_completion(&coll_op->full_message.super);
-    ompi_request_free((ompi_request_t **) &coll_op);
-
-    ML_VERBOSE(10, ("Bcast is done"));
-
-    return OMPI_SUCCESS;
-}
--- a/ompi/mca/coll/ml/coll_ml_colls.h
+++ b/ompi/mca/coll/ml/coll_ml_colls.h
@ -1,552 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2014-2015 Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef MCA_COLL_ML_COLLS_H
-#define MCA_COLL_ML_COLLS_H
-
-#include "ompi_config.h"
-#include "ompi/mca/bcol/bcol.h"
-
-#define COLL_ML_FN_NAME_LEN 256
-
-
-/* utility information used to coordinate activities, such as resource
- * management between different functions in the hierarchy
- */
-struct mca_coll_ml_utility_data_t {
-
-    /* RLG - temp fix  !!!! - really need to remove this, but right now
-     do not want to change the signature of the collective primitives to
-     use coll_ml_utility_data_t rather than mca_bcol_base_function_t */
-    int dummy;
-
-    /* module */
-    struct mca_bcol_base_module_t *bcol_module;
-
-    /* */
-    int index_in_consecutive_same_bcol_calls;
-
-    /* number of times functions from this bcol are called in order */
-    int n_of_this_type_in_a_row;
-
-    /* number of times functions from this module are called
-     * in the collective operation. */
-    int n_of_this_type_in_collective;
-    int index_of_this_type_in_collective;
-
-};
-typedef struct mca_coll_ml_utility_data_t mca_coll_ml_utility_data_t;
-
-
-/* forward declaration */
-struct mca_coll_ml_collective_operation_progress_t;
-struct mca_coll_ml_task_status_t;
-
-typedef int (* mca_coll_ml_process_op_fn_t)
-    (struct mca_coll_ml_collective_operation_progress_t *coll_op);
-
-typedef int (* mca_coll_ml_task_comp_fn_t)
-    (struct mca_coll_ml_task_status_t *coll_op);
-
-typedef int (* mca_coll_ml_fragment_launch_fn_t)
-    ( struct mca_coll_ml_collective_operation_progress_t *coll_op);
-
-typedef int (* mca_coll_ml_sequential_task_setup_fn_t)
-    ( struct mca_coll_ml_collective_operation_progress_t *coll_op);
-/* This data structure defines the dependencies for a given
- * compound operation.  We will use this as a basis for implementing
- * collective operations.
- */
-struct mca_coll_ml_compound_functions_t {
-    /* label */
-    char fn_name[COLL_ML_FN_NAME_LEN];
-
-    /* hierarchy level that is used for this bcol */
-    int h_level;
-
-    /* the list of functions that make up this task */
-    /* coll_bcol_collective_description_t *bcol_function; */
-    mca_bcol_base_coll_fn_desc_t *bcol_function;
-    /* task completion function for this compound function */
-    mca_coll_ml_task_comp_fn_t task_comp_fn;
-
-    /* module specific information that is a constant on a per group
-     * basis
-     */
-    mca_coll_ml_utility_data_t constant_group_data;
-
-    /* number of dependencies to be satified before these function can be
-     * started */
-    int num_dependencies;
-
-    /*
-     * number of notifications to perform on completion.  The assumption
-     * is that a counter will be incremented.
-     */
-    int num_dependent_tasks;
-
-    /*
-     * pointers to counters that need be updated.  This assumes
-     * an array of tasks is used to describe the ML level
-     * collective operation, with these indecies referencing elements
-     * in this array.
-     */
-    int *dependent_task_indices;
-
-};
-
-typedef struct mca_coll_ml_compound_functions_t mca_coll_ml_compound_functions_t;
-
-/* Forward declaration for operation_description_t */
-struct mca_coll_ml_module_t;
-
-enum {
-    COLL_ML_GENERAL_TASK_FN,
-    COLL_ML_ROOT_TASK_FN,
-    COLL_ML_MAX_TASK_FN
-};
-
-enum {
-    SEQ_TASK_NOT_STARTED,
-    SEQ_TASK_PENDING,
-    SEQ_TASK_IN_PROG
-};
-
-typedef void (*mca_coll_ml_task_setup_fn_t) (struct mca_coll_ml_task_status_t *task_status, int index, struct mca_coll_ml_compound_functions_t *func);
-
-/*
- * Collective operation definition
- */
-struct mca_coll_ml_collective_operation_description_t {
-
-    /*
-     * Type of collective opeartion - there are two types:
-     * 1) sequential progress through the collectives is sufficient
-     * 2) general treatment, popping tasks onto execution queus is needed.
-     */
-    int progress_type;
-
-    struct mca_coll_ml_topology_t *topo_info;
-
-    /*
-     * number of functions in collective operation
-     */
-    int n_fns;
-
-    /*
-     * list of functions
-     */
-    mca_coll_ml_compound_functions_t *component_functions;
-
-    /*
-     * array of lists of functions
-     */
-    mca_coll_ml_compound_functions_t **comp_fn_arr;
-
-    /*
-     * indices into the list - fixes a sequential schedule
-     */
-    int *sch_idx;
-
-    /*
-     * Task setup functions, so far we have only 3 - root and non-root
-     */
-    mca_coll_ml_task_setup_fn_t task_setup_fn[COLL_ML_MAX_TASK_FN];
-
-    /* number of functions are called for bcols need ordering */
-    int n_fns_need_ordering;
-};
-typedef struct mca_coll_ml_collective_operation_description_t
-               mca_coll_ml_collective_operation_description_t;
-
-/* Data structure used to track the state of individual bcol
- * functions.  This is used to track dependencies and completion
- * to progress the ML level function correctly.
- *
- * mca_coll_ml_task_status_t will be associated with an
- * mca_coll_ml_collective_operation_progress_t structure for
- * the duration of the lifetime of a communicator.
- * An array of task statuses will be stored with
- * the mca_coll_ml_collective_operation_progress_t data structure, so
- * that the taks status elements do not need to be moved back to
- * a free list before they are re-used.  When the ML level function
- * is complete, all mca_coll_ml_task_status_t are available for
- * re-use.
- */
-struct mca_coll_ml_task_status_t{
-    /* need to move this between lists to progress this correctly */
-    opal_list_item_t item;
-
-    /* number of dependencies satisfied */
-    int n_dep_satisfied;
-
-    /* ***************************************************************
-     * Pasha:
-     * I'm adding to the status: num_dependencies, num_dependent_tasks and
-     * dependent_task_indices. The information originally resided on mca_coll_ml_compound_functions_t.
-     * For collective operation with static nature it is not problem.
-     * But for Bcast operation, where run time parameters, like root, actually
-     * define the dependency. rt prefix mean run-time.
-     */
-
-    /* number of dependencies to be satisfied before these function can be
-     * started */
-    int rt_num_dependencies;
-
-    /*
-     * number of notifications to perform on completion.  The assumption
-     * is that a counter will be incremented.
-     */
-    int rt_num_dependent_tasks;
-
-    /*
-     * pointers to counters that need be updated.  This assumes
-     * an array of tasks is used to describe the ML level
-     * collective operation, with these indecies referencing elements
-     * in this array.
-     */
-    int *rt_dependent_task_indices;
-    /*
-     *
-     * ***************************************************************/
-
-    /* index in collective schedule */
-    int my_index_in_coll_schedule;
-
-    /* function pointers */
-    mca_bcol_base_coll_fn_desc_t *bcol_fn;
-
-    /* association with a specific collective task - the ML
-     * mca_coll_ml_collective_operation_progress_t stores the
-     * specific function parameters */
-    struct mca_coll_ml_collective_operation_progress_t *ml_coll_operation;
-
-    mca_coll_ml_task_comp_fn_t task_comp_fn;
-};
-typedef struct mca_coll_ml_task_status_t mca_coll_ml_task_status_t;
-
-typedef enum mca_coll_ml_pending_type_t {
-    REQ_OUT_OF_ORDER = 1,
-    REQ_OUT_OF_MEMORY = 1 << 1
-} mca_coll_ml_pending_type_t;
-
-/* Forward declaration */
-struct mca_bcol_base_payload_buffer_desc_t;
-/* Data structure used to track ML level collective operation
- * progress.
- */
-struct mca_coll_ml_collective_operation_progress_t {
-    /* need this to put on a list properly */
-    /* Full message information */
-    struct full_message_t {
-        /* make this a list item */
-        ompi_request_t super;
-        /* Next expected fragment.
-         * It used for controling order of converter unpack operation */
-        size_t next_expected_index;
-        /* Pointer to last intilized fragment.
-         * It used for controling order of converter unpack operation */
-        struct mca_coll_ml_collective_operation_progress_t *last_started_frag;
-        /* destination data address in user memory */
-        void *dest_user_addr;
-        /* source data address in user memory */
-        const void *src_user_addr;
-        /* total message size */
-        size_t n_bytes_total;
-        /* per-process total message size - relevant for operations
-         * such as gather and scatter, where each rank has it's
-         * own unique data
-         */
-        size_t n_bytes_per_proc_total;
-        size_t max_n_bytes_per_proc_total;
-        /* data processes - from a local perspective */
-        size_t n_bytes_delivered;
-        /* current offset - where to continue with next fragment */
-        size_t n_bytes_scheduled;
-        /* number of fragments needed to process this message */
-        size_t n_fragments;
-        /* number of active frags */
-        int n_active;
-        /* actual pipeline depth */
-        int pipeline_depth;
-        /* am I the real root of the collective ? */
-        bool root;
-        /* collective fragment launcher */
-        mca_coll_ml_fragment_launch_fn_t fragment_launcher;
-        /* is data contingous */
-        bool send_data_continguous;
-        bool recv_data_continguous;
-        /* data type count */
-        int64_t send_count;
-        int64_t recv_count;
-        /* extent of the data types */
-        size_t send_extent;
-        size_t recv_extent;
-        /* send data type */
-        struct ompi_datatype_t * send_data_type;
-        /* needed for non-contigous buffers */
-        size_t offset_into_send_buffer;
-        /* receive data type */
-        struct ompi_datatype_t * recv_data_type;
-        /* needed for non-contigous buffers */
-        size_t offset_into_recv_buffer;
-        /* Convertors for non contigous data */
-        opal_convertor_t send_convertor;
-        opal_convertor_t recv_convertor;
-        /* Will be used by receiver for #bytes calc in the next frag */
-        opal_convertor_t dummy_convertor;
-        size_t dummy_conv_position;
-        /* Size of packed data */
-        size_t send_converter_bytes_packed;
-        size_t recv_converter_bytes_packed;
-        /* In case if ordering is needed: order num for next frag */
-        int next_frag_num;
-        /* The variable is used by non-blocking memory synchronization code
-         * for caching bank index */
-        int bank_index_to_recycle;
-        /* need a handle for collective progress e.g. alltoall*/
-        bcol_fragment_descriptor_t frag_info;
-    } full_message;
-
-    /* collective operation being progressed */
-    mca_coll_ml_collective_operation_description_t *coll_schedule;
-    /* */
-    mca_coll_ml_process_op_fn_t process_fn;
-
-    mca_coll_base_module_t *coll_module;
-
-    /* If not null , we have to release next fragment */
-    struct mca_coll_ml_collective_operation_progress_t *next_to_process_frag;
-    /* pointer to previous fragment */
-    struct mca_coll_ml_collective_operation_progress_t *prev_frag;
-    /* This flag marks that the fragment is pending on the waiting
-     * to be processed prior to recycling
-     */
-    enum mca_coll_ml_pending_type_t pending;
-
-    /* Fragment data */
-    struct fragment_data_t {
-        /* current buffer pointer - offset (in bytes) into the user data */
-        size_t offset_into_user_buffer;
-        size_t offset_into_user_buffer_per_proc;
-
-        /* amount of data (in bytes) in this fragment - amount of data
-         * actually processed */
-        size_t fragment_size;
-        size_t per_rank_fragment_size;
-        size_t data_type_count_per_frag;
-
-        /* pointer to full message progress data */
-        struct full_message_t *message_descriptor;
-
-        /* ML buffer descriptor attached to this buffer */
-        struct mca_bcol_base_payload_buffer_desc_t *buffer_desc;
-        /* handle for collective progress, e.g. alltoall */
-        bcol_fragment_descriptor_t bcol_fragment_desc;
-
-        /* Which collective algorithm */
-        int current_coll_op;
-    } fragment_data;
-
-    /* specific function parameters */
-    /* the assumption is that the variable parameters passed into
-     * the ML level function will persist until the collective operation
-     * is complete.  For a blocking function this is until the collective
-     * function is exited, and for nonblocking collective functions this
-     * is until test or wait completes the collective.
-     */
-    int global_root;
-    bcol_function_args_t variable_fn_params;
-
-    struct{
-        /* current active function - for sequential algorithms */
-        int current_active_bcol_fn;
-
-        /* current function status - not started, or in progress.
-         * When the routine has completed, the active bcol index is
-         * incremented, so no need to keep track of a completed
-         * status.
-         */
-        int current_bcol_status;
-
-        /* use this call back to setup algorithm specific info
-           after each level necessary
-          */
-       mca_coll_ml_sequential_task_setup_fn_t seq_task_setup;
-
-    } sequential_routine;
-
-    struct{
-        /*
-         * BCOL function status - individual elements will be posted to
-         * ml level component queues, as appropriate.
-         */
-        mca_coll_ml_task_status_t *status_array;
-
-        /* number of completed tasks - need this for collective completion.
-         * Resource completion is tracked by each BCOL module .
-         */
-        int num_tasks_completed;
-    } dag_description;
-};
-typedef struct mca_coll_ml_collective_operation_progress_t
-mca_coll_ml_collective_operation_progress_t;
-OBJ_CLASS_DECLARATION(mca_coll_ml_collective_operation_progress_t);
-
-#define OP_ML_MODULE(op) ((mca_coll_ml_module_t *)((op)->coll_module))
-#define GET_COMM(op) ((OP_ML_MODULE(op))->comm)
-#define IS_COLL_SYNCMEM(op) (ML_MEMSYNC == op->fragment_data.current_coll_op)
-
-#define CHECK_AND_RECYCLE(op)                                                   \
-do {                                                                            \
-    if (0 == (op)->pending) {                                                   \
-        /* Caching 2 values that we can't to touch on op after returing it */   \
-        /* back to the free list  (free list may release memory on distruct )*/ \
-        struct ompi_communicator_t *comm = GET_COMM(op);                        \
-        bool is_coll_sync = IS_COLL_SYNCMEM(op);                                \
-        ML_VERBOSE(10, ("Releasing %p", op));                                   \
-        OMPI_REQUEST_FINI(&(op)->full_message.super);                           \
-        opal_free_list_return (&(((mca_coll_ml_module_t *)(op)->coll_module)->  \
-                                 coll_ml_collective_descriptors),               \
-                               (opal_free_list_item_t *)op);                    \
-        /* Special check for memory synchronization completion */               \
-        /* We have to return it first to free list, since the communicator  */  \
-        /* release potentially may trigger ML module distraction and having */  \
-        /* the element not on the list may cause memory leak.               */  \
-        if (OPAL_UNLIKELY(is_coll_sync)) {                                      \
-            if (OMPI_COMM_IS_INTRINSIC(comm)) {                                 \
-                opal_show_help("help-mpi-coll-ml.txt",                          \
-                               "coll-ml-check-fatal-error", true,               \
-                               comm->c_name);                                   \
-                ompi_mpi_abort(comm, 6);                                        \
-            } else {                                                            \
-                opal_show_help("help-mpi-coll-ml.txt",                          \
-                               "coll-ml-check-error", true,                     \
-                               comm->c_name);                                   \
-                /* After this point it is UNSAFE to touch ml module */          \
-                /* or communicator */                                           \
-                OBJ_RELEASE(comm);                                              \
-            }                                                                   \
-        }                                                                       \
-    }                                                                           \
-} while (0)
-
-#define MCA_COLL_ML_SET_ORDER_INFO(coll_progress, num_frags)                      \
-do {                                                                              \
-    mca_coll_ml_topology_t *topo = (coll_progress)->coll_schedule->topo_info;     \
-    bcol_function_args_t *variable_params = &(coll_progress)->variable_fn_params; \
-    if (topo->topo_ordering_info.num_bcols_need_ordering > 0) {                   \
-        variable_params->order_info.bcols_started = 0;                            \
-        variable_params->order_info.order_num =                                   \
-                      topo->topo_ordering_info.next_order_num;                    \
-        variable_params->order_info.n_fns_need_ordering =                         \
-                       (coll_progress)->coll_schedule->n_fns_need_ordering;       \
-        topo->topo_ordering_info.next_order_num += num_frags;                     \
-        (coll_progress)->fragment_data.message_descriptor->next_frag_num =        \
-                                      variable_params->order_info.order_num + 1;  \
-    }                                                                             \
-} while (0)
-
-#define MCA_COLL_ML_SET_NEW_FRAG_ORDER_INFO(coll_progress)                                    \
-do {                                                                                          \
-    mca_coll_ml_topology_t *topo = (coll_progress)->coll_schedule->topo_info;                 \
-    if (topo->topo_ordering_info.num_bcols_need_ordering > 0) {                               \
-        bcol_function_args_t *variable_params = &(coll_progress)->variable_fn_params;         \
-        struct fragment_data_t *frag_data = &(coll_progress)->fragment_data;                  \
-        variable_params->order_info.bcols_started = 0;                                        \
-        variable_params->order_info.order_num = frag_data->message_descriptor->next_frag_num; \
-        variable_params->order_info.n_fns_need_ordering =                                     \
-                       (coll_progress)->coll_schedule->n_fns_need_ordering;                   \
-        frag_data->message_descriptor->next_frag_num++;                                       \
-    }                                                                                         \
-} while (0)
-
-#define MCA_COLL_ML_SET_SCHEDULE_ORDER_INFO(schedule)                           \
-do {                                                                            \
-    int i;                                                                      \
-    (schedule)->n_fns_need_ordering = 0;                                        \
-    for (i = 0; i < (schedule)->n_fns; ++i) {                                   \
-        mca_bcol_base_module_t *current_bcol =                                  \
-            (schedule)->component_functions[i].constant_group_data.bcol_module; \
-        assert (NULL != current_bcol);                                          \
-        if (current_bcol->bcol_component->need_ordering) {                      \
-            (schedule)->n_fns_need_ordering++;                                  \
-        }                                                                       \
-    }                                                                           \
-} while (0)
-
-enum {
-    MCA_COLL_ML_NET_STREAM_SEND,
-    MCA_COLL_ML_NET_STREAM_RECV
-};
-
-static inline  __opal_attribute_always_inline__
-    int mca_coll_ml_convertor_prepare(ompi_datatype_t *dtype, int count, const void *buff,
-                                            opal_convertor_t *convertor, int stream)
-{
-    size_t bytes_packed;
-
-    if (MCA_COLL_ML_NET_STREAM_SEND == stream) {
-        opal_convertor_copy_and_prepare_for_send(
-                ompi_mpi_local_convertor,
-                &dtype->super, count, buff, 0,
-                convertor);
-    } else {
-        opal_convertor_copy_and_prepare_for_recv(
-                ompi_mpi_local_convertor,
-                &dtype->super, count, buff, 0,
-                convertor);
-    }
-
-    opal_convertor_get_packed_size(convertor, &bytes_packed);
-
-    return bytes_packed;
-}
-
-static inline  __opal_attribute_always_inline__
-    int mca_coll_ml_convertor_pack(void *data_addr, size_t buff_size,
-                                            opal_convertor_t *convertor)
-{
-    struct iovec iov;
-
-    size_t max_data = 0;
-    uint32_t iov_count = 1;
-
-    iov.iov_base = (IOVBASE_TYPE*) data_addr;
-    iov.iov_len  = buff_size;
-
-    opal_convertor_pack(convertor, &iov, &iov_count, &max_data);
-
-    return max_data;
-}
-
-static inline  __opal_attribute_always_inline__
-    int mca_coll_ml_convertor_unpack(void *data_addr, size_t buff_size,
-                                            opal_convertor_t *convertor)
-{
-    struct iovec iov;
-
-    size_t max_data = 0;
-    uint32_t iov_count = 1;
-
-    iov.iov_base = (void *) (uintptr_t) data_addr;
-    iov.iov_len  = buff_size;
-
-    opal_convertor_unpack(convertor, &iov, &iov_count, &max_data);
-
-    return max_data;
-}
-#endif /* MCA_COLL_ML_COLLS_H */
-
--- a/ompi/mca/coll/ml/coll_ml_component.c
+++ b/ompi/mca/coll/ml/coll_ml_component.c
@ -1,449 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2014      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/**
- * @file
- *
- * Most of the description of the data layout is in the
- * coll_sm_module.c file.
- */
-
-#include "ompi_config.h"
-
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-
-#include "ompi/constants.h"
-#include "ompi/mca/coll/base/base.h"
-#include "opal/mca/mpool/base/base.h"
-#include "opal/mca/mpool/mpool.h"
-#include "ompi/mca/bcol/base/base.h"
-#include "ompi/mca/sbgp/base/base.h"
-
-#include "coll_ml.h"
-#include "coll_ml_inlines.h"
-
-#include "ompi/patterns/net/netpatterns.h"
-#include "coll_ml_mca.h"
-#include "coll_ml_custom_utils.h"
-
-
-/*
- * Public string showing the coll ompi_ml V2 component version number
- */
-const char *mca_coll_ml_component_version_string =
-"Open MPI ml-V2 collective MCA component version " OMPI_VERSION;
-
-/*
- * Local functions
- */
-
-static int ml_open(void);
-static int ml_close(void);
-static int coll_ml_progress(void);
-
-/*
- * Instantiate the public struct with all of our public information
- * and pointers to our public functions in it
- */
-
-mca_coll_ml_component_t mca_coll_ml_component = {
-
-    /* First, fill in the super */
-
-    .super = {
-        /* First, the mca_component_t struct containing meta
-           information about the component itself */
-
-        .collm_version = {
-            MCA_COLL_BASE_VERSION_2_0_0,
-
-            /* Component name and version */
-
-            .mca_component_name = "ml",
-            MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
-                                  OMPI_RELEASE_VERSION),
-
-            /* Component open, close, and register functions */
-
-            .mca_open_component = ml_open,
-            .mca_close_component = ml_close,
-            .mca_register_component_params = mca_coll_ml_register_params
-        },
-        .collm_data = {
-            /* The component is not checkpoint ready */
-            MCA_BASE_METADATA_PARAM_NONE
-        },
-
-        /* Initialization / querying functions */
-        .collm_init_query = mca_coll_ml_init_query,
-        .collm_comm_query = mca_coll_ml_comm_query,
-    },
-};
-
-void mca_coll_ml_abort_ml(char *message)
-{
-    ML_ERROR(("ML Collective FATAL ERROR: %s", message));
-    /* shutdown the MPI */
-    ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_INTERN);
-}
-/*
- * progress function
- */
-
-#define INDEX(task) ((task)->my_index_in_coll_schedule)
-#define ACTIVE_L    (&mca_coll_ml_component.active_tasks)
-#define PENDING_L   (&mca_coll_ml_component.pending_tasks)
-#define SEQ_L       (&mca_coll_ml_component.sequential_collectives)
-
-static int coll_ml_progress()
-{
-
-    int rc = OMPI_SUCCESS;
-    int fn_idx;
-
-    mca_coll_ml_task_status_t *task_status, *task_status_tmp;
-    mca_coll_ml_collective_operation_progress_t *seq_coll_op;
-    mca_coll_ml_collective_operation_progress_t *seq_coll_op_tmp;
-
-    mca_bcol_base_module_collective_fn_primitives_t progress_fn,
-                                                    coll_fn;
-    mca_coll_ml_utility_data_t *const_args;
-    mca_coll_ml_component_t *cm = &mca_coll_ml_component;
-
-    /* Pasha: Not sure that is it correct way to resolve the problem.
-       Iprobe call for progress engine. The progress engine calls for our
-       progress and as result the first element on the list is progressed again
-       and so we call for Iprobe again.... as result we get HUGE stack.
-
-       One way to prevent it - remove the item from the list, and once you finish
-       to process it - put it back.
-
-       Other way - put flag on component, if the progress is running - exit immediate.
-     */
-    if (cm->progress_is_busy) {
-        /* We are already working...*/
-        return OMPI_SUCCESS;
-    } else {
-        cm->progress_is_busy = true;
-    }
-
-    /* progress sequential collective operations */
-    /* RLG - need to do better here for parallel progress */
-    OPAL_THREAD_LOCK(&(cm->sequential_collectives_mutex));
-    OPAL_LIST_FOREACH_SAFE(seq_coll_op, seq_coll_op_tmp, SEQ_L, mca_coll_ml_collective_operation_progress_t) {
-        do {
-            fn_idx      = seq_coll_op->sequential_routine.current_active_bcol_fn;
-            /* initialize the task */
-
-            if (SEQ_TASK_IN_PROG == seq_coll_op->sequential_routine.current_bcol_status){
-                progress_fn = seq_coll_op->coll_schedule->
-                    component_functions[fn_idx].bcol_function->progress_fn;
-            } else {
-                /* PPP Pasha - apparently task setup should be called only here. see linr 190 */
-                progress_fn = seq_coll_op->coll_schedule->
-                    component_functions[fn_idx].bcol_function->coll_fn;
-            }
-
-            const_args  = &seq_coll_op->coll_schedule->component_functions[fn_idx].constant_group_data;
-            /* RLG - note need to move to useing coll_ml_utility_data_t as
-             * collective argument, rather than  mca_bcol_base_function_t
-             */
-            rc = progress_fn(&(seq_coll_op->variable_fn_params), (mca_bcol_base_function_t *)const_args);
-            if (BCOL_FN_COMPLETE == rc) {
-                /* done with this routine */
-                seq_coll_op->sequential_routine.current_active_bcol_fn++;
-                /* this is totally hardwired for bcast, need a general call-back */
-
-                fn_idx = seq_coll_op->sequential_routine.current_active_bcol_fn;
-                if (fn_idx == seq_coll_op->coll_schedule->n_fns) {
-                    /* done with this collective - recycle descriptor */
-
-                    /* remove from the progress list */
-                    (void) opal_list_remove_item(SEQ_L, (opal_list_item_t *)seq_coll_op);
-
-                    /* handle fragment completion */
-                    rc = coll_ml_fragment_completion_processing(seq_coll_op);
-
-                    if (OMPI_SUCCESS != rc) {
-                        mca_coll_ml_abort_ml("Failed to run coll_ml_fragment_completion_processing");
-                    }
-                } else {
-                    rc = seq_coll_op->sequential_routine.seq_task_setup(seq_coll_op);
-                    if (OMPI_SUCCESS != rc) {
-                        mca_coll_ml_abort_ml("Failed to run sequential task setup");
-                    }
-
-                    seq_coll_op->sequential_routine.current_bcol_status = SEQ_TASK_PENDING;
-                    continue;
-                }
-            } else if (BCOL_FN_NOT_STARTED == rc) {
-                seq_coll_op->sequential_routine.current_bcol_status = SEQ_TASK_PENDING;
-            } else if (BCOL_FN_STARTED == rc) {
-                seq_coll_op->sequential_routine.current_bcol_status = SEQ_TASK_IN_PROG;
-            }
-
-            break;
-        } while (true);
-    }
-    OPAL_THREAD_UNLOCK(&(cm->sequential_collectives_mutex));
-
-    /* general dag's */
-    /* see if active tasks can be progressed */
-    OPAL_THREAD_LOCK(&(cm->active_tasks_mutex));
-    OPAL_LIST_FOREACH(task_status, ACTIVE_L, mca_coll_ml_task_status_t) {
-        /* progress task */
-        progress_fn = task_status->bcol_fn->progress_fn;
-        const_args = &task_status->ml_coll_operation->coll_schedule->
-            component_functions[INDEX(task_status)].constant_group_data;
-        rc = progress_fn(&(task_status->ml_coll_operation->variable_fn_params),
-                (mca_bcol_base_function_t *)const_args);
-        if (BCOL_FN_COMPLETE == rc) {
-            ML_VERBOSE(3, ("GOT BCOL_COMPLETED!!!!"));
-            rc = mca_coll_ml_task_completion_processing(&task_status, ACTIVE_L);
-            if (OMPI_SUCCESS != rc) {
-                mca_coll_ml_abort_ml("Failed to run mca_coll_ml_task_completion_processing");
-            }
-        } else if (BCOL_FN_STARTED == rc) {
-            /* nothing to do */
-        } else {
-            mca_coll_ml_abort_ml("Failed to run mca_coll_ml_task_completion_processing");
-        }
-    }
-    OPAL_THREAD_UNLOCK(&(cm->active_tasks_mutex));
-
-    /* see if new tasks can be initiated */
-    OPAL_THREAD_LOCK(&(cm->pending_tasks_mutex));
-    OPAL_LIST_FOREACH_SAFE(task_status, task_status_tmp, PENDING_L, mca_coll_ml_task_status_t) {
-        /* check to see if dependencies are satisfied */
-        int n_dependencies = task_status->rt_num_dependencies;
-        int n_dependencies_satisfied = task_status->n_dep_satisfied;
-
-        if (n_dependencies == n_dependencies_satisfied) {
-            /* initiate the task */
-            coll_fn = task_status->bcol_fn->coll_fn;
-            const_args = &task_status->ml_coll_operation->coll_schedule->
-                component_functions[INDEX(task_status)].constant_group_data;
-            rc = coll_fn(&(task_status->ml_coll_operation->variable_fn_params),
-                    (mca_bcol_base_function_t *)const_args);
-            if (BCOL_FN_COMPLETE == rc) {
-                ML_VERBOSE(3, ("GOT BCOL_COMPLETED!"));
-                rc = mca_coll_ml_task_completion_processing(&task_status, PENDING_L);
-                if (OMPI_SUCCESS != rc) {
-                    mca_coll_ml_abort_ml("Failed to run mca_coll_ml_task_completion_processing");
-                }
-            } else if ( BCOL_FN_STARTED == rc ) {
-                ML_VERBOSE(3, ("GOT BCOL_STARTED!"));
-                (void) opal_list_remove_item(PENDING_L, (opal_list_item_t *)task_status);
-                /* RLG - is there potential for deadlock here ?  Need to
-                 * look at this closely
-                 */
-                OPAL_THREAD_LOCK(&(cm->active_tasks_mutex));
-                opal_list_append(ACTIVE_L, (opal_list_item_t *)task_status);
-                OPAL_THREAD_UNLOCK(&(cm->active_tasks_mutex));
-            } else if( BCOL_FN_NOT_STARTED == rc ) {
-                /* nothing to do */
-                ML_VERBOSE(10, ("GOT BCOL_FN_NOT_STARTED!"));
-            } else {
-                OPAL_THREAD_UNLOCK(&(cm->pending_tasks_mutex));
-                /* error will be returned - RLG : need to reconsider return
-                 * types - we have no way to convey error information
-                 * the way the code is implemented now */
-                ML_VERBOSE(3, ("GOT error !"));
-                rc = OMPI_ERROR;
-                OMPI_ERRHANDLER_RETURN(rc,MPI_COMM_WORLD,rc,"Error returned from bcol function: aborting");
-                break;
-            }
-        }
-    }
-    OPAL_THREAD_UNLOCK(&(cm->pending_tasks_mutex));
-
-    /* return */
-    cm->progress_is_busy = false;
-
-    return rc;
-}
-
-
-static void adjust_coll_config_by_mca_param(void)
-{
-    /* setting bcast mca params */
-    if (COLL_ML_STATIC_BCAST == mca_coll_ml_component.bcast_algorithm) {
-        mca_coll_ml_component.coll_config[ML_BCAST][ML_SMALL_MSG].algorithm_id = ML_BCAST_SMALL_DATA_KNOWN;
-        mca_coll_ml_component.coll_config[ML_BCAST][ML_LARGE_MSG].algorithm_id = ML_BCAST_LARGE_DATA_KNOWN;
-    } else if (COLL_ML_SEQ_BCAST == mca_coll_ml_component.bcast_algorithm) {
-        mca_coll_ml_component.coll_config[ML_BCAST][ML_SMALL_MSG].algorithm_id = ML_BCAST_SMALL_DATA_SEQUENTIAL;
-        mca_coll_ml_component.coll_config[ML_BCAST][ML_LARGE_MSG].algorithm_id = ML_BCAST_LARGE_DATA_SEQUENTIAL;
-    } else { /* Unknown root */
-        mca_coll_ml_component.coll_config[ML_BCAST][ML_SMALL_MSG].algorithm_id = ML_BCAST_SMALL_DATA_UNKNOWN;
-        mca_coll_ml_component.coll_config[ML_BCAST][ML_LARGE_MSG].algorithm_id = ML_BCAST_LARGE_DATA_UNKNOWN;
-    }
-}
-
-/*
- * Open the component
- */
-static int ml_open(void)
-{
-    /* local variables */
-    int rc, c_idx, m_idx;
-    mca_coll_ml_component_t *cs = &mca_coll_ml_component;
-
-    /* set the starting sequence number */
-    cs->base_sequence_number = -1;
-    cs->progress_is_busy = false;
-
-    /* If the priority is zero (default) disable the component */
-    if (mca_coll_ml_component.ml_priority <= 0) {
-        return OMPI_ERR_NOT_AVAILABLE;
-    }
-
-    /* Init memory structures (no real memory is allocated) */
-    OBJ_CONSTRUCT(&cs->memory_manager, mca_coll_ml_lmngr_t);
-
-    if (OMPI_SUCCESS != (rc = mca_base_framework_open(&ompi_sbgp_base_framework, 0))) {
-        fprintf(stderr," failure in open mca_sbgp_base_open \n");
-        return rc;
-    }
-    if (OMPI_SUCCESS != (rc = mca_base_framework_open(&ompi_bcol_base_framework, 0))) {
-        fprintf(stderr," failure in open mca_bcol_base_open \n");
-        return rc;
-    }
-
-    /* Reset collective tunings cache */
-    for (c_idx = 0; c_idx < ML_NUM_OF_FUNCTIONS; c_idx++) {
-        for (m_idx = 0; m_idx < ML_NUM_MSG; m_idx++) {
-            mca_coll_ml_reset_config(&cs->coll_config[c_idx][m_idx]);
-        }
-    }
-
-    adjust_coll_config_by_mca_param();
-
-    /* Load configuration file and cache the configuration on component */
-    rc = mca_coll_ml_config_file_init();
-    if (OMPI_SUCCESS != rc) {
-        return OMPI_ERROR;
-    }
-
-
-    /* reigster the progress function */
-    rc = opal_progress_register(coll_ml_progress);
-    if (OMPI_SUCCESS != rc ) {
-        fprintf(stderr," failed to register the ml progress function \n");
-        fflush(stderr);
-        return rc;
-    }
-
-    OBJ_CONSTRUCT(&(cs->pending_tasks_mutex), opal_mutex_t);
-    OBJ_CONSTRUCT(&(cs->pending_tasks), opal_list_t);
-    OBJ_CONSTRUCT(&(cs->active_tasks_mutex), opal_mutex_t);
-    OBJ_CONSTRUCT(&(cs->active_tasks), opal_list_t);
-    OBJ_CONSTRUCT(&(cs->sequential_collectives_mutex), opal_mutex_t);
-    OBJ_CONSTRUCT(&(cs->sequential_collectives), opal_list_t);
-
-    rc = netpatterns_init();
-    if (OMPI_SUCCESS != rc) {
-        return rc;
-    }
-
-    cs->topo_discovery_fn[COLL_ML_HR_FULL] =
-        mca_coll_ml_fulltree_hierarchy_discovery;
-
-    cs->topo_discovery_fn[COLL_ML_HR_ALLREDUCE] =
-        mca_coll_ml_allreduce_hierarchy_discovery;
-
-    cs->topo_discovery_fn[COLL_ML_HR_NBS] =
-        mca_coll_ml_fulltree_exclude_basesmsocket_hierarchy_discovery;
-
-    cs->topo_discovery_fn[COLL_ML_HR_SINGLE_PTP] =
-        mca_coll_ml_fulltree_ptp_only_hierarchy_discovery;
-
-    cs->topo_discovery_fn[COLL_ML_HR_SINGLE_IBOFFLOAD] =
-        mca_coll_ml_fulltree_iboffload_only_hierarchy_discovery;
-
-    cs->need_allreduce_support = false;
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * Close the component
- */
-static int ml_close(void)
-{
-    int ret;
-
-    mca_coll_ml_component_t *cs = &mca_coll_ml_component;
-
-    /* There is not need to release/close resource if the
-     * priority was set to zero */
-    if (cs->ml_priority <= 0) {
-        return OMPI_SUCCESS;
-    }
-
-    OBJ_DESTRUCT(&cs->memory_manager);
-    OBJ_DESTRUCT(&cs->pending_tasks_mutex);
-    OBJ_DESTRUCT(&cs->pending_tasks);
-    OBJ_DESTRUCT(&cs->active_tasks_mutex);
-    OBJ_DESTRUCT(&cs->active_tasks);
-    OBJ_DESTRUCT(&cs->sequential_collectives_mutex);
-    OBJ_DESTRUCT(&cs->sequential_collectives);
-
-    /* deregister progress function */
-    ret = opal_progress_unregister(coll_ml_progress);
-    if (OMPI_SUCCESS != ret ) {
-        OMPI_ERROR_LOG(ret);
-        return ret;
-    }
-
-    /* close the sbgp and bcol frameworks */
-    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_sbgp_base_framework))) {
-        OMPI_ERROR_LOG(ret);
-        return ret;
-    }
-
-    if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_bcol_base_framework))) {
-        OMPI_ERROR_LOG(ret);
-        return ret;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/* query to see if the component is available for use, and can
- * satisfy the thread and progress requirements
- */
-int mca_coll_ml_init_query(bool enable_progress_threads,
-                           bool enable_mpi_threads)
-{
-    int ret;
-
-    /* at this stage there is no reason to disaulify this component */
-    /* Add here bcol init nand sbgp init */
-    ret = mca_sbgp_base_init(enable_progress_threads, enable_mpi_threads);
-    if (OMPI_SUCCESS != ret) {
-        return ret;
-    }
-
-    ret = mca_bcol_base_init(enable_progress_threads, enable_mpi_threads);
-    if (OMPI_SUCCESS != ret) {
-        return ret;
-    }
-
-    /* done */
-    return OMPI_SUCCESS;
-}
--- a/ompi/mca/coll/ml/coll_ml_config.c
+++ b/ompi/mca/coll/ml/coll_ml_config.c
@ -1,613 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <string.h>
-#include <ctype.h>
-#include <stdlib.h>
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-
-#include "coll_ml.h"
-#include "coll_ml_inlines.h"
-#include "coll_ml_config.h"
-#include "coll_ml_lex.h"
-
-static char *key_buffer = NULL;
-static size_t key_buffer_len = 0;
-
-typedef struct section_config_t {
-    char *section_name;
-    int section_id;
-    per_collective_configuration_t config;
-} section_config_t;
-
-typedef struct coll_config_t {
-    char *coll_name;
-    int coll_id;
-    section_config_t section;
-} coll_config_t;
-
-static int algorithm_name_to_id(char *name)
-{
-    assert (NULL != name);
-    if (!strcasecmp(name,"ML_BCAST_SMALL_DATA_KNOWN"))
-        return ML_BCAST_SMALL_DATA_KNOWN;
-    if (!strcasecmp(name,"ML_BCAST_SMALL_DATA_UNKNOWN"))
-        return ML_BCAST_SMALL_DATA_UNKNOWN;
-    if (!strcasecmp(name,"ML_BCAST_SMALL_DATA_SEQUENTIAL"))
-        return ML_BCAST_SMALL_DATA_SEQUENTIAL;
-    if (!strcasecmp(name,"ML_BCAST_LARGE_DATA_KNOWN"))
-        return ML_BCAST_LARGE_DATA_KNOWN;
-    if (!strcasecmp(name,"ML_BCAST_LARGE_DATA_UNKNOWN"))
-        return ML_BCAST_LARGE_DATA_UNKNOWN;
-    if (!strcasecmp(name,"ML_BCAST_LARGE_DATA_SEQUENTIAL"))
-        return ML_BCAST_LARGE_DATA_SEQUENTIAL;
-    if (!strcasecmp(name,"ML_N_DATASIZE_BINS"))
-        return ML_N_DATASIZE_BINS;
-    if (!strcasecmp(name,"ML_NUM_BCAST_FUNCTIONS"))
-        return ML_NUM_BCAST_FUNCTIONS;
-    if (!strcasecmp(name,"ML_SCATTER_SMALL_DATA_KNOWN"))
-        return ML_SCATTER_SMALL_DATA_KNOWN;
-    if (!strcasecmp(name,"ML_SCATTER_N_DATASIZE_BINS"))
-        return ML_SCATTER_N_DATASIZE_BINS;
-    if (!strcasecmp(name,"ML_SCATTER_SMALL_DATA_UNKNOWN"))
-        return ML_SCATTER_SMALL_DATA_UNKNOWN;
-    if (!strcasecmp(name,"ML_SCATTER_SMALL_DATA_SEQUENTIAL"))
-        return ML_SCATTER_SMALL_DATA_SEQUENTIAL;
-    if (!strcasecmp(name,"ML_NUM_SCATTER_FUNCTIONS"))
-        return ML_NUM_SCATTER_FUNCTIONS;
-    if (!strcasecmp(name,"ML_SMALL_DATA_ALLREDUCE"))
-        return ML_SMALL_DATA_ALLREDUCE;
-    if (!strcasecmp(name,"ML_LARGE_DATA_ALLREDUCE"))
-        return ML_LARGE_DATA_ALLREDUCE;
-    if (!strcasecmp(name,"ML_SMALL_DATA_REDUCE"))
-        return ML_SMALL_DATA_ALLREDUCE;
-    if (!strcasecmp(name,"ML_LARGE_DATA_REDUCE"))
-        return ML_LARGE_DATA_ALLREDUCE;
-    if (!strcasecmp(name,"ML_SMALL_DATA_REDUCE"))
-        return ML_SMALL_DATA_REDUCE;
-    if (!strcasecmp(name,"ML_LARGE_DATA_REDUCE"))
-        return ML_LARGE_DATA_REDUCE;
-    if (!strcasecmp(name,"ML_NUM_ALLREDUCE_FUNCTIONS"))
-        return ML_NUM_ALLREDUCE_FUNCTIONS;
-    if (!strcasecmp(name,"ML_SMALL_DATA_ALLTOALL"))
-        return ML_SMALL_DATA_ALLTOALL;
-    if (!strcasecmp(name,"ML_LARGE_DATA_ALLTOALL"))
-        return ML_LARGE_DATA_ALLTOALL;
-    if (!strcasecmp(name,"ML_NUM_ALLTOALL_FUNCTIONS"))
-        return ML_NUM_ALLTOALL_FUNCTIONS;
-    if (!strcasecmp(name,"ML_SMALL_DATA_ALLGATHER"))
-        return ML_SMALL_DATA_ALLGATHER;
-    if (!strcasecmp(name,"ML_LARGE_DATA_ALLGATHER"))
-        return ML_LARGE_DATA_ALLGATHER;
-    if (!strcasecmp(name,"ML_NUM_ALLGATHER_FUNCTIONS"))
-        return ML_NUM_ALLGATHER_FUNCTIONS;
-    if (!strcasecmp(name,"ML_SMALL_DATA_GATHER"))
-        return ML_SMALL_DATA_GATHER;
-    if (!strcasecmp(name,"ML_LARGE_DATA_GATHER"))
-        return ML_LARGE_DATA_GATHER;
-    if (!strcasecmp(name,"ML_NUM_GATHER_FUNCTIONS"))
-        return ML_NUM_GATHER_FUNCTIONS;
-    if (!strcasecmp(name,"ML_BARRIER_DEFAULT"))
-        return ML_BARRIER_DEFAULT;
-
-    /* ERROR */
-    return ML_UNDEFINED;
-}
-
-static int hierarchy_name_to_id(char *name)
-{
-    assert (NULL != name);
-    if (!strcasecmp(name, "FULL_HR")) {
-        return COLL_ML_HR_FULL;
-    }
-    if (!strcasecmp(name, "FULL_HR_NO_BASESOCKET")) {
-        return COLL_ML_HR_NBS;
-    }
-    if (!strcasecmp(name, "PTP_ONLY")) {
-        return COLL_ML_HR_SINGLE_PTP;
-    }
-    if (!strcasecmp(name, "IBOFFLOAD_ONLY")) {
-        return COLL_ML_HR_SINGLE_IBOFFLOAD;
-    }
-    /* Error */
-    return ML_UNDEFINED;
-}
-
-static int section_name_to_id(char *name)
-{
-    assert (NULL != name);
-    if (!strcasecmp(name, "SMALL")) {
-        return ML_SMALL_MSG;
-    }
-
-    if (!strcasecmp(name, "LARGE")) {
-        return ML_LARGE_MSG;
-    }
-    /* Error */
-    return ML_UNDEFINED;
-}
-
-static int coll_name_to_id(char *name)
-{
-    assert (NULL != name);
-    if (!strcasecmp(name, "ALLGATHER")) {
-        return ML_ALLGATHER;
-    }
-    if (!strcasecmp(name, "ALLGATHERV")) {
-        return ML_ALLGATHERV;
-    }
-    if (!strcasecmp(name, "ALLREDUCE")) {
-        return ML_ALLREDUCE;
-    }
-    if (!strcasecmp(name, "ALLTOALL")) {
-        return ML_ALLTOALL;
-    }
-    if (!strcasecmp(name, "ALLTOALLV")) {
-        return ML_ALLTOALLV;
-    }
-    if (!strcasecmp(name, "ALLTOALLW")) {
-        return ML_ALLTOALLW;
-    }
-    if (!strcasecmp(name, "ALLTOALLW")) {
-        return ML_ALLTOALLW;
-    }
-    if (!strcasecmp(name, "BARRIER")) {
-        return ML_BARRIER;
-    }
-    if (!strcasecmp(name, "BCAST")) {
-        return ML_BCAST;
-    }
-    if (!strcasecmp(name, "EXSCAN")) {
-        return ML_EXSCAN;
-    }
-    if (!strcasecmp(name, "GATHER")) {
-        return ML_GATHER;
-    }
-    if (!strcasecmp(name, "GATHERV")) {
-        return ML_GATHERV;
-    }
-    if (!strcasecmp(name, "REDUCE")) {
-        return ML_REDUCE;
-    }
-    if (!strcasecmp(name, "REDUCE_SCATTER")) {
-        return ML_REDUCE_SCATTER;
-    }
-    if (!strcasecmp(name, "SCAN")) {
-        return ML_SCAN;
-    }
-    if (!strcasecmp(name, "SCATTER")) {
-        return ML_SCATTER;
-    }
-    if (!strcasecmp(name, "SCATTERV")) {
-        return ML_SCATTERV;
-    }
-
-    /* nonblocking functions */
-
-    if (!strcasecmp(name, "IALLGATHER")) {
-        return ML_IALLGATHER;
-    }
-    if (!strcasecmp(name, "IALLGATHERV")) {
-        return ML_IALLGATHERV;
-    }
-    if (!strcasecmp(name, "IALLREDUCE")) {
-        return ML_IALLREDUCE;
-    }
-    if (!strcasecmp(name, "IALLTOALL")) {
-        return ML_IALLTOALL;
-    }
-    if (!strcasecmp(name, "IALLTOALLV")) {
-        return ML_IALLTOALLV;
-    }
-    if (!strcasecmp(name, "IALLTOALLW")) {
-        return ML_IALLTOALLW;
-    }
-    if (!strcasecmp(name, "IALLTOALLW")) {
-        return ML_IALLTOALLW;
-    }
-    if (!strcasecmp(name, "IBARRIER")) {
-        return ML_IBARRIER;
-    }
-    if (!strcasecmp(name, "IBCAST")) {
-        return ML_IBCAST;
-    }
-    if (!strcasecmp(name, "IEXSCAN")) {
-        return ML_IEXSCAN;
-    }
-    if (!strcasecmp(name, "IGATHER")) {
-        return ML_IGATHER;
-    }
-    if (!strcasecmp(name, "IGATHERV")) {
-        return ML_IGATHERV;
-    }
-    if (!strcasecmp(name, "IREDUCE")) {
-        return ML_IREDUCE;
-    }
-    if (!strcasecmp(name, "IREDUCE_SCATTER")) {
-        return ML_IREDUCE_SCATTER;
-    }
-    if (!strcasecmp(name, "ISCAN")) {
-        return ML_ISCAN;
-    }
-    if (!strcasecmp(name, "ISCATTER")) {
-        return ML_ISCATTER;
-    }
-    if (!strcasecmp(name, "ISCATTERV")) {
-        return ML_ISCATTERV;
-    }
-
-    /* Error - collecives name was not matched */
-    return ML_UNDEFINED;
-}
-static int set_collective_name(coll_config_t *coll_config)
-{
-    int coll_id =
-        coll_name_to_id(coll_ml_config_yytext);
-
-    if (ML_UNDEFINED == coll_id) {
-        return OMPI_ERROR;
-    }
-
-    coll_config->coll_id = coll_id;
-    coll_config->coll_name = strdup(coll_ml_config_yytext);
-
-    return OMPI_SUCCESS;
-}
-
-static int set_section_name(section_config_t *section_config)
-{
-    int section_id;
-
-    section_id = section_name_to_id(coll_ml_config_yytext);
-
-    if (ML_UNDEFINED == section_id) {
-        return OMPI_ERROR;
-    }
-
-    section_config->section_id = section_id;
-    section_config->section_name = strdup(coll_ml_config_yytext);
-
-    return OMPI_SUCCESS;
-}
-
-void mca_coll_ml_reset_config(per_collective_configuration_t *config)
-{
-    config->topology_id = ML_UNDEFINED;
-    config->threshold = ML_UNDEFINED;
-    config->algorithm_id = ML_UNDEFINED;
-    config->fragmentation_enabled = ML_UNDEFINED;
-}
-
-static void reset_section(section_config_t *section_cf)
-{
-    if (section_cf->section_name) {
-        free (section_cf->section_name);
-        section_cf->section_name = NULL;
-    }
-
-    section_cf->section_id = ML_UNDEFINED;
-    mca_coll_ml_reset_config(&section_cf->config);
-}
-
-static void reset_collective(coll_config_t *coll_cf)
-{
-    if (coll_cf->coll_name) {
-        free (coll_cf->coll_name);
-        coll_cf->coll_name = NULL;
-    }
-
-    coll_cf->coll_id = ML_UNDEFINED;
-    reset_section(&coll_cf->section);
-}
-
-/*
- * String to integer;
- */
-static int string_to_int(char *str)
-{
-    while (isspace(*str)) {
-        ++str;
-    }
-
-    /* Nope -- just decimal, so use atoi() */
-    return atoi(str);
-}
-
-static int parse_algorithm_key(section_config_t *section, char *value)
-{
-    int ret;
-    ret = algorithm_name_to_id(value);
-    if (ML_UNDEFINED == ret) {
-        return OMPI_ERROR;
-    } else {
-        section->config.algorithm_id = ret;
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int parse_threshold_key(section_config_t *section, char *value)
-{
-    assert (NULL != value);
-
-    if(!strcasecmp(value, "unlimited")) {
-        section->config.threshold = -1;
-    } else {
-        section->config.threshold = string_to_int(value);
-    }
-
-    return OMPI_SUCCESS;
-}
-
-static int parse_hierarchy_key(section_config_t *section, char *value)
-{
-    int ret;
-
-    ret = hierarchy_name_to_id(value);
-    if (ML_UNDEFINED == ret) {
-        return OMPI_ERROR;
-    }
-
-    section->config.topology_id = ret;
-
-    return OMPI_SUCCESS;
-}
-
-static int parse_fragmentation_key(section_config_t *section, char *value)
-{
-    assert (NULL != value);
-
-    if(!strcasecmp(value, "enable")) {
-        section->config.fragmentation_enabled = 1;
-    } else if (!strcasecmp(value, "disable")) {
-        section->config.fragmentation_enabled = 0;
-    } else {
-        ML_ERROR(("Line %d, unexpected fragmentation value %s. Legal values are: enable/disable",
-                    coll_ml_config_yynewlines, value));
-        return OMPI_ERROR;
-    }
-    return OMPI_SUCCESS;
-}
-
-/* Save configuration that have been collected so far */
-static int save_settings(coll_config_t *coll_config)
-{
-    per_collective_configuration_t *cf;
-
-    if (ML_UNDEFINED == coll_config->coll_id || ML_UNDEFINED == coll_config->section.section_id) {
-        return OMPI_ERROR;
-    }
-
-    cf = &mca_coll_ml_component.coll_config[coll_config->coll_id][coll_config->section.section_id];
-
-    cf->topology_id = coll_config->section.config.topology_id;
-    cf->threshold = coll_config->section.config.threshold;
-    cf->algorithm_id = coll_config->section.config.algorithm_id;
-    cf->fragmentation_enabled = coll_config->section.config.fragmentation_enabled;
-
-    return OMPI_SUCCESS;
-}
-
-/*
- * Parse a single line
- */
-static int parse_line(section_config_t *section)
-{
-    int val, ret = OMPI_SUCCESS;
-    char *value = NULL;
-
-    /* Save the name name */
-    if (key_buffer_len < strlen(coll_ml_config_yytext) + 1) {
-        char *tmp;
-        key_buffer_len = strlen(coll_ml_config_yytext) + 1;
-        tmp = (char *) realloc(key_buffer, key_buffer_len);
-        if (NULL == tmp) {
-            free(key_buffer);
-            key_buffer_len = 0;
-            key_buffer = NULL;
-            return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-        }
-        key_buffer = tmp;
-    }
-    strncpy(key_buffer, coll_ml_config_yytext, key_buffer_len);
-
-    /* The first thing we have to see is an "=" */
-    val = coll_ml_config_yylex();
-    if (coll_ml_config_parse_done || COLL_ML_CONFIG_PARSE_EQUAL != val) {
-        ML_ERROR(("Line %d, expected = before key: %s",
-                    coll_ml_config_yynewlines,
-                    key_buffer));
-        return OMPI_ERROR;
-    }
-
-    /* Next we get the value */
-    val = coll_ml_config_yylex();
-    if (COLL_ML_CONFIG_PARSE_SINGLE_WORD == val ||
-        COLL_ML_CONFIG_PARSE_VALUE == val) {
-        value = strdup(coll_ml_config_yytext);
-        if (NULL == value) {
-            return OMPI_ERR_OUT_OF_RESOURCE;
-        }
-
-        /* Now we need to see the newline */
-        val = coll_ml_config_yylex();
-        if (COLL_ML_CONFIG_PARSE_NEWLINE != val &&
-            COLL_ML_CONFIG_PARSE_DONE != val) {
-            ML_ERROR(("Line %d, expected new line after %s",
-                    coll_ml_config_yynewlines,
-                    key_buffer));
-            free(value);
-            return OMPI_ERROR;
-        }
-    }
-
-    /* If we did not get EOL or EOF, something is wrong */
-    else if (COLL_ML_CONFIG_PARSE_DONE != val &&
-             COLL_ML_CONFIG_PARSE_NEWLINE != val) {
-        ML_ERROR(("Line %d, expected new line or end of line",
-                    coll_ml_config_yynewlines));
-        return OMPI_ERROR;
-    } else {
-        ML_ERROR(("Line %d malformed", coll_ml_config_yynewlines));
-        return OMPI_ERROR;
-    }
-
-    /* Line parsing is done, read the values */
-    if (!strcasecmp(key_buffer, "algorithm")) {
-        ret = parse_algorithm_key(section, value);
-    } else if (!strcasecmp(key_buffer, "threshold")) {
-        ret = parse_threshold_key(section, value);
-    } else if (!strcasecmp(key_buffer, "hierarchy")) {
-        ret = parse_hierarchy_key(section, value);
-    } else if (!strcasecmp(key_buffer, "fragmentation")) {
-        ret = parse_fragmentation_key(section, value);
-    /* Failed to parse the key */
-    } else {
-        ML_ERROR(("Line %d, unknown key %s",
-                    coll_ml_config_yynewlines, key_buffer));
-    }
-
-    /* All done */
-    free(value);
-
-    return ret;
-}
-
-/**************************************************************************/
-
-/*
- * Parse a single file
- */
-static int parse_file(char *filename)
-{
-    int val;
-    int ret = OMPI_SUCCESS;
-    bool first_section = true, first_coll = true;
-    coll_config_t coll_config;
-
-    memset (&coll_config, 0, sizeof (coll_config));
-    reset_collective(&coll_config);
-
-    /* Open the file */
-    coll_ml_config_yyin = fopen(filename, "r");
-    if (NULL == coll_ml_config_yyin) {
-        ML_ERROR(("Failed to open config file %s", filename));
-        ret = OMPI_ERR_NOT_FOUND;
-        goto cleanup;
-    }
-
-    /* Do the parsing */
-    coll_ml_config_parse_done = false;
-    coll_ml_config_yynewlines = 1;
-    coll_ml_config_init_buffer(coll_ml_config_yyin);
-    while (!coll_ml_config_parse_done) {
-        val = coll_ml_config_yylex();
-        switch (val) {
-        case COLL_ML_CONFIG_PARSE_DONE:
-        case COLL_ML_CONFIG_PARSE_NEWLINE:
-            break;
-        case COLL_ML_CONFIG_PARSE_COLLECTIVE:
-            /* dump all the information to last section that was defined */
-            if (!first_coll) {
-                ret = save_settings(&coll_config);
-
-                if (OMPI_SUCCESS != ret) {
-                    ML_ERROR(("Error in syntax for collective %s", coll_config.coll_name));
-                    goto cleanup;
-                }
-            }
-
-            /* reset collective config */
-            reset_collective(&coll_config);
-
-            first_coll    = false;
-            first_section = true;
-
-            ret = set_collective_name(&coll_config);
-            if (OMPI_SUCCESS != ret) {
-                goto cleanup;
-            }
-            break;
-        case COLL_ML_CONFIG_PARSE_SECTION:
-            if (ML_UNDEFINED == coll_config.coll_id) {
-                ML_ERROR(("Collective section wasn't defined !"));
-                ret = OMPI_ERROR;
-                goto cleanup;
-            }
-
-            if (!first_section) {
-                /* dump all the information to last section that was defined */
-                ret = save_settings(&coll_config);
-                if (OMPI_SUCCESS != ret) {
-                    ML_ERROR(("Error in syntax for collective %s section %s", coll_config.coll_name,
-                              coll_config.section.section_name));
-                    goto cleanup;
-                }
-            }
-
-            first_section = false;
-
-            /* reset all section values */
-            reset_section(&coll_config.section);
-
-            /* set new section name */
-            ret = set_section_name(&coll_config.section);
-            if (OMPI_SUCCESS != ret) {
-                goto cleanup;
-            }
-            break;
-        case COLL_ML_CONFIG_PARSE_SINGLE_WORD:
-            if (ML_UNDEFINED == coll_config.coll_id ||
-                ML_UNDEFINED == coll_config.section.section_id) {
-                ML_ERROR(("Collective section or sub-section was not defined !"));
-                ret = OMPI_ERROR;
-                goto cleanup;
-            } else {
-                parse_line(&coll_config.section);
-            }
-            break;
-
-        default:
-            /* anything else is an error */
-            ML_ERROR(("Unexpected token!"));
-            ret = OMPI_ERROR;
-            goto cleanup;
-            break;
-        }
-    }
-
-    save_settings(&coll_config);
-    fclose(coll_ml_config_yyin);
-    coll_ml_config_yylex_destroy ();
-    ret = OMPI_SUCCESS;
-
-cleanup:
-    reset_collective(&coll_config);
-    if (NULL != key_buffer) {
-        free(key_buffer);
-        key_buffer = NULL;
-        key_buffer_len = 0;
-    }
-    return ret;
-}
-
-int mca_coll_ml_config_file_init(void)
-{
-    return parse_file(mca_coll_ml_component.config_file_name);
-}
-
--- a/ompi/mca/coll/ml/coll_ml_config.h
+++ b/ompi/mca/coll/ml/coll_ml_config.h
@ -1,23 +0,0 @@
-#ifndef COLL_ML_CONFIG_H_
-#define COLL_ML_CONFIG_H_
-
-#include "opal_config.h"
-#include <stdio.h>
-
-BEGIN_C_DECLS
-
-#define ML_UNDEFINED -1
-
-struct per_collective_configuration_t {
-    int topology_id;
-    int threshold;
-    int algorithm_id;
-    int fragmentation_enabled;
-};
-typedef struct per_collective_configuration_t per_collective_configuration_t;
-
-void mca_coll_ml_reset_config(per_collective_configuration_t *config);
-int mca_coll_ml_config_file_init(void);
-
-END_C_DECLS
-#endif
--- a/ompi/mca/coll/ml/coll_ml_copy_fns.c
+++ b/ompi/mca/coll/ml/coll_ml_copy_fns.c
@ -1,131 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2015      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/** @file */
-
-#include "ompi_config.h"
-
-#include "ompi/constants.h"
-#include "opal/threads/mutex.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/coll/coll.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "opal/sys/atomic.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
-#include "ompi/mca/coll/ml/coll_ml_inlines.h"
-#include "ompi/mca/coll/ml/coll_ml_allocation.h"
-#include "coll_ml_colls.h"
-#include <unistd.h>
-#include <sys/uio.h>
-
-
-
-/* This routine re-orders and packs user data.  The assumption is that
- * there is per-process data, the amount of data is the same for all
- * ranks, and the user data is contigous.
- */
-int mca_coll_ml_pack_reorder_contiguous_data(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    int i, rank;
-    void *user_buf, *library_buf;
-    size_t bytes_per_proc;
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *)
-        coll_op->coll_module;
-    mca_coll_ml_topology_t *topo_info = coll_op->coll_schedule->topo_info;
-    ptrdiff_t ptr_dif;
-
-    /* get the offset into each processes data.  The assumption is that
-     * we are manipulating the same amount of data for each process.
-     */
-
-    /* figure out how much data per-proc to copy */
-    bytes_per_proc=coll_op->fragment_data.per_rank_fragment_size;
-
-    /* loop over all the ranks in the communicator */
-    for( i=0 ; i < ompi_comm_size(ml_module->comm) ; i++ ) {
-
-        /* look up the rank of the i'th element in the sorted list */
-        rank = topo_info->sort_list[i];
-
-        /* get the pointer to user data */
-        user_buf=(void *)coll_op->full_message.src_user_addr;
-        /* compute offset into the user buffer */
-
-        /* offset for data already processed */
-        ptr_dif=rank*coll_op->full_message.n_bytes_per_proc_total+
-                            coll_op->fragment_data.offset_into_user_buffer_per_proc;
-        user_buf=(void *) ((char *)user_buf+ptr_dif);
-                /*
-                rank*coll_op->full_message.n_bytes_per_proc_total+
-                coll_op->fragment_data.offset_into_user_buffer_per_proc);
-                */
-
-        /* get the pointer to the ML buffer */
-        library_buf= (void *)
-            ((char *)coll_op->variable_fn_params.src_desc->data_addr+i*bytes_per_proc);
-
-        /* copy the data */
-        memcpy(library_buf, user_buf, bytes_per_proc);
-
-    }
-
-    return OMPI_SUCCESS;
-}
-
-/* This routine re-orders and packs user data.  The assumption is that
- * there is per-process data, the amount of data is the same for all
- * ranks, and the user data is contigous.
- */
-int mca_coll_ml_pack_reorder_noncontiguous_data(mca_coll_ml_collective_operation_progress_t *coll_op)
-{
-    int i, rank;
-    void *user_buf, *library_buf;
-    size_t bytes_per_proc;
-    ptrdiff_t ptr_dif;
-    mca_coll_ml_module_t *ml_module = (mca_coll_ml_module_t *)
-        coll_op->coll_module;
-    mca_coll_ml_topology_t *topo_info = coll_op->coll_schedule->topo_info;
-
-    /* get the offset into each processes data.  The assumption is that
-     * we are manipulating the same amount of data for each process.
-     */
-
-    /* figure out how much data per-proc to copy */
-    bytes_per_proc = coll_op->fragment_data.per_rank_fragment_size;
-
-    /* loop over all the ranks in the communicator */
-    for(i = 0; i < ompi_comm_size(ml_module->comm); i++ ) {
-
-        /* look up the rank of the i'th element in the sorted list */
-        rank = topo_info->sort_list[i];
-
-        /* get the pointer to user data */
-        user_buf=(void *)coll_op->full_message.src_user_addr;
-        /* compute offset into the user buffer */
-
-        /* offset for data already processed */
-        ptr_dif=rank*coll_op->full_message.send_count*
-                coll_op->full_message.send_extent+
-                coll_op->fragment_data.offset_into_user_buffer_per_proc;
-        user_buf=(void *) ((char *)user_buf+ptr_dif);
-
-        /* get the pointer to the ML buffer */
-        library_buf= (void *)
-            ((char *)coll_op->variable_fn_params.src_desc->data_addr+i*bytes_per_proc);
-
-        /* copy the data */
-        memcpy(library_buf, user_buf, bytes_per_proc);
-
-    }
-
-    return OMPI_SUCCESS;
-}
-
--- a/ompi/mca/coll/ml/coll_ml_custom_utils.c
+++ b/ompi/mca/coll/ml/coll_ml_custom_utils.c
@ -1,139 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * Copyright (c) 2014      The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2014      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/** @file */
-
-#include "ompi_config.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-
-#include "opal/util/output.h"
-#include "opal/class/opal_list.h"
-#include "opal/class/opal_object.h"
-#include "ompi/mca/mca.h"
-#include "opal/mca/base/base.h"
-#include "opal/threads/mutex.h"
-#include "opal/sys/atomic.h"
-
-#include "ompi/op/op.h"
-#include "ompi/constants.h"
-#include "ompi/mca/coll/coll.h"
-#include "ompi/mca/bcol/bcol.h"
-#include "ompi/mca/coll/base/base.h"
-#include "ompi/mca/coll/ml/coll_ml.h"
-#include "ompi/mca/coll/ml/coll_ml_inlines.h"
-#include "ompi/patterns/comm/coll_ops.h"
-
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/communicator/communicator.h"
-
-#include "ompi/mca/bcol/base/base.h"
-#include "coll_ml_custom_utils.h"
-
-/*
- * Local types
- */
-
-struct avail_coll_t {
-    opal_list_item_t super;
-    int ac_priority;
-    mca_coll_base_module_2_1_0_t *ac_module;
-};
-typedef struct avail_coll_t avail_coll_t;
-
-/*
- * Stuff for the OBJ interface
- * If topo_index == COLL_ML_TOPO_MAX it looks over all possilbe topologies, otherwhise it looks
- * in the topology that was specified.
- */
-
-int mca_coll_ml_check_if_bcol_is_used(const char *bcol_name, const mca_coll_ml_module_t *ml_module,
-        int topo_index)
-{
-    int i, rc, hier, *ranks_in_comm,
-        is_used = 0,
-        comm_size = ompi_comm_size(ml_module->comm);
-    int n_hier, tp , max_tp;
-    const mca_coll_ml_topology_t *topo_info;
-
-    ranks_in_comm = (int *) malloc(comm_size * sizeof(int));
-    if (OPAL_UNLIKELY(NULL == ranks_in_comm)) {
-        ML_ERROR(("Memory allocation failed."));
-        ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_NO_MEM);
-        /* not reached but causes a clang warning to not return here */
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    for (i = 0; i < comm_size; ++i) {
-        ranks_in_comm[i] = i;
-    }
-
-    if (COLL_ML_TOPO_MAX == topo_index) {
-        tp = 0;
-        max_tp = COLL_ML_TOPO_MAX;
-    } else {
-        tp = topo_index;
-        max_tp = topo_index + 1;
-    }
-
-    for (; tp < max_tp; tp++) {
-        topo_info = &ml_module->topo_list[tp];
-        n_hier = topo_info->n_levels;
-        for (hier = 0; hier < n_hier; ++hier) {
-            hierarchy_pairs *pair = &topo_info->component_pairs[hier];
-            mca_bcol_base_component_t *b_cm = pair->bcol_component;
-            if(0 == strcmp(bcol_name,
-                        b_cm->bcol_version.mca_component_name)) {
-                is_used = 1;
-                break;
-            }
-        }
-    }
-
-    rc = comm_allreduce_pml(&is_used, &is_used, 1, MPI_INT,
-                  ompi_comm_rank(ml_module->comm), MPI_MAX,
-                  comm_size, ranks_in_comm, ml_module->comm);
-
-    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
-        ML_ERROR(("comm_allreduce_pml failed."));
-        ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_OP);
-    }
-
-    free(ranks_in_comm);
-
-    return is_used;
-}
-
-/* The function is very different from the above function */
-int mca_coll_ml_check_if_bcol_is_requested(const char *component_name)
-{
-    mca_base_component_list_item_t *bcol_comp;
-
-    ML_VERBOSE(10, ("Loop over bcol components"));
-    OPAL_LIST_FOREACH(bcol_comp, &mca_bcol_base_components_in_use, mca_base_component_list_item_t) {
-        if(0 == strcmp(component_name,
-                    ((mca_bcol_base_component_2_0_0_t *)
-                     bcol_comp->cli_component)->bcol_version.mca_component_name)) {
-            return true;
-        }
-    }
-
-    /* the component was not resquested */
-    return false;
-}
--- a/ompi/mca/coll/ml/coll_ml_custom_utils.h
+++ b/ompi/mca/coll/ml/coll_ml_custom_utils.h
@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/** @file */
-
-#ifndef MCA_COLL_ML_CUSTOM_UTILS_H
-#define MCA_COLL_ML_CUSTOM_UTILS_H
-
-#include "ompi_config.h"
-
-#include "coll_ml.h"
-
-/* the function is used to check if the bcol name is used in this ml module */
-int mca_coll_ml_check_if_bcol_is_used(const char *bcol_name, const mca_coll_ml_module_t *ml_module,
-        int topo_index);
-
-/* The function is used to check if the bcol component was REQUESTED by user */
-int mca_coll_ml_check_if_bcol_is_requested(const char *component_name);
-
-END_C_DECLS
-
-#endif /* MCA_COLL_ML_ML_H */
--- a/ompi/mca/coll/ml/coll_ml_descriptors.c
+++ b/ompi/mca/coll/ml/coll_ml_descriptors.c
@ -1,60 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * Copyright (c) 2014      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * Copyright (c) 2014      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-#include "ompi_config.h"
-#include "coll_ml.h"
-#include "coll_ml_inlines.h"
-
-
-static inline void mca_coll_ml_fragment_constructor(mca_coll_ml_fragment_t *frag)
-{
-    frag->fn_args = NULL;
-}
-
-static inline void mca_coll_ml_fragment_destructor(mca_coll_ml_fragment_t *frag)
-{
-    if (frag->fn_args) {
-	free(frag->fn_args);
-	frag->fn_args = NULL;
-    }
-}
-
-static inline void mca_coll_ml_descriptor_constructor(mca_coll_ml_descriptor_t *descriptor)
-{
-
- OBJ_CONSTRUCT(&(descriptor->fragment),mca_coll_ml_fragment_t);
-
- /* this fragment is alway associated with this message descriptor */
- descriptor->fragment.full_msg_descriptor=descriptor;
-
-}
-
-
-static inline void mca_coll_ml_descriptor_destructor(mca_coll_ml_descriptor_t *descriptor)
-{
- OBJ_DESTRUCT(&(descriptor->fragment));
-}
-
-OBJ_CLASS_INSTANCE(
-    mca_coll_ml_fragment_t,
-    opal_list_item_t,
-    mca_coll_ml_fragment_constructor,
-    mca_coll_ml_fragment_destructor);
-
-OBJ_CLASS_INSTANCE(
-    mca_coll_ml_descriptor_t,
-    ompi_request_t,
-    mca_coll_ml_descriptor_constructor,
-    mca_coll_ml_descriptor_destructor);
-
--- a/ompi/mca/coll/ml/coll_ml_functions.h
+++ b/ompi/mca/coll/ml/coll_ml_functions.h
@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2009-2012 Oak Ridge National Laboratory.  All rights reserved.
- * Copyright (c) 2009-2012 Mellanox Technologies.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/** @file */
-
-#ifndef MCA_COLL_ML_FUNCTIONS_H
-#define MCA_COLL_ML_FUNCTIONS_H
-
-#include "ompi_config.h"
-
-BEGIN_C_DECLS
-
-#define ML_MEMSYNC -100
-
-enum {
-    ML_BARRIER_DEFAULT
-};
- /* small data algorithm */
-/* broadcast functions */
-enum {
-    /* small data algorithm */
-    ML_BCAST_SMALL_DATA_KNOWN,
-    /* small data - dynamic decision making supported */
-    ML_BCAST_SMALL_DATA_UNKNOWN,
-    /* Sequential algorithm */
-    ML_BCAST_SMALL_DATA_SEQUENTIAL,
-
-    ML_BCAST_LARGE_DATA_KNOWN,
-
-    ML_BCAST_LARGE_DATA_UNKNOWN,
-
-    ML_BCAST_LARGE_DATA_SEQUENTIAL,
-
-    /* marker - all routines about this are expected to be used in
-     * selection logic that is based on size of the data */
-    ML_N_DATASIZE_BINS,
-
-    /* number of functions - also counts some markers, but ... */
-    ML_NUM_BCAST_FUNCTIONS
-};
-
-
-/* scatter functions */
-enum {
-    /* small data algorithm */
-    ML_SCATTER_SMALL_DATA_KNOWN,
-
-    /* marker - all routines about this are expected to be used in
-     * selection logic that is based on size of the data */
-    ML_SCATTER_N_DATASIZE_BINS,
-
-    /* small data - dynamic decision making supported */
-    ML_SCATTER_SMALL_DATA_UNKNOWN,
-
-    /* Sequential algorithm */
-    ML_SCATTER_SMALL_DATA_SEQUENTIAL,
-
-    /* number of functions - also counts some markers, but ... */
-    ML_NUM_SCATTER_FUNCTIONS
-};
-
-
-/* Allreduce functions */
-enum {
-    /* small data algorithm */
-    ML_SMALL_DATA_ALLREDUCE,
-
-    /* Large data algorithm */
-    ML_LARGE_DATA_ALLREDUCE,
-
-    /* If some of bcols doesn't support
-       all possibles types, use these extra algthms */
-    /* small data algorithm */
-    ML_SMALL_DATA_EXTRA_TOPO_ALLREDUCE,
-
-    /* large data algorithm */
-    ML_LARGE_DATA_EXTRA_TOPO_ALLREDUCE,
-
-    /* number of functions */
-    ML_NUM_ALLREDUCE_FUNCTIONS
-};
-
-/* Reduce functions */
-enum {
-    /* small data algorithm */
-    ML_SMALL_DATA_REDUCE,
-
-    /* Large data algorithm */
-    ML_LARGE_DATA_REDUCE,
-
-    /* number of functions */
-    ML_NUM_REDUCE_FUNCTIONS
-};
-/* Alltoall functions */
-enum {
-    /* small data algorithm */
-    ML_SMALL_DATA_ALLTOALL,
-    /* large all to all */
-    ML_LARGE_DATA_ALLTOALL,
-    /* number of functions */
-    ML_NUM_ALLTOALL_FUNCTIONS
-};
-
-/* Allgather functions */
-enum {
-    /* small data */
-    ML_SMALL_DATA_ALLGATHER,
-    /* large data */
-    ML_LARGE_DATA_ALLGATHER,
-    /* number of functions */
-    ML_NUM_ALLGATHER_FUNCTIONS
-};
-
-/* gather functions */
-enum {
-    /* small data */
-    ML_SMALL_DATA_GATHER,
-    /* large data */
-    ML_LARGE_DATA_GATHER,
-    /* number of functions */
-    ML_NUM_GATHER_FUNCTIONS
-};
-
-END_C_DECLS
-
-#endif /* MCA_COLL_ML_FUNCTIONS_H */
--- a/Показать больше
+++ b/Показать больше