Merge pull request #725 from bosilca/treematch

Add a new topo module: Treematch
2015-07-31 15:17:54 -04:00 · 2015-07-31 15:17:54 -04:00 · 047eccef8d
--- a/5
+++ b/5
@ -1,7 +1,7 @@
 Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
                        University Research and Technology
                        Corporation.  All rights reserved.
-Copyright (c) 2004-2007 The University of Tennessee and The University
+Copyright (c) 2004-2015 The University of Tennessee and The University
                        of Tennessee Research Foundation.  All rights
                        reserved.
 Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
@ -436,6 +436,9 @@ General Run-Time Support Notes
 MPI Functionality and Features
 ------------------------------

+- Rank reordering support is available using the TreeMatch library. It is activated
+  for the graph and dist_graph topologies.
+
 - All MPI-3 functionality is supported.

 - When using MPI deprecated functions, some compilers will emit
--- a/ompi/mca/topo/treematch/Makefile.am
+++ b/ompi/mca/topo/treematch/Makefile.am
@ -0,0 +1,62 @@
+#
+# Copyright (c) 2011-2015 The University of Tennessee and The University
+#                         of Tennessee Research Foundation.  All rights
+#                         reserved.
+# Copyright (c) 2011-2015 INRIA.  All rights reserved.
+# Copyright (c) 2011-2015 Université Bordeaux 1
+# $COPYRIGHT$
+#
+# Additional copyrights may follow
+#
+# $HEADER$
+#
+
+if topo_treematch_local
+extra_treematch_files = treematch/tm_bucket.h \
+    treematch/tm_hwloc.h treematch/tm_mapping.h \
+    treematch/tm_timings.h treematch/tm_tree.h \
+    treematch/tm_kpartitioning.h treematch/uthash.h\
+    treematch/IntConstantInitializedVector.h \
+    treematch/tm_mt.h \
+    treematch/tm_thread_pool.h treematch/tm_verbose.h \
+    treematch/tm_malloc.h \
+    treematch/IntConstantInitializedVector.c \
+    treematch/tm_mt.c \
+    treematch/tm_thread_pool.c treematch/tm_verbose.c \
+    treematch/tm_malloc.c \
+    treematch/tm_mapping.c treematch/tm_timings.c \
+    treematch/tm_bucket.c treematch/tm_tree.c \
+    treematch/tm_hwloc.c treematch/tm_kpartitioning.c
+endif
+
+sources = \
+    topo_treematch.h \
+    topo_treematch_module.c \
+    topo_treematch_component.c \
+    topo_treematch_dist_graph_create.c $(extra_treematch_files)
+
+# Make the output library in this directory, and name it either
+# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
+# (for static builds).
+
+if MCA_BUILD_ompi_topo_treematch_DSO
+lib =
+lib_sources =
+component = mca_topo_treematch.la
+component_sources = $(sources)
+else
+lib = libmca_topo_treematch.la
+lib_sources = $(sources)
+component =
+component_sources =
+endif
+
+mcacomponentdir = $(pkglibdir)
+mcacomponent_LTLIBRARIES = $(component)
+mca_topo_treematch_la_SOURCES = $(component_sources)
+mca_topo_treematch_la_LDFLAGS = -module -avoid-version
+
+noinst_LTLIBRARIES = $(lib)
+libmca_topo_treematch_la_SOURCES = $(lib_sources)
+libmca_topo_treematch_la_LDFLAGS = -module -avoid-version
+
--- a/ompi/mca/topo/treematch/configure.m4
+++ b/ompi/mca/topo/treematch/configure.m4
@ -0,0 +1,87 @@
+# -*- shell-script -*-
+#
+# Copyright (c) 2011-2015 The University of Tennessee and The University
+#                         of Tennessee Research Foundation.  All rights
+#                         reserved.
+# Copyright (c) 2011-2015 INRIA.  All rights reserved.
+# Copyright (c) 2011-2015 Universite Bordeaux 1
+# $COPYRIGHT$
+#
+# Additional copyrights may follow
+#
+# $HEADER$
+#
+
+# MCA_ompi_topo_treematch_CONFIG([action-if-can-compile],
+#                                [action-if-cant-compile])
+# -------------------------------------------
+AC_DEFUN([MCA_ompi_topo_treematch_CONFIG], [
+    AC_REQUIRE([MCA_opal_hwloc_CONFIG_REQUIRE])
+
+    AC_ARG_WITH([treematch],
+          [AC_HELP_STRING([--with-treematch(=DIR)],
+               [Build TreeMatch topology support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])],
+               [],
+               [with_treematch=yes])
+     AC_ARG_WITH([treematch-include],
+                 [AC_HELP_STRING([--with-treematch-include(=DIR)],
+                                 ["Search for TreeMatch headers in DIR"])])
+     AC_ARG_WITH([treematch-libdir],
+                 [AC_HELP_STRING([--with-treematch-libdir(=DIR)],
+                                 ["Search for TreeMatch libraries in DIR"])])
+
+    treematch_files_local="no"
+    ompi_check_treematch_dir=$srcdir
+    ompi_check_treematch_libdir=""
+    ompi_check_treematch_happy="no"
+
+    AS_IF([test "x$with_treematch" != xno],
+          [AC_MSG_CHECKING([TreeMatch headers])
+           AS_IF([test "x$with_treematch_include" = x],
+                 [AS_IF([test "x$with_treematch" = xyes],
+                        [treematch_files_local="yes"
+                         with_treematch_include=$OMPI_TOP_SRCDIR/ompi/mca/topo/treematch/treematch],
+                        [with_treematch_include=$with_treematch/include])])
+           AS_IF([test -f $with_treematch_include/tm_tree.h],
+                 [AS_IF([test "x$with_treematch" = xyes],
+                        [AC_MSG_RESULT([in the source])],
+                        [AC_MSG_RESULT([user provided])])
+                  opal_check_treematch_dir=$with_treematch_include
+                  ompi_check_treematch_happy="yes"],
+                 [AC_MSG_ERROR([missing tm_tree.h (${with_treematch}:${with_treematch_include})])])])
+
+    AS_IF([test "$ompi_check_treematch_happy" = "yes"],
+          [AC_MSG_CHECKING([TreeMatch library])
+           OPAL_CHECK_WITHDIR([treematch], [$with_treematch_include], [tm_tree.h])
+           AS_IF([test "x$with_treematch_libdir" = x],
+                 [AS_IF([test "x$with_treematch" != xyes],
+                        [with_treematch_libdir=$with_treematch/lib]
+                        [with_treematch_libdir=$OMPI_TOP_SRCDIR/ompi/mca/topo/treematch/treematch])])
+           AS_IF([test "x$treematch_files_local" = xno],
+                 [OPAL_CHECK_WITHDIR([treematch-libdir], [$with_treematch_libdir], [libtreematch.*])
+                  AS_IF([test "x$with_treematch" != xno -a "x$with_treematch" != xyes],
+                        [AS_IF([test ! -z "$with_treematch" -a "$with_treematch" != "yes"],
+                               [ompi_check_treematch_dir="$with_treematch"])
+                         AS_IF([test ! -z "$with_treematch_libdir" -a "$with_treematch_libdir" != "yes"],
+                               [ompi_check_treematch_libdir="$with_treematch_libdir"])
+                         OPAL_CHECK_PACKAGE([topo_treematch],
+                                            [tm_tree.h],
+                                            [treematch],
+                                            [build_tree],
+                                            [],
+                                            [$with_treematch_include],
+                                            [$with_treematch_libdir],
+                                            [ompi_check_treematch_happy="yes"],
+                                            [ompi_check_treematch_happy="no"])],
+                        [ompi_check_treematch_happy="no"])])])
+
+    AS_IF([test "$ompi_check_treematch_happy" = "yes"],
+          [$1],
+          [AS_IF([test ! -z "$with_treematch" -a "$with_treematch" != "no"],
+                 [AC_MSG_ERROR([TreeMatch support requested but not found.  Aborting])])
+           $2])
+
+    AC_CONFIG_FILES([ompi/mca/topo/treematch/Makefile])
+    AM_CONDITIONAL(topo_treematch_local,
+                   [test "x$treematch_files_local" = "xyes"])
+])
--- a/ompi/mca/topo/treematch/topo_treematch.h
+++ b/ompi/mca/topo/treematch/topo_treematch.h
@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2011-2015 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2011-2015 INRIA.  All rights reserved.
+ * Copyright (c) 2011-2015 Bordeaux Polytechnic Institute
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#ifndef MCA_TOPO_UNTIY_H
+#define MCA_TOPO_UNTIY_H
+
+#include "ompi_config.h"
+#include "ompi/mca/topo/topo.h"
+
+/*
+ * ******************************************************************
+ * ******** functions which provide MCA interface comppliance *******
+ * ******************************************************************
+ * These functions are:
+ *       - mca_topo_treematch_module_open
+ *       - mca_topo_treematch_module_close
+ *       - mca_topo_treematch_module_query
+ *       - mca_topo_treematch_module_finalize
+ * These functions are always found on the mca_topo_treematch_module
+ * structure. They are the "meta" functions to ensure smooth op.
+ * ******************************************************************
+ */
+BEGIN_C_DECLS
+
+/*
+ * Public component instance
+ */
+typedef struct mca_topo_treematch_component_2_2_0_t {
+    mca_topo_base_component_2_2_0_t super;
+
+    int reorder_mode;
+} mca_topo_treematch_component_2_2_0_t;
+
+OMPI_MODULE_DECLSPEC extern mca_topo_treematch_component_2_2_0_t
+    mca_topo_treematch_component;
+
+/*
+ * A unique module class for the module so that we can both cache
+ * module-specific information on the module and have a
+ * module-specific constructor and destructor.
+ */
+typedef struct {
+    mca_topo_base_module_t super;
+
+    /* Modules can add their own information here */
+} mca_topo_treematch_module_t;
+
+OBJ_CLASS_DECLARATION(mca_topo_treematch_module_t);
+
+
+/*
+ * Module functions
+ */
+
+int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* module,
+                                         ompi_communicator_t *comm_old,
+                                         int n, int nodes[],
+                                         int degrees[], int targets[],
+                                         int weights[],
+                                         struct ompi_info_t *info, int reorder,
+                                         ompi_communicator_t **newcomm);
+/*
+ * ******************************************************************
+ * ************ functions implemented in this module end ************
+ * ******************************************************************
+ */
+
+END_C_DECLS
+
+#endif /* MCA_TOPO_EXAMPLE_H */
--- a/ompi/mca/topo/treematch/topo_treematch_component.c
+++ b/ompi/mca/topo/treematch/topo_treematch_component.c
@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2011-2015 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2011-2015 INRIA.  All rights reserved.
+ * Copyright (c) 2011-2015 Université Bordeaux 1
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include "ompi_config.h"
+#include "ompi/mca/topo/treematch/topo_treematch.h"
+
+/*
+ * Public string showing the topo treematch module version number
+ */
+const char *mca_topo_treematch_component_version_string =
+    "Open MPI treematch topology MCA component version" OMPI_VERSION;
+
+/*
+ * Local funtions
+ */
+static int init_query(bool enable_progress_threads, bool enable_mpi_threads);
+static struct mca_topo_base_module_t *
+comm_query(const ompi_communicator_t *comm, int *priority, uint32_t type);
+static int mca_topo_treematch_component_register(void);
+
+/*
+ * Public component structure
+ */
+mca_topo_treematch_component_2_2_0_t mca_topo_treematch_component =
+    {
+        {
+            {
+                MCA_TOPO_BASE_VERSION_2_2_0,
+
+                "treematch",
+                OMPI_MAJOR_VERSION,
+                OMPI_MINOR_VERSION,
+                OMPI_RELEASE_VERSION,
+                NULL,  /* component open */
+                NULL,  /* component close */
+                NULL, /* component query */
+                mca_topo_treematch_component_register, /* component register */
+            },
+
+            {
+                /* The component is checkpoint ready */
+                MCA_BASE_METADATA_PARAM_CHECKPOINT
+            },
+
+            init_query,
+            comm_query
+        },
+        0  /* reorder: by default centralized */
+    };
+
+
+static int init_query(bool enable_progress_threads, bool enable_mpi_threads)
+{
+    if(NULL == opal_hwloc_topology) {
+        return OPAL_ERR_NOT_SUPPORTED;
+    }
+    return OMPI_SUCCESS;
+}
+
+
+static struct mca_topo_base_module_t *
+comm_query(const ompi_communicator_t *comm, int *priority, uint32_t type)
+{
+    mca_topo_treematch_module_t *treematch;
+
+    if( OMPI_COMM_DIST_GRAPH != type ) {
+        return NULL;
+    }
+    treematch = OBJ_NEW(mca_topo_treematch_module_t);
+    if (NULL == treematch) {
+        return NULL;
+    }
+    treematch->super.topo.dist_graph.dist_graph_create = mca_topo_treematch_dist_graph_create;
+
+    /* This component has very low priority -- it's an treematch, after
+       all! */
+    *priority = 42;
+    treematch->super.type = OMPI_COMM_DIST_GRAPH;
+    return &(treematch->super);
+}
+
+static int mca_topo_treematch_component_register(void)
+{
+    (void)mca_base_component_var_register(&mca_topo_treematch_component.super.topoc_version,
+                                          "reorder_mode", "If set the reordering will be done in a partially distributed way (default=0). If partially-distributed only local knowledge will be used, possibly leading to less accurate reordering.", MCA_BASE_VAR_TYPE_INT,
+                                          NULL, 0, 0, OPAL_INFO_LVL_2,
+                                          MCA_BASE_VAR_SCOPE_READONLY, &mca_topo_treematch_component.reorder_mode);
+    return OMPI_SUCCESS;
+}
+
--- a/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c
+++ b/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c
@ -0,0 +1,907 @@
+/*
+ * Copyright (c) 2011-2015 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2011-2015 INRIA.  All rights reserved.
+ * Copyright (c) 2012-2015 Bordeaux Poytechnic Institute
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include "ompi_config.h"
+
+#include "opal/constants.h"
+#if defined(OPAL_HAVE_HWLOC)
+#include "opal/mca/hwloc/hwloc.h"
+#endif  /* defined(OPAL_HAVE_HWLOC) */
+
+#include "ompi/mca/topo/treematch/topo_treematch.h"
+#include "ompi/mca/topo/treematch/treematch/tm_mapping.h"
+#include "ompi/mca/topo/base/base.h"
+
+#include "ompi/communicator/communicator.h"
+#include "ompi/info/info.h"
+
+#include "ompi/mca/pml/pml.h"
+
+#include "opal/mca/dstore/dstore.h"
+
+#define ERR_EXIT(ERR)                           \
+    do { free(local_pattern);                   \
+        return (ERR); }                         \
+    while(0);
+
+#define FALLBACK()                  \
+    do { free(nodes_roots);			\
+        free(local_procs);          \
+        hwloc_bitmap_free(set);     \
+        goto fallback; }            \
+    while(0);
+
+#define MY_STRING_SIZE 64
+/*#define __DEBUG__ 1  */
+
+
+static int check_oversubscribing(int rank,
+                                 int num_nodes,
+                                 int num_objs_in_node,
+                                 int num_procs_in_node,
+                                 int *nodes_roots,
+                                 int *local_procs,
+                                 ompi_communicator_t *comm_old)
+{
+    int oversubscribed = 0;
+    int local_oversub = 0;
+    int err;
+
+    if (rank == local_procs[0])
+        if(num_objs_in_node < num_procs_in_node)
+            local_oversub =  1;
+
+    if (rank == 0) {
+        MPI_Request *reqs = (MPI_Request *)calloc(num_nodes-1, sizeof(MPI_Request));
+        int *oversub = (int *)calloc(num_nodes, sizeof(int));
+        int i;
+
+        oversub[0] = local_oversub;
+        for(i = 1;  i < num_nodes; i++)
+            if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(&oversub[i], 1, MPI_INT,
+                                                           nodes_roots[i], 111, comm_old, &reqs[i-1]))))
+                return err;
+
+        if (OMPI_SUCCESS != ( err = ompi_request_wait_all(num_nodes-1,
+                                                          reqs, MPI_STATUSES_IGNORE)))
+            return err;
+
+        for(i = 0;  i < num_nodes; i++)
+            oversubscribed += oversub[i];
+
+        free(oversub);
+        free(reqs);
+    } else {
+        if (rank == local_procs[0])
+            if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(&local_oversub, 1, MPI_INT, 0,
+                                                         111, MCA_PML_BASE_SEND_STANDARD, comm_old))))
+                return err;
+    }
+
+    if (OMPI_SUCCESS != (err = comm_old->c_coll.coll_bcast(&oversubscribed, 1,
+                                                           MPI_INT, 0, comm_old,
+                                                           comm_old->c_coll.coll_bcast_module)))
+        return err;
+
+    return oversubscribed;
+}
+
+int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
+                                         ompi_communicator_t *comm_old,
+                                         int n, int nodes[],
+                                         int degrees[], int targets[],
+                                         int weights[],
+                                         struct ompi_info_t *info, int reorder,
+                                         ompi_communicator_t **newcomm)
+{
+    int err;
+
+    if (OMPI_SUCCESS != (err = mca_topo_base_dist_graph_distribute(topo_module, comm_old,n,nodes,
+                                                                   degrees,targets,weights,
+                                                                   &(topo_module->mtc.dist_graph))))
+        return err;
+
+    if(!reorder) {  /* No reorder. Create a new communicator, then   */
+                    /* jump out to attach  the dist_graph and return */
+    fallback:
+
+        if( OMPI_SUCCESS == (err = ompi_comm_create(comm_old,
+                                                    comm_old->c_local_group,
+                                                    newcomm))){
+            /* Attach the dist_graph to the newly created communicator */
+            (*newcomm)->c_flags        |= OMPI_COMM_DIST_GRAPH;
+            (*newcomm)->c_topo          = topo_module;
+            (*newcomm)->c_topo->reorder = reorder;
+        }
+        return err;
+    } else {  /* reorder == yes */
+        mca_topo_base_comm_dist_graph_2_2_0_t *topo = NULL;
+        ompi_proc_t *proc = NULL;
+        MPI_Request  *reqs = NULL;
+        hwloc_cpuset_t set;
+        hwloc_obj_t object,root_obj;
+        hwloc_obj_t *tracker = NULL;
+        double *local_pattern = NULL;
+        int *vpids, *colors = NULL;
+        int *local_procs = NULL;
+        int *nodes_roots = NULL;
+        int *localrank_to_objnum  = NULL;
+        int depth, effective_depth, obj_rank = -1;
+        int num_objs_in_node = 0;
+        int num_pus_in_node = 0;
+        int numlevels = 0;
+        int num_nodes = 0;
+        int num_procs_in_node = 0;
+        int rank, size;
+        int hwloc_err;
+        int oversubscribing_objs = 0;
+        int i, j, idx;
+        uint32_t val, *pval;
+
+        topo = topo_module->mtc.dist_graph;
+        rank = ompi_comm_rank(comm_old);
+        size = ompi_comm_size(comm_old);
+
+#ifdef __DEBUG__
+        fprintf(stdout,"Process rank is : %i\n",rank);
+#endif
+        /* Determine the number of local procs */
+        /* and the number of ext procs         */
+        for(i = 0 ; i < size ; i++){
+            proc = ompi_group_peer_lookup(comm_old->c_local_group, i);
+            if (( i == rank ) ||
+                (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)))
+                num_procs_in_node++;
+        }
+
+        /* Get the ranks of the local procs in comm_old */
+        local_procs = (int *)malloc(num_procs_in_node * sizeof(int));
+        for(i = idx = 0 ; i < size ; i++){
+            proc = ompi_group_peer_lookup(comm_old->c_local_group, i);
+            if (( i == rank ) ||
+                (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)))
+                local_procs[idx++] = i;
+        }
+
+        vpids = (int *)malloc(size * sizeof(int));
+        colors = (int *)malloc(size * sizeof(int));
+        for(i = 0; i < size ; i++) {
+            proc = ompi_group_peer_lookup(comm_old->c_local_group, i);
+            pval = &val;
+            OPAL_MODEX_RECV_VALUE(err, OPAL_DSTORE_NODEID, &(proc->super), &pval, OPAL_UINT32);
+            if( OPAL_SUCCESS != err ) {
+                opal_output(0, "Unable to extract peer %s nodeid from the modex.\n",
+                            OMPI_NAME_PRINT(&(proc->super)));
+                vpids[i] = colors[i] = -1;
+                continue;
+            }
+            vpids[i] = colors[i] = (int)val;
+        }
+
+#ifdef __DEBUG__
+        fprintf(stdout,"Process rank (2) is : %i \n",rank);
+        if ( 0 == rank ){
+            fprintf(stdout,"local_procs : ");
+            for(i = 0; i < num_procs_in_node ; i++)
+                fprintf(stdout," [%i:%i] ",i,local_procs[i]);
+            fprintf(stdout,"\n");
+
+            fprintf(stdout,"Vpids : ");
+            for(i = 0; i < size ; i++)
+                fprintf(stdout," [%i:%i] ",i,vpids[i]);
+            fprintf(stdout,"\n");
+        }
+#endif
+        /* clean-up dupes in the array */
+        for(i = 0; i < size ; i++)
+            if ( -1 ==  vpids[i] )
+                continue;
+            else
+                for(j = i+1 ; j < size ; j++)
+                    if( vpids[j] != -1 )
+                        if( vpids[i] == vpids[j] )
+                            vpids[j] = -1;
+        /* compute number of nodes */
+        for(i = 0; i < size ; i++)
+            if( vpids[i] != -1 )
+                num_nodes++;
+        /* compute local roots ranks in comm_old */
+        /* Only the global root needs to do this */
+        if(0 == rank) {
+            nodes_roots = (int *)calloc(num_nodes,sizeof(int));
+            for(i = idx = 0; i < size ; i++)
+                if( vpids[i] != -1 )
+                    nodes_roots[idx++] = i;
+#ifdef __DEBUG__
+            fprintf(stdout,"num nodes is %i\n",num_nodes);
+            fprintf(stdout,"Root nodes are :\n");
+            for(i = 0; i < num_nodes ; i++)
+                fprintf(stdout," [root %i : %i] ",i,nodes_roots[i]);
+            fprintf(stdout,"\n");
+#endif
+        }
+        free(vpids);
+
+        /* Then, we need to know if the processes are bound */
+        /* We make the hypothesis that all processes are in  */
+        /* the same state : all bound or none bound */
+        hwloc_err = hwloc_topology_init(&opal_hwloc_topology);
+        if (-1 == hwloc_err) goto fallback;
+        hwloc_err = hwloc_topology_load(opal_hwloc_topology);
+        if (-1 == hwloc_err) goto fallback;
+        root_obj = hwloc_get_root_obj(opal_hwloc_topology);
+        if (NULL == root_obj) goto fallback;
+
+        /* if cpubind returns an error, it will be full anyway */
+        set = hwloc_bitmap_alloc_full();
+        hwloc_get_cpubind(opal_hwloc_topology,set,0);
+        num_pus_in_node = hwloc_get_nbobjs_by_type(opal_hwloc_topology, HWLOC_OBJ_PU);
+
+        if(hwloc_bitmap_isincluded(root_obj->cpuset,set)){
+            /* processes are not bound on the machine */
+#ifdef __DEBUG__
+            if (0 == rank)
+                fprintf(stdout,">>>>>>>>>>>>> Process Not bound <<<<<<<<<<<<<<<\n");
+#endif /* __DEBUG__ */
+
+            /* we try to bind to cores or above objects if enough are present */
+            /* Not sure that cores are present in ALL nodes */
+            depth = hwloc_get_type_or_above_depth(opal_hwloc_topology,HWLOC_OBJ_CORE);
+            num_objs_in_node = hwloc_get_nbobjs_by_depth(opal_hwloc_topology,depth);
+
+            /* Check for oversubscribing */
+            oversubscribing_objs = check_oversubscribing(rank,num_nodes,
+                                                         num_objs_in_node,num_procs_in_node,
+                                                         nodes_roots,local_procs,comm_old);
+            if(oversubscribing_objs) {
+#ifdef __DEBUG__
+                fprintf(stdout,"Oversubscribing OBJ/CORES resources => Trying to use PUs \n");
+#endif
+                int oversubscribed_pus = check_oversubscribing(rank,num_nodes,
+                                                               num_pus_in_node,num_procs_in_node,
+                                                               nodes_roots,local_procs,comm_old);
+                if (oversubscribed_pus){
+#ifdef __DEBUG__
+                    fprintf(stdout,"Oversubscribing PUs resources => Rank Reordering Impossible \n");
+#endif
+                    FALLBACK();
+                } else {
+                    obj_rank = ompi_process_info.my_local_rank%num_pus_in_node;
+                    effective_depth = hwloc_topology_get_depth(opal_hwloc_topology) - 1;
+                    num_objs_in_node = num_pus_in_node;
+#ifdef __DEBUG__
+                    fprintf(stdout,"Process not bound : binding on PU#%i \n",obj_rank);
+#endif
+                }
+            } else {
+                obj_rank = ompi_process_info.my_local_rank%num_objs_in_node;
+                effective_depth = depth;
+                object = hwloc_get_obj_by_depth(opal_hwloc_topology,effective_depth,obj_rank);
+                if( NULL == object) FALLBACK();
+
+                hwloc_bitmap_copy(set,object->cpuset);
+                hwloc_bitmap_singlify(set); /* we don't want the process to move */
+                hwloc_err = hwloc_set_cpubind(opal_hwloc_topology,set,0);
+                if( -1 == hwloc_err) FALLBACK();
+#ifdef __DEBUG__
+                fprintf(stdout,"Process not bound : binding on OBJ#%i \n",obj_rank);
+#endif
+            }
+        } else {    /* the processes are already bound */
+            object = hwloc_get_obj_covering_cpuset(opal_hwloc_topology,set);
+            obj_rank = object->logical_index;
+            effective_depth = object->depth;
+            num_objs_in_node = hwloc_get_nbobjs_by_depth(opal_hwloc_topology, effective_depth);
+
+            /* Check for oversubscribing */
+            oversubscribing_objs = check_oversubscribing(rank,num_nodes,
+                                                         num_objs_in_node,num_procs_in_node,
+                                                         nodes_roots,local_procs,comm_old);
+            if(oversubscribing_objs) {
+#ifdef __DEBUG__
+                fprintf(stdout,"Oversubscribing OBJ/CORES resources =>  Rank Reordering Impossible\n");
+#endif
+                FALLBACK();
+            }
+#ifdef __DEBUG__
+            fprintf(stdout,"Process %i bound  on OBJ #%i \n",rank,obj_rank);
+            fprintf(stdout,"=====> Num obj in node : %i | num pus in node : %i\n",num_objs_in_node,num_pus_in_node);
+#endif
+        }
+
+        reqs = (MPI_Request *)calloc(num_procs_in_node-1,sizeof(MPI_Request));
+        if( rank == local_procs[0] ) {
+            /* we need to find the right elements of the hierarchy */
+            /* and remove the unneeded elements                    */
+            /* Only local masters need to do this                  */
+            int array_size = effective_depth + 1;
+            int *myhierarchy = (int *)calloc(array_size,sizeof(int));
+
+            for (i = 0; i < array_size ; i++)
+                myhierarchy[i] = hwloc_get_nbobjs_by_depth(opal_hwloc_topology,i);
+
+            numlevels = 1;
+            for (i = 1; i < array_size; i++)
+                if ((myhierarchy[i] != 0) && (myhierarchy[i] != myhierarchy[i-1]))
+                    numlevels++;
+
+            tracker = (hwloc_obj_t *)calloc(numlevels,sizeof(hwloc_obj_t));
+            idx = 0;
+            tracker[idx++] = root_obj;
+            i = 1;
+            while (i < array_size){
+                if ( myhierarchy[i] != myhierarchy[i-1]) {
+                    j = i;
+                    while(myhierarchy[j] == myhierarchy[i])
+                        if (++j > effective_depth)
+                            break;
+                    tracker[idx++] = hwloc_get_obj_by_depth(opal_hwloc_topology,j-1,0);
+                    i = j;
+                } else i++;
+            }
+            free(myhierarchy);
+
+#ifdef __DEBUG__
+            fprintf(stdout,">>>>>>>>>>>>>>>>>>>>> Effective depth is : %i (total depth %i)| num_levels %i\n",
+                    effective_depth,hwloc_topology_get_depth(opal_hwloc_topology),numlevels);
+            for(i = 0 ; i < numlevels ; i++)
+                fprintf(stdout,"tracker[%i] : arity %i | depth %i\n",i,tracker[i]->arity,tracker[i]->depth);
+#endif
+            /* get the obj number */
+            localrank_to_objnum = (int *)calloc(num_procs_in_node,sizeof(int));
+            localrank_to_objnum[0] = obj_rank;
+
+            for(i = 1;  i < num_procs_in_node; i++) {
+                if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(&localrank_to_objnum[i],1,MPI_INT,
+                                                               local_procs[i],111, comm_old,&reqs[i-1]))))
+                    return err;
+            }
+            if (OMPI_SUCCESS != ( err = ompi_request_wait_all(num_procs_in_node-1,
+                                                              reqs,MPI_STATUSES_IGNORE)))
+                return err;
+        } else {
+            /* sending my core number to my local master on the node */
+            if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(&obj_rank, 1, MPI_INT, local_procs[0],
+                                                         111, MCA_PML_BASE_SEND_STANDARD, comm_old))))
+                return err;
+        }
+        free(reqs);
+
+        /* Centralized Reordering */
+        if (0 == mca_topo_treematch_component.reorder_mode) {
+            int *k = NULL;
+            int *obj_mapping = NULL;
+            int newrank = -1;
+            int num_objs_total = 0;
+
+            /* Gather comm pattern
+             * If weights have been provided take them in account. Otherwise rely
+             * solely on HWLOC information.
+             */
+            if(0 == rank) {
+
+                fprintf(stderr,"========== Centralized Reordering ========= \n");
+
+                local_pattern = (double *)calloc(size*size,sizeof(double));
+                if( true == topo->weighted ) {
+                    for(i = 0; i < topo->indegree ; i++)
+                        local_pattern[topo->in[i]] += topo->inw[i];
+                    for(i = 0; i < topo->outdegree ; i++)
+                        local_pattern[topo->out[i]] += topo->outw[i];
+                    if (OMPI_SUCCESS != (err = comm_old->c_coll.coll_gather(MPI_IN_PLACE, size, MPI_DOUBLE,
+                                                                            local_pattern, size, MPI_DOUBLE,
+                                                                            0, comm_old,
+                                                                            comm_old->c_coll.coll_gather_module)))
+                        return err;
+                }
+            } else {
+                local_pattern = (double *)calloc(size,sizeof(double));
+                if( true == topo->weighted ) {
+                    for(i = 0; i < topo->indegree ; i++)
+                        local_pattern[topo->in[i]] += topo->inw[i];
+                    for(i = 0; i < topo->outdegree ; i++)
+                        local_pattern[topo->out[i]] += topo->outw[i];
+                    if (OMPI_SUCCESS != (err = comm_old->c_coll.coll_gather(local_pattern, size, MPI_DOUBLE,
+                                                                            NULL,0,0,
+                                                                            0, comm_old,
+                                                                            comm_old->c_coll.coll_gather_module)))
+                        return err;
+                }
+            }
+
+            if( rank == local_procs[0]) {
+                tm_topology_t *tm_topology = NULL;
+                tm_topology_t *tm_opt_topology = NULL;
+                int *obj_to_rank_in_comm = NULL;
+                int *hierarchies = NULL;
+                int  hierarchy[MAX_LEVELS+1];
+                int  min;
+
+                /* create a table that derives the rank in comm_old from the object number */
+                obj_to_rank_in_comm = (int *)malloc(num_objs_in_node*sizeof(int));
+                for(i = 0 ; i < num_objs_in_node ; i++)
+                    obj_to_rank_in_comm[i] = -1;
+                for(i = 0 ; i < num_objs_in_node ; i++) {
+                    object = hwloc_get_obj_by_depth(opal_hwloc_topology,effective_depth,i);
+                    for( j = 0; j < num_procs_in_node ; j++ )
+                        if(localrank_to_objnum[j] == (int)(object->logical_index))
+                            break;
+                    if(j == num_procs_in_node)
+                        obj_to_rank_in_comm[i] = -1;
+                    else {
+                        int k;
+                        for(k = 0; k < size ; k++)
+                            if (k == local_procs[j])
+                                break;
+                        obj_to_rank_in_comm[i] = k;
+                    }
+                }
+
+                /* the global master gathers info from local_masters */
+                if ( 0 == rank ) {
+                    if ( num_nodes > 1 ) {
+                        int *objs_per_node = NULL ;
+                        int *displs = NULL;
+
+                        objs_per_node = (int *)calloc(num_nodes,sizeof(int));
+                        reqs = (MPI_Request *)calloc(num_nodes-1,sizeof(MPI_Request));
+                        objs_per_node[0] = num_objs_in_node;
+                        for(i = 1; i < num_nodes ; i++)
+                            if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(objs_per_node + i, 1, MPI_INT,
+                                                                           nodes_roots[i],111,comm_old,&reqs[i-1]))))
+                                ERR_EXIT(err);
+
+                        if (OMPI_SUCCESS != ( err = ompi_request_wait_all(num_nodes - 1,
+                                                                          reqs,MPI_STATUSES_IGNORE)))
+                            ERR_EXIT(err);
+
+                        for(i = 0; i < num_nodes; i++)
+                            num_objs_total += objs_per_node[i];
+                        obj_mapping = (int *)calloc(num_objs_total,sizeof(int));
+                        displs = (int *)calloc(num_objs_total,sizeof(int));
+                        displs[0] = 0;
+                        for(i = 1; i < num_nodes ; i++)
+                            displs[i] = displs[i-1] + objs_per_node[i];
+
+                        memset(reqs,0,(num_nodes-1)*sizeof(MPI_Request));
+                        memcpy(obj_mapping,obj_to_rank_in_comm,objs_per_node[0]*sizeof(int));
+                        for(i = 1; i < num_nodes ; i++)
+                            if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(obj_mapping + displs[i], objs_per_node[i], MPI_INT,
+                                                                           nodes_roots[i],111,comm_old,&reqs[i-1]))))
+                                ERR_EXIT(err);
+                        if (OMPI_SUCCESS != ( err = ompi_request_wait_all(num_nodes - 1,
+                                                                          reqs,MPI_STATUSES_IGNORE)))
+                            ERR_EXIT(err);
+                        free(displs);
+                        free(objs_per_node);
+                    } else {
+                        /* if num_nodes == 1, then it's easy to get the obj mapping */
+                        num_objs_total = num_objs_in_node;
+                        obj_mapping = (int *)calloc(num_objs_total,sizeof(int));
+                        memcpy(obj_mapping,obj_to_rank_in_comm,num_objs_total*sizeof(int));
+                    }
+
+#ifdef __DEBUG__
+                    fprintf(stdout,"Obj mapping : ");
+                    for(i = 0 ; i < num_objs_total ; i++)
+                        fprintf(stdout," [%i:%i] ",i,obj_mapping[i]);
+                    fprintf(stdout,"\n");
+#endif
+                } else {
+                    if ( num_nodes > 1 ) {
+                        if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(&num_objs_in_node, 1, MPI_INT,
+                                                                     0, 111, MCA_PML_BASE_SEND_STANDARD, comm_old))))
+                            ERR_EXIT(err);
+                        if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(obj_to_rank_in_comm, num_objs_in_node, MPI_INT,
+                                                                     0, 111, MCA_PML_BASE_SEND_STANDARD, comm_old))))
+                            ERR_EXIT(err);
+                    }
+                }
+
+                free(obj_to_rank_in_comm);
+
+                for(i = 0 ; i < (MAX_LEVELS+1) ; i++)
+                    hierarchy[i] = -1;
+                hierarchy[0] = numlevels;
+
+                assert(numlevels < MAX_LEVELS);
+
+                for(i = 0 ; i < hierarchy[0] ; i++)
+                    hierarchy[i+1] = tracker[i]->arity;
+
+                if( 0 == rank ) {
+                    hierarchies = (int *)malloc(num_nodes*(MAX_LEVELS+1)*sizeof(int));
+                    for(i = 0 ; i < num_nodes*(MAX_LEVELS+1) ; i++)
+                        hierarchies[i] = -1;
+                }
+
+                /* gather hierarchies iff more than 1 node! */
+                if ( num_nodes > 1 ) {
+                    if(rank != 0) {
+                        if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(hierarchy,(MAX_LEVELS+1), MPI_INT, 0,
+                                                                     111, MCA_PML_BASE_SEND_STANDARD, comm_old))))
+                            ERR_EXIT(err);
+                    } else {
+                        memset(reqs,0,(num_nodes-1)*sizeof(MPI_Request));
+                        for(i = 1; i < num_nodes ; i++)
+                            if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(hierarchies+i*(MAX_LEVELS+1),(MAX_LEVELS+1),MPI_INT,
+                                                                           nodes_roots[i],111,comm_old,&reqs[i-1])))){
+                                free(hierarchies);
+                                ERR_EXIT(err);
+                            }
+                        if (OMPI_SUCCESS != ( err = ompi_request_wait_all(num_nodes - 1,
+                                                                          reqs,MPI_STATUSES_IGNORE))) {
+                            free(hierarchies);
+                            ERR_EXIT(err);
+                        }
+                        free(reqs);
+                    }
+                }
+
+                if ( 0 == rank ) {
+                    tree_t *comm_tree = NULL;
+                    double **comm_pattern = NULL;
+                    int *matching = NULL;
+
+                    memcpy(hierarchies,hierarchy,(MAX_LEVELS+1)*sizeof(int));
+#ifdef __DEBUG__
+                    fprintf(stdout,"hierarchies : ");
+                    for(i = 0 ; i < num_nodes*(MAX_LEVELS+1) ; i++)
+                        fprintf(stdout," [%i] ",hierarchies[i]);
+                    fprintf(stdout,"\n");
+#endif
+                    tm_topology = (tm_topology_t *)malloc(sizeof(tm_topology_t));
+                    tm_topology->nb_levels = hierarchies[0];
+
+                    /* extract min depth */
+                    for(i = 1 ; i < num_nodes ; i++)
+                        if (hierarchies[i*(MAX_LEVELS+1)] < tm_topology->nb_levels)
+                            tm_topology->nb_levels = hierarchies[i*(MAX_LEVELS+1)];
+                    /* Crush levels in hierarchies too long (ie > tm_topology->nb_levels)*/
+                    for(i = 0; i < num_nodes ; i++) {
+                        int *base_ptr = hierarchies + i*(MAX_LEVELS+1) ;
+                        int  suppl = *base_ptr - tm_topology->nb_levels;
+                        for(j = 1 ; j <= suppl ; j++)
+                            *(base_ptr + tm_topology->nb_levels) *= *(base_ptr + tm_topology->nb_levels + j);
+                    }
+                    if( num_nodes > 1){
+                        /* We aggregate all topos => +1 level!*/
+                        tm_topology->nb_levels += 1;
+                        tm_topology->arity = (int *)calloc(tm_topology->nb_levels,sizeof(int));
+                        tm_topology->arity[0] = num_nodes;
+                        for(i = 0; i < (tm_topology->nb_levels - 1); i++) {
+                            min = *(hierarchies + 1 + i);
+                            for(j = 1; j < num_nodes ; j++)
+                                if( hierarchies[j*(MAX_LEVELS+1) + 1 + i] < min)
+                                    min = hierarchies[j*(MAX_LEVELS+1) + 1 + i];
+                            tm_topology->arity[i+1] = min;
+                        }
+                    }else{
+                        tm_topology->arity = (int *)calloc(tm_topology->nb_levels,sizeof(int));
+                        for(i = 0; i < tm_topology->nb_levels; i++)
+                            tm_topology->arity[i] = hierarchies[i+1];
+                    }
+                    free(hierarchies);
+
+                    /* compute the number of processing elements */
+                    tm_topology->nb_nodes = (int *)calloc(tm_topology->nb_levels,sizeof(int));
+                    tm_topology->nb_nodes[0] = 1;
+                    for(i = 1 ; i < tm_topology->nb_levels; i++)
+                        tm_topology->nb_nodes[i] = tm_topology->nb_nodes[i-1]*tm_topology->arity[i-1];
+
+                    comm_pattern = (double **)malloc(size*sizeof(double *));
+                    for(i = 0 ; i < size ; i++)
+                        comm_pattern[i] = local_pattern + i*size;
+                    /* matrix needs to be symmetric */
+                    for( i = 0 ; i < size ; i++)
+                        for(j = i ; j < size ; j++) {
+                            comm_pattern[i][j] += comm_pattern[j][i];
+                            comm_pattern[j][i]  = comm_pattern[i][j];
+                        }
+                    for( i = 0 ; i < size ; i++)
+                        for(j = 0 ; j < size ; j++)
+                            comm_pattern[i][j] /= 2;
+#ifdef __DEBUG__
+                    fprintf(stdout,"==== COMM PATTERN ====\n");
+                    for( i = 0 ; i < size ; i++){
+                        for(j = 0 ; j < size ; j++)
+                            fprintf(stdout," %f ",comm_pattern[i][j]);
+                        fprintf(stdout,"\n");
+                    }
+#endif
+                    /* Build process id tab */
+                    tm_topology->node_id  = (int **)calloc(tm_topology->nb_levels,sizeof(int*));
+                    for(i = 0; i < tm_topology->nb_levels ; i++) {
+                        tm_topology->node_id[i] = (int *)calloc(tm_topology->nb_nodes[i],sizeof(int));
+                        for (j = 0; j < tm_topology->nb_nodes[i] ; j++)
+                            tm_topology->node_id[i][j] = obj_mapping[j];
+                    }
+
+#ifdef __DEBUG__
+                    for(i = 0; i < tm_topology->nb_levels ; i++) {
+                        fprintf(stdout,"tm topo node_id for level [%i] : ",i);
+                        for(j = 0 ; j < tm_topology->nb_nodes[i] ; j++)
+                            fprintf(stdout," [%i:%i] ",j,obj_mapping[j]);
+                        fprintf(stdout,"\n");
+                    }
+                    display_topology(tm_topology);
+#endif
+                    k = (int *)calloc(num_objs_total,sizeof(int));
+                    matching = (int *)calloc(size,sizeof(int));
+
+                    tm_opt_topology = optimize_topology(tm_topology);
+                    comm_tree = build_tree_from_topology(tm_opt_topology,comm_pattern,size,NULL,NULL);
+                    map_topology_simple(tm_opt_topology,comm_tree,matching,size,k);
+
+#ifdef __DEBUG__
+
+                    fprintf(stdout,"====> nb levels : %i\n",tm_topology->nb_levels);
+                    fprintf(stdout,"Rank permutation sigma/k : ");
+                    for(i = 0 ; i < num_objs_total ; i++)
+                        fprintf(stdout," [%i:%i] ",i,k[i]);
+                    fprintf(stdout,"\n");
+
+                    fprintf(stdout,"Matching : ");
+                    for(i = 0 ; i < size ; i++)
+                        fprintf(stdout," [%i:%i] ",i,matching[i]);
+                    fprintf(stdout,"\n");
+#endif
+                    free(comm_pattern);
+                    free(comm_tree);
+                    free(matching);
+                    free(obj_mapping);
+                    for(i = 0 ; i < tm_topology->nb_levels ; i++)
+                        free(tm_topology->node_id[i]);
+                    free(tm_topology->node_id);
+                    free(tm_topology->nb_nodes);
+                    free(tm_topology->arity);
+                    free(tm_topology);
+                    FREE_topology(tm_opt_topology);
+                }
+            }
+
+            /* Todo : Bcast + group creation */
+            /* scatter the ranks */
+            if (OMPI_SUCCESS != (err = comm_old->c_coll.coll_scatter(k, 1, MPI_INT,
+                                                                     &newrank, 1, MPI_INT,
+                                                                     0, comm_old,comm_old->c_coll.coll_scatter_module)))
+                ERR_EXIT(err);
+
+            if ( 0 == rank )
+                free(k);
+
+            /* this needs to be optimized but will do for now */
+            if (OMPI_SUCCESS != (err = ompi_comm_split(comm_old, 0, newrank,newcomm, false)))
+                ERR_EXIT(err);
+            /* end of TODO */
+
+            /* Attach the dist_graph to the newly created communicator */
+            (*newcomm)->c_flags        |= OMPI_COMM_DIST_GRAPH;
+            (*newcomm)->c_topo          = topo_module;
+            (*newcomm)->c_topo->reorder = reorder;
+        } else { /* partially distributed reordering */
+            ompi_communicator_t *localcomm = NULL;
+            int *matching = (int *)calloc(num_procs_in_node,sizeof(int));
+            int *lrank_to_grank = (int *)calloc(num_procs_in_node,sizeof(int));
+            int *grank_to_lrank = (int *)calloc(size,sizeof(int));
+            hwloc_obj_t object;
+            opal_hwloc_locality_t locality;
+            char set_as_string[64];
+            opal_value_t kv;
+
+            if (OMPI_SUCCESS != (err = ompi_comm_split(comm_old,colors[rank],ompi_process_info.my_local_rank,&localcomm, false)))
+                return err;
+
+            for(i = 0 ; i < num_procs_in_node ; i++)
+                lrank_to_grank[i] = -1;
+            lrank_to_grank[ompi_process_info.my_local_rank] = rank;
+
+            for(i = 0 ; i < size ; i++)
+                grank_to_lrank[i] = -1;
+
+            if (OMPI_SUCCESS != (err = localcomm->c_coll.coll_allgather(&rank,1,MPI_INT,
+                                                                        lrank_to_grank,1,MPI_INT,
+                                                                        localcomm,
+                                                                        localcomm->c_coll.coll_allgather_module)))
+                return err;
+
+            for(i = 0 ; i < num_procs_in_node ; i++)
+                grank_to_lrank[lrank_to_grank[i]] = i;
+
+            if (rank == local_procs[0]){
+                tm_topology_t  *tm_topology = NULL;
+                tm_topology_t  *tm_opt_topology = NULL;
+                tree_t *comm_tree = NULL;
+                double **comm_pattern = NULL;
+
+#ifdef __DEBUG__
+                fprintf(stderr,"========== Partially Distributed Reordering ========= \n");
+#endif
+
+                local_pattern = (double *)calloc(num_procs_in_node*num_procs_in_node,sizeof(double));
+                for(i = 0 ; i < num_procs_in_node*num_procs_in_node ; i++)
+                    local_pattern[i] = 0.0;
+
+                if( true == topo->weighted ) {
+                    for(i = 0; i < topo->indegree ; i++)
+                        if (grank_to_lrank[topo->in[i]] != -1)
+                            local_pattern[grank_to_lrank[topo->in[i]]] += topo->inw[i];
+                    for(i = 0; i < topo->outdegree ; i++)
+                        if (grank_to_lrank[topo->out[i]] != -1)
+                            local_pattern[grank_to_lrank[topo->out[i]]] += topo->outw[i];
+                    if (OMPI_SUCCESS != (err = localcomm->c_coll.coll_gather(MPI_IN_PLACE, num_procs_in_node, MPI_DOUBLE,
+                                                                             local_pattern, num_procs_in_node, MPI_DOUBLE,
+                                                                             0,localcomm,
+                                                                             localcomm->c_coll.coll_gather_module)))
+                        ERR_EXIT(err);
+                }
+
+                comm_pattern = (double **)malloc(num_procs_in_node*sizeof(double *));
+                for(i = 0 ; i < num_procs_in_node ; i++){
+                    comm_pattern[i] = (double *)calloc(num_procs_in_node,sizeof(double));
+                    memcpy((void *)comm_pattern[i],(void *)(local_pattern + i*num_procs_in_node),num_procs_in_node*sizeof(double));
+                }
+                /* Matrix needs to be symmetric */
+                for( i = 0 ; i < num_procs_in_node ; i++)
+                    for(j = i ; j < num_procs_in_node ; j++){
+                        comm_pattern[i][j] += comm_pattern[j][i];
+                        comm_pattern[j][i]  = comm_pattern[i][j];
+                    }
+                for( i = 0 ; i < num_procs_in_node ; i++)
+                    for(j = 0 ; j < num_procs_in_node ; j++)
+                        comm_pattern[i][j] /= 2;
+
+#ifdef __DEBUG__
+                fprintf(stdout,"========== COMM PATTERN ============= \n");
+                for(i = 0 ; i < num_procs_in_node ; i++){
+                    fprintf(stdout," %i : ",i);
+                    for(j = 0; j < num_procs_in_node ; j++)
+                        fprintf(stdout,"  %f  ",comm_pattern[i][j]);
+                    fprintf(stdout,"\n");
+                }
+                fprintf(stdout,"======================= \n");
+#endif
+
+                tm_topology  = (tm_topology_t *)malloc(sizeof(tm_topology_t));
+                tm_topology->nb_levels = numlevels;
+                tm_topology->arity     = (int *)calloc(tm_topology->nb_levels,sizeof(int));
+                tm_topology->nb_nodes  = (int *)calloc(tm_topology->nb_levels,sizeof(int));
+                tm_topology->node_id   = (int **)malloc(tm_topology->nb_levels*sizeof(int *));
+                for(i = 0 ; i < tm_topology->nb_levels ; i++){
+                    int nb_objs = hwloc_get_nbobjs_by_depth(opal_hwloc_topology,tracker[i]->depth);
+                    tm_topology->nb_nodes[i] = nb_objs;
+                    tm_topology->node_id[i]  = (int*)malloc(sizeof(int)*nb_objs);
+                    tm_topology->arity[i]    = tracker[i]->arity;
+                    for(j = 0 ; j < nb_objs ; j++)
+                        tm_topology->node_id[i][j] = -1;
+                    for(j = 0 ; j < nb_objs ; j++)
+                        if ( j < num_procs_in_node )
+                            tm_topology->node_id[i][j] = localrank_to_objnum[j];
+                }
+
+#ifdef __DEBUG__
+                fprintf(stdout,"Levels in topo : %i | num procs in node : %i\n",tm_topology->nb_levels,num_procs_in_node);
+                for(i = 0; i < tm_topology->nb_levels ; i++){
+                    fprintf(stdout,"Nb objs for level %i : %i | arity %i\n ",i,tm_topology->nb_nodes[i],tm_topology->arity[i]);
+                    for(j = 0; j < tm_topology->nb_nodes[i] ; j++)
+                        fprintf(stdout,"Obj id : %i |",tm_topology->node_id[i][j]);
+                    fprintf(stdout,"\n");
+                }
+                display_topology(tm_topology);
+#endif
+
+                tm_opt_topology = optimize_topology(tm_topology);
+                comm_tree = build_tree_from_topology(tm_opt_topology,comm_pattern,num_procs_in_node,NULL,NULL);
+                map_topology_simple(tm_opt_topology,comm_tree,matching,num_procs_in_node,NULL);
+
+#ifdef __DEBUG__
+
+                fprintf(stdout,"Matching :");
+                for(i = 0 ; i < num_procs_in_node ; i++)
+                    fprintf(stdout," %i ",matching[i]);
+                fprintf(stdout,"\n");
+#endif
+                for(i = 0 ; i < num_procs_in_node ; i++)
+                    free(comm_pattern[i]);
+                free(comm_pattern);
+                for(i = 0; i < tm_topology->nb_levels ; i++)
+                    free(tm_topology->node_id[i]);
+                free(tm_topology->node_id);
+                free(tm_topology->nb_nodes);
+                free(tm_topology->arity);
+                free(tm_topology);
+                FREE_topology(tm_opt_topology);
+            } else {
+                local_pattern = (double *)calloc(num_procs_in_node,sizeof(double));
+                for(i = 0 ; i < num_procs_in_node ; i++)
+                    local_pattern[i] = 0.0;
+
+                if( true == topo->weighted ) {
+                    for(i = 0; i < topo->indegree ; i++)
+                        if (grank_to_lrank[topo->in[i]] != -1)
+                            local_pattern[grank_to_lrank[topo->in[i]]] += topo->inw[i];
+                    for(i = 0; i < topo->outdegree ; i++)
+                        if (grank_to_lrank[topo->out[i]] != -1)
+                            local_pattern[grank_to_lrank[topo->out[i]]] += topo->outw[i];
+                    if (OMPI_SUCCESS != (err = localcomm->c_coll.coll_gather(local_pattern, num_procs_in_node, MPI_DOUBLE,
+                                                                             NULL,0,0,
+                                                                             0,localcomm,
+                                                                             localcomm->c_coll.coll_gather_module)))
+                        ERR_EXIT(err);
+                }
+            }
+
+            if (OMPI_SUCCESS != (err = localcomm->c_coll.coll_bcast(matching, num_procs_in_node,
+                                                                    MPI_INT,0,localcomm,
+                                                                    localcomm->c_coll.coll_bcast_module)))
+                ERR_EXIT(err);
+
+            object = hwloc_get_obj_by_depth(opal_hwloc_topology,
+                                            effective_depth,matching[ompi_process_info.my_local_rank]);
+            if( NULL == object) goto fallback;
+            hwloc_bitmap_copy(set,object->cpuset);
+            hwloc_bitmap_singlify(set);
+            hwloc_err = hwloc_set_cpubind(opal_hwloc_topology,set,0);
+            if( -1 == hwloc_err) goto fallback;
+
+            /* Report new binding to ORTE/OPAL */
+            /*	hwloc_bitmap_list_asprintf(&orte_process_info.cpuset,set);   */
+            err = hwloc_bitmap_snprintf (set_as_string,64,set);
+
+#ifdef __DEBUG__
+            fprintf(stdout,"Bitmap str size : %i\n",err);
+#endif
+
+            OBJ_CONSTRUCT(&kv, opal_value_t);
+            kv.key = strdup(OPAL_DSTORE_CPUSET);
+            kv.type = OPAL_STRING;
+            kv.data.string = strdup(set_as_string);
+
+            (void)opal_dstore.store(opal_dstore_internal, (opal_process_name_t*)ORTE_PROC_MY_NAME, &kv);
+            OBJ_DESTRUCT(&kv);
+
+            locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
+                                                             orte_process_info.cpuset,set_as_string);
+            OBJ_CONSTRUCT(&kv, opal_value_t);
+            kv.key = strdup(OPAL_DSTORE_LOCALITY);
+            kv.type = OPAL_UINT16;
+            kv.data.uint16 = locality;
+            (void)opal_dstore.store(opal_dstore_internal, (opal_process_name_t*)ORTE_PROC_MY_NAME, &kv);
+            OBJ_DESTRUCT(&kv);
+
+            if( OMPI_SUCCESS != (err = ompi_comm_create(comm_old,
+                                                        comm_old->c_local_group,
+                                                        newcomm))){
+                ERR_EXIT(err);
+            } else {
+                /* Attach the dist_graph to the newly created communicator */
+                (*newcomm)->c_flags        |= OMPI_COMM_DIST_GRAPH;
+                (*newcomm)->c_topo          = topo_module;
+                (*newcomm)->c_topo->reorder = reorder;
+            }
+            free(matching);
+            free(grank_to_lrank);
+            free(lrank_to_grank);
+        } /* distributed reordering end */
+
+        if(rank == local_procs[0])
+            free(tracker);
+        free(nodes_roots);
+        free(local_procs);
+        free(local_pattern);
+        free(localrank_to_objnum);
+        free(colors);
+        hwloc_bitmap_free(set);
+    } /* reorder == yes */
+    return err;
+}
--- a/ompi/mca/topo/treematch/topo_treematch_module.c
+++ b/ompi/mca/topo/treematch/topo_treematch_module.c
@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2011-2015 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2011-2015 INRIA.  All rights reserved.
+ * Copyright (c) 2011-2015 Université Bordeaux 1
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include "ompi_config.h"
+
+#include <stdio.h>
+
+#include "mpi.h"
+#include "ompi/communicator/communicator.h"
+#include "ompi/mca/topo/topo.h"
+#include "ompi/mca/topo/base/base.h"
+#include "ompi/mca/topo/treematch/topo_treematch.h"
+
+/*
+ * Local functions
+ */
+static void treematch_module_constructor(mca_topo_treematch_module_t *u);
+static void treematch_module_destructor(mca_topo_treematch_module_t *u);
+
+OBJ_CLASS_INSTANCE(mca_topo_treematch_module_t, mca_topo_base_module_t,
+                   treematch_module_constructor, treematch_module_destructor);
+
+
+static void treematch_module_constructor(mca_topo_treematch_module_t *u)
+{
+    mca_topo_base_module_t *m = &(u->super);
+
+    memset(&m->topo, 0, sizeof(m->topo));
+}
+
+
+static void treematch_module_destructor(mca_topo_treematch_module_t *u)
+{
+    /* Do whatever is necessary to clean up / destroy the module */
+}
--- a/ompi/mca/topo/treematch/treematch/COPYING
+++ b/ompi/mca/topo/treematch/treematch/COPYING
@ -0,0 +1,8 @@
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+    1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+    2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+    3. Neither the name of Inria nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
--- a/ompi/mca/topo/treematch/treematch/IntConstantInitializedVector.c
+++ b/ompi/mca/topo/treematch/treematch/IntConstantInitializedVector.c
@ -0,0 +1,61 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "IntConstantInitializedVector.h"
+
+
+int intCIV_isInitialized(int_CIVector * v, int i)
+{
+  if(v->top == 0)
+    return 0;
+  if(v->from[i] >= 0)
+    if(v->from[i] < v->top && v->to[v->from[i]] == i)
+    return 1;
+  return 0;
+}
+
+
+
+void intCIV_init(int_CIVector * v, int size, int init_value)
+{
+  v->init_value = init_value;
+  v->size = size;
+  v->top = 0;
+  v->to = malloc(sizeof(int)*size);
+  v->from = malloc(sizeof(int)*size);
+  v->vec = malloc(sizeof(int)*size);
+}
+
+void intCIV_exit(int_CIVector * v)
+{
+  free(v->to);
+  free(v->from);
+  free(v->vec);
+ }
+
+int intCIV_set(int_CIVector * v, int i, int val)
+{
+  if(v == NULL)
+    return -1;
+  if(i < 0 || i >= v->size)
+    return -1;
+  if(!intCIV_isInitialized(v,i))
+    {
+      v->from[i] = v->top;
+      v->to[v->top] = i;
+      v->top++;
+    }
+  v->vec[i] = val;
+  return 0;
+}
+
+int intCIV_get(int_CIVector * v, int i)
+{
+  if(v == NULL)
+    return -1;
+  if(i < 0 || i >= v->size)
+    return -1;
+  if(intCIV_isInitialized(v,i))
+    return v->vec[i];
+  return v->init_value;
+}
+
--- a/ompi/mca/topo/treematch/treematch/IntConstantInitializedVector.h
+++ b/ompi/mca/topo/treematch/treematch/IntConstantInitializedVector.h
@ -0,0 +1,16 @@
+#ifndef INTEGER_CONSTANT_INITIALIZED_VECTOR
+#define INTEGER_CONSTANT_INITIALIZED_VECTOR
+
+typedef struct int_CIVector_
+{
+  int init_value, size, top, *to, *from, *vec;
+} int_CIVector;
+
+int intCIV_isInitialized(int_CIVector * v, int i);
+void intCIV_init(int_CIVector * v, int size, int init_value);
+void intCIV_exit(int_CIVector * v);
+int intCIV_set(int_CIVector * v, int i, int val);
+int intCIV_get(int_CIVector * v, int i);
+
+
+#endif /*INTEGER_CONSTANT_INITIALIZED_VECTOR*/
--- a/ompi/mca/topo/treematch/treematch/LICENSE
+++ b/ompi/mca/topo/treematch/treematch/LICENSE
@ -0,0 +1,515 @@
+
+CeCILL-B FREE SOFTWARE LICENSE AGREEMENT
+
+
+    Notice
+
+This Agreement is a Free Software license agreement that is the result
+of discussions between its authors in order to ensure compliance with
+the two main principles guiding its drafting:
+
+    * firstly, compliance with the principles governing the distribution
+      of Free Software: access to source code, broad rights granted to
+      users,
+    * secondly, the election of a governing law, French law, with which
+      it is conformant, both as regards the law of torts and
+      intellectual property law, and the protection that it offers to
+      both authors and holders of the economic rights over software.
+
+The authors of the CeCILL-B (for Ce[a] C[nrs] I[nria] L[ogiciel] L[ibre])
+license are: 
+
+Commissariat р l'Energie Atomique - CEA, a public scientific, technical
+and industrial research establishment, having its principal place of
+business at 25 rue Leblanc, immeuble Le Ponant D, 75015 Paris, France.
+
+Centre National de la Recherche Scientifique - CNRS, a public scientific
+and technological establishment, having its principal place of business
+at 3 rue Michel-Ange, 75794 Paris cedex 16, France.
+
+Institut National de Recherche en Informatique et en Automatique -
+INRIA, a public scientific and technological establishment, having its
+principal place of business at Domaine de Voluceau, Rocquencourt, BP
+105, 78153 Le Chesnay cedex, France.
+
+
+    Preamble
+
+This Agreement is an open source software license intended to give users
+significant freedom to modify and redistribute the software licensed
+hereunder.
+
+The exercising of this freedom is conditional upon a strong obligation
+of giving credits for everybody that distributes a software
+incorporating a software ruled by the current license so as all
+contributions to be properly identified and acknowledged.
+
+In consideration of access to the source code and the rights to copy,
+modify and redistribute granted by the license, users are provided only
+with a limited warranty and the software's author, the holder of the
+economic rights, and the successive licensors only have limited liability.
+
+In this respect, the risks associated with loading, using, modifying
+and/or developing or reproducing the software by the user are brought to
+the user's attention, given its Free Software status, which may make it
+complicated to use, with the result that its use is reserved for
+developers and experienced professionals having in-depth computer
+knowledge. Users are therefore encouraged to load and test the
+suitability of the software as regards their requirements in conditions
+enabling the security of their systems and/or data to be ensured and,
+more generally, to use and operate it in the same conditions of
+security. This Agreement may be freely reproduced and published,
+provided it is not altered, and that no provisions are either added or
+removed herefrom.
+
+This Agreement may apply to any or all software for which the holder of
+the economic rights decides to submit the use thereof to its provisions.
+
+
+    Article 1 - DEFINITIONS
+
+For the purpose of this Agreement, when the following expressions
+commence with a capital letter, they shall have the following meaning:
+
+Agreement: means this license agreement, and its possible subsequent
+versions and annexes.
+
+Software: means the software in its Object Code and/or Source Code form
+and, where applicable, its documentation, "as is" when the Licensee
+accepts the Agreement.
+
+Initial Software: means the Software in its Source Code and possibly its
+Object Code form and, where applicable, its documentation, "as is" when
+it is first distributed under the terms and conditions of the Agreement.
+
+Modified Software: means the Software modified by at least one
+Contribution.
+
+Source Code: means all the Software's instructions and program lines to
+which access is required so as to modify the Software.
+
+Object Code: means the binary files originating from the compilation of
+the Source Code.
+
+Holder: means the holder(s) of the economic rights over the Initial
+Software.
+
+Licensee: means the Software user(s) having accepted the Agreement.
+
+Contributor: means a Licensee having made at least one Contribution.
+
+Licensor: means the Holder, or any other individual or legal entity, who
+distributes the Software under the Agreement.
+
+Contribution: means any or all modifications, corrections, translations,
+adaptations and/or new functions integrated into the Software by any or
+all Contributors, as well as any or all Internal Modules.
+
+Module: means a set of sources files including their documentation that
+enables supplementary functions or services in addition to those offered
+by the Software.
+
+External Module: means any or all Modules, not derived from the
+Software, so that this Module and the Software run in separate address
+spaces, with one calling the other when they are run.
+
+Internal Module: means any or all Module, connected to the Software so
+that they both execute in the same address space.
+
+Parties: mean both the Licensee and the Licensor.
+
+These expressions may be used both in singular and plural form.
+
+
+    Article 2 - PURPOSE
+
+The purpose of the Agreement is the grant by the Licensor to the
+Licensee of a non-exclusive, transferable and worldwide license for the
+Software as set forth in Article 5 hereinafter for the whole term of the
+protection granted by the rights over said Software.
+
+
+    Article 3 - ACCEPTANCE
+
+3.1 The Licensee shall be deemed as having accepted the terms and
+conditions of this Agreement upon the occurrence of the first of the
+following events:
+
+    * (i) loading the Software by any or all means, notably, by
+      downloading from a remote server, or by loading from a physical
+      medium;
+    * (ii) the first time the Licensee exercises any of the rights
+      granted hereunder.
+
+3.2 One copy of the Agreement, containing a notice relating to the
+characteristics of the Software, to the limited warranty, and to the
+fact that its use is restricted to experienced users has been provided
+to the Licensee prior to its acceptance as set forth in Article 3.1
+hereinabove, and the Licensee hereby acknowledges that it has read and
+understood it.
+
+
+    Article 4 - EFFECTIVE DATE AND TERM
+
+
+      4.1 EFFECTIVE DATE
+
+The Agreement shall become effective on the date when it is accepted by
+the Licensee as set forth in Article 3.1.
+
+
+      4.2 TERM
+
+The Agreement shall remain in force for the entire legal term of
+protection of the economic rights over the Software.
+
+
+    Article 5 - SCOPE OF RIGHTS GRANTED
+
+The Licensor hereby grants to the Licensee, who accepts, the following
+rights over the Software for any or all use, and for the term of the
+Agreement, on the basis of the terms and conditions set forth hereinafter.
+
+Besides, if the Licensor owns or comes to own one or more patents
+protecting all or part of the functions of the Software or of its
+components, the Licensor undertakes not to enforce the rights granted by
+these patents against successive Licensees using, exploiting or
+modifying the Software. If these patents are transferred, the Licensor
+undertakes to have the transferees subscribe to the obligations set
+forth in this paragraph.
+
+
+      5.1 RIGHT OF USE
+
+The Licensee is authorized to use the Software, without any limitation
+as to its fields of application, with it being hereinafter specified
+that this comprises:
+
+   1. permanent or temporary reproduction of all or part of the Software
+      by any or all means and in any or all form.
+
+   2. loading, displaying, running, or storing the Software on any or
+      all medium.
+
+   3. entitlement to observe, study or test its operation so as to
+      determine the ideas and principles behind any or all constituent
+      elements of said Software. This shall apply when the Licensee
+      carries out any or all loading, displaying, running, transmission
+      or storage operation as regards the Software, that it is entitled
+      to carry out hereunder.
+
+
+      5.2 ENTITLEMENT TO MAKE CONTRIBUTIONS
+
+The right to make Contributions includes the right to translate, adapt,
+arrange, or make any or all modifications to the Software, and the right
+to reproduce the resulting software.
+
+The Licensee is authorized to make any or all Contributions to the
+Software provided that it includes an explicit notice that it is the
+author of said Contribution and indicates the date of the creation thereof.
+
+
+      5.3 RIGHT OF DISTRIBUTION
+
+In particular, the right of distribution includes the right to publish,
+transmit and communicate the Software to the general public on any or
+all medium, and by any or all means, and the right to market, either in
+consideration of a fee, or free of charge, one or more copies of the
+Software by any means.
+
+The Licensee is further authorized to distribute copies of the modified
+or unmodified Software to third parties according to the terms and
+conditions set forth hereinafter.
+
+
+        5.3.1 DISTRIBUTION OF SOFTWARE WITHOUT MODIFICATION
+
+The Licensee is authorized to distribute true copies of the Software in
+Source Code or Object Code form, provided that said distribution
+complies with all the provisions of the Agreement and is accompanied by:
+
+   1. a copy of the Agreement,
+
+   2. a notice relating to the limitation of both the Licensor's
+      warranty and liability as set forth in Articles 8 and 9,
+
+and that, in the event that only the Object Code of the Software is
+redistributed, the Licensee allows effective access to the full Source
+Code of the Software at a minimum during the entire period of its
+distribution of the Software, it being understood that the additional
+cost of acquiring the Source Code shall not exceed the cost of
+transferring the data.
+
+
+        5.3.2 DISTRIBUTION OF MODIFIED SOFTWARE
+
+If the Licensee makes any Contribution to the Software, the resulting
+Modified Software may be distributed under a license agreement other
+than this Agreement subject to compliance with the provisions of Article
+5.3.4.
+
+
+        5.3.3 DISTRIBUTION OF EXTERNAL MODULES
+
+When the Licensee has developed an External Module, the terms and
+conditions of this Agreement do not apply to said External Module, that
+may be distributed under a separate license agreement.
+
+
+        5.3.4 CREDITS
+
+Any Licensee who may distribute a Modified Software hereby expressly
+agrees to:
+
+   1. indicate in the related documentation that it is based on the
+      Software licensed hereunder, and reproduce the intellectual
+      property notice for the Software,
+
+   2. ensure that written indications of the Software intended use,
+      intellectual property notice and license hereunder are included in
+      easily accessible format from the Modified Software interface,
+
+   3. mention, on a freely accessible website describing the Modified
+      Software, at least throughout the distribution term thereof, that
+      it is based on the Software licensed hereunder, and reproduce the
+      Software intellectual property notice,
+
+   4. where it is distributed to a third party that may distribute a
+      Modified Software without having to make its source code
+      available, make its best efforts to ensure that said third party
+      agrees to comply with the obligations set forth in this Article .
+
+If the Software, whether or not modified, is distributed with an
+External Module designed for use in connection with the Software, the
+Licensee shall submit said External Module to the foregoing obligations.
+
+
+        5.3.5 COMPATIBILITY WITH THE CeCILL AND CeCILL-C LICENSES
+
+Where a Modified Software contains a Contribution subject to the CeCILL
+license, the provisions set forth in Article 5.3.4 shall be optional.
+
+A Modified Software may be distributed under the CeCILL-C license. In
+such a case the provisions set forth in Article 5.3.4 shall be optional.
+
+
+    Article 6 - INTELLECTUAL PROPERTY
+
+
+      6.1 OVER THE INITIAL SOFTWARE
+
+The Holder owns the economic rights over the Initial Software. Any or
+all use of the Initial Software is subject to compliance with the terms
+and conditions under which the Holder has elected to distribute its work
+and no one shall be entitled to modify the terms and conditions for the
+distribution of said Initial Software.
+
+The Holder undertakes that the Initial Software will remain ruled at
+least by this Agreement, for the duration set forth in Article 4.2.
+
+
+      6.2 OVER THE CONTRIBUTIONS
+
+The Licensee who develops a Contribution is the owner of the
+intellectual property rights over this Contribution as defined by
+applicable law.
+
+
+      6.3 OVER THE EXTERNAL MODULES
+
+The Licensee who develops an External Module is the owner of the
+intellectual property rights over this External Module as defined by
+applicable law and is free to choose the type of agreement that shall
+govern its distribution.
+
+
+      6.4 JOINT PROVISIONS
+
+The Licensee expressly undertakes:
+
+   1. not to remove, or modify, in any manner, the intellectual property
+      notices attached to the Software;
+
+   2. to reproduce said notices, in an identical manner, in the copies
+      of the Software modified or not.
+
+The Licensee undertakes not to directly or indirectly infringe the
+intellectual property rights of the Holder and/or Contributors on the
+Software and to take, where applicable, vis-р-vis its staff, any and all
+measures required to ensure respect of said intellectual property rights
+of the Holder and/or Contributors.
+
+
+    Article 7 - RELATED SERVICES
+
+7.1 Under no circumstances shall the Agreement oblige the Licensor to
+provide technical assistance or maintenance services for the Software.
+
+However, the Licensor is entitled to offer this type of services. The
+terms and conditions of such technical assistance, and/or such
+maintenance, shall be set forth in a separate instrument. Only the
+Licensor offering said maintenance and/or technical assistance services
+shall incur liability therefor.
+
+7.2 Similarly, any Licensor is entitled to offer to its licensees, under
+its sole responsibility, a warranty, that shall only be binding upon
+itself, for the redistribution of the Software and/or the Modified
+Software, under terms and conditions that it is free to decide. Said
+warranty, and the financial terms and conditions of its application,
+shall be subject of a separate instrument executed between the Licensor
+and the Licensee.
+
+
+    Article 8 - LIABILITY
+
+8.1 Subject to the provisions of Article 8.2, the Licensee shall be
+entitled to claim compensation for any direct loss it may have suffered
+from the Software as a result of a fault on the part of the relevant
+Licensor, subject to providing evidence thereof.
+
+8.2 The Licensor's liability is limited to the commitments made under
+this Agreement and shall not be incurred as a result of in particular:
+(i) loss due the Licensee's total or partial failure to fulfill its
+obligations, (ii) direct or consequential loss that is suffered by the
+Licensee due to the use or performance of the Software, and (iii) more
+generally, any consequential loss. In particular the Parties expressly
+agree that any or all pecuniary or business loss (i.e. loss of data,
+loss of profits, operating loss, loss of customers or orders,
+opportunity cost, any disturbance to business activities) or any or all
+legal proceedings instituted against the Licensee by a third party,
+shall constitute consequential loss and shall not provide entitlement to
+any or all compensation from the Licensor.
+
+
+    Article 9 - WARRANTY
+
+9.1 The Licensee acknowledges that the scientific and technical
+state-of-the-art when the Software was distributed did not enable all
+possible uses to be tested and verified, nor for the presence of
+possible defects to be detected. In this respect, the Licensee's
+attention has been drawn to the risks associated with loading, using,
+modifying and/or developing and reproducing the Software which are
+reserved for experienced users.
+
+The Licensee shall be responsible for verifying, by any or all means,
+the suitability of the product for its requirements, its good working
+order, and for ensuring that it shall not cause damage to either persons
+or properties.
+
+9.2 The Licensor hereby represents, in good faith, that it is entitled
+to grant all the rights over the Software (including in particular the
+rights set forth in Article 5).
+
+9.3 The Licensee acknowledges that the Software is supplied "as is" by
+the Licensor without any other express or tacit warranty, other than
+that provided for in Article 9.2 and, in particular, without any warranty 
+as to its commercial value, its secured, safe, innovative or relevant 
+nature.
+
+Specifically, the Licensor does not warrant that the Software is free
+from any error, that it will operate without interruption, that it will
+be compatible with the Licensee's own equipment and software
+configuration, nor that it will meet the Licensee's requirements.
+
+9.4 The Licensor does not either expressly or tacitly warrant that the
+Software does not infringe any third party intellectual property right
+relating to a patent, software or any other property right. Therefore,
+the Licensor disclaims any and all liability towards the Licensee
+arising out of any or all proceedings for infringement that may be
+instituted in respect of the use, modification and redistribution of the
+Software. Nevertheless, should such proceedings be instituted against
+the Licensee, the Licensor shall provide it with technical and legal
+assistance for its defense. Such technical and legal assistance shall be
+decided on a case-by-case basis between the relevant Licensor and the
+Licensee pursuant to a memorandum of understanding. The Licensor
+disclaims any and all liability as regards the Licensee's use of the
+name of the Software. No warranty is given as regards the existence of
+prior rights over the name of the Software or as regards the existence
+of a trademark.
+
+
+    Article 10 - TERMINATION
+
+10.1 In the event of a breach by the Licensee of its obligations
+hereunder, the Licensor may automatically terminate this Agreement
+thirty (30) days after notice has been sent to the Licensee and has
+remained ineffective.
+
+10.2 A Licensee whose Agreement is terminated shall no longer be
+authorized to use, modify or distribute the Software. However, any
+licenses that it may have granted prior to termination of the Agreement
+shall remain valid subject to their having been granted in compliance
+with the terms and conditions hereof.
+
+
+    Article 11 - MISCELLANEOUS
+
+
+      11.1 EXCUSABLE EVENTS
+
+Neither Party shall be liable for any or all delay, or failure to
+perform the Agreement, that may be attributable to an event of force
+majeure, an act of God or an outside cause, such as defective
+functioning or interruptions of the electricity or telecommunications
+networks, network paralysis following a virus attack, intervention by
+government authorities, natural disasters, water damage, earthquakes,
+fire, explosions, strikes and labor unrest, war, etc.
+
+11.2 Any failure by either Party, on one or more occasions, to invoke
+one or more of the provisions hereof, shall under no circumstances be
+interpreted as being a waiver by the interested Party of its right to
+invoke said provision(s) subsequently.
+
+11.3 The Agreement cancels and replaces any or all previous agreements,
+whether written or oral, between the Parties and having the same
+purpose, and constitutes the entirety of the agreement between said
+Parties concerning said purpose. No supplement or modification to the
+terms and conditions hereof shall be effective as between the Parties
+unless it is made in writing and signed by their duly authorized
+representatives.
+
+11.4 In the event that one or more of the provisions hereof were to
+conflict with a current or future applicable act or legislative text,
+said act or legislative text shall prevail, and the Parties shall make
+the necessary amendments so as to comply with said act or legislative
+text. All other provisions shall remain effective. Similarly, invalidity
+of a provision of the Agreement, for any reason whatsoever, shall not
+cause the Agreement as a whole to be invalid.
+
+
+      11.5 LANGUAGE
+
+The Agreement is drafted in both French and English and both versions
+are deemed authentic.
+
+
+    Article 12 - NEW VERSIONS OF THE AGREEMENT
+
+12.1 Any person is authorized to duplicate and distribute copies of this
+Agreement.
+
+12.2 So as to ensure coherence, the wording of this Agreement is
+protected and may only be modified by the authors of the License, who
+reserve the right to periodically publish updates or new versions of the
+Agreement, each with a separate number. These subsequent versions may
+address new issues encountered by Free Software.
+
+12.3 Any Software distributed under a given version of the Agreement may
+only be subsequently distributed under the same version of the Agreement
+or a subsequent version.
+
+
+    Article 13 - GOVERNING LAW AND JURISDICTION
+
+13.1 The Agreement is governed by French law. The Parties agree to
+endeavor to seek an amicable solution to any disagreements or disputes
+that may arise during the performance of the Agreement.
+
+13.2 Failing an amicable solution within two (2) months as from their
+occurrence, and unless emergency proceedings are necessary, the
+disagreements or disputes shall be referred to the Paris Courts having
+jurisdiction, by the more diligent Party.
+
+
+Version 1.0 dated 2006-09-05.
--- a/ompi/mca/topo/treematch/treematch/tgt_map.c
+++ b/ompi/mca/topo/treematch/treematch/tgt_map.c
@ -0,0 +1,56 @@
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+//#include "tm_hwloc.h"
+#include "tm_tree.h"
+#include "tm_mapping.h"
+#include "tm_timings.h"
+
+
+
+int  main(int argc, char**argv){;
+  tree_t *comm_tree=NULL;
+  double **comm,**arch;
+  tm_topology_t *topology;
+  int nb_processes,nb_cores;
+  int *sol,*k;
+  if(argc<3){
+    fprintf(stderr,"Usage: %s <Architecture tgt> <communication partern file>\n",argv[0]);
+    return -1;
+  }
+
+  topology=tgt_to_tm(argv[1],&arch);
+   optimize_topology(&topology);
+  nb_processes=build_comm(argv[2],&comm);
+  sol=(int*)MALLOC(sizeof(int)*nb_processes);
+
+  nb_cores=nb_processing_units(topology);
+  k=(int*)MALLOC(sizeof(int)*nb_cores);
+  // TreeMatchMapping(nb_processes,nb_cores,comm,sol);
+
+  if(nb_processes>nb_cores){
+    fprintf(stderr,"Error: to many processes (%d)  for this topology (%d nodes)\n",nb_processes,nb_cores);
+    exit(-1);
+  }
+  TIC;
+  comm_tree=build_tree_from_topology(topology,comm,nb_processes,NULL,NULL);
+  map_topology_simple(topology,comm_tree,sol,k);
+  double duration=TOC;
+  printf("mapping duration: %f\n",duration);
+  printf("TreeMatch: ");
+  print_sol_inv(nb_processes,sol,comm,arch);
+  //print_1D_tab(k,nb_cores);
+//  display_other_heuristics(topology,nb_processes,comm,arch);
+
+  //display_tab(arch,nb_cores);
+
+  FREE_topology(topology);
+  //FREE_tree(comm_tree);
+  FREE(sol);
+  FREE(comm);
+  FREE(arch);
+
+
+
+  return 0;
+}
--- a/ompi/mca/topo/treematch/treematch/tgt_to_mat.c
+++ b/ompi/mca/topo/treematch/treematch/tgt_to_mat.c
@ -0,0 +1,31 @@
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include "tm_hwloc.h"
+#include "tm_tree.h"
+#include "tm_mapping.h"
+#include "tm_timings.h"
+
+
+
+int  main(int argc, char**argv){;
+  tm_topology_t *topology;
+  int nb_cores;
+  double **arch;
+  if(argc<2){
+    fprintf(stderr,"Usage: %s <Architecture tgt>\n",argv[0]);
+    return -1;
+  }
+
+  topology=tgt_to_tm(argv[1],&arch);
+  nb_cores=nb_nodes(topology);
+
+  display_tab(arch,nb_cores);
+
+  FREE_topology(topology);
+  FREE(arch);
+
+
+
+  return 0;
+}
--- a/ompi/mca/topo/treematch/treematch/tm_bucket.c
+++ b/ompi/mca/topo/treematch/treematch/tm_bucket.c
@ -0,0 +1,669 @@
+#include <stdio.h>
+#include <float.h>
+#include <math.h>
+#include <assert.h>
+#include "tm_tree.h"
+#include "tm_bucket.h"
+#include "tm_timings.h"
+#include "tm_verbose.h"
+#include "tm_thread_pool.h"
+#include "tm_mt.h"
+#ifdef _WIN32
+#include <windows.h>
+#include <winbase.h>
+#endif
+
+#ifndef __CHARMC__
+#define __CHARMC__ 0
+#endif
+
+#if __CHARMC__
+#include "converse.h"
+#else
+static int ilog2(int val)
+{
+  int i = 0;
+  for( ; val != 0; val >>= 1, i++ );
+  return i;
+}
+#define CmiLog2(VAL)  ilog2((int)(VAL))
+#endif
+
+static int verbose_level = ERROR;
+
+bucket_list_t global_bl;
+
+int tab_cmp(const void*,const void*);
+int old_bucket_id(int,int,bucket_list_t);
+int bucket_id(int,int,bucket_list_t);
+void display_bucket(bucket_t *);
+void check_bucket(bucket_t *,double **,double, double);
+void display_pivots(bucket_list_t);
+void display_bucket_list(bucket_list_t);
+void add_to_bucket(int,int,int,bucket_list_t);
+void dfs(int,int,int,double *,double *,int,int);
+void built_pivot_tree(bucket_list_t);
+void fill_buckets(bucket_list_t);
+int is_power_of_2(int);
+void partial_sort(bucket_list_t *,double **,int);
+void next_bucket_elem(bucket_list_t,int *,int *);
+int add_edge_3(tree_t *,tree_t *,int,int,int *);
+void FREE_bucket(bucket_t *);
+void FREE_tab_bucket(bucket_t **,int);
+void FREE_bucket_list(bucket_list_t);
+void partial_update_val (int nb_args, void **args);
+
+int tab_cmp(const void* x1,const void* x2)
+{
+  int *e1 = NULL,*e2 = NULL,i1,i2,j1,j2;
+  double **tab = NULL;
+  bucket_list_t bl;
+
+  bl = global_bl;
+
+  e1 = ((int *)x1);
+  e2 = ((int *)x2);
+
+  tab = bl->tab;
+
+  i1 = e1[0];
+  j1 = e1[1];
+  i2 = e2[0];
+  j2 = e2[1];
+
+  if(tab[i1][j1]==tab[i2][j2]){
+    if(i1==i2){
+      return (j1 > j2) ? -1 : 1;
+    }else{
+      return (i1 > i2) ? -1 : 1;
+    }
+  }
+  return (tab[i1][j1] > tab[i2][j2]) ? -1 : 1;
+}
+
+
+int old_bucket_id(int i,int j,bucket_list_t bucket_list)
+{
+  double *pivot = NULL,val;
+  int n,sup,inf,p;
+
+  pivot = bucket_list->pivot;
+  n = bucket_list->nb_buckets;
+  val = bucket_list->tab[i][j];
+
+  inf = -1;
+  sup = n;
+
+  while( (sup - inf) > 1){
+    p = (sup + inf)/2;
+    /* printf("%f [%d,%d,%d]=%f\n",val,inf,p,sup,pivot[p]); */
+    if( val < pivot[p] ){
+      inf = p;
+      if( inf == sup )
+	inf--;
+    } else {
+      sup = p;
+      if( sup == inf )
+	sup++;
+    }
+  }
+  /*exit(-1);*/
+  return sup;
+}
+
+int bucket_id(int i,int j,bucket_list_t bucket_list)
+{
+  double *pivot_tree = NULL,val;
+  int p,k;
+
+  pivot_tree = bucket_list->pivot_tree;
+  val = bucket_list->tab[i][j];
+
+
+  p = 1;
+  for( k = 0 ; k < bucket_list->max_depth ; k++){
+    if( val > pivot_tree[p] )
+      p = p*2;
+    else
+      p = p*2 + 1;
+  }
+
+  return (int)pivot_tree[p];
+}
+
+void  display_bucket(bucket_t *b)
+{
+  printf("\tb.bucket=%p\n",(void *)b->bucket);
+  printf("\tb.bucket_len=%d\n",(int)b->bucket_len);
+  printf("\tb.nb_elem=%d\n",(int)b->nb_elem);
+}
+
+void check_bucket(bucket_t *b,double **tab,double inf, double sup)
+{
+  int i,j,k;
+  for( k = 0 ; k < b->nb_elem ; k++ ){
+    i = b->bucket[k].i;
+    j = b->bucket[k].j;
+    if((tab[i][j] < inf) || (tab[i][j] > sup)){
+      if(verbose_level >= CRITICAL)
+	  printf("[%d] (%d,%d):%f not in [%f,%f]\n",k,i,j,tab[i][j],inf,sup);
+      exit(-1);
+    }
+  }
+}
+
+void display_pivots(bucket_list_t bucket_list)
+{
+  int i;
+  for( i = 0 ; i < bucket_list->nb_buckets-1 ; i++)
+    printf("pivot[%d]=%f\n",i,bucket_list->pivot[i]);
+  printf("\n");
+}
+
+void display_bucket_list(bucket_list_t bucket_list)
+{
+  int i;
+  double inf,sup;
+
+  /*display_pivots(bucket_list);*/
+
+  for(i = 0 ; i < bucket_list->nb_buckets ; i++){
+    inf = bucket_list->pivot[i];
+    sup = bucket_list->pivot[i-1];
+    if( i == 0 )
+      sup=DBL_MAX;
+    if( i == bucket_list->nb_buckets - 1 )
+      inf = 0;
+    if(verbose_level >= DEBUG){
+      printf("Bucket %d:\n",i);
+      display_bucket(bucket_list->bucket_tab[i]);
+      printf("\n");
+    }
+    check_bucket(bucket_list->bucket_tab[i],bucket_list->tab,inf,sup);
+  }
+
+}
+
+void add_to_bucket(int id,int i,int j,bucket_list_t bucket_list)
+{
+  bucket_t *bucket = NULL;
+  int N,n,size;
+
+  bucket = bucket_list->bucket_tab[id];
+  /* display_bucket(bucket);*/
+
+  if( bucket->bucket_len == bucket->nb_elem ){
+    N = bucket_list->N;
+    n = bucket_list->nb_buckets;
+    size = N*N/n;
+    /* display_bucket(bucket);*/
+    bucket->bucket = (coord*)realloc(bucket->bucket,sizeof(coord)*(size + bucket->bucket_len));
+    bucket->bucket_len += size;
+
+    if(verbose_level >= DEBUG){
+      printf("MALLOC/realloc: %d\n",id);
+      printf("(%d,%d)\n",i,j);
+      display_bucket(bucket);
+      printf("\n");
+    }
+
+  }
+
+ bucket->bucket[bucket->nb_elem].i=i;
+ bucket->bucket[bucket->nb_elem].j=j;
+ bucket->nb_elem++;
+
+ /* printf("\n"); */
+ /* exit(-1); */
+}
+
+void dfs(int i,int inf,int sup,double *pivot,double *pivot_tree,int depth,int max_depth)
+{
+  int p;
+  if( depth == max_depth )
+    return;
+
+  p = (inf + sup)/2;
+  pivot_tree[i] = pivot[p-1];
+
+  dfs(2*i,inf,p-1,pivot,pivot_tree,depth+1,max_depth);
+  dfs(2*i+1,p+1,sup,pivot,pivot_tree,depth+1,max_depth);
+}
+
+void  built_pivot_tree(bucket_list_t bucket_list)
+{
+  double *pivot_tree = NULL,*pivot = NULL;
+  int n,i,k;
+
+  pivot = bucket_list->pivot;
+  n = bucket_list->nb_buckets;
+  pivot_tree = (double*)MALLOC(sizeof(double)*2*n);
+  bucket_list->max_depth = (int)CmiLog2(n) - 1;
+
+  dfs(1,1,n-1,pivot,pivot_tree,0,bucket_list->max_depth);
+
+  k = 0;
+  pivot_tree[0] = -1;
+  for( i = n ; i < 2*n ; i++)
+    pivot_tree[i] = k++;
+
+  bucket_list->pivot_tree = pivot_tree;
+
+  if(verbose_level >= DEBUG){
+    for(i=0;i<2*n;i++)
+      printf("%d:%f\t",i,pivot_tree[i]);
+    printf("\n");
+  }
+}
+
+void fill_buckets(bucket_list_t bucket_list)
+{
+  int N,i,j,id;
+
+  N = bucket_list->N;
+
+  for( i = 0 ; i < N ; i++ )
+    for( j = i+1 ; j < N ; j++ ){
+      id = bucket_id(i,j,bucket_list);
+      add_to_bucket(id,i,j,bucket_list);
+    }
+}
+
+int is_power_of_2(int val)
+{
+  int n = 1;
+  do{
+    if( n == val)
+      return 1;
+    n <<= 1;
+  }while( n > 0);
+  return 0;
+}
+
+
+void partial_sort(bucket_list_t *bl,double **tab,int N)
+{
+  double *pivot = NULL;
+  int *sample = NULL;
+  int i,j,k,n,id;
+  bucket_list_t bucket_list;
+  int nb_buckets, nb_bits;
+
+  /* after these operations, nb_bucket is a power of 2 interger close to log2(N)*/
+
+  nb_buckets = (int)floor(CmiLog2(N));
+
+  nb_bits = (int)ceil(CmiLog2(nb_buckets));
+  nb_buckets = nb_buckets >> (nb_bits-1);
+  nb_buckets = nb_buckets << (nb_bits-1);
+
+  /* check the result*/
+  if(!is_power_of_2(nb_buckets)){
+    if(verbose_level >= ERROR)
+      fprintf(stderr,"Error! Paramater nb_buckets is: %d and should be a power of 2\n",nb_buckets);
+    exit(-1);
+  }
+
+  bucket_list = (bucket_list_t)MALLOC(sizeof(_bucket_list_t));
+  bucket_list->tab = tab;
+  bucket_list->N = N;
+
+  n = pow(nb_buckets,2);
+  if(verbose_level >= INFO)
+    printf("N=%d, n=%d\n",N,n);
+  sample = (int*)MALLOC(2*sizeof(int)*n);
+
+  for( k =  0 ; k < n ; k++ ){
+    i = genrand_int32()%(N-2)+1;
+    if( i == N-2 )
+      j = N-1;
+    else
+      j = genrand_int32()%(N-i-2)+i+1;
+    if(verbose_level >= DEBUG)
+      printf("i=%d, j=%d\n",i,j);
+    assert( i != j );
+    assert( i < j );
+    assert( i < N );
+    assert( j < N );
+    sample[2*k] = i;
+    sample[2*k+1] = j;
+  }
+
+  /* printf("k=%d\n",k); */
+  global_bl = bucket_list;
+  qsort(sample,n,2*sizeof(int),tab_cmp);
+
+  if(verbose_level >= DEBUG)
+    for(k=0;k<n;k++){
+      i=sample[2*k];
+      j=sample[2*k+1];
+      printf("%f\n",tab[i][j]);
+    }
+
+
+  pivot = (double*)MALLOC(sizeof(double)*nb_buckets-1);
+  id = 1;
+  for( k = 1 ; k < nb_buckets ; k++ ){
+    /* fprintf(stderr,"k=%d, id=%d\n",k,id); */
+    i = sample[2*(id-1)];
+    j = sample[2*(id-1)+1];
+    id *= 2;
+
+    /*    i=sample[k*N/nb_buckets]/N;
+	  j=sample[k*N/nb_buckets]%N;*/
+    pivot[k-1] = tab[i][j];
+    /* printf("pivot[%d]=%f\n",k-1,tab[i][j]); */
+  }
+
+  bucket_list->pivot = pivot;
+  bucket_list->nb_buckets = nb_buckets;
+  built_pivot_tree(bucket_list);
+
+  bucket_list->bucket_tab = (bucket_t**)MALLOC(nb_buckets*sizeof(bucket_t*));
+  for( i  = 0 ; i < nb_buckets ; i++ )
+    bucket_list->bucket_tab[i] = (bucket_t*)CALLOC(1,sizeof(bucket_t));
+
+  fill_buckets(bucket_list);
+
+  /* display_bucket_list(bucket_list); */
+
+  bucket_list->cur_bucket = 0;
+  bucket_list->bucket_indice = 0;
+
+  FREE(sample);
+
+  *bl = bucket_list;
+}
+
+void next_bucket_elem(bucket_list_t bucket_list,int *i,int *j)
+{
+  bucket_t *bucket = bucket_list->bucket_tab[bucket_list->cur_bucket];
+
+  /*  display_bucket_list(bucket_list);
+      printf("nb_elem: %d, indice: %d, bucket_id: %d\n",(int)bucket->nb_elem,bucket_list->bucket_indice,bucket_list->cur_bucket);
+  */
+  while( bucket->nb_elem <= bucket_list->bucket_indice ){
+    bucket_list->bucket_indice = 0;
+    bucket_list->cur_bucket++;
+    bucket = bucket_list->bucket_tab[bucket_list->cur_bucket];
+    if(verbose_level >= DEBUG){
+      printf("### From bucket %d to bucket %d\n",bucket_list->cur_bucket-1,bucket_list->cur_bucket);
+      printf("nb_elem: %d, indice: %d, bucket_id: %d\n",(int)bucket->nb_elem,bucket_list->bucket_indice,bucket_list->cur_bucket);
+    }
+  }
+
+  if(!bucket->sorted){
+    global_bl = bucket_list;
+    qsort(bucket->bucket,bucket->nb_elem,2*sizeof(int),tab_cmp);
+    bucket->sorted = 1;
+  }
+
+  *i = bucket->bucket[bucket_list->bucket_indice].i;
+  *j = bucket->bucket[bucket_list->bucket_indice].j;
+  bucket_list->bucket_indice++;
+}
+
+
+int add_edge_3(tree_t *tab_node, tree_t *parent,int i,int j,int *nb_groups)
+{
+  /* printf("%d <-> %d ?\n",tab_node[i].id,tab_node[j].id); */
+  if((!tab_node[i].parent) && (!tab_node[j].parent)){
+    if(parent){
+      parent->child[0] = &tab_node[i];
+      parent->child[1] = &tab_node[j];
+      tab_node[i].parent = parent;
+      tab_node[j].parent = parent;
+
+      if(verbose_level >= DEBUG)
+	printf("%d: %d-%d\n",*nb_groups,parent->child[0]->id,parent->child[1]->id);
+
+      return 1;
+    }
+    return 0;
+  }
+
+  if( tab_node[i].parent && (!tab_node[j].parent) ){
+    parent = tab_node[i].parent;
+    if(!parent->child[2]){
+      parent->child[2] = &tab_node[j];
+      tab_node[j].parent = parent;
+
+      if(verbose_level >= DEBUG)
+	printf("%d: %d-%d-%d\n",*nb_groups,parent->child[0]->id,parent->child[1]->id,parent->child[2]->id);
+
+      (*nb_groups)++;
+    }
+    return 0;
+  }
+
+  if(tab_node[j].parent && (!tab_node[i].parent)){
+    parent = tab_node[j].parent;
+    if(!parent->child[2]){
+      parent->child[2] = &tab_node[i];
+      tab_node[i].parent = parent;
+
+      if(verbose_level >= DEBUG)
+	printf("%d: %d-%d-%d\n",*nb_groups,parent->child[0]->id,parent->child[1]->id,parent->child[2]->id);
+
+      (*nb_groups)++;
+    }
+    return 0;
+  }
+
+  return 0;
+}
+
+int try_add_edge(tree_t *tab_node, tree_t *parent,int arity,int i,int j,int *nb_groups)
+{
+  assert( i != j );
+
+  switch(arity){
+  case 2:
+    if(tab_node[i].parent)
+      return 0;
+    if(tab_node[j].parent)
+      return 0;
+
+    parent->child[0] = &tab_node[i];
+    parent->child[1] = &tab_node[j];
+    tab_node[i].parent = parent;
+    tab_node[j].parent = parent;
+
+    (*nb_groups)++;
+
+    return 1;
+  case 3:
+    return add_edge_3(tab_node,parent,i,j,nb_groups);
+  default:
+    if(verbose_level >= ERROR)
+      fprintf(stderr,"Cannot handle arity %d\n",parent->arity);
+    exit(-1);
+  }
+}
+
+void FREE_bucket(bucket_t *bucket)
+{
+  FREE(bucket->bucket);
+  FREE(bucket);
+}
+
+void FREE_tab_bucket(bucket_t **bucket_tab,int N)
+{
+  int i;
+  for( i = 0 ; i < N ; i++ )
+    FREE_bucket(bucket_tab[i]);
+  FREE(bucket_tab);
+}
+
+void FREE_bucket_list(bucket_list_t bucket_list)
+{
+  /* Do not FREE the tab field it is used elsewhere */
+  FREE_tab_bucket(bucket_list->bucket_tab,bucket_list->nb_buckets);
+  FREE(bucket_list->pivot);
+  FREE(bucket_list->pivot_tree);
+  FREE(bucket_list);
+}
+
+void partial_update_val (int nb_args, void **args){
+  int inf = *(int*)args[0];
+  int sup = *(int*)args[1];
+  affinity_mat_t *aff_mat = (affinity_mat_t*)args[2];
+  tree_t *new_tab_node = (tree_t*)args[3];
+  double *res=(double*)args[4];
+  int l;
+
+  if(nb_args != 6){
+    if(verbose_level >= ERROR)
+      fprintf(stderr,"Wrong number of args in %s: %d\n",__FUNCTION__, nb_args);
+    exit(-1);
+  }
+
+  for( l = inf ; l < sup ; l++ ){
+      update_val(aff_mat,&new_tab_node[l]);
+      *res += new_tab_node[l].val;
+    }
+}
+
+void bucket_grouping(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_node,
+		     int arity,int M)
+{
+  bucket_list_t bucket_list;
+  double duration,val = 0;
+  int l,i,j,nb_groups;
+  double gr1_1=0;
+  double gr1_2=0;
+  double gr1, gr2, gr3;
+  int N = aff_mat->order;
+  double **mat = aff_mat->mat;
+
+  verbose_level = get_verbose_level();
+  if(verbose_level >= INFO )
+    printf("starting sort of N=%d elements\n",N);
+
+  TIC;
+  partial_sort(&bucket_list,mat,N);
+  duration = TOC;
+  if(verbose_level >= INFO)
+    printf("Partial sorting=%fs\n",duration);
+  if(verbose_level >= DEBUG)
+    display_pivots(bucket_list);
+
+  TIC;
+  l = 0;
+  i = 0;
+  nb_groups = 0;
+
+
+  TIC;
+  if(verbose_level >= INFO){
+    while( l < M ){
+      TIC;
+      next_bucket_elem(bucket_list,&i,&j);
+      if(verbose_level >= DEBUG)
+	printf("elem[%d][%d]=%f ",i,j,mat[i][j]);
+      gr1_1 += TOC;
+      TIC;
+      if(try_add_edge(tab_node,&new_tab_node[l],arity,i,j,&nb_groups)){
+	l++;
+      }
+      gr1_2 += TOC;
+    }
+  }else{
+    while( l < M ){
+      next_bucket_elem(bucket_list,&i,&j);
+      if(try_add_edge(tab_node,&new_tab_node[l],arity,i,j,&nb_groups)){
+	l++;
+      }
+    }
+  }
+
+  gr1=TOC;
+  if(verbose_level >= INFO)
+    printf("Grouping phase 1=%fs (%fs+%fs) \n",gr1, gr1_1, gr1_2);
+
+  if(verbose_level >= DEBUG)
+    printf("l=%d,nb_groups=%d\n",l,nb_groups);
+
+  TIC;
+  while( nb_groups < M ){
+    next_bucket_elem(bucket_list,&i,&j);
+    try_add_edge(tab_node,NULL,arity,i,j,&nb_groups);
+  }
+
+  gr2=TOC;
+  if(verbose_level >= INFO)
+    printf("Grouping phase 2=%fs\n",gr2);
+
+  if(verbose_level >= DEBUG)
+    printf("l=%d,nb_groups=%d\n",l,nb_groups);
+
+  TIC;
+
+
+  if(M>512){ /* perform this part in parallel*/
+    int id;
+    int nb_threads;
+    work_t **works;
+    int *inf;
+    int *sup;
+    double *tab_val;
+
+    nb_threads = get_nb_threads();
+    works = (work_t**)MALLOC(sizeof(work_t*)*nb_threads);
+    inf = (int*)MALLOC(sizeof(int)*nb_threads);
+    sup = (int*)MALLOC(sizeof(int)*nb_threads);
+    tab_val = (double*)CALLOC(nb_threads,sizeof(double));
+    for(id=0;id<nb_threads;id++){
+      void **args=(void**)MALLOC(sizeof(void*)*5);
+      inf[id]=id*M/nb_threads;
+      sup[id]=(id+1)*M/nb_threads;
+      if(id == nb_threads-1) sup[id]=M;
+      args[0]=(void*)(inf+id);
+      args[1]=(void*)(sup+id);
+      args[2]=(void*)aff_mat;
+      args[3]=(void*)new_tab_node;
+      args[4]=(void*)(tab_val+id);
+
+      works[id]= create_work(5,args,partial_update_val);
+      if(verbose_level >= DEBUG)
+	printf("Executing %p\n",(void *)works[id]);
+
+      submit_work( works[id], id);
+    }
+
+    for(id=0;id<nb_threads;id++){
+      wait_work_completion(works[id]);
+      val+=tab_val[id];
+      FREE(works[id]->args);
+    }
+
+
+    FREE(inf);
+    FREE(sup);
+    FREE(tab_val);
+    FREE(works);
+  }else{
+    for( l = 0 ; l < M ; l++ ){
+
+      update_val(aff_mat,&new_tab_node[l]);
+      val += new_tab_node[l].val;
+    }
+  }
+  gr3=TOC;
+  if(verbose_level >= INFO)
+    printf("Grouping phase 3=%fs\n",gr3);
+  /* printf("val=%f\n",val);exit(-1);	 */
+
+  duration = TOC;
+  if(verbose_level >= INFO)
+    printf("Grouping =%fs\n",duration);
+
+  if(verbose_level >= DEBUG){
+    printf("Bucket: %d, indice:%d\n",bucket_list->cur_bucket,bucket_list->bucket_indice);
+    printf("val=%f\n",val);
+  }
+  FREE_bucket_list(bucket_list);
+
+  /*  exit(-1); */
+  /*  display_grouping(new_tab_node,M,arity,val); */
+}
--- a/ompi/mca/topo/treematch/treematch/tm_bucket.h
+++ b/ompi/mca/topo/treematch/treematch/tm_bucket.h
@ -0,0 +1,34 @@
+#ifndef __BUCKET_H__
+#define __BUCKET_H__
+
+typedef struct{
+  int i;
+  int j;
+}coord;
+
+typedef struct{
+  coord * bucket; /* store i,j */
+  int bucket_len; /* allocated size in the heap */
+  int nb_elem;    /* number of usefull elements (nb_elem should be lower than bucket_len) */
+  int sorted;
+}bucket_t;
+
+typedef struct{
+  bucket_t **bucket_tab;
+  int nb_buckets;
+  double **tab;
+  int N;/* length of tab */
+  /* For iterating over the buckets */
+  int cur_bucket;
+  int bucket_indice;
+  double *pivot;
+  double *pivot_tree;
+  int max_depth;
+}_bucket_list_t;
+
+typedef _bucket_list_t *bucket_list_t;
+
+void bucket_grouping(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_node,
+		     int arity,int M);
+int try_add_edge(tree_t *tab_node, tree_t *parent,int arity,int i,int j,int *nb_groups);
+#endif
--- a/ompi/mca/topo/treematch/treematch/tm_hwloc.c
+++ b/ompi/mca/topo/treematch/treematch/tm_hwloc.c
@ -0,0 +1,280 @@
+#include <hwloc.h>
+#include <hwloc/helper.h>
+#include "tm_tree.h"
+#include "tm_mapping.h"
+#include <ctype.h>
+#include "tm_verbose.h"
+
+
+double ** tm_topology_to_arch(tm_topology_t *topology,double *cost);
+tm_topology_t * tgt_to_tm(char *filename,double **pcost);
+int topo_nb_proc(hwloc_topology_t topology,int N);
+double ** topology_to_arch(hwloc_topology_t topology);
+int symetric(hwloc_topology_t topology);
+tm_topology_t* hwloc_to_tm(char *filename,double **pcost);
+tm_topology_t* get_local_topo_with_hwloc(void);
+
+
+
+
+/* transform a tgt scotch file into a topology file*/
+tm_topology_t * tgt_to_tm(char *filename, double **pcost)
+{
+  tm_topology_t *topology = NULL;
+  FILE *pf = NULL;
+  char line[1024];
+  char *s = NULL;
+  double *cost = NULL;
+  int i;
+
+
+
+  pf = fopen(filename,"r");
+  if(!pf){
+    if(get_verbose_level() >= CRITICAL)
+      fprintf(stderr,"Cannot open %s\n",filename);
+    exit(-1);
+  }
+
+  if(get_verbose_level() >= INFO)
+    printf("Reading TGT file: %s\n",filename);
+
+
+  fgets(line,1024,pf);
+
+  s = strstr(line,"tleaf");
+  if(!s){
+    if(get_verbose_level() >= CRITICAL)
+      fprintf(stderr,"Syntax error! %s is not a tleaf file\n",filename);
+    exit(-1);
+  }
+
+  s += 5;
+  while(isspace(*s))
+    s++;
+
+  topology = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
+  topology->nb_levels = atoi(strtok(s," "))+1;
+  topology->arity = (int*)MALLOC(sizeof(int)*topology->nb_levels);
+  cost = (double*)CALLOC(topology->nb_levels,sizeof(double));
+
+  for( i = 0 ; i < topology->nb_levels-1 ; i++ ){
+    topology->arity[i] = atoi(strtok(NULL," "));
+    cost[i] = atoi(strtok(NULL," "));
+  }
+
+  topology->arity[topology->nb_levels-1] = 0;
+  /* cost[topology->nb_levels-1]=0; */
+
+  /*aggregate costs*/
+  for( i = topology->nb_levels-2 ; i >= 0 ; i-- )
+    cost[i] += cost[i+1];
+
+  build_synthetic_proc_id(topology);
+
+  *pcost = cost;
+  /* FREE(cost); */
+  /*
+  topology->arity[0]=nb_proc;
+  topology->nb_levels=decompose((int)ceil((1.0*nb_obj)/nb_proc),1,topology->arity);
+  printf("levels=%d\n",topology->nb_levels);
+  */
+  if(get_verbose_level() >= INFO)
+    printf("Topology built from %s!\n",filename);
+
+
+  return topology;
+}
+
+int topo_nb_proc(hwloc_topology_t topology,int N)
+{
+  hwloc_obj_t *objs = NULL;
+  int nb_proc;
+
+  objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*N);
+  objs[0] = hwloc_get_next_obj_by_type(topology,HWLOC_OBJ_PU,NULL);
+  nb_proc = 1 + hwloc_get_closest_objs(topology,objs[0],objs+1,N-1);
+  FREE(objs);
+  return nb_proc;
+}
+
+
+double ** topology_to_arch(hwloc_topology_t topology)
+{
+  int nb_proc,i,j;
+  hwloc_obj_t obj_proc1,obj_proc2,obj_res;
+  double **arch = NULL;
+
+  nb_proc = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);
+  arch = (double**)MALLOC(sizeof(double*)*nb_proc);
+  for( i = 0 ; i < nb_proc ; i++ ){
+    obj_proc1 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,i);
+    arch[obj_proc1->os_index] = (double*)MALLOC(sizeof(double)*nb_proc);
+    for( j = 0 ; j < nb_proc ; j++ ){
+      obj_proc2 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,j);
+      obj_res = hwloc_get_common_ancestor_obj(topology,obj_proc1,obj_proc2);
+      /* printf("arch[%d][%d] <- %ld\n",obj_proc1->os_index,obj_proc2->os_index,*((long int*)(obj_res->userdatab))); */
+      arch[obj_proc1->os_index][obj_proc2->os_index]=speed(obj_res->depth+1);
+    }
+  }
+  return arch;
+}
+
+int symetric(hwloc_topology_t topology)
+{
+   int depth,i,topodepth = hwloc_topology_get_depth(topology);
+   unsigned int arity;
+   hwloc_obj_t obj;
+   for ( depth = 0; depth < topodepth-1 ; depth++ ) {
+    int N = hwloc_get_nbobjs_by_depth(topology, depth);
+    obj = hwloc_get_next_obj_by_depth (topology,depth,NULL);
+    arity = obj->arity;
+
+    /* printf("Depth=%d, N=%d, Arity:%d\n",depth,N,arity); */
+    for (i = 1; i < N; i++ ){
+      obj = hwloc_get_next_obj_by_depth (topology,depth,obj);
+      if( obj->arity != arity){
+	/* printf("[%d]: obj->arity=%d, arity=%d\n",i,obj->arity,arity); */
+	return 0;
+      }
+    }
+   }
+   return 1;
+}
+
+tm_topology_t* hwloc_to_tm(char *filename,double **pcost)
+{
+  hwloc_topology_t topology;
+  tm_topology_t *res = NULL;
+  hwloc_obj_t *objs = NULL;
+  unsigned topodepth,depth;
+  int nb_nodes,i;
+  double *cost;
+  int err;
+
+  /* Build the topology */
+  hwloc_topology_init(&topology);
+  err = hwloc_topology_set_xml(topology,filename);
+  if(err == -1){
+    if(get_verbose_level() >= CRITICAL)
+      fprintf(stderr,"Error: %s is a bad xml topology file!\n",filename);
+    exit(-1);
+  }
+
+  hwloc_topology_ignore_all_keep_structure(topology);
+  hwloc_topology_load(topology);
+
+
+  /* Test if symetric */
+  if(!symetric(topology)){
+    if(get_verbose_level() >= CRITICAL)
+      fprintf(stderr,"%s not symetric!\n",filename);
+    exit(-1);
+  }
+
+  /* work on depth */
+  topodepth = hwloc_topology_get_depth(topology);
+
+  res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
+  res->nb_levels = topodepth;
+  res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels);
+  res->nb_nodes = (int*)MALLOC(sizeof(int)*res->nb_levels);
+  res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels);
+
+  if(get_verbose_level() >= INFO)
+      printf("topodepth = %d\n",topodepth);
+
+  /* Build TreeMatch topology */
+  for( depth = 0 ; depth < topodepth ; depth++ ){
+    nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
+    res->nb_nodes[depth] = nb_nodes;
+    res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
+
+    objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
+    objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
+    hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1);
+    res->arity[depth] = objs[0]->arity;
+
+    if(get_verbose_level() >= INFO)
+      printf("%d(%d):",res->arity[depth],nb_nodes);
+
+    /* Build process id tab */
+    for (i = 0; i < nb_nodes; i++){
+      res->node_id[depth][i] = objs[i]->os_index;
+      /* if(depth==topodepth-1) */
+    }
+    FREE(objs);
+  }
+
+  cost = (double*)CALLOC(res->nb_levels,sizeof(double));
+  for(i=0; i<res->nb_levels; i++){
+    cost[i] = speed(i);
+  }
+
+  *pcost = cost;
+
+
+  /* Destroy topology object. */
+  hwloc_topology_destroy(topology);
+  if(get_verbose_level() >= INFO)
+    printf("\n");
+  return res;
+}
+
+tm_topology_t* get_local_topo_with_hwloc(void)
+{
+  hwloc_topology_t topology;
+  tm_topology_t *res = NULL;
+  hwloc_obj_t *objs = NULL;
+  unsigned topodepth,depth;
+  int nb_nodes,i;
+
+  /* Build the topology */
+  hwloc_topology_init(&topology);
+  hwloc_topology_ignore_all_keep_structure(topology);
+  hwloc_topology_load(topology);
+
+  /* Test if symetric */
+  if(!symetric(topology)){
+    if(get_verbose_level() >= CRITICAL)
+      fprintf(stderr,"Local toplogy not symetric!\n");
+    exit(-1);
+  }
+
+  /* work on depth */
+  topodepth = hwloc_topology_get_depth(topology);
+
+  res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
+  res->nb_levels = topodepth;
+  res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels);
+  res->nb_nodes = (int*)MALLOC(sizeof(int)*res->nb_levels);
+  res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels);
+
+  /* Build TreeMatch topology */
+  for( depth = 0 ; depth < topodepth ; depth++ ){
+    nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
+    res->nb_nodes[depth] = nb_nodes;
+    res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
+
+    objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
+    objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
+    hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1);
+    res->arity[depth] = objs[0]->arity;
+
+    /* printf("%d:",res->arity[depth]); */
+
+    /* Build process id tab */
+    for (i = 0; i < nb_nodes; i++){
+      res->node_id[depth][i] = objs[i]->os_index;
+      /* if(depth==topodepth-1) */
+    }
+    FREE(objs);
+  }
+
+  /* Destroy HWLOC topology object. */
+  hwloc_topology_destroy(topology);
+
+  /* printf("\n"); */
+  return res;
+}
+
--- a/ompi/mca/topo/treematch/treematch/tm_hwloc.h
+++ b/ompi/mca/topo/treematch/treematch/tm_hwloc.h
@ -0,0 +1,7 @@
+#include <hwloc.h>
+#include "tm_tree.h"
+
+void hwloc_topology_tag(hwloc_topology_t topology);
+tm_topology_t* hwloc_to_tm(char *filename,double **pcost);
+tm_topology_t * tgt_to_tm(char *filename,double **pcost);
+tm_topology_t* get_local_topo_with_hwloc(void);
--- a/ompi/mca/topo/treematch/treematch/tm_kpartitioning.c
+++ b/ompi/mca/topo/treematch/treematch/tm_kpartitioning.c
@ -0,0 +1,505 @@
+#include "tm_mapping.h"
+#include "tm_mt.h"
+#include "tm_kpartitioning.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+#define USE_KL_KPART 0
+#if USE_KL_KPART
+#include "k-partitioning.h"
+#endif  /* USE_KL_KPART */
+#define KL_KPART_GREEDY_TRIALS 0
+
+static int verbose_level = ERROR;
+
+#define MAX_TRIALS 10
+#define USE_KL_STRATEGY 1
+
+
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
+
+int  fill_tab(int **,int *,int,int,int,int);
+void complete_com_mat(double ***,int,int);
+void complete_obj_weight(double **,int,int);
+
+void allocate_vertex(int,int *,com_mat_t *,int,int *,int);
+double eval_cost(int *, com_mat_t *);
+int *kpartition_greedy(int, com_mat_t *,int,int *,int);
+constraint_t *split_constraints (int *,int,int,tm_topology_t *,int);
+com_mat_t **split_com_mat(com_mat_t *,int,int,int *);
+int **split_vertices(int *,int,int,int *);
+void FREE_tab_com_mat(com_mat_t **,int);
+void FREE_tab_local_vertices(int **,int);
+void FREE_const_tab(constraint_t *,int);
+void kpartition_build_level_topology(tree_t *,com_mat_t *,int,int,tm_topology_t *,
+				     int *,int *,int,double *,double *);
+
+
+
+void allocate_vertex(int u, int *res, com_mat_t *com_mat, int n, int *size, int max_size)
+{
+  int i,best_part=0;
+  double cost, best_cost = -1;
+
+  /*printf("\n");
+    print_1D_tab(res,n);*/
+  if(u>=com_mat->n){
+    for( i = 0 ; i < n ; i++)
+      if (( res[i] != -1 ) && ( size[res[i]] < max_size )){
+	best_part = res[i];
+	break;
+      }
+  }else{
+    for( i = 0 ; i < n ; i++){
+      if (( res[i] != -1 ) && ( size[res[i]] < max_size )){
+	cost = (((i)<com_mat->n)) ?com_mat->comm[u][i]:0;
+	if (( cost > best_cost)){
+	  best_cost = cost;
+	  best_part = res[i];
+	}
+      }
+    }
+  }
+  /*  printf("size[%d]: %d\n",best_part, size[best_part]);*/
+  /* printf("putting(%.2f): %d -> %d\n",best_cost, u, best_part); */
+
+  res[u] = best_part;
+  size[best_part]++;
+}
+
+double eval_cost(int *partition, com_mat_t *com_mat)
+{
+  double cost = 0;
+  int i,j;
+
+  for( i = 0 ; i < com_mat->n ; i++ )
+    for( j = i+1 ; j < com_mat->n ; j++ )
+      if(partition[i] != partition[j])
+	cost += com_mat->comm[i][j];
+
+  return cost;
+}
+
+int  *kpartition_greedy(int k, com_mat_t *com_mat, int n, int *constraints, int nb_constraints)
+{
+  int *res = NULL, *best_res=NULL, *size = NULL;
+  int i,j,nb_trials;
+  int max_size, max_val;
+  double cost, best_cost = -1;
+  int start, end;
+  int dumb_id, nb_dumb;
+
+
+
+
+  for( nb_trials = 0 ; nb_trials < MAX_TRIALS ; nb_trials++ ){
+    res = (int *)MALLOC(sizeof(int)*n);
+    for ( i = 0 ; i < n ; i ++ )
+      res[i] = -1;
+
+    size = (int *)CALLOC(k,sizeof(int));
+    max_size = n/k;
+
+    /*printf("Constraints: ");print_1D_tab(constraints,nb_constraints);*/
+
+    /* put "dumb" vertices in the correct partition if there are any*/
+    if (nb_constraints){
+      start = 0;
+      dumb_id = n-1;
+      for( i = 0 ; i < k ; i ++){
+	max_val = (i+1)* (n/k);
+	end = start;
+	while( end < nb_constraints){
+	  if(constraints[end] >= max_val)
+	    break;
+	  end++;
+	}
+	/* now end - start is the number of constarints for the ith subtree
+	   hence the number of dumb vertices is the differences between the
+	   number of leaves of the subtree (n/k) and the number of constraints
+	*/
+	nb_dumb = n/k - (end-start);
+	/*printf("max_val: %d, nb_dumb=%d, start=%d, end=%d, size=%d\n",max_val, nb_dumb, start, end, n/k);*/
+
+	/* dumb vertices are the one with highest indices:
+	   put them in the ith partitions*/
+	for( j = 0; j < nb_dumb; j ++ ){
+	  res[dumb_id] = i;
+	  dumb_id--;
+	}
+	/* increase the size of the ith partition accordingly*/
+	size[i] += nb_dumb;
+	start=end;
+      }
+    }
+    /*printf("After dumb vertices mapping: ");print_1D_tab(res,n);*/
+
+    /* choose k initial "true" vertices at random and put them in a different partition */
+    for ( i = 0 ; i < k ; i ++ ){
+      /* if the partition is full of dumb vertices go to next partition*/
+      if(size[i] >= max_size)
+	continue;
+      /* find a vertex not allready partitionned*/
+      do{
+	/* call the mersenne twister PRNG of tm_mt.c*/
+	j =  genrand_int32() % n;
+      } while ( res[j] != -1 );
+      /* allocate and update size of partition*/
+      res[j] = i;
+      /* printf("random: %d -> %d\n",j,i); */
+      size[i]++;
+    }
+
+    /* allocate each unaloacted vertices in the partition that maximize the communication*/
+    for( i = 0 ;  i < n ; i ++)
+      if( res[i] == -1)
+	allocate_vertex(i, res, com_mat, n, size, max_size);
+
+    cost = eval_cost(res,com_mat);
+    /*print_1D_tab(res,n);
+    printf("cost=%.2f\n",cost);*/
+    if((cost<best_cost) || (best_cost == -1)){
+      best_cost=cost;
+      FREE(best_res);
+      best_res=res;
+    }else
+      FREE(res);
+
+    FREE(size);
+  }
+
+  /*print_1D_tab(best_res,n);
+  printf("best_cost=%.2f\n",best_cost);
+  */
+  return best_res;
+}
+
+int *kpartition(int k, com_mat_t *com_mat, int n, int *constraints, int nb_constraints)
+{
+  int *res= NULL;
+
+  if( n%k != 0){
+    if(verbose_level >= ERROR)
+      fprintf(stderr,"Error: Cannot partition %d elements in %d parts\n",n,k);
+    return NULL;
+  }
+
+  /* if(USE_KL_KPART) */
+  /*   res = kPartitioning(comm, n, k, constraints, nb_constraints, KL_KPART_GREEDY_TRIALS); */
+  /* else */
+    res = kpartition_greedy(k, com_mat, n, constraints, nb_constraints);
+
+  return res;
+}
+
+constraint_t *split_constraints (int *constraints, int nb_constraints, int k, tm_topology_t *topology, int depth)
+{
+  constraint_t *const_tab = NULL;
+  int nb_leaves, start, end;
+  int i;
+
+  const_tab = (constraint_t *)CALLOC(k,sizeof(constraint_t));
+
+  /* nb_leaves is the number of leaves of the current subtree
+     this will help to detremine where to split constraints and how to shift values
+  */
+  nb_leaves = compute_nb_leaves_from_level( depth + 1, topology );
+
+/* split the constraints into k sub-constraints
+     each sub-contraints 'i' contains constraints of value in [i*nb_leaves,(i+1)*nb_leaves[
+   */
+  start = 0;
+  for( i = 0; i < k; i++ ){
+    /*returns the indice in contsraints that contains the smallest value not copied
+      end is used to compute the number of copied elements (end-size) and is used as the next staring indices*/
+    end = fill_tab(&(const_tab[i].constraints), constraints, nb_constraints,start, (i+1) * nb_leaves, i * nb_leaves);
+    const_tab[i].length = end-start;
+    const_tab[i].id = i;
+    start = end;
+  }
+
+  return const_tab;
+}
+
+
+com_mat_t **split_com_mat(com_mat_t *com_mat, int n, int k, int *partition)
+{
+  com_mat_t **res = NULL, *sub_com_mat;
+  double **sub_mat = NULL;
+  int *perm = NULL;
+  int cur_part, i, ii, j, jj, m = n/k, s;
+
+  res = (com_mat_t**)MALLOC(k*sizeof(com_mat_t *));
+
+
+  if(verbose_level >= DEBUG){
+    printf("Partition: "); print_1D_tab(partition,n);
+    display_tab(com_mat->comm,com_mat->n);
+  }
+
+  perm  = (int*)MALLOC(sizeof(int)*m);
+  for( cur_part = 0 ; cur_part < k ; cur_part ++ ){
+
+    /* build perm such that submat[i][j] correspond to com_mat[perm[i]][perm[j]] according to the partition*/
+    s = 0;
+    for( j = 0; j < com_mat->n; j ++) /* check only non zero element of of com_mat*/
+      if ( partition[j] == cur_part )
+	perm[s++] = j;
+
+    /* s is now the size of the non zero sub matrix for this partition*/
+    /* built a sub-matrix for partition cur_part*/
+    sub_mat = (double **) MALLOC(sizeof(double *) * s);
+    for( i = 0 ; i < s ; i++)
+      sub_mat[i] = (double *) MALLOC(sizeof(double ) * s);
+
+    /* build the sub_mat corresponding to the partiion cur_part*/
+    for ( i = 0 ; i < s ; i ++){
+      ii = perm[i];
+      for( j = i ; j < s ; j ++){
+	jj = perm[j];
+	sub_mat[i][j] = com_mat->comm[ii][jj];
+	sub_mat[j][i] = sub_mat[i][j];
+      }
+    }
+
+    sub_com_mat = (com_mat_t *)malloc(sizeof(com_mat_t));
+    sub_com_mat -> n = s;
+    sub_com_mat -> comm = sub_mat;
+
+
+    /*  printf("\n\npartition:%d\n",cur_part);display_tab(sub_mat,m);*/
+
+    /* assign the sub_mat to the result*/
+    res[cur_part] = sub_com_mat;
+  }
+
+  FREE(perm);
+
+  return res;
+}
+
+int **split_vertices( int *vertices, int n, int k, int *partition)
+{
+  int **res = NULL, *sub_vertices = NULL;
+  int m = n/k;
+  int i, j, cur_part;
+
+  /*allocate resuts*/
+  res = (int**) MALLOC(sizeof(int*) * k);
+
+
+  if(verbose_level >= DEBUG){
+    printf("Partition: ");print_1D_tab(partition,n);
+    printf("Vertices id: ");print_1D_tab(vertices,n);
+  }
+
+  /*split the vertices tab of the partition cur_part  to the sub_vertices tab*/
+  for( cur_part = 0; cur_part < k ; cur_part ++){
+    sub_vertices = (int*) MALLOC(sizeof(int) * m);
+    i = 0;
+    for( j = 0; j < n; j ++)
+      if ( partition[j] == cur_part )
+	sub_vertices[i++] = vertices[j];
+    res[cur_part] = sub_vertices;
+    if(verbose_level >= DEBUG){
+      printf("partition %d: ",cur_part);print_1D_tab(sub_vertices,m);
+    }
+  }
+  /*exit(-1);*/
+  return res;
+}
+
+void FREE_tab_com_mat(com_mat_t **mat,int k)
+{
+  int i,j;
+  if( !mat )
+    return;
+
+  for ( i = 0 ; i < k ; i ++){
+    for ( j = 0 ; j < mat[i]->n ; j ++)
+      FREE( mat[i]->comm[j] );
+    FREE( mat[i]->comm );
+  }
+  FREE(mat);
+}
+
+void FREE_tab_local_vertices(int **mat, int k)
+{
+  int i; /* m=n/k; */
+  if( !mat )
+    return;
+
+  for ( i = 0 ; i < k ; i ++){
+    FREE( mat[i] );
+  }
+  FREE(mat);
+}
+
+
+void FREE_const_tab(constraint_t *const_tab, int k)
+{
+  int i;
+
+  if( !const_tab )
+    return;
+
+  for(i = 0; i < k; i++){
+    if(const_tab[i].length)
+      FREE(const_tab[i].constraints);
+  }
+
+  FREE(const_tab);
+}
+
+void kpartition_build_level_topology(tree_t *cur_node, com_mat_t *com_mat, int N, int depth,
+				     tm_topology_t *topology, int *local_vertices,
+				     int *constraints, int nb_constraints,
+				     double *obj_weight, double *comm_speed)
+{
+  com_mat_t **tab_com_mat = NULL; /* table of comunication matrix. We will have k of such comunication matrix, one for each subtree */
+  int k = topology->arity[depth];
+  tree_t **tab_child = NULL;
+  int *partition = NULL;
+  int **tab_local_vertices = NULL;
+  constraint_t *const_tab = NULL;
+  int i;
+  verbose_level = get_verbose_level();
+
+  /* if we are at the bottom of the tree set cur_node
+   and return*/
+  if ( depth == topology->nb_levels - 1 ){
+    if(verbose_level>=DEBUG)
+      printf("id : %d, com_mat= %p\n",local_vertices[0], (void *)com_mat->comm);
+    set_node(cur_node,NULL, 0, NULL, local_vertices[0], 0, NULL, depth);
+    return;
+  }
+
+
+  /* partition the com_matrix in k partitions*/
+  partition = kpartition(topology->arity[depth], com_mat, N, constraints, nb_constraints);
+
+  /* split the communication matrix in k parts according to the partition just found above */
+  tab_com_mat = split_com_mat( com_mat, N, k, partition);
+
+  /* split the local vertices in k parts according to the partition just found above */
+  tab_local_vertices = split_vertices( local_vertices, N, k, partition);
+
+  /* construct a tab of constraints of  size k: one for each partitions*/
+  const_tab = split_constraints (constraints, nb_constraints, k, topology, depth);
+
+  /* create the table of k nodes of the resulting sub-tree */
+  tab_child = (tree_t **) CALLOC (k,sizeof(tree_t));
+  for( i = 0 ; i < k ; i++){
+    tab_child[i] = (tree_t *) MALLOC(sizeof(tree_t));
+  }
+
+  /* for each child, proceeed recursively*/
+  for( i = 0 ; i < k ; i++){
+    tab_child[i]->id = i;
+    kpartition_build_level_topology ( tab_child[i], tab_com_mat[i], N/k, depth + 1,
+				      topology, tab_local_vertices[i],
+				      const_tab[i].constraints, const_tab[i].length,
+				      obj_weight, comm_speed);
+    tab_child[i]->parent = cur_node;
+  }
+
+  /* link the node with its child */
+  set_node( cur_node, tab_child, k, NULL, cur_node->id, 0, NULL, depth);
+
+  /* FREE local data*/
+  FREE(partition);
+  FREE_tab_com_mat(tab_com_mat,k);
+  FREE_tab_local_vertices(tab_local_vertices,k);
+  FREE_const_tab(const_tab,k);
+}
+
+
+tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **comm,int N, int *constraints, int nb_constraints, double *obj_weight, double *com_speed)
+{
+  int depth,i, K;
+  tree_t *root = NULL;
+  int *local_vertices = NULL;
+  int nb_cores;
+  com_mat_t com_mat;
+
+  verbose_level = get_verbose_level();
+
+  if(verbose_level>=INFO)
+    printf("Number of constraints: %d\n", nb_constraints);
+  printf("Number of constraints: %d, N=%d\n", nb_constraints, N);
+
+  nb_cores=nb_processing_units(topology);
+
+  if((constraints == NULL) && (nb_constraints != 0)){
+    if(verbose_level>=ERROR)
+      fprintf(stderr,"size of constraint table not zero while constraint tab is NULL\n");
+    return NULL;
+  }
+
+  if((constraints != NULL) && (nb_constraints > nb_cores)){
+    if(verbose_level>=ERROR)
+      fprintf(stderr,"size of constraint table (%d) is greater than the number of cores (%d)\n", nb_constraints, nb_cores);
+    return NULL;
+  }
+
+  depth = 0;
+
+  /* if we have more cores than processes add new dumb process to the com matrix*/
+  if((K=nb_cores - N)>0){
+    /* add K element to the object weight*/
+    complete_obj_weight(&obj_weight,N,K);
+    /* display_tab(tab,N+K);*/
+  } else if( K < 0){
+    if(verbose_level>=ERROR)
+      fprintf(stderr,"Not enough cores!\n");
+    return NULL;
+  }
+
+  com_mat.comm = comm;
+  com_mat.n    = N;
+
+  /*
+     local_vertices is the array of vertices that can be used
+     the min(N,nb_contraints) 1st element are number from 0 to N
+     the last ones have value -1
+     the value of this array will be used to number the leaves of the tree_t tree
+     that start at "root"
+
+     min(N,nb_contraints) is used to takle the case where thre is less processes than constraints
+
+   */
+
+  local_vertices = (int*) MALLOC (sizeof(int) * (K+N));
+
+  for( i = 0 ; i < MIN(N,nb_constraints) ; i++)
+    local_vertices[i] = i;
+  for( i = MIN(N,nb_constraints) ;i < N + K ; i++)
+    local_vertices[i] = -1;
+
+  /* we assume all objects have the same arity*/
+  /* assign the root of the tree*/
+  root = (tree_t*) MALLOC (sizeof(tree_t));
+
+
+
+  /*build the tree downward from the root*/
+  kpartition_build_level_topology(root, &com_mat, N+K,  depth, topology, local_vertices,
+					constraints, nb_constraints, obj_weight, com_speed);
+
+  /*print_1D_tab(local_vertices,K+N);*/
+  if(verbose_level>=INFO)
+    printf("Build (bottom-up) tree done!\n");
+
+
+
+  FREE(local_vertices);
+
+
+  /* tell the system it is a constraint tree, this is usefull for freeing pointers*/
+  root->constraint = 1;
+
+  return root;
+}
+
+
--- a/ompi/mca/topo/treematch/treematch/tm_kpartitioning.h
+++ b/ompi/mca/topo/treematch/treematch/tm_kpartitioning.h
@ -0,0 +1,9 @@
+typedef struct _com_mat_t{
+  double **comm;
+  int n;  /*comm is of size n by n the other element are zeroes*/
+
+} com_mat_t;
+
+
+int  *kpartition(int, com_mat_t*, int, int *, int);
+tree_t * kpartition_build_tree_from_topology(tm_topology_t *topology,double **com_mat,int N, int *constraints, int nb_constraints, double *obj_weight, double *com_speed);
--- a/ompi/mca/topo/treematch/treematch/tm_malloc.c
+++ b/ompi/mca/topo/treematch/treematch/tm_malloc.c
@ -0,0 +1,157 @@
+#include "uthash.h"
+#include <stdio.h>
+#include "tm_verbose.h"
+#include "tm_malloc.h"
+
+#define EXTRA_BYTE 100
+
+typedef signed char  byte;
+
+
+/* static int verbose_level = ERROR;*/
+
+typedef struct _hash_t {
+    void *key;            /* we'll use this field as the key */
+    size_t size;
+    UT_hash_handle hh; /* makes this structure hashable */
+}hash_t;
+
+static hash_t *size_hash = NULL;
+static char extra_data[EXTRA_BYTE];
+
+static void save_size(void *ptr, size_t size);
+static size_t retreive_size(void *someaddr);
+static void init_extra_data(void);
+
+void save_size(void *ptr, size_t size) {
+  hash_t *elem;
+  elem = (hash_t*) malloc(sizeof(hash_t));
+  elem -> key = ptr;
+  elem -> size = size;
+  if(get_verbose_level() >= DEBUG)
+    printf("Storing (%p,%ld)\n",ptr,size);
+  HASH_ADD_PTR( size_hash, key, elem );
+}
+
+
+size_t retreive_size(void *someaddr){
+  size_t res;
+  hash_t *elem = NULL;
+  HASH_FIND_PTR(size_hash, &someaddr, elem);
+  if(!elem){
+    fprintf(stderr,"cannot find ptr %p to free!\n",someaddr);
+    return 0;
+  }
+
+  res  = elem->size;
+  if(get_verbose_level()>=DEBUG)
+    printf("Retreiving (%p,%ld)\n",someaddr, res);
+
+  HASH_DEL( size_hash, elem);
+  return res;
+}
+
+void my_mem_check(void){
+    hash_t  *s;
+    int nb_errors = 0;
+    for(s=size_hash; s != NULL; s=s->hh.next) {
+      if(get_verbose_level()>=ERROR)
+        printf("pointer %p of size %ld has not been freed!\n", s->key, s->size);
+	nb_errors ++;
+    }
+
+    if(get_verbose_level() >= INFO)
+      printf ("Number of errors in managing memory: %d\n",nb_errors);
+}
+
+void init_extra_data(void){
+  static int done = 0;
+  int i;
+
+  if(done)
+    return;
+
+  srandom(0);
+
+  for( i = 0 ; i < EXTRA_BYTE; i++)
+    extra_data[i] = (char) random() % 256;
+
+  done = 1;
+}
+
+
+void *my_malloc(size_t size, char *file, int line){
+  byte *ptr;
+  init_extra_data();
+
+  size+=2*EXTRA_BYTE;
+  ptr = malloc(size);
+
+  if(get_verbose_level()>=DEBUG)
+    printf("my_malloc of size %ld: %p (%s: %d)\n",size-2*EXTRA_BYTE,ptr,file,line);
+
+  save_size(ptr,size);
+
+  memcpy(ptr, extra_data, EXTRA_BYTE);
+  memcpy(ptr + size - EXTRA_BYTE, extra_data, EXTRA_BYTE);
+
+
+  if(get_verbose_level()>=DEBUG)
+    printf("my_malloc returning: %p\n",ptr+EXTRA_BYTE);
+
+  return (void *)(ptr + EXTRA_BYTE);
+}
+
+void *my_calloc(size_t count, size_t size, char *file, int line){
+  byte *ptr;
+  size_t full_size;
+
+  init_extra_data();
+
+  full_size = count * size + 2 * EXTRA_BYTE;
+
+  ptr = malloc(full_size);
+  bzero(ptr,full_size);
+  save_size(ptr, full_size);
+
+  if(get_verbose_level()>=DEBUG)
+    printf("my_calloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE,ptr, file, line);
+
+
+  memcpy(ptr, extra_data, EXTRA_BYTE);
+  memcpy(ptr + full_size - EXTRA_BYTE, extra_data, EXTRA_BYTE);
+
+  if(get_verbose_level()>=DEBUG)
+    printf("my_calloc returning: %p\n",ptr+EXTRA_BYTE);
+
+  return (void *)(ptr+EXTRA_BYTE);
+}
+
+void my_free(void *ptr){
+  byte *original_ptr = ((byte *)ptr) - EXTRA_BYTE;
+  size_t size;
+
+  if(!ptr)
+    return;
+
+  size = retreive_size(original_ptr);
+
+  if((bcmp(original_ptr ,extra_data, EXTRA_BYTE)) && ((get_verbose_level()>=ERROR))){
+    fprintf(stderr,"cannot find special string ***before*** %p!\n",ptr);
+    fprintf(stderr,"memory is probably corrupted here!\n");
+  }
+
+  if((bcmp(original_ptr + size -EXTRA_BYTE ,extra_data, EXTRA_BYTE)) && ((get_verbose_level()>=ERROR))){
+    fprintf(stderr,"cannot find special string ***after*** %p!\n",ptr);
+    fprintf(stderr,"memory is probably corrupted here!\n");
+  }
+
+  if(get_verbose_level()>=DEBUG)
+    printf("my_free freeing: %p\n",original_ptr);
+
+
+  free(original_ptr);
+}
+
+
+
--- a/ompi/mca/topo/treematch/treematch/tm_malloc.h
+++ b/ompi/mca/topo/treematch/treematch/tm_malloc.h
@ -0,0 +1,5 @@
+#include <stdlib.h>
+void *my_malloc(size_t size, char *, int);
+void *my_calloc(size_t count, size_t size, char *, int);
+void my_free(void *ptr);
+void my_mem_check(void);
--- a/ompi/mca/topo/treematch/treematch/tm_mapping.c
+++ b/ompi/mca/topo/treematch/treematch/tm_mapping.c
--- a/ompi/mca/topo/treematch/treematch/tm_mapping.h
+++ b/ompi/mca/topo/treematch/treematch/tm_mapping.h
@ -0,0 +1,43 @@
+#include "tm_tree.h"
+#include "tm_hwloc.h"
+#include "tm_timings.h"
+#include "tm_verbose.h"
+
+int  build_comm(char *filename,double ***pcomm);
+void TreeMatchMapping(int nb_obj, int nb_proc,double **comm_mat,  double * obj_weigth, double *com_speed, int d, int *sol);
+
+/*Map topology to cores:
+ sigma_i is such that  process i is mapped on core sigma_i
+ k_i is such that core i exectutes process k_i
+
+ size of sigma is the number of process (nb_objs)
+ size of k is the number of cores/nodes (nb_proc)
+
+ We must have numbe of process<=number of cores
+
+ k_i =-1 if no process is mapped on core i
+*/
+void map_topology_simple(tm_topology_t *topology,tree_t *comm_tree, int *sigma, int nb_processes, int *k);
+
+int nb_processing_units(tm_topology_t *topology);
+void free_topology(tm_topology_t *topology);
+void display_other_heuristics(tm_topology_t *topology,int N,double **comm,int TGT_flag, int *constraints, double *cost);
+void print_1D_tab(int *tab,int N);
+void   build_synthetic_proc_id(tm_topology_t *topology);
+void display_topology(tm_topology_t *topology);
+tm_topology_t  *build_synthetic_topology(int *arity, int nb_levels, int *core_numbering, int nb_core_per_node);
+tm_topology_t  *optimize_topology(tm_topology_t *topology);
+double print_sol_inv(int N,int *Value,double **comm, double *cost, tm_topology_t *topology);
+double print_sol(int N,int *Value,double **comm, double *cost, tm_topology_t *topology);
+int  build_binding_constraints(char *filename, int **ptab);
+void canonize_constraints(tm_topology_t *topology, int *constraints, int **canonical, int n, int **perm, int *m);
+int compute_nb_leaves_from_level(int depth,tm_topology_t *topology);
+void FREE_topology(tm_topology_t *);
+
+
+/* use to split a constaint into subconstraint according the tree*/
+typedef struct _constraint{
+  int *constraints; /* the subconstraints*/
+  int length; /*length of *constraints*/
+  int id;  /* id of the corresponding subtree*/
+}constraint_t;
--- a/ompi/mca/topo/treematch/treematch/tm_mt.c
+++ b/ompi/mca/topo/treematch/treematch/tm_mt.c
@ -0,0 +1,198 @@
+/*
+   A C-program for MT19937, with improved initialization 2002/1/26.
+
+   This is an optimized version that amortizes the shift/reload cost,
+   by Eric Landry 2004-03-15.
+
+   Before using, initialize the state by using init_genrand(seed) or
+   init_by_array(init_key, key_length).
+
+   Copyright (C) 1997--2004, Makoto Matsumoto, Takuji Nishimura, and
+   Eric Landry; All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+     1. Redistributions of source code must retain the above copyright
+        notice, this list of conditions and the following disclaimer.
+
+     2. Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer
+        in the documentation and/or other materials provided with the
+        distribution.
+
+     3. The names of its contributors may not be used to endorse or
+        promote products derived from this software without specific
+        prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   Any feedback is very welcome.
+   http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
+   email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
+
+   Reference: M. Matsumoto and T. Nishimura, "Mersenne Twister:
+   A 623-Dimensionally Equidistributed Uniform Pseudo-Random Number
+   Generator", ACM Transactions on Modeling and Computer Simulation,
+   Vol. 8, No. 1, January 1998, pp 3--30.
+*/
+
+#include "tm_mt.h"
+
+/* Period parameters */
+#define N 624
+#define M 397
+#define MATRIX_A 0x9908b0dfUL   /* constant vector a */
+#define UPPER_MASK 0x80000000UL /* most significant w-r bits */
+#define LOWER_MASK 0x7fffffffUL /* least significant r bits */
+
+static unsigned long x[N];      /* the array for the state vector  */
+static unsigned long *p0, *p1, *pm;
+
+/*
+   initialize with a seed
+
+   See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier.
+
+   In the previous versions, MSBs of the seed affect only MSBs of
+   the state.
+
+   2002-01-09 modified by Makoto Matsumoto
+*/
+void
+init_genrand(unsigned long s)
+{
+  int i;
+
+  x[0] = s & 0xffffffffUL;
+  for (i = 1; i < N; ++i) {
+    x[i] = (1812433253UL * (x[i - 1] ^ (x[i - 1] >> 30)) + i)
+      & 0xffffffffUL;           /* for >32 bit machines */
+  }
+  p0 = x;
+  p1 = x + 1;
+  pm = x + M;
+}
+
+/*
+   initialize by an array with array-length
+
+   init_key is the array for initializing keys
+
+   key_length is its length
+
+   2004-02-26 slight change for C++
+*/
+void
+init_by_array(unsigned long init_key[], int key_length)
+{
+  int i, j, k;
+
+  init_genrand(19650218UL);
+  i = 1;
+  j = 0;
+  for (k = (N > key_length ? N : key_length); k; --k) {
+    /* non linear */
+    x[i] = ((x[i] ^ ((x[i - 1] ^ (x[i - 1] >> 30)) * 1664525UL))
+            + init_key[j] + j) & 0xffffffffUL;  /* for WORDSIZE > 32 machines */
+    if (++i >= N) {
+      x[0] = x[N - 1];
+      i = 1;
+    }
+    if (++j >= key_length) {
+      j = 0;
+    }
+  }
+  for (k = N - 1; k; --k) {
+    /* non linear */
+    x[i] = ((x[i] ^ ((x[i - 1] ^ (x[i - 1] >> 30)) * 1566083941UL)) - i)
+      & 0xffffffffUL;           /* for WORDSIZE > 32 machines */
+    if (++i >= N) {
+      x[0] = x[N - 1];
+      i = 1;
+    }
+  }
+  x[0] = 0x80000000UL;          /* MSB is 1; assuring non-zero initial array */
+}
+
+/* generates a random number on the interval [0,0xffffffff] */
+unsigned long
+genrand_int32(void)
+{
+  unsigned long y;
+
+  if (!p0) {
+    /* Default seed */
+    init_genrand(5489UL);
+  }
+  /* Twisted feedback */
+  y = *p0 = *pm++ ^ (((*p0 & UPPER_MASK) | (*p1 & LOWER_MASK)) >> 1)
+    ^ (-(*p1 & 1) & MATRIX_A);
+  p0 = p1++;
+  if (pm == x + N) {
+    pm = x;
+  }
+  if (p1 == x + N) {
+    p1 = x;
+  }
+  /* Temper */
+  y ^= y >> 11;
+  y ^= y << 7 & 0x9d2c5680UL;
+  y ^= y << 15 & 0xefc60000UL;
+  y ^= y >> 18;
+  return y;
+}
+
+/* generates a random number on the interval [0,0x7fffffff] */
+long
+genrand_int31(void)
+{
+  return (long) (genrand_int32() >> 1);
+}
+
+/* generates a random number on the real interval [0,1] */
+double
+genrand_real1(void)
+{
+  return genrand_int32() * (1.0 / 4294967295.0);
+  /* divided by 2^32-1 */
+}
+
+/* generates a random number on the real interval [0,1) */
+double
+genrand_real2(void)
+{
+  return genrand_int32() * (1.0 / 4294967296.0);
+  /* divided by 2^32 */
+}
+
+/* generates a random number on the real interval (0,1) */
+double
+genrand_real3(void)
+{
+  return (((double) genrand_int32()) + 0.5) * (1.0 / 4294967296.0);
+  /* divided by 2^32 */
+}
+
+/* generates a 53-bit random number on the real interval [0,1) */
+double
+genrand_res53(void)
+{
+  unsigned long a = genrand_int32() >> 5, b = genrand_int32() >> 6;
+
+  return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0);
+}
+
+/* 2002-01-09 These real versions are due to Isaku Wada */
+
--- a/ompi/mca/topo/treematch/treematch/tm_mt.h
+++ b/ompi/mca/topo/treematch/treematch/tm_mt.h
@ -0,0 +1,11 @@
+void init_genrand(unsigned long s);
+void init_by_array(unsigned long init_key[], int key_length);
+
+/* generates a random number on the interval [0,0x7fffffff] */
+unsigned long genrand_int32(void);
+
+long genrand_int31(void);
+double genrand_real1(void);
+double genrand_real2(void);
+double genrand_real3(void);
+double genrand_res53(void);
--- a/ompi/mca/topo/treematch/treematch/tm_thread_pool.c
+++ b/ompi/mca/topo/treematch/treematch/tm_thread_pool.c
@ -0,0 +1,349 @@
+#include <pthread.h>
+#include "tm_thread_pool.h"
+#include "tm_verbose.h"
+#include <hwloc.h>
+#include "tm_verbose.h"
+#include "tm_tree.h"
+#include <errno.h>
+
+static int verbose_level = ERROR;
+static thread_pool_t *pool = NULL;
+
+static thread_pool_t *get_thread_pool(void);
+static void execute_work(work_t *work);
+static int bind_myself_to_core(hwloc_topology_t topology, int id);
+static void *thread_loop(void *arg);
+static void add_work(pthread_mutex_t *list_lock, pthread_cond_t *cond_var, work_t *working_list, work_t *work);
+static thread_pool_t *create_threads(void);
+
+static void f1 (int nb_args, void **args);
+static void f2 (int nb_args, void **args);
+static void destroy_work(work_t *work);
+
+
+void f1 (int nb_args, void **args){
+  int a, b;
+  a = *(int*)args[0];
+  b = *(int*)args[1];
+  printf("nb_args=%d, a=%d, b=%d\n",nb_args,a,b);
+}
+
+
+void f2 (int nb_args, void **args){
+  int n, *tab;
+  int *res;
+  int i,j;
+  n = *(int*)args[0];
+  tab = (int*)args[1];
+  res=(int*)args[2];
+
+  for(j=0;j<1000000;j++){
+    *res=0;
+    for (i=0;i<n;i++)
+      *res+=tab[i];
+  }
+
+  printf("done: %d!\n",nb_args);
+}
+
+
+void execute_work(work_t *work){
+  work->task(work->nb_args, work->args);
+}
+
+int bind_myself_to_core(hwloc_topology_t topology, int id){
+  hwloc_cpuset_t cpuset;
+  hwloc_obj_t obj;
+  char *str;
+  int binding_res;
+  int depth = hwloc_topology_get_depth(topology);
+  /* printf("depth=%d\n",depth); */
+
+    /* Get my core. */
+    obj = hwloc_get_obj_by_depth(topology, depth-1, id);
+    if (obj) {
+      /* Get a copy of its cpuset that we may modify. */
+      cpuset = hwloc_bitmap_dup(obj->cpuset);
+
+      /* Get only one logical processor (in case the core is
+	 SMT/hyperthreaded). */
+      hwloc_bitmap_singlify(cpuset);
+
+
+      /*hwloc_bitmap_asprintf(&str, cpuset);
+      printf("Binding thread %d to cpuset %s\n", id,str);
+      FREE(str);
+      */
+
+      /* And try  to bind ourself there. */
+      binding_res = hwloc_set_cpubind(topology, cpuset, HWLOC_CPUBIND_THREAD);
+      if (binding_res == -1){
+	int error = errno;
+	hwloc_bitmap_asprintf(&str, obj->cpuset);
+	if(verbose_level>=WARNING)
+	  fprintf(stderr,"%d Couldn't bind to cpuset %s: %s\n", id, str, strerror(error));
+	FREE(str);
+	return 0;
+      }
+      /* FREE our cpuset copy */
+      hwloc_bitmap_free(cpuset);
+      return 1;
+    }else{
+      if(verbose_level>=WARNING)
+	fprintf(stderr,"No valid object for core id %d!\n",id);
+      return 0;
+    }
+}
+
+
+
+
+void *thread_loop(void *arg){
+  local_thread_t *local=(local_thread_t*)arg;
+  int id = local->id;
+  hwloc_topology_t topology= local->topology;
+  work_t *start_working_list = local ->working_list;
+  pthread_cond_t *cond_var = local->cond_var;
+  pthread_mutex_t *list_lock = local->list_lock;
+  work_t *work;
+  int *ret = (int *)MALLOC(sizeof(int));
+
+  bind_myself_to_core(topology,id);
+
+
+
+  while(1){
+    pthread_mutex_lock(list_lock);
+    while(start_working_list->next == NULL) {
+      pthread_cond_wait(cond_var, list_lock);
+    }
+
+    work = start_working_list->next;
+    start_working_list->next = work-> next;
+    pthread_mutex_unlock(list_lock);
+
+    if(!work->task){
+      *ret = 0;
+      pthread_exit(ret);
+    }
+
+    execute_work(work);
+    pthread_mutex_lock(&work->mutex);
+    work->done=1;
+    pthread_mutex_unlock(&work->mutex);
+    pthread_cond_signal(&work->work_done);
+  }
+
+}
+
+void add_work(pthread_mutex_t *list_lock, pthread_cond_t *cond_var, work_t *working_list, work_t *work){
+
+  work_t *elem = working_list;
+  pthread_mutex_lock(list_lock);
+  while(elem->next!=NULL){
+    elem=elem->next;
+  }
+  elem->next=work;
+  work -> next = NULL;
+  work -> done = 0;
+  pthread_cond_signal(cond_var);
+  pthread_mutex_unlock(list_lock);
+}
+
+
+void wait_work_completion(work_t *work){
+  pthread_mutex_lock(&work->mutex);
+  while(!work->done)
+    pthread_cond_wait(&work->work_done, &work->mutex);
+
+}
+
+
+int submit_work(work_t *work, int thread_id){
+  if( (thread_id>=0) && (thread_id< pool->nb_threads)){
+    add_work(&pool->list_lock[thread_id], &pool->cond_var[thread_id], &pool->working_list[thread_id], work);
+    return 1;
+  }
+  return 0;
+}
+
+thread_pool_t *create_threads(){
+  hwloc_topology_t topology;
+  int i;
+  local_thread_t *local;
+  int nb_cores;
+  int depth;
+
+  verbose_level = get_verbose_level();
+
+
+    /*Get number of cores: set 1 thread per core*/
+  /* Allocate and initialize topology object. */
+  hwloc_topology_init(&topology);
+  /* Only keep relevant levels
+     hwloc_topology_ignore_all_keep_structure(topology);*/
+  /* Perform the topology detection. */
+  hwloc_topology_load(topology);
+  depth = hwloc_topology_get_depth(topology);
+  if (depth == -1 ) {
+    if(verbose_level>=CRITICAL)
+      fprintf(stderr,"Error: topology with unknown depth\n");
+    exit(-1);
+  }
+
+
+
+  /* at depth 'depth' it is necessary a PU/core where we can execute things*/
+  nb_cores = hwloc_get_nbobjs_by_depth(topology, depth-1);
+
+  pool = (thread_pool_t*) MALLOC(sizeof(thread_pool_t));
+  pool -> topology = topology;
+  pool -> nb_threads = nb_cores;
+  pool -> thread_list = (pthread_t*)MALLOC(sizeof(pthread_t)*nb_cores);
+  pool -> working_list = (work_t*)CALLOC(nb_cores,sizeof(work_t));
+  pool -> cond_var = (pthread_cond_t*)MALLOC(sizeof(pthread_cond_t)*nb_cores);
+  pool -> list_lock = (pthread_mutex_t*)MALLOC(sizeof(pthread_mutex_t)*nb_cores);
+
+  local=(local_thread_t*)MALLOC(sizeof(local_thread_t)*nb_cores);
+  pool->local = local;
+
+  for (i=0;i<nb_cores;i++){
+    local[i].topology = topology;
+    local[i].id = i;
+    local[i].working_list = &pool->working_list[i];
+    pthread_cond_init(pool->cond_var +i, NULL);
+    local[i].cond_var = pool->cond_var +i;
+    pthread_mutex_init(pool->list_lock +i, NULL);
+    local[i].list_lock = pool->list_lock+i;
+    if (pthread_create (pool->thread_list+i, NULL, thread_loop, local+i) < 0) {
+      if(verbose_level>=CRITICAL)
+	fprintf(stderr, "pthread_create error for exec thread %d\n",i);
+      return NULL;
+    }
+  }
+  return pool;
+}
+
+thread_pool_t *get_thread_pool(){;
+  if (pool == NULL)
+    return create_threads();
+
+  return pool;
+}
+
+void terminate_thread_pool(){
+  int id;
+  int *ret=NULL;
+  work_t work;
+
+  if(pool){
+    work.task=NULL;
+    for (id=0;id<pool->nb_threads;id++){
+      submit_work(&work,id);
+    }
+
+
+    for (id=0;id<pool->nb_threads;id++){
+      pthread_join(pool->thread_list[id],(void **) &ret);
+      pthread_cond_destroy(pool->cond_var +id);
+      pthread_mutex_destroy(pool->list_lock +id);
+      if (pool->working_list[id].next != NULL)
+	if(verbose_level >= WARNING)
+	  fprintf(stderr,"Working list of thread %d not empty!\n",id);
+    }
+
+    hwloc_topology_destroy(pool->topology);
+    FREE(pool -> thread_list);
+    FREE(pool -> working_list);
+    FREE(pool -> cond_var);
+    FREE(pool -> list_lock);
+    FREE(pool -> local);
+    FREE(pool);
+    pool = NULL;
+  }
+}
+
+
+
+
+int get_nb_threads(){
+  pool = get_thread_pool();
+  return pool -> nb_threads;
+}
+
+
+work_t *create_work(int nb_args, void **args, void (*task) (int, void **)){
+  work_t *work;
+  work = MALLOC(sizeof(work_t));
+  work -> nb_args = nb_args;
+  work -> args = args;
+  work -> task = task;
+  work -> done = 0;
+  pthread_cond_init (&work->work_done, NULL);
+  pthread_mutex_init(&work->mutex,     NULL);
+  if( verbose_level >= DEBUG)
+    printf("work %p created\n",(void *)work);
+  return work;
+}
+
+
+void destroy_work(work_t *work){
+  pthread_cond_destroy(&work->work_done);
+  pthread_mutex_destroy(&work->mutex);
+  FREE(work);
+}
+
+
+int test_main(void){
+
+  int a=3, c;
+  int b=-5;
+  void *args1[3];
+  void *args2[3];
+  int tab[100];
+  int i,res;
+  work_t *work1,*work2,*work3,*work4;
+  int nb_threads = get_nb_threads();
+
+
+  printf("nb_threads= %d\n", nb_threads);
+
+
+  args1[0] = &a;
+  args1[1] = &b;
+  work1 = create_work(2,args1,f1);
+
+
+  for (i=0;i<100;i++)
+    tab[i]=i;
+
+  c=100;
+  args2[0] = &c;
+  args2[1] = tab;
+  args2[2] = &res;
+
+  work2 = create_work(3, args2, f2);
+  work3 = create_work(4, args2, f2);
+  work4 = create_work(5, args2, f2);
+
+  submit_work(work1,0);
+  submit_work(work2,1);
+  submit_work(work3,1);
+  submit_work(work4,1);
+
+
+
+  terminate_thread_pool();
+  wait_work_completion(work1);
+  wait_work_completion(work2);
+  wait_work_completion(work3);
+  wait_work_completion(work4);
+
+  printf("res=%d\n",res);
+
+  destroy_work(work1);
+  destroy_work(work2);
+  destroy_work(work3);
+  destroy_work(work4);
+  return 0;
+}
--- a/ompi/mca/topo/treematch/treematch/tm_thread_pool.h
+++ b/ompi/mca/topo/treematch/treematch/tm_thread_pool.h
@ -0,0 +1,45 @@
+#ifndef THREAD_POOL_H
+#define THREAD_POOL_H
+
+#include <pthread.h>
+#include <hwloc.h>
+
+
+typedef struct _work_t{
+  int nb_args;
+  void (*task)(int nb_args, void **args);
+  void **args;
+  struct _work_t *next;
+  pthread_cond_t work_done;
+  pthread_mutex_t mutex;
+  int done;
+}work_t;
+
+typedef struct {
+  int id;
+  hwloc_topology_t topology;
+  work_t *working_list;
+  pthread_cond_t *cond_var;
+  pthread_mutex_t *list_lock;
+}local_thread_t;
+
+
+typedef struct _thread_pool_t{
+  int nb_threads;
+  pthread_t *thread_list;
+  work_t *working_list;
+  pthread_cond_t *cond_var;
+  pthread_mutex_t *list_lock;
+  local_thread_t *local;
+  hwloc_topology_t topology;
+}thread_pool_t;
+
+int get_nb_threads(void);
+int submit_work(work_t *work, int thread_id);
+void wait_work_completion(work_t *work);
+void terminate_thread_pool(void);
+work_t *create_work(int nb_args, void **args, void (int, void **));
+int test_main(void);
+
+
+#endif /* THREAD_POOL_H */
--- a/ompi/mca/topo/treematch/treematch/tm_timings.c
+++ b/ompi/mca/topo/treematch/treematch/tm_timings.c
@ -0,0 +1,32 @@
+#include "tm_timings.h"
+
+static CLOCK_T time_tab[MAX_CLOCK];
+static int clock_num = -1;
+
+void get_time(void)
+{
+  clock_num++;
+
+  if(clock_num>MAX_CLOCK-1)
+    return;
+
+  CLOCK(time_tab[clock_num]);
+}
+double time_diff(void)
+{
+  CLOCK_T t2,t1;
+
+  if(clock_num>MAX_CLOCK-1){
+    clock_num--;
+    return -1.0;
+  }
+
+  if(clock_num < 0){
+    return -1.0;
+  }
+
+  CLOCK(t2);
+  t1=time_tab[clock_num--];
+
+  return CLOCK_DIFF(t2,t1);
+}
--- a/ompi/mca/topo/treematch/treematch/tm_timings.h
+++ b/ompi/mca/topo/treematch/treematch/tm_timings.h
@ -0,0 +1,47 @@
+
+#ifndef TIMINGS_H
+#define TIMINGS_H
+#include <stdio.h>
+
+#ifndef _WIN32
+#include <sys/time.h>
+#else
+#include <sys/timeb.h>
+#endif
+#include <stdlib.h>
+#include <unistd.h>
+
+#define MAX_CLOCK 1000
+
+#ifndef _WIN32
+typedef struct timeval CLOCK_T;
+
+
+#define CLOCK(c) gettimeofday(&c,(struct timezone *)NULL)
+#define CLOCK_DIFF(c1,c2)  \
+((double)(c1.tv_sec-c2.tv_sec)+(double)(c1.tv_usec-c2.tv_usec)/1e+6)
+#define CLOCK_DISPLAY(c) fprintf(stderr,"%d.%d",(int)c.tv_sec,(int)c.tv_usec)
+
+#else    /* for windows */
+
+#ifdef __CYGWIN__
+typedef struct timeb CLOCK_T;
+#else
+typedef struct _timeb CLOCK_T;
+#endif
+
+#define CLOCK(c) _ftime(&c)
+#define CLOCK_DIFF(c1,c2)  \
+((double)(c1.time-c2.time)+(double)(c1.millitm-c2.millitm)/1e+3)
+#define CLOCK_DISPLAY(c) fprintf(stderr,"%d.%d",(int)c.time,(int)c.millitm*1e+3)
+
+#endif
+
+double time_diff(void);
+void get_time(void);
+
+#define TIC get_time()
+#define TOC time_diff()
+
+#endif /*TIMINGS_H*/
+
--- a/ompi/mca/topo/treematch/treematch/tm_tree.c
+++ b/ompi/mca/topo/treematch/treematch/tm_tree.c
--- a/ompi/mca/topo/treematch/treematch/tm_tree.h
+++ b/ompi/mca/topo/treematch/treematch/tm_tree.h
@ -0,0 +1,94 @@
+#ifndef __TREE_H__
+#define __TREE_H__
+#include <stdlib.h>
+
+
+typedef struct _node_info_t{
+  int submit_date;
+  int job_id;
+  int finish_date;
+} job_info_t;
+
+typedef struct _tree_t{
+  int constraint; /* tells if the tree has been constructed with constraints on the nodes or not.                    usefull for freeing it. needs to be set on the root only*/
+  struct _tree_t **child;
+  struct _tree_t *parent;
+  struct _tree_t *tab_child; /*the pointer to be freed*/
+  double val;
+  int arity;
+  int depth;
+  int id;
+  int uniq;
+  int dumb; /* 1 if the node belongs to a dumb tree: hence has to be freed separately*/
+  job_info_t *job_info;
+}tree_t;
+
+/* Maximum number of levels in the tree*/
+#define MAX_LEVELS 100
+
+typedef struct {
+  int *arity; /* arity of the nodes of each level*/
+  int nb_levels; /*number of levels of the tree. Levels are numbered from top to bottom starting at 0*/
+  int *nb_nodes; /*nb of nodes of each level*/
+  int *nb_free_nodes; /*nb of available nodes of each level*/
+  int **node_id;    /*ID of the nodes of the tree for each level*/
+  int **free_nodes; /*ID of the nodes of the tree for each level*/
+}tm_topology_t;
+
+
+typedef struct {
+  double ** mat;
+  double *  sum_row;
+  int order;
+} affinity_mat_t;
+
+
+
+tree_t * build_tree(double **tab,int N);
+tree_t * build_tree_from_topology(tm_topology_t *topology,double **tab,int N, double *obj_weight, double *comm_speed);
+void map_tree(tree_t *,tree_t*);
+void display_tab(double **tab,int N);
+double speed(int depth);
+void set_node(tree_t *node,tree_t ** child, int arity,tree_t *parent,int id,double val,tree_t *deb_tab_child, int depth);
+void free_constraint_tree(tree_t *tree);
+void free_tree(tree_t *tree);
+void free_tab_double(double**tab,int N);
+void free_tab_int(int**tab,int N);
+void update_val(affinity_mat_t *aff_mat,tree_t *parent);
+void FREE_tree(tree_t *tree);
+void FREE_tab_double(double**,int);
+
+typedef struct _group_list_t{
+  struct _group_list_t *next;
+  tree_t **tab;
+  double val;
+  double sum_neighbour;
+  double wg;
+}group_list_t;
+
+
+typedef struct{
+  int i;
+  int j;
+  double val;
+}adjacency_t;
+
+
+
+/* for debugging malloc */
+/* #define __DEBUG_MY_MALLOC__ */
+#undef __DEBUG_MY_MALLOC__
+#ifdef __DEBUG_MY_MALLOC__
+#include "tm_malloc.h"
+#define MALLOC(x) my_malloc(x,__FILE__,__LINE__)
+#define CALLOC(x,y) my_calloc(x,y,__FILE__,__LINE__)
+#define FREE   my_free
+#define MEM_CHECK my_mem_check
+#else
+#define MALLOC malloc
+#define CALLOC calloc
+#define FREE   free
+#define MEM_CHECK my_mem_check
+#endif
+
+#endif
--- a/ompi/mca/topo/treematch/treematch/tm_verbose.c
+++ b/ompi/mca/topo/treematch/treematch/tm_verbose.c
@ -0,0 +1,11 @@
+#include "tm_verbose.h"
+static unsigned int verbose_level = ERROR;
+
+void set_verbose_level(unsigned int level){
+  verbose_level = level;
+}
+
+
+unsigned int get_verbose_level(){
+  return verbose_level;
+}
--- a/ompi/mca/topo/treematch/treematch/tm_verbose.h
+++ b/ompi/mca/topo/treematch/treematch/tm_verbose.h
@ -0,0 +1,11 @@
+#define NONE     0
+#define CRITICAL 1
+#define ERROR    2
+#define WARNING  3
+#define INFO     4
+#define DEBUG    5
+
+void         set_verbose_level(unsigned int level);
+unsigned int get_verbose_level(void);
+
+
--- a/ompi/mca/topo/treematch/treematch/uthash.h
+++ b/ompi/mca/topo/treematch/treematch/uthash.h
@ -0,0 +1,905 @@
+/*
+Copyright (c) 2003-2011, Troy D. Hanson     http://uthash.sourceforge.net
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef UTHASH_H
+#define UTHASH_H
+
+#include <string.h>   /* memcmp,strlen */
+#include <stddef.h>   /* ptrdiff_t */
+#include <stdlib.h>   /* exit() */
+
+/* These macros use decltype or the earlier __typeof GNU extension.
+   As decltype is only available in newer compilers (VS2010 or gcc 4.3+
+   when compiling c++ source) this code uses whatever method is needed
+   or, for VS2008 where neither is available, uses casting workarounds. */
+#ifdef _MSC_VER         /* MS compiler */
+#if _MSC_VER >= 1600 && defined(__cplusplus)  /* VS2010 or newer in C++ mode */
+#define DECLTYPE(x) (decltype(x))
+#else                   /* VS2008 or older (or VS2010 in C mode) */
+#define NO_DECLTYPE
+#define DECLTYPE(x)
+#endif
+#else                   /* GNU, Sun and other compilers */
+#define DECLTYPE(x) (__typeof(x))
+#endif
+
+#ifdef NO_DECLTYPE
+#define DECLTYPE_ASSIGN(dst,src)                                                 \
+do {                                                                             \
+  char **_da_dst = (char**)(&(dst));                                             \
+  *_da_dst = (char*)(src);                                                       \
+} while(0)
+#else
+#define DECLTYPE_ASSIGN(dst,src)                                                 \
+do {                                                                             \
+  (dst) = DECLTYPE(dst)(src);                                                    \
+} while(0)
+#endif
+
+/* a number of the hash function use uint32_t which isn't defined on win32 */
+#ifdef _MSC_VER
+typedef unsigned int uint32_t;
+typedef unsigned char uint8_t;
+#else
+#include <inttypes.h>   /* uint32_t */
+#endif
+
+#define UTHASH_VERSION 1.9.4
+
+#define uthash_fatal(msg) exit(-1)        /* fatal error (out of memory,etc) */
+#define uthash_malloc(sz) malloc(sz)      /* malloc fcn                      */
+#define uthash_free(ptr,sz) free(ptr)     /* free fcn                        */
+
+#define uthash_noexpand_fyi(tbl)          /* can be defined to log noexpand  */
+#define uthash_expand_fyi(tbl)            /* can be defined to log expands   */
+
+/* initial number of buckets */
+#define HASH_INITIAL_NUM_BUCKETS 32      /* initial number of buckets        */
+#define HASH_INITIAL_NUM_BUCKETS_LOG2 5  /* lg2 of initial number of buckets */
+#define HASH_BKT_CAPACITY_THRESH 10      /* expand when bucket count reaches */
+
+/* calculate the element whose hash handle address is hhe */
+#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho)))
+
+#define HASH_FIND(hh,head,keyptr,keylen,out)                                     \
+do {                                                                             \
+  unsigned _hf_bkt,_hf_hashv;                                                    \
+  out=NULL;                                                                      \
+  if (head) {                                                                    \
+     HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt);   \
+     if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) {                           \
+       HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ],  \
+                        keyptr,keylen,out);                                      \
+     }                                                                           \
+  }                                                                              \
+} while (0)
+
+#ifdef HASH_BLOOM
+#define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM)
+#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0)
+#define HASH_BLOOM_MAKE(tbl)                                                     \
+do {                                                                             \
+  (tbl)->bloom_nbits = HASH_BLOOM;                                               \
+  (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN);                 \
+  if (!((tbl)->bloom_bv))  { uthash_fatal( "out of memory"); }                   \
+  memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN);                                \
+  (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE;                                       \
+} while (0);
+
+#define HASH_BLOOM_FREE(tbl)                                                     \
+do {                                                                             \
+  uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN);                              \
+} while (0);
+
+#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8)))
+#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8)))
+
+#define HASH_BLOOM_ADD(tbl,hashv)                                                \
+  HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
+
+#define HASH_BLOOM_TEST(tbl,hashv)                                               \
+  HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
+
+#else
+#define HASH_BLOOM_MAKE(tbl)
+#define HASH_BLOOM_FREE(tbl)
+#define HASH_BLOOM_ADD(tbl,hashv)
+#define HASH_BLOOM_TEST(tbl,hashv) (1)
+#endif
+
+#define HASH_MAKE_TABLE(hh,head)                                                 \
+do {                                                                             \
+  (head)->hh.tbl = (UT_hash_table*)uthash_malloc(                                \
+                  sizeof(UT_hash_table));                                        \
+  if (!((head)->hh.tbl))  { uthash_fatal( "out of memory"); }                    \
+  memset((head)->hh.tbl, 0, sizeof(UT_hash_table));                              \
+  (head)->hh.tbl->tail = &((head)->hh);                                          \
+  (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS;                        \
+  (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2;              \
+  (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head);                    \
+  (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc(                      \
+          HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket));               \
+  if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); }             \
+  memset((head)->hh.tbl->buckets, 0,                                             \
+          HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket));               \
+  HASH_BLOOM_MAKE((head)->hh.tbl);                                               \
+  (head)->hh.tbl->signature = HASH_SIGNATURE;                                    \
+} while(0)
+
+#define HASH_ADD(hh,head,fieldname,keylen_in,add)                                \
+        HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add)
+
+#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add)                            \
+do {                                                                             \
+ unsigned _ha_bkt;                                                               \
+ (add)->hh.next = NULL;                                                          \
+ (add)->hh.key = (char*)keyptr;                                                  \
+ (add)->hh.keylen = keylen_in;                                                   \
+ if (!(head)) {                                                                  \
+    head = (add);                                                                \
+    (head)->hh.prev = NULL;                                                      \
+    HASH_MAKE_TABLE(hh,head);                                                    \
+ } else {                                                                        \
+    (head)->hh.tbl->tail->next = (add);                                          \
+    (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail);         \
+    (head)->hh.tbl->tail = &((add)->hh);                                         \
+ }                                                                               \
+ (head)->hh.tbl->num_items++;                                                    \
+ (add)->hh.tbl = (head)->hh.tbl;                                                 \
+ HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets,                         \
+         (add)->hh.hashv, _ha_bkt);                                              \
+ HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh);                   \
+ HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv);                                 \
+ HASH_EMIT_KEY(hh,head,keyptr,keylen_in);                                        \
+ HASH_FSCK(hh,head);                                                             \
+} while(0)
+
+#define HASH_TO_BKT( hashv, num_bkts, bkt )                                      \
+do {                                                                             \
+  bkt = ((hashv) & ((num_bkts) - 1));                                            \
+} while(0)
+
+/* delete "delptr" from the hash table.
+ * "the usual" patch-up process for the app-order doubly-linked-list.
+ * The use of _hd_hh_del below deserves special explanation.
+ * These used to be expressed using (delptr) but that led to a bug
+ * if someone used the same symbol for the head and deletee, like
+ *  HASH_DELETE(hh,users,users);
+ * We want that to work, but by changing the head (users) below
+ * we were forfeiting our ability to further refer to the deletee (users)
+ * in the patch-up process. Solution: use scratch space to
+ * copy the deletee pointer, then the latter references are via that
+ * scratch pointer rather than through the repointed (users) symbol.
+ */
+#define HASH_DELETE(hh,head,delptr)                                              \
+do {                                                                             \
+    unsigned _hd_bkt;                                                            \
+    struct UT_hash_handle *_hd_hh_del;                                           \
+    if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) )  {         \
+        uthash_free((head)->hh.tbl->buckets,                                     \
+                    (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
+        HASH_BLOOM_FREE((head)->hh.tbl);                                         \
+        uthash_free((head)->hh.tbl, sizeof(UT_hash_table));                      \
+        head = NULL;                                                             \
+    } else {                                                                     \
+        _hd_hh_del = &((delptr)->hh);                                            \
+        if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) {     \
+            (head)->hh.tbl->tail =                                               \
+                (UT_hash_handle*)((char*)((delptr)->hh.prev) +                   \
+                (head)->hh.tbl->hho);                                            \
+        }                                                                        \
+        if ((delptr)->hh.prev) {                                                 \
+            ((UT_hash_handle*)((char*)((delptr)->hh.prev) +                      \
+                    (head)->hh.tbl->hho))->next = (delptr)->hh.next;             \
+        } else {                                                                 \
+            DECLTYPE_ASSIGN(head,(delptr)->hh.next);                             \
+        }                                                                        \
+        if (_hd_hh_del->next) {                                                  \
+            ((UT_hash_handle*)((char*)_hd_hh_del->next +                         \
+                    (head)->hh.tbl->hho))->prev =                                \
+                    _hd_hh_del->prev;                                            \
+        }                                                                        \
+        HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt);   \
+        HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del);        \
+        (head)->hh.tbl->num_items--;                                             \
+    }                                                                            \
+    HASH_FSCK(hh,head);                                                          \
+} while (0)
+
+
+/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */
+#define HASH_FIND_STR(head,findstr,out)                                          \
+    HASH_FIND(hh,head,findstr,strlen(findstr),out)
+#define HASH_ADD_STR(head,strfield,add)                                          \
+    HASH_ADD(hh,head,strfield,strlen(add->strfield),add)
+#define HASH_FIND_INT(head,findint,out)                                          \
+    HASH_FIND(hh,head,findint,sizeof(int),out)
+#define HASH_ADD_INT(head,intfield,add)                                          \
+    HASH_ADD(hh,head,intfield,sizeof(int),add)
+#define HASH_FIND_PTR(head,findptr,out)                                          \
+    HASH_FIND(hh,head,findptr,sizeof(void *),out)
+#define HASH_ADD_PTR(head,ptrfield,add)                                          \
+    HASH_ADD(hh,head,ptrfield,sizeof(void *),add)
+#define HASH_DEL(head,delptr)                                                    \
+    HASH_DELETE(hh,head,delptr)
+
+/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined.
+ * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined.
+ */
+#ifdef HASH_DEBUG
+#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0)
+#define HASH_FSCK(hh,head)                                                       \
+do {                                                                             \
+    unsigned _bkt_i;                                                             \
+    unsigned _count, _bkt_count;                                                 \
+    char *_prev;                                                                 \
+    struct UT_hash_handle *_thh;                                                 \
+    if (head) {                                                                  \
+        _count = 0;                                                              \
+        for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) {       \
+            _bkt_count = 0;                                                      \
+            _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head;                      \
+            _prev = NULL;                                                        \
+            while (_thh) {                                                       \
+               if (_prev != (char*)(_thh->hh_prev)) {                            \
+                   HASH_OOPS("invalid hh_prev %p, actual %p\n",                  \
+                    _thh->hh_prev, _prev );                                      \
+               }                                                                 \
+               _bkt_count++;                                                     \
+               _prev = (char*)(_thh);                                            \
+               _thh = _thh->hh_next;                                             \
+            }                                                                    \
+            _count += _bkt_count;                                                \
+            if ((head)->hh.tbl->buckets[_bkt_i].count !=  _bkt_count) {          \
+               HASH_OOPS("invalid bucket count %d, actual %d\n",                 \
+                (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count);              \
+            }                                                                    \
+        }                                                                        \
+        if (_count != (head)->hh.tbl->num_items) {                               \
+            HASH_OOPS("invalid hh item count %d, actual %d\n",                   \
+                (head)->hh.tbl->num_items, _count );                             \
+        }                                                                        \
+        /* traverse hh in app order; check next/prev integrity, count */         \
+        _count = 0;                                                              \
+        _prev = NULL;                                                            \
+        _thh =  &(head)->hh;                                                     \
+        while (_thh) {                                                           \
+           _count++;                                                             \
+           if (_prev !=(char*)(_thh->prev)) {                                    \
+              HASH_OOPS("invalid prev %p, actual %p\n",                          \
+                    _thh->prev, _prev );                                         \
+           }                                                                     \
+           _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh);                    \
+           _thh = ( _thh->next ?  (UT_hash_handle*)((char*)(_thh->next) +        \
+                                  (head)->hh.tbl->hho) : NULL );                 \
+        }                                                                        \
+        if (_count != (head)->hh.tbl->num_items) {                               \
+            HASH_OOPS("invalid app item count %d, actual %d\n",                  \
+                (head)->hh.tbl->num_items, _count );                             \
+        }                                                                        \
+    }                                                                            \
+} while (0)
+#else
+#define HASH_FSCK(hh,head)
+#endif
+
+/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
+ * the descriptor to which this macro is defined for tuning the hash function.
+ * The app can #include <unistd.h> to get the prototype for write(2). */
+#ifdef HASH_EMIT_KEYS
+#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)                                   \
+do {                                                                             \
+    unsigned _klen = fieldlen;                                                   \
+    write(HASH_EMIT_KEYS, &_klen, sizeof(_klen));                                \
+    write(HASH_EMIT_KEYS, keyptr, fieldlen);                                     \
+} while (0)
+#else
+#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
+#endif
+
+/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
+#ifdef HASH_FUNCTION
+#define HASH_FCN HASH_FUNCTION
+#else
+#define HASH_FCN HASH_JEN
+#endif
+
+/* The Bernstein hash function, used in Perl prior to v5.6 */
+#define HASH_BER(key,keylen,num_bkts,hashv,bkt)                                  \
+do {                                                                             \
+  unsigned _hb_keylen=keylen;                                                    \
+  char *_hb_key=(char*)(key);                                                    \
+  (hashv) = 0;                                                                   \
+  while (_hb_keylen--)  { (hashv) = ((hashv) * 33) + *_hb_key++; }               \
+  bkt = (hashv) & (num_bkts-1);                                                  \
+} while (0)
+
+
+/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
+ * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */
+#define HASH_SAX(key,keylen,num_bkts,hashv,bkt)                                  \
+do {                                                                             \
+  unsigned _sx_i;                                                                \
+  char *_hs_key=(char*)(key);                                                    \
+  hashv = 0;                                                                     \
+  for(_sx_i=0; _sx_i < keylen; _sx_i++)                                          \
+      hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i];                     \
+  bkt = hashv & (num_bkts-1);                                                    \
+} while (0)
+
+#define HASH_FNV(key,keylen,num_bkts,hashv,bkt)                                  \
+do {                                                                             \
+  unsigned _fn_i;                                                                \
+  char *_hf_key=(char*)(key);                                                    \
+  hashv = 2166136261UL;                                                          \
+  for(_fn_i=0; _fn_i < keylen; _fn_i++)                                          \
+      hashv = (hashv * 16777619) ^ _hf_key[_fn_i];                               \
+  bkt = hashv & (num_bkts-1);                                                    \
+} while(0);
+
+#define HASH_OAT(key,keylen,num_bkts,hashv,bkt)                                  \
+do {                                                                             \
+  unsigned _ho_i;                                                                \
+  char *_ho_key=(char*)(key);                                                    \
+  hashv = 0;                                                                     \
+  for(_ho_i=0; _ho_i < keylen; _ho_i++) {                                        \
+      hashv += _ho_key[_ho_i];                                                   \
+      hashv += (hashv << 10);                                                    \
+      hashv ^= (hashv >> 6);                                                     \
+  }                                                                              \
+  hashv += (hashv << 3);                                                         \
+  hashv ^= (hashv >> 11);                                                        \
+  hashv += (hashv << 15);                                                        \
+  bkt = hashv & (num_bkts-1);                                                    \
+} while(0)
+
+#define HASH_JEN_MIX(a,b,c)                                                      \
+do {                                                                             \
+  a -= b; a -= c; a ^= ( c >> 13 );                                              \
+  b -= c; b -= a; b ^= ( a << 8 );                                               \
+  c -= a; c -= b; c ^= ( b >> 13 );                                              \
+  a -= b; a -= c; a ^= ( c >> 12 );                                              \
+  b -= c; b -= a; b ^= ( a << 16 );                                              \
+  c -= a; c -= b; c ^= ( b >> 5 );                                               \
+  a -= b; a -= c; a ^= ( c >> 3 );                                               \
+  b -= c; b -= a; b ^= ( a << 10 );                                              \
+  c -= a; c -= b; c ^= ( b >> 15 );                                              \
+} while (0)
+
+#define HASH_JEN(key,keylen,num_bkts,hashv,bkt)                                  \
+do {                                                                             \
+  unsigned _hj_i,_hj_j,_hj_k;                                                    \
+  char *_hj_key=(char*)(key);                                                    \
+  hashv = 0xfeedbeef;                                                            \
+  _hj_i = _hj_j = 0x9e3779b9;                                                    \
+  _hj_k = keylen;                                                                \
+  while (_hj_k >= 12) {                                                          \
+    _hj_i +=    (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 )                      \
+        + ( (unsigned)_hj_key[2] << 16 )                                         \
+        + ( (unsigned)_hj_key[3] << 24 ) );                                      \
+    _hj_j +=    (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 )                      \
+        + ( (unsigned)_hj_key[6] << 16 )                                         \
+        + ( (unsigned)_hj_key[7] << 24 ) );                                      \
+    hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 )                         \
+        + ( (unsigned)_hj_key[10] << 16 )                                        \
+        + ( (unsigned)_hj_key[11] << 24 ) );                                     \
+                                                                                 \
+     HASH_JEN_MIX(_hj_i, _hj_j, hashv);                                          \
+                                                                                 \
+     _hj_key += 12;                                                              \
+     _hj_k -= 12;                                                                \
+  }                                                                              \
+  hashv += keylen;                                                               \
+  switch ( _hj_k ) {                                                             \
+     case 11: hashv += ( (unsigned)_hj_key[10] << 24 );                          \
+     case 10: hashv += ( (unsigned)_hj_key[9] << 16 );                           \
+     case 9:  hashv += ( (unsigned)_hj_key[8] << 8 );                            \
+     case 8:  _hj_j += ( (unsigned)_hj_key[7] << 24 );                           \
+     case 7:  _hj_j += ( (unsigned)_hj_key[6] << 16 );                           \
+     case 6:  _hj_j += ( (unsigned)_hj_key[5] << 8 );                            \
+     case 5:  _hj_j += _hj_key[4];                                               \
+     case 4:  _hj_i += ( (unsigned)_hj_key[3] << 24 );                           \
+     case 3:  _hj_i += ( (unsigned)_hj_key[2] << 16 );                           \
+     case 2:  _hj_i += ( (unsigned)_hj_key[1] << 8 );                            \
+     case 1:  _hj_i += _hj_key[0];                                               \
+  }                                                                              \
+  HASH_JEN_MIX(_hj_i, _hj_j, hashv);                                             \
+  bkt = hashv & (num_bkts-1);                                                    \
+} while(0)
+
+/* The Paul Hsieh hash function */
+#undef get16bits
+#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__)             \
+  || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
+#define get16bits(d) (*((const uint16_t *) (d)))
+#endif
+
+#if !defined (get16bits)
+#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)             \
+                       +(uint32_t)(((const uint8_t *)(d))[0]) )
+#endif
+#define HASH_SFH(key,keylen,num_bkts,hashv,bkt)                                  \
+do {                                                                             \
+  char *_sfh_key=(char*)(key);                                                   \
+  uint32_t _sfh_tmp, _sfh_len = keylen;                                          \
+                                                                                 \
+  int _sfh_rem = _sfh_len & 3;                                                   \
+  _sfh_len >>= 2;                                                                \
+  hashv = 0xcafebabe;                                                            \
+                                                                                 \
+  /* Main loop */                                                                \
+  for (;_sfh_len > 0; _sfh_len--) {                                              \
+    hashv    += get16bits (_sfh_key);                                            \
+    _sfh_tmp       = (get16bits (_sfh_key+2) << 11) ^ hashv;                     \
+    hashv     = (hashv << 16) ^ _sfh_tmp;                                        \
+    _sfh_key += 2*sizeof (uint16_t);                                             \
+    hashv    += hashv >> 11;                                                     \
+  }                                                                              \
+                                                                                 \
+  /* Handle end cases */                                                         \
+  switch (_sfh_rem) {                                                            \
+    case 3: hashv += get16bits (_sfh_key);                                       \
+            hashv ^= hashv << 16;                                                \
+            hashv ^= _sfh_key[sizeof (uint16_t)] << 18;                          \
+            hashv += hashv >> 11;                                                \
+            break;                                                               \
+    case 2: hashv += get16bits (_sfh_key);                                       \
+            hashv ^= hashv << 11;                                                \
+            hashv += hashv >> 17;                                                \
+            break;                                                               \
+    case 1: hashv += *_sfh_key;                                                  \
+            hashv ^= hashv << 10;                                                \
+            hashv += hashv >> 1;                                                 \
+  }                                                                              \
+                                                                                 \
+    /* Force "avalanching" of final 127 bits */                                  \
+    hashv ^= hashv << 3;                                                         \
+    hashv += hashv >> 5;                                                         \
+    hashv ^= hashv << 4;                                                         \
+    hashv += hashv >> 17;                                                        \
+    hashv ^= hashv << 25;                                                        \
+    hashv += hashv >> 6;                                                         \
+    bkt = hashv & (num_bkts-1);                                                  \
+} while(0);
+
+#ifdef HASH_USING_NO_STRICT_ALIASING
+/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads.
+ * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
+ * MurmurHash uses the faster approach only on CPU's where we know it's safe.
+ *
+ * Note the preprocessor built-in defines can be emitted using:
+ *
+ *   gcc -m64 -dM -E - < /dev/null                  (on gcc)
+ *   cc -## a.c (where a.c is a simple test file)   (Sun Studio)
+ */
+#if (defined(__i386__) || defined(__x86_64__))
+#define MUR_GETBLOCK(p,i) p[i]
+#else /* non intel */
+#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0)
+#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 0x3) == 1)
+#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 0x3) == 2)
+#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 0x3) == 3)
+#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL))
+#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__))
+#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24))
+#define MUR_TWO_TWO(p)   ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16))
+#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >>  8))
+#else /* assume little endian non-intel */
+#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24))
+#define MUR_TWO_TWO(p)   ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16))
+#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) <<  8))
+#endif
+#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) :           \
+                            (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \
+                             (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) :  \
+                                                      MUR_ONE_THREE(p))))
+#endif
+#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
+#define MUR_FMIX(_h) \
+do {                 \
+  _h ^= _h >> 16;    \
+  _h *= 0x85ebca6b;  \
+  _h ^= _h >> 13;    \
+  _h *= 0xc2b2ae35l; \
+  _h ^= _h >> 16;    \
+} while(0)
+
+#define HASH_MUR(key,keylen,num_bkts,hashv,bkt)                        \
+do {                                                                   \
+  const uint8_t *_mur_data = (const uint8_t*)(key);                    \
+  const int _mur_nblocks = (keylen) / 4;                               \
+  uint32_t _mur_h1 = 0xf88D5353;                                       \
+  uint32_t _mur_c1 = 0xcc9e2d51;                                       \
+  uint32_t _mur_c2 = 0x1b873593;                                       \
+  const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+_mur_nblocks*4); \
+  int _mur_i;                                                          \
+  for(_mur_i = -_mur_nblocks; _mur_i; _mur_i++) {                      \
+    uint32_t _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i);               \
+    _mur_k1 *= _mur_c1;                                                \
+    _mur_k1 = MUR_ROTL32(_mur_k1,15);                                  \
+    _mur_k1 *= _mur_c2;                                                \
+                                                                       \
+    _mur_h1 ^= _mur_k1;                                                \
+    _mur_h1 = MUR_ROTL32(_mur_h1,13);                                  \
+    _mur_h1 = _mur_h1*5+0xe6546b64;                                    \
+  }                                                                    \
+  const uint8_t *_mur_tail = (const uint8_t*)(_mur_data + _mur_nblocks*4); \
+  uint32_t _mur_k1=0;                                                  \
+  switch((keylen) & 3) {                                               \
+    case 3: _mur_k1 ^= _mur_tail[2] << 16;                             \
+    case 2: _mur_k1 ^= _mur_tail[1] << 8;                              \
+    case 1: _mur_k1 ^= _mur_tail[0];                                   \
+    _mur_k1 *= _mur_c1;                                                \
+    _mur_k1 = MUR_ROTL32(_mur_k1,15);                                  \
+    _mur_k1 *= _mur_c2;                                                \
+    _mur_h1 ^= _mur_k1;                                                \
+  }                                                                    \
+  _mur_h1 ^= (keylen);                                                 \
+  MUR_FMIX(_mur_h1);                                                   \
+  hashv = _mur_h1;                                                     \
+  bkt = hashv & (num_bkts-1);                                          \
+} while(0)
+#endif  /* HASH_USING_NO_STRICT_ALIASING */
+
+/* key comparison function; return 0 if keys equal */
+#define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
+
+/* iterate over items in a known bucket to find desired item */
+#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out)                       \
+do {                                                                             \
+ if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head));          \
+ else out=NULL;                                                                  \
+ while (out) {                                                                   \
+    if (out->hh.keylen == keylen_in) {                                           \
+        if ((HASH_KEYCMP(out->hh.key,keyptr,keylen_in)) == 0) break;             \
+    }                                                                            \
+    if (out->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,out->hh.hh_next)); \
+    else out = NULL;                                                             \
+ }                                                                               \
+} while(0)
+
+/* add an item to a bucket  */
+#define HASH_ADD_TO_BKT(head,addhh)                                              \
+do {                                                                             \
+ head.count++;                                                                   \
+ (addhh)->hh_next = head.hh_head;                                                \
+ (addhh)->hh_prev = NULL;                                                        \
+ if (head.hh_head) { (head).hh_head->hh_prev = (addhh); }                        \
+ (head).hh_head=addhh;                                                           \
+ if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH)             \
+     && (addhh)->tbl->noexpand != 1) {                                           \
+       HASH_EXPAND_BUCKETS((addhh)->tbl);                                        \
+ }                                                                               \
+} while(0)
+
+/* remove an item from a given bucket */
+#define HASH_DEL_IN_BKT(hh,head,hh_del)                                          \
+    (head).count--;                                                              \
+    if ((head).hh_head == hh_del) {                                              \
+      (head).hh_head = hh_del->hh_next;                                          \
+    }                                                                            \
+    if (hh_del->hh_prev) {                                                       \
+        hh_del->hh_prev->hh_next = hh_del->hh_next;                              \
+    }                                                                            \
+    if (hh_del->hh_next) {                                                       \
+        hh_del->hh_next->hh_prev = hh_del->hh_prev;                              \
+    }
+
+/* Bucket expansion has the effect of doubling the number of buckets
+ * and redistributing the items into the new buckets. Ideally the
+ * items will distribute more or less evenly into the new buckets
+ * (the extent to which this is true is a measure of the quality of
+ * the hash function as it applies to the key domain).
+ *
+ * With the items distributed into more buckets, the chain length
+ * (item count) in each bucket is reduced. Thus by expanding buckets
+ * the hash keeps a bound on the chain length. This bounded chain
+ * length is the essence of how a hash provides constant time lookup.
+ *
+ * The calculation of tbl->ideal_chain_maxlen below deserves some
+ * explanation. First, keep in mind that we're calculating the ideal
+ * maximum chain length based on the *new* (doubled) bucket count.
+ * In fractions this is just n/b (n=number of items,b=new num buckets).
+ * Since the ideal chain length is an integer, we want to calculate
+ * ceil(n/b). We don't depend on floating point arithmetic in this
+ * hash, so to calculate ceil(n/b) with integers we could write
+ *
+ *      ceil(n/b) = (n/b) + ((n%b)?1:0)
+ *
+ * and in fact a previous version of this hash did just that.
+ * But now we have improved things a bit by recognizing that b is
+ * always a power of two. We keep its base 2 log handy (call it lb),
+ * so now we can write this with a bit shift and logical AND:
+ *
+ *      ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
+ *
+ */
+#define HASH_EXPAND_BUCKETS(tbl)                                                 \
+do {                                                                             \
+    unsigned _he_bkt;                                                            \
+    unsigned _he_bkt_i;                                                          \
+    struct UT_hash_handle *_he_thh, *_he_hh_nxt;                                 \
+    UT_hash_bucket *_he_new_buckets, *_he_newbkt;                                \
+    _he_new_buckets = (UT_hash_bucket*)uthash_malloc(                            \
+             2 * tbl->num_buckets * sizeof(struct UT_hash_bucket));              \
+    if (!_he_new_buckets) { uthash_fatal( "out of memory"); }                    \
+    memset(_he_new_buckets, 0,                                                   \
+            2 * tbl->num_buckets * sizeof(struct UT_hash_bucket));               \
+    tbl->ideal_chain_maxlen =                                                    \
+       (tbl->num_items >> (tbl->log2_num_buckets+1)) +                           \
+       ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0);                    \
+    tbl->nonideal_items = 0;                                                     \
+    for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++)                \
+    {                                                                            \
+        _he_thh = tbl->buckets[ _he_bkt_i ].hh_head;                             \
+        while (_he_thh) {                                                        \
+           _he_hh_nxt = _he_thh->hh_next;                                        \
+           HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt);            \
+           _he_newbkt = &(_he_new_buckets[ _he_bkt ]);                           \
+           if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) {                \
+             tbl->nonideal_items++;                                              \
+             _he_newbkt->expand_mult = _he_newbkt->count /                       \
+                                        tbl->ideal_chain_maxlen;                 \
+           }                                                                     \
+           _he_thh->hh_prev = NULL;                                              \
+           _he_thh->hh_next = _he_newbkt->hh_head;                               \
+           if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev =               \
+                _he_thh;                                                         \
+           _he_newbkt->hh_head = _he_thh;                                        \
+           _he_thh = _he_hh_nxt;                                                 \
+        }                                                                        \
+    }                                                                            \
+    uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
+    tbl->num_buckets *= 2;                                                       \
+    tbl->log2_num_buckets++;                                                     \
+    tbl->buckets = _he_new_buckets;                                              \
+    tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ?         \
+        (tbl->ineff_expands+1) : 0;                                              \
+    if (tbl->ineff_expands > 1) {                                                \
+        tbl->noexpand=1;                                                         \
+        uthash_noexpand_fyi(tbl);                                                \
+    }                                                                            \
+    uthash_expand_fyi(tbl);                                                      \
+} while(0)
+
+
+/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
+/* Note that HASH_SORT assumes the hash handle name to be hh.
+ * HASH_SRT was added to allow the hash handle name to be passed in. */
+#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
+#define HASH_SRT(hh,head,cmpfcn)                                                 \
+do {                                                                             \
+  unsigned _hs_i;                                                                \
+  unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize;               \
+  struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail;            \
+  if (head) {                                                                    \
+      _hs_insize = 1;                                                            \
+      _hs_looping = 1;                                                           \
+      _hs_list = &((head)->hh);                                                  \
+      while (_hs_looping) {                                                      \
+          _hs_p = _hs_list;                                                      \
+          _hs_list = NULL;                                                       \
+          _hs_tail = NULL;                                                       \
+          _hs_nmerges = 0;                                                       \
+          while (_hs_p) {                                                        \
+              _hs_nmerges++;                                                     \
+              _hs_q = _hs_p;                                                     \
+              _hs_psize = 0;                                                     \
+              for ( _hs_i = 0; _hs_i  < _hs_insize; _hs_i++ ) {                  \
+                  _hs_psize++;                                                   \
+                  _hs_q = (UT_hash_handle*)((_hs_q->next) ?                      \
+                          ((void*)((char*)(_hs_q->next) +                        \
+                          (head)->hh.tbl->hho)) : NULL);                         \
+                  if (! (_hs_q) ) break;                                         \
+              }                                                                  \
+              _hs_qsize = _hs_insize;                                            \
+              while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) {           \
+                  if (_hs_psize == 0) {                                          \
+                      _hs_e = _hs_q;                                             \
+                      _hs_q = (UT_hash_handle*)((_hs_q->next) ?                  \
+                              ((void*)((char*)(_hs_q->next) +                    \
+                              (head)->hh.tbl->hho)) : NULL);                     \
+                      _hs_qsize--;                                               \
+                  } else if ( (_hs_qsize == 0) || !(_hs_q) ) {                   \
+                      _hs_e = _hs_p;                                             \
+                      _hs_p = (UT_hash_handle*)((_hs_p->next) ?                  \
+                              ((void*)((char*)(_hs_p->next) +                    \
+                              (head)->hh.tbl->hho)) : NULL);                     \
+                      _hs_psize--;                                               \
+                  } else if ((                                                   \
+                      cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \
+                             DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \
+                             ) <= 0) {                                           \
+                      _hs_e = _hs_p;                                             \
+                      _hs_p = (UT_hash_handle*)((_hs_p->next) ?                  \
+                              ((void*)((char*)(_hs_p->next) +                    \
+                              (head)->hh.tbl->hho)) : NULL);                     \
+                      _hs_psize--;                                               \
+                  } else {                                                       \
+                      _hs_e = _hs_q;                                             \
+                      _hs_q = (UT_hash_handle*)((_hs_q->next) ?                  \
+                              ((void*)((char*)(_hs_q->next) +                    \
+                              (head)->hh.tbl->hho)) : NULL);                     \
+                      _hs_qsize--;                                               \
+                  }                                                              \
+                  if ( _hs_tail ) {                                              \
+                      _hs_tail->next = ((_hs_e) ?                                \
+                            ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL);          \
+                  } else {                                                       \
+                      _hs_list = _hs_e;                                          \
+                  }                                                              \
+                  _hs_e->prev = ((_hs_tail) ?                                    \
+                     ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL);              \
+                  _hs_tail = _hs_e;                                              \
+              }                                                                  \
+              _hs_p = _hs_q;                                                     \
+          }                                                                      \
+          _hs_tail->next = NULL;                                                 \
+          if ( _hs_nmerges <= 1 ) {                                              \
+              _hs_looping=0;                                                     \
+              (head)->hh.tbl->tail = _hs_tail;                                   \
+              DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list));      \
+          }                                                                      \
+          _hs_insize *= 2;                                                       \
+      }                                                                          \
+      HASH_FSCK(hh,head);                                                        \
+ }                                                                               \
+} while (0)
+
+/* This function selects items from one hash into another hash.
+ * The end result is that the selected items have dual presence
+ * in both hashes. There is no copy of the items made; rather
+ * they are added into the new hash through a secondary hash
+ * hash handle that must be present in the structure. */
+#define HASH_SELECT(hh_dst, dst, hh_src, src, cond)                              \
+do {                                                                             \
+  unsigned _src_bkt, _dst_bkt;                                                   \
+  void *_last_elt=NULL, *_elt;                                                   \
+  UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL;                         \
+  ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst));                 \
+  if (src) {                                                                     \
+    for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) {     \
+      for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head;                \
+          _src_hh;                                                               \
+          _src_hh = _src_hh->hh_next) {                                          \
+          _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh);                       \
+          if (cond(_elt)) {                                                      \
+            _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho);               \
+            _dst_hh->key = _src_hh->key;                                         \
+            _dst_hh->keylen = _src_hh->keylen;                                   \
+            _dst_hh->hashv = _src_hh->hashv;                                     \
+            _dst_hh->prev = _last_elt;                                           \
+            _dst_hh->next = NULL;                                                \
+            if (_last_elt_hh) { _last_elt_hh->next = _elt; }                     \
+            if (!dst) {                                                          \
+              DECLTYPE_ASSIGN(dst,_elt);                                         \
+              HASH_MAKE_TABLE(hh_dst,dst);                                       \
+            } else {                                                             \
+              _dst_hh->tbl = (dst)->hh_dst.tbl;                                  \
+            }                                                                    \
+            HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt);    \
+            HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh);            \
+            (dst)->hh_dst.tbl->num_items++;                                      \
+            _last_elt = _elt;                                                    \
+            _last_elt_hh = _dst_hh;                                              \
+          }                                                                      \
+      }                                                                          \
+    }                                                                            \
+  }                                                                              \
+  HASH_FSCK(hh_dst,dst);                                                         \
+} while (0)
+
+#define HASH_CLEAR(hh,head)                                                      \
+do {                                                                             \
+  if (head) {                                                                    \
+    uthash_free((head)->hh.tbl->buckets,                                         \
+                (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket));      \
+    HASH_BLOOM_FREE((head)->hh.tbl);                                             \
+    uthash_free((head)->hh.tbl, sizeof(UT_hash_table));                          \
+    (head)=NULL;                                                                 \
+  }                                                                              \
+} while(0)
+
+#ifdef NO_DECLTYPE
+#define HASH_ITER(hh,head,el,tmp)                                                \
+for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL);       \
+  el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL))
+#else
+#define HASH_ITER(hh,head,el,tmp)                                                \
+for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL);                 \
+  el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL))
+#endif
+
+/* obtain a count of items in the hash */
+#define HASH_COUNT(head) HASH_CNT(hh,head)
+#define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0)
+
+typedef struct UT_hash_bucket {
+   struct UT_hash_handle *hh_head;
+   unsigned count;
+
+   /* expand_mult is normally set to 0. In this situation, the max chain length
+    * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
+    * the bucket's chain exceeds this length, bucket expansion is triggered).
+    * However, setting expand_mult to a non-zero value delays bucket expansion
+    * (that would be triggered by additions to this particular bucket)
+    * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
+    * (The multiplier is simply expand_mult+1). The whole idea of this
+    * multiplier is to reduce bucket expansions, since they are expensive, in
+    * situations where we know that a particular bucket tends to be overused.
+    * It is better to let its chain length grow to a longer yet-still-bounded
+    * value, than to do an O(n) bucket expansion too often.
+    */
+   unsigned expand_mult;
+
+} UT_hash_bucket;
+
+/* random signature used only to find hash tables in external analysis */
+#define HASH_SIGNATURE 0xa0111fe1
+#define HASH_BLOOM_SIGNATURE 0xb12220f2
+
+typedef struct UT_hash_table {
+   UT_hash_bucket *buckets;
+   unsigned num_buckets, log2_num_buckets;
+   unsigned num_items;
+   struct UT_hash_handle *tail; /* tail hh in app order, for fast append    */
+   ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */
+
+   /* in an ideal situation (all buckets used equally), no bucket would have
+    * more than ceil(#items/#buckets) items. that's the ideal chain length. */
+   unsigned ideal_chain_maxlen;
+
+   /* nonideal_items is the number of items in the hash whose chain position
+    * exceeds the ideal chain maxlen. these items pay the penalty for an uneven
+    * hash distribution; reaching them in a chain traversal takes >ideal steps */
+   unsigned nonideal_items;
+
+   /* ineffective expands occur when a bucket doubling was performed, but
+    * afterward, more than half the items in the hash had nonideal chain
+    * positions. If this happens on two consecutive expansions we inhibit any
+    * further expansion, as it's not helping; this happens when the hash
+    * function isn't a good fit for the key domain. When expansion is inhibited
+    * the hash will still work, albeit no longer in constant time. */
+   unsigned ineff_expands, noexpand;
+
+   uint32_t signature; /* used only to find hash tables in external analysis */
+#ifdef HASH_BLOOM
+   uint32_t bloom_sig; /* used only to test bloom exists in external analysis */
+   uint8_t *bloom_bv;
+   char bloom_nbits;
+#endif
+
+} UT_hash_table;
+
+typedef struct UT_hash_handle {
+   struct UT_hash_table *tbl;
+   void *prev;                       /* prev element in app order      */
+   void *next;                       /* next element in app order      */
+   struct UT_hash_handle *hh_prev;   /* previous hh in bucket order    */
+   struct UT_hash_handle *hh_next;   /* next hh in bucket order        */
+   void *key;                        /* ptr to enclosing struct's key  */
+   unsigned keylen;                  /* enclosing struct's key len     */
+   unsigned hashv;                   /* result of hash-fcn(key)        */
+} UT_hash_handle;
+
+#endif /* UTHASH_H */