From 1a34224948cc7f377777b8b1a6b44d7e105779d1 Mon Sep 17 00:00:00 2001
From: Gilles Gouaillardet <gilles@rist.or.jp>
Date: Thu, 20 Jul 2017 17:39:16 +0900
Subject: [PATCH 1/4] hwloc: do not set the HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM
 flag

Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp>
---
 opal/mca/hwloc/base/hwloc_base_dt.c           | 1 -
 opal/mca/hwloc/base/hwloc_base_util.c         | 4 +---
 orte/mca/ess/singleton/ess_singleton_module.c | 2 +-
 orte/test/system/opal_hwloc.c                 | 1 -
 4 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/opal/mca/hwloc/base/hwloc_base_dt.c b/opal/mca/hwloc/base/hwloc_base_dt.c
index 10ab99688a..200ac90c0a 100644
--- a/opal/mca/hwloc/base/hwloc_base_dt.c
+++ b/opal/mca/hwloc/base/hwloc_base_dt.c
@@ -107,7 +107,6 @@ int opal_hwloc_unpack(opal_buffer_t *buffer, void *dest,
          * explicitly set a flag so hwloc sets things up correctly
          */
         if (0 != hwloc_topology_set_flags(t, (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM |
-                                              HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM |
                                               HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) {
             rc = OPAL_ERROR;
             hwloc_topology_destroy(t);
diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c
index cd75ce6111..3f1dfc0dc1 100644
--- a/opal/mca/hwloc/base/hwloc_base_util.c
+++ b/opal/mca/hwloc/base/hwloc_base_util.c
@@ -305,8 +305,7 @@ int opal_hwloc_base_get_topology(void)
     } else if (NULL == opal_hwloc_base_topo_file) {
         if (0 != hwloc_topology_init(&opal_hwloc_topology) ||
             0 != hwloc_topology_set_flags(opal_hwloc_topology,
-                                          (HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM |
-                                           HWLOC_TOPOLOGY_FLAG_IO_DEVICES)) ||
+                                          HWLOC_TOPOLOGY_FLAG_IO_DEVICES) ||
             0 != hwloc_topology_load(opal_hwloc_topology)) {
             return OPAL_ERR_NOT_SUPPORTED;
         }
@@ -356,7 +355,6 @@ int opal_hwloc_base_set_topology(char *topofile)
      */
     if (0 != hwloc_topology_set_flags(opal_hwloc_topology,
                                       (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM |
-                                       HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM |
                                        HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) {
         hwloc_topology_destroy(opal_hwloc_topology);
         return OPAL_ERR_NOT_SUPPORTED;
diff --git a/orte/mca/ess/singleton/ess_singleton_module.c b/orte/mca/ess/singleton/ess_singleton_module.c
index 78cf662e68..e3e2fc81bd 100644
--- a/orte/mca/ess/singleton/ess_singleton_module.c
+++ b/orte/mca/ess/singleton/ess_singleton_module.c
@@ -15,7 +15,7 @@
  * Copyright (c) 2013-2017 Intel, Inc.  All rights reserved.
  * Copyright (c) 2015      Los Alamos National Security, LLC. All rights
  *                         reserved.
- * Copyright (c) 2016      Research Organization for Information Science
+ * Copyright (c) 2016-2017 Research Organization for Information Science
  *                         and Technology (RIST). All rights reserved.
  * $COPYRIGHT$
  *
diff --git a/orte/test/system/opal_hwloc.c b/orte/test/system/opal_hwloc.c
index f07cbf2bf3..ae2f7f5b40 100644
--- a/orte/test/system/opal_hwloc.c
+++ b/orte/test/system/opal_hwloc.c
@@ -74,7 +74,6 @@ int main(int argc, char* argv[])
      */
     if (0 != hwloc_topology_set_flags(my_topology,
                                       (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM |
-                                       HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM |
                                        HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) {
         hwloc_topology_destroy(my_topology);
         return OPAL_ERR_NOT_SUPPORTED;

From 9f29f3bff4464633550c914d836688a7b0db45aa Mon Sep 17 00:00:00 2001
From: Gilles Gouaillardet <gilles@rist.or.jp>
Date: Wed, 19 Jul 2017 15:23:52 +0900
Subject: [PATCH 2/4] hwloc: since WHOLE_SYSTEM is no more used, remove useless

checks related to offline and disallowed elements

Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp>
---
 ompi/mpiext/affinity/c/mpiext_affinity_str.c |   4 +-
 opal/mca/btl/smcuda/btl_smcuda.c             |   9 +-
 opal/mca/hwloc/base/base.h                   |   7 +-
 opal/mca/hwloc/base/hwloc_base_dt.c          |  17 --
 opal/mca/hwloc/base/hwloc_base_util.c        | 256 ++-----------------
 orte/mca/ess/base/ess_base_fns.c             |  11 +-
 orte/mca/plm/base/plm_base_launch_support.c  |   2 -
 orte/mca/ras/simulator/ras_sim_module.c      |   9 +-
 orte/mca/rmaps/base/rmaps_base_binding.c     |  14 +-
 orte/mca/rmaps/ppr/rmaps_ppr.c               |  13 +-
 orte/orted/orted_main.c                      |   8 +-
 11 files changed, 48 insertions(+), 302 deletions(-)

diff --git a/ompi/mpiext/affinity/c/mpiext_affinity_str.c b/ompi/mpiext/affinity/c/mpiext_affinity_str.c
index bc6412da66..9ea81fce4a 100644
--- a/ompi/mpiext/affinity/c/mpiext_affinity_str.c
+++ b/ompi/mpiext/affinity/c/mpiext_affinity_str.c
@@ -131,7 +131,7 @@ static int get_rsrc_current_binding(char str[OMPI_AFFINITY_STRING_MAX])
 
     /* get our root object */
     root = hwloc_get_root_obj(opal_hwloc_topology);
-    rootset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, root);
+    rootset = root->cpuset;
 
     /* get our bindings */
     boundset = hwloc_bitmap_alloc();
@@ -324,7 +324,7 @@ static int get_layout_current_binding(char str[OMPI_AFFINITY_STRING_MAX])
 
     /* get our root object */
     root = hwloc_get_root_obj(opal_hwloc_topology);
-    rootset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, root);
+    rootset = root->cpuset;
 
     /* get our bindings */
     boundset = hwloc_bitmap_alloc();
diff --git a/opal/mca/btl/smcuda/btl_smcuda.c b/opal/mca/btl/smcuda/btl_smcuda.c
index 086f776e66..03d3a6a116 100644
--- a/opal/mca/btl/smcuda/btl_smcuda.c
+++ b/opal/mca/btl/smcuda/btl_smcuda.c
@@ -16,7 +16,7 @@
  *                         reserved.
  * Copyright (c) 2012-2015 NVIDIA Corporation.  All rights reserved.
  * Copyright (c) 2012      Oracle and/or its affiliates.  All rights reserved.
- * Copyright (c) 2014      Research Organization for Information Science
+ * Copyright (c) 2014-2017 Research Organization for Information Science
  *                         and Technology (RIST). All rights reserved.
  * Copyright (c) 2015-2016 Intel, Inc.  All rights reserved.
  * $COPYRIGHT$
@@ -296,7 +296,6 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl,
             num_mem_nodes > 0 && NULL != opal_process_info.cpuset) {
             int numa=0, w;
             unsigned n_bound=0;
-            hwloc_cpuset_t avail;
             hwloc_obj_t obj;
 
             /* count the number of NUMA nodes to which we are bound */
@@ -306,10 +305,8 @@ smcuda_btl_first_time_init(mca_btl_smcuda_t *smcuda_btl,
                                                                    OPAL_HWLOC_AVAILABLE))) {
                     continue;
                 }
-                /* get that NUMA node's available cpus */
-                avail = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
-                /* see if we intersect */
-                if (hwloc_bitmap_intersects(avail, opal_hwloc_my_cpuset)) {
+                /* see if we intersect with that NUMA node's cpus */
+                if (hwloc_bitmap_intersects(obj->cpuset, opal_hwloc_my_cpuset)) {
                     n_bound++;
                     numa = w;
                 }
diff --git a/opal/mca/hwloc/base/base.h b/opal/mca/hwloc/base/base.h
index 0a9c482a74..2f3ab8c5e7 100644
--- a/opal/mca/hwloc/base/base.h
+++ b/opal/mca/hwloc/base/base.h
@@ -1,6 +1,8 @@
 /*
  * Copyright (c) 2011-2017 Cisco Systems, Inc.  All rights reserved
  * Copyright (c) 2013-2017 Intel, Inc.  All rights reserved.
+ * Copyright (c) 2017      Research Organization for Information Science
+ *                         and Technology (RIST). All rights reserved.
  * $COPYRIGHT$
  *
  * Additional copyrights may follow
@@ -132,9 +134,6 @@ typedef enum {
  */
 OPAL_DECLSPEC extern opal_hwloc_base_mbfa_t opal_hwloc_base_mbfa;
 
-/* some critical helper functions */
-OPAL_DECLSPEC int opal_hwloc_base_filter_cpus(hwloc_topology_t topo);
-
 /**
  * Discover / load the hwloc topology (i.e., call hwloc_topology_init() and
  * hwloc_topology_load()).
@@ -150,8 +149,6 @@ OPAL_DECLSPEC int opal_hwloc_base_set_topology(char *topofile);
  * Free the hwloc topology.
  */
 OPAL_DECLSPEC void opal_hwloc_base_free_topology(hwloc_topology_t topo);
-OPAL_DECLSPEC hwloc_cpuset_t opal_hwloc_base_get_available_cpus(hwloc_topology_t topo,
-                                                                hwloc_obj_t obj);
 OPAL_DECLSPEC unsigned int opal_hwloc_base_get_nbobjs_by_type(hwloc_topology_t topo,
                                                               hwloc_obj_type_t target,
                                                               unsigned cache_level,
diff --git a/opal/mca/hwloc/base/hwloc_base_dt.c b/opal/mca/hwloc/base/hwloc_base_dt.c
index 200ac90c0a..4f680788ec 100644
--- a/opal/mca/hwloc/base/hwloc_base_dt.c
+++ b/opal/mca/hwloc/base/hwloc_base_dt.c
@@ -136,11 +136,6 @@ int opal_hwloc_unpack(opal_buffer_t *buffer, void *dest,
             goto cleanup;
         }
 
-        /* filter the cpus thru any default cpu set */
-        if (OPAL_SUCCESS != (rc = opal_hwloc_base_filter_cpus(t))) {
-            goto cleanup;
-        }
-
         /* pass it back */
         tarray[i] = t;
 
@@ -268,18 +263,6 @@ static void print_hwloc_obj(char **output, char *prefix,
         free(tmp);
         tmp = tmp2;
     }
-    if (NULL != obj->online_cpuset) {
-        hwloc_bitmap_snprintf(string, OPAL_HWLOC_MAX_STRING, obj->online_cpuset);
-        asprintf(&tmp2, "%s%sOnline:  %s", tmp, pfx, string);
-        free(tmp);
-        tmp = tmp2;
-    }
-    if (NULL != obj->allowed_cpuset) {
-        hwloc_bitmap_snprintf(string, OPAL_HWLOC_MAX_STRING, obj->allowed_cpuset);
-        asprintf(&tmp2, "%s%sAllowed: %s", tmp, pfx, string);
-        free(tmp);
-        tmp = tmp2;
-    }
     if (HWLOC_OBJ_MACHINE == obj->type) {
         /* root level object - add support values */
         support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo);
diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c
index 3f1dfc0dc1..0c23af43f3 100644
--- a/opal/mca/hwloc/base/hwloc_base_util.c
+++ b/opal/mca/hwloc/base/hwloc_base_util.c
@@ -110,100 +110,6 @@ hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo,
     return obj;
 }
 
-/* determine the node-level available cpuset based on
- * online vs allowed vs user-specified cpus
- */
-int opal_hwloc_base_filter_cpus(hwloc_topology_t topo)
-{
-    hwloc_obj_t root, pu;
-    hwloc_cpuset_t avail = NULL, pucpus, res;
-    opal_hwloc_topo_data_t *sum;
-    opal_hwloc_obj_data_t *data;
-    char **ranges=NULL, **range=NULL;
-    int idx, cpu, start, end;
-
-    root = hwloc_get_root_obj(topo);
-
-    if (NULL == root->userdata) {
-        root->userdata = (void*)OBJ_NEW(opal_hwloc_topo_data_t);
-    }
-    sum = (opal_hwloc_topo_data_t*)root->userdata;
-
-    /* should only ever enter here once, but check anyway */
-    if (NULL != sum->available) {
-        return OPAL_SUCCESS;
-    }
-
-    /* process any specified default cpu set against this topology */
-    if (NULL == opal_hwloc_base_cpu_list) {
-        /* get the root available cpuset */
-        avail = hwloc_bitmap_alloc();
-        hwloc_bitmap_and(avail, root->online_cpuset, root->allowed_cpuset);
-        OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output,
-                             "hwloc:base: no cpus specified - using root available cpuset"));
-    } else {
-        OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output,
-                             "hwloc:base: filtering cpuset"));
-        /* find the specified logical cpus */
-        ranges = opal_argv_split(opal_hwloc_base_cpu_list, ',');
-        avail = hwloc_bitmap_alloc();
-        hwloc_bitmap_zero(avail);
-        res = hwloc_bitmap_alloc();
-        pucpus = hwloc_bitmap_alloc();
-        for (idx=0; idx < opal_argv_count(ranges); idx++) {
-            range = opal_argv_split(ranges[idx], '-');
-            switch (opal_argv_count(range)) {
-            case 1:
-                /* only one cpu given - get that object */
-                cpu = strtoul(range[0], NULL, 10);
-                if (NULL != (pu = opal_hwloc_base_get_pu(topo, cpu, OPAL_HWLOC_LOGICAL))) {
-                    hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset);
-                    hwloc_bitmap_or(res, avail, pucpus);
-                    hwloc_bitmap_copy(avail, res);
-                    data = (opal_hwloc_obj_data_t*)pu->userdata;
-                    if (NULL == data) {
-                        pu->userdata = (void*)OBJ_NEW(opal_hwloc_obj_data_t);
-                        data = (opal_hwloc_obj_data_t*)pu->userdata;
-                    }
-                    data->npus++;
-                }
-                break;
-            case 2:
-                /* range given */
-                start = strtoul(range[0], NULL, 10);
-                end = strtoul(range[1], NULL, 10);
-                for (cpu=start; cpu <= end; cpu++) {
-                    if (NULL != (pu = opal_hwloc_base_get_pu(topo, cpu, OPAL_HWLOC_LOGICAL))) {
-                        hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset);
-                        hwloc_bitmap_or(res, avail, pucpus);
-                        hwloc_bitmap_copy(avail, res);
-                        data = (opal_hwloc_obj_data_t*)pu->userdata;
-                        if (NULL == data) {
-                            pu->userdata = (void*)OBJ_NEW(opal_hwloc_obj_data_t);
-                            data = (opal_hwloc_obj_data_t*)pu->userdata;
-                        }
-                        data->npus++;
-                    }
-                }
-                break;
-            default:
-                break;
-            }
-            opal_argv_free(range);
-        }
-        if (NULL != ranges) {
-            opal_argv_free(ranges);
-        }
-        hwloc_bitmap_free(res);
-        hwloc_bitmap_free(pucpus);
-    }
-
-    /* cache this info */
-    sum->available = avail;
-
-    return OPAL_SUCCESS;
-}
-
 static void fill_cache_line_size(void)
 {
     int i = 0, cache_level = 2;
@@ -297,11 +203,6 @@ int opal_hwloc_base_get_topology(void)
             return OPAL_ERROR;
         }
         free(val);
-        /* filter the cpus thru any default cpu set */
-        if (OPAL_SUCCESS != (rc = opal_hwloc_base_filter_cpus(opal_hwloc_topology))) {
-            hwloc_topology_destroy(opal_hwloc_topology);
-            return rc;
-        }
     } else if (NULL == opal_hwloc_base_topo_file) {
         if (0 != hwloc_topology_init(&opal_hwloc_topology) ||
             0 != hwloc_topology_set_flags(opal_hwloc_topology,
@@ -309,9 +210,6 @@ int opal_hwloc_base_get_topology(void)
             0 != hwloc_topology_load(opal_hwloc_topology)) {
             return OPAL_ERR_NOT_SUPPORTED;
         }
-        if (OPAL_SUCCESS != (rc = opal_hwloc_base_filter_cpus(opal_hwloc_topology))) {
-            return rc;
-        }
     } else {
         if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology(opal_hwloc_base_topo_file))) {
             return rc;
@@ -333,7 +231,6 @@ int opal_hwloc_base_get_topology(void)
 int opal_hwloc_base_set_topology(char *topofile)
 {
     struct hwloc_topology_support *support;
-    int rc;
 
      OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output,
                           "hwloc:base:set_topology %s", topofile));
@@ -375,12 +272,6 @@ int opal_hwloc_base_set_topology(char *topofile)
     support->cpubind->set_thisproc_cpubind = true;
     support->membind->set_thisproc_membind = true;
 
-    /* filter the cpus thru any default cpu set */
-    rc = opal_hwloc_base_filter_cpus(opal_hwloc_topology);
-    if (OPAL_SUCCESS != rc) {
-        return rc;
-    }
-
     /* fill opal_cache_line_size global with the smallest L1 cache
        line size */
     fill_cache_line_size();
@@ -432,7 +323,6 @@ void opal_hwloc_base_free_topology(hwloc_topology_t topo)
 void opal_hwloc_base_get_local_cpuset(void)
 {
     hwloc_obj_t root;
-    hwloc_cpuset_t base_cpus;
 
     if (NULL != opal_hwloc_topology) {
         if (NULL == opal_hwloc_my_cpuset) {
@@ -445,8 +335,7 @@ void opal_hwloc_base_get_local_cpuset(void)
                               HWLOC_CPUBIND_PROCESS) < 0) {
             /* we are not bound - use the root's available cpuset */
             root = hwloc_get_root_obj(opal_hwloc_topology);
-            base_cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, root);
-            hwloc_bitmap_copy(opal_hwloc_my_cpuset, base_cpus);
+            hwloc_bitmap_copy(opal_hwloc_my_cpuset, root->cpuset);
         }
     }
 }
@@ -474,72 +363,6 @@ int opal_hwloc_base_report_bind_failure(const char *file,
     return OPAL_SUCCESS;
 }
 
-hwloc_cpuset_t opal_hwloc_base_get_available_cpus(hwloc_topology_t topo,
-                                                  hwloc_obj_t obj)
-{
-    hwloc_obj_t root;
-    hwloc_cpuset_t avail, specd=NULL;
-    opal_hwloc_topo_data_t *rdata;
-    opal_hwloc_obj_data_t *data;
-
-    OPAL_OUTPUT_VERBOSE((10, opal_hwloc_base_framework.framework_output,
-                         "hwloc:base: get available cpus"));
-
-    /* get the node-level information */
-    root = hwloc_get_root_obj(topo);
-    rdata = (opal_hwloc_topo_data_t*)root->userdata;
-    /* bozo check */
-    if (NULL == rdata) {
-        rdata = OBJ_NEW(opal_hwloc_topo_data_t);
-        root->userdata = (void*)rdata;
-        OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output,
-                             "hwloc:base:get_available_cpus first time - filtering cpus"));
-    }
-
-    /* are we asking about the root object? */
-    if (obj == root) {
-        OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output,
-                             "hwloc:base:get_available_cpus root object"));
-        return rdata->available;
-    }
-
-    /* some hwloc object types don't have cpus */
-    if (NULL == obj->online_cpuset || NULL == obj->allowed_cpuset) {
-        return NULL;
-    }
-
-    /* see if we already have this info */
-    if (NULL == (data = (opal_hwloc_obj_data_t*)obj->userdata)) {
-        /* nope - create the object */
-        data = OBJ_NEW(opal_hwloc_obj_data_t);
-        obj->userdata = (void*)data;
-    }
-
-    /* do we have the cpuset */
-    if (NULL != data->available) {
-        return data->available;
-    }
-
-    /* find the available processors on this object */
-    avail = hwloc_bitmap_alloc();
-    hwloc_bitmap_and(avail, obj->online_cpuset, obj->allowed_cpuset);
-
-    /* filter this against the node-available processors */
-    if (NULL == rdata->available) {
-        hwloc_bitmap_free(avail);
-        return NULL;
-    }
-    specd = hwloc_bitmap_alloc();
-    hwloc_bitmap_and(specd, avail, rdata->available);
-
-    /* cache the info */
-    data->available = specd;
-
-    /* cleanup */
-    hwloc_bitmap_free(avail);
-    return specd;
-}
-
 static void df_search_cores(hwloc_obj_t obj, unsigned int *cnt)
 {
     unsigned k;
@@ -552,13 +375,6 @@ static void df_search_cores(hwloc_obj_t obj, unsigned int *cnt)
             obj->userdata = (void*)data;
         }
         if (NULL == opal_hwloc_base_cpu_list) {
-            if (!hwloc_bitmap_intersects(obj->cpuset, obj->allowed_cpuset)) {
-                /*
-                 * do not count not allowed cores (e.g. cores with zero allowed PU)
-                 * if SMT is enabled, do count cores with at least one allowed hwthread
-                 */
-                return;
-            }
             data->npus = 1;
         }
         *cnt += data->npus;
@@ -605,7 +421,6 @@ unsigned int opal_hwloc_base_get_npus(hwloc_topology_t topo,
 {
     opal_hwloc_obj_data_t *data;
     unsigned int cnt = 0;
-    hwloc_cpuset_t cpuset;
 
     data = (opal_hwloc_obj_data_t*)obj->userdata;
     if (NULL == data || !data->npus_calculated) {
@@ -629,12 +444,13 @@ unsigned int opal_hwloc_base_get_npus(hwloc_topology_t topo,
                 df_search_cores(obj, &cnt);
             }
         } else {
+            hwloc_cpuset_t cpuset;
 
             /* if we are treating cores as cpus, or the system can't detect
              * "cores", then get the available cpuset for this object - this will
              * create and store the data
              */
-            if (NULL == (cpuset = opal_hwloc_base_get_available_cpus(topo, obj))) {
+            if (NULL == (cpuset = obj->cpuset)) {
                 return 0;
             }
             /* count the number of bits that are set - there is
@@ -795,7 +611,7 @@ static hwloc_obj_t df_search(hwloc_topology_t topo,
             }
             /* see if we already know our available cpuset */
             if (NULL == data->available) {
-                data->available = opal_hwloc_base_get_available_cpus(topo, start);
+                data->available = hwloc_bitmap_dup(start->cpuset);
             }
             if (NULL != data->available && !hwloc_bitmap_iszero(data->available)) {
                 if (NULL != num_objs) {
@@ -1092,7 +908,6 @@ static int socket_to_cpu_set(char *cpus,
     int lower_range, upper_range;
     int socket_id;
     hwloc_obj_t obj;
-    hwloc_bitmap_t res;
 
     if ('*' == cpus[0]) {
         /* requesting cpumask for ALL sockets */
@@ -1100,8 +915,7 @@ static int socket_to_cpu_set(char *cpus,
         /* set to all available processors - essentially,
          * this specification equates to unbound
          */
-        res = opal_hwloc_base_get_available_cpus(topo, obj);
-        hwloc_bitmap_or(cpumask, cpumask, res);
+        hwloc_bitmap_or(cpumask, cpumask, obj->cpuset);
         return OPAL_SUCCESS;
     }
 
@@ -1112,8 +926,7 @@ static int socket_to_cpu_set(char *cpus,
         socket_id = atoi(range[0]);
         obj = opal_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0, socket_id, rtype);
         /* get the available cpus for this socket */
-        res = opal_hwloc_base_get_available_cpus(topo, obj);
-        hwloc_bitmap_or(cpumask, cpumask, res);
+        hwloc_bitmap_or(cpumask, cpumask, obj->cpuset);
         break;
 
     case 2:  /* range of sockets was given */
@@ -1122,10 +935,8 @@ static int socket_to_cpu_set(char *cpus,
         /* cycle across the range of sockets */
         for (socket_id=lower_range; socket_id<=upper_range; socket_id++) {
             obj = opal_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0, socket_id, rtype);
-            /* get the available cpus for this socket */
-            res = opal_hwloc_base_get_available_cpus(topo, obj);
-            /* set the corresponding bits in the bitmask */
-            hwloc_bitmap_or(cpumask, cpumask, res);
+            /* set the available cpus for this socket bits in the bitmask */
+            hwloc_bitmap_or(cpumask, cpumask, obj->cpuset);
         }
         break;
     default:
@@ -1149,7 +960,6 @@ static int socket_core_to_cpu_set(char *socket_core_list,
     int lower_range, upper_range;
     int socket_id, core_id;
     hwloc_obj_t socket, core;
-    hwloc_cpuset_t res;
     unsigned int idx;
     hwloc_obj_type_t obj_type = HWLOC_OBJ_CORE;
 
@@ -1179,9 +989,8 @@ static int socket_core_to_cpu_set(char *socket_core_list,
             corestr = socket_core[i];
         }
         if ('*' == corestr[0]) {
-            /* set to all available cpus on this socket */
-            res = opal_hwloc_base_get_available_cpus(topo, socket);
-            hwloc_bitmap_or(cpumask, cpumask, res);
+            /* set to all cpus on this socket */
+            hwloc_bitmap_or(cpumask, cpumask, socket->cpuset);
             /* we are done - already assigned all cores! */
             rc = OPAL_SUCCESS;
             break;
@@ -1205,8 +1014,7 @@ static int socket_core_to_cpu_set(char *socket_core_list,
                         return OPAL_ERR_NOT_FOUND;
                     }
                     /* get the cpus */
-                    res = opal_hwloc_base_get_available_cpus(topo, core);
-                    hwloc_bitmap_or(cpumask, cpumask, res);
+                    hwloc_bitmap_or(cpumask, cpumask, core->cpuset);
                 }
                 opal_argv_free(list);
                 break;
@@ -1227,10 +1035,8 @@ static int socket_core_to_cpu_set(char *socket_core_list,
                         opal_argv_free(socket_core);
                         return OPAL_ERR_NOT_FOUND;
                     }
-                    /* get the cpus */
-                    res = opal_hwloc_base_get_available_cpus(topo, core);
-                    /* add them into the result */
-                    hwloc_bitmap_or(cpumask, cpumask, res);
+                    /* get the cpus add them into the result */
+                    hwloc_bitmap_or(cpumask, cpumask, core->cpuset);
                 }
                 break;
 
@@ -1255,7 +1061,6 @@ int opal_hwloc_base_cpu_list_parse(const char *slot_str,
     char **item, **rngs;
     int rc, i, j, k;
     hwloc_obj_t pu;
-    hwloc_cpuset_t pucpus;
     char **range, **list;
     size_t range_cnt;
     int core_id, lower_range, upper_range;
@@ -1349,10 +1154,8 @@ int opal_hwloc_base_cpu_list_parse(const char *slot_str,
                             opal_argv_free(list);
                             return OPAL_ERR_SILENT;
                         }
-                        /* get the available cpus for that object */
-                        pucpus = opal_hwloc_base_get_available_cpus(topo, pu);
-                        /* set that in the mask */
-                        hwloc_bitmap_or(cpumask, cpumask, pucpus);
+                        /* get the cpus for that object and set them in the massk*/
+                        hwloc_bitmap_or(cpumask, cpumask, pu->cpuset);
                     }
                     opal_argv_free(list);
                     break;
@@ -1368,10 +1171,8 @@ int opal_hwloc_base_cpu_list_parse(const char *slot_str,
                             opal_argv_free(rngs);
                             return OPAL_ERR_SILENT;
                         }
-                        /* get the available cpus for that object */
-                        pucpus = opal_hwloc_base_get_available_cpus(topo, pu);
-                        /* set that in the mask */
-                        hwloc_bitmap_or(cpumask, cpumask, pucpus);
+                        /* get the cpus for that object and set them in the mask*/
+                        hwloc_bitmap_or(cpumask, cpumask, pu->cpuset);
                     }
                     break;
 
@@ -1396,7 +1197,6 @@ opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t top
     opal_hwloc_locality_t locality;
     hwloc_obj_t obj;
     unsigned depth, d, width, w;
-    hwloc_cpuset_t avail;
     bool shared;
     hwloc_obj_type_t type;
     int sect1, sect2;
@@ -1444,11 +1244,9 @@ opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t top
         for (w=0; w < width; w++) {
             /* get the object at this depth/index */
             obj = hwloc_get_obj_by_depth(topo, d, w);
-            /* get the available cpuset for this obj */
-            avail = opal_hwloc_base_get_available_cpus(topo, obj);
-            /* see if our locations intersect with it */
-            sect1 = hwloc_bitmap_intersects(avail, loc1);
-            sect2 = hwloc_bitmap_intersects(avail, loc2);
+            /* see if our locations intersect with the cpuset for this obj */
+            sect1 = hwloc_bitmap_intersects(obj->cpuset, loc1);
+            sect2 = hwloc_bitmap_intersects(obj->cpuset, loc2);
             /* if both intersect, then we share this level */
             if (sect1 && sect2) {
                 shared = true;
@@ -1864,9 +1662,7 @@ int opal_hwloc_base_cset2str(char *str, int len,
 
     /* if the cpuset includes all available cpus, then we are unbound */
     root = hwloc_get_root_obj(topo);
-    if (NULL == root->userdata) {
-        opal_hwloc_base_filter_cpus(topo);
-    } else {
+    if (NULL != root->userdata) {
         sum = (opal_hwloc_topo_data_t*)root->userdata;
         if (NULL == sum->available) {
            return OPAL_ERROR;
@@ -1934,9 +1730,7 @@ int opal_hwloc_base_cset2mapstr(char *str, int len,
 
     /* if the cpuset includes all available cpus, then we are unbound */
     root = hwloc_get_root_obj(topo);
-    if (NULL == root->userdata) {
-        opal_hwloc_base_filter_cpus(topo);
-    } else {
+    if (NULL != root->userdata) {
         sum = (opal_hwloc_topo_data_t*)root->userdata;
         if (NULL == sum->available) {
            return OPAL_ERROR;
@@ -2201,7 +1995,7 @@ char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo,
     hwloc_obj_t obj;
     char *locality=NULL, *tmp, *t2;
     unsigned depth, d, width, w;
-    hwloc_cpuset_t cpuset, avail, result;
+    hwloc_cpuset_t cpuset, result;
     hwloc_obj_type_t type;
 
     /* if this proc is not bound, then there is no locality. We
@@ -2249,10 +2043,8 @@ char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo,
         for (w=0; w < width; w++) {
             /* get the object at this depth/index */
             obj = hwloc_get_obj_by_depth(topo, d, w);
-            /* get the available cpuset for this obj */
-            avail = opal_hwloc_base_get_available_cpus(topo, obj);
             /* see if the location intersects with it */
-            if (hwloc_bitmap_intersects(avail, cpuset)) {
+            if (hwloc_bitmap_intersects(obj->cpuset, cpuset)) {
                 hwloc_bitmap_set(result, w);
             }
         }
diff --git a/orte/mca/ess/base/ess_base_fns.c b/orte/mca/ess/base/ess_base_fns.c
index f40814ef5b..ae29db2874 100644
--- a/orte/mca/ess/base/ess_base_fns.c
+++ b/orte/mca/ess/base/ess_base_fns.c
@@ -13,7 +13,7 @@
  * Copyright (c) 2011-2012 Los Alamos National Security, LLC.
  *                         All rights reserved.
  * Copyright (c) 2014-2017 Intel, Inc.  All rights reserved.
- * Copyright (c) 2014      Research Organization for Information Science
+ * Copyright (c) 2014-2017 Research Organization for Information Science
  *                         and Technology (RIST). All rights reserved.
  * $COPYRIGHT$
  *
@@ -113,7 +113,7 @@ int orte_ess_base_proc_binding(void)
         support = (struct hwloc_topology_support*)hwloc_topology_get_support(opal_hwloc_topology);
         /* get our node object */
         node = hwloc_get_root_obj(opal_hwloc_topology);
-        nodeset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, node);
+        nodeset = node->cpuset;
         /* get our bindings */
         cpus = hwloc_bitmap_alloc();
         if (hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS) < 0) {
@@ -191,14 +191,13 @@ int orte_ess_base_proc_binding(void)
                         error = "Getting hwthread object";
                         goto error;
                     }
-                    cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
+                    cpus = obj->cpuset;
                     if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
                         ret = ORTE_ERROR;
                         error = "Setting processor affinity failed";
                         goto error;
                     }
                     hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
-                    hwloc_bitmap_free(cpus);
                     OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
                                          "%s Process bound to hwthread",
                                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
@@ -212,7 +211,7 @@ int orte_ess_base_proc_binding(void)
                         error = "Getting core object";
                         goto error;
                     }
-                    cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
+                    cpus = obj->cpuset;
                     if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
                         error = "Setting processor affinity failed";
                         ret = ORTE_ERROR;
@@ -256,7 +255,7 @@ int orte_ess_base_proc_binding(void)
                                 continue;
                             }
                             /* this is the place! */
-                            cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj);
+                            cpus = obj->cpuset;
                             if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
                                 ret = ORTE_ERROR;
                                 error = "Setting processor affinity failed";
diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c
index 1f68cf4dbe..7af4521a24 100644
--- a/orte/mca/plm/base/plm_base_launch_support.c
+++ b/orte/mca/plm/base/plm_base_launch_support.c
@@ -944,8 +944,6 @@ void orte_plm_base_daemon_topology(int status, orte_process_name_t* sender,
         orted_failed_launch = true;
         goto CLEANUP;
     }
-    /* filter the topology as we'll need it that way later */
-    opal_hwloc_base_filter_cpus(topo);
     /* record the final topology */
     t->topo = topo;
 
diff --git a/orte/mca/ras/simulator/ras_sim_module.c b/orte/mca/ras/simulator/ras_sim_module.c
index fc8f74194e..9e71040713 100644
--- a/orte/mca/ras/simulator/ras_sim_module.c
+++ b/orte/mca/ras/simulator/ras_sim_module.c
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2011-2017 Cisco Systems, Inc.  All rights reserved
  * Copyright (c) 2012      Los Alamos National Security, LLC. All rights reserved
- * Copyright (c) 2015      Research Organization for Information Science
+ * Copyright (c) 2015-2017 Research Organization for Information Science
  *                         and Technology (RIST). All rights reserved.
  * Copyright (c) 2015-2017 Intel, Inc.  All rights reserved.
  *
@@ -205,13 +205,6 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
                 hwloc_topology_destroy(topo);
                 goto error_silent;
             }
-            if (OPAL_SUCCESS != opal_hwloc_base_filter_cpus(topo)) {
-                orte_show_help("help-ras-simulator.txt",
-                               "hwloc API fail", true,
-                               __FILE__, __LINE__, "opal_hwloc_base_filter_cpus");
-                hwloc_topology_destroy(topo);
-                goto error_silent;
-            }
             /* remove the hostname from the topology. Unfortunately, hwloc
              * decided to add the source hostname to the "topology", thus
              * rendering it unusable as a pure topological description. So
diff --git a/orte/mca/rmaps/base/rmaps_base_binding.c b/orte/mca/rmaps/base/rmaps_base_binding.c
index 0de8defa08..a524823e1a 100644
--- a/orte/mca/rmaps/base/rmaps_base_binding.c
+++ b/orte/mca/rmaps/base/rmaps_base_binding.c
@@ -13,7 +13,7 @@
  * Copyright (c) 2011-2012 Los Alamos National Security, LLC.
  *                         All rights reserved.
  * Copyright (c) 2013-2017 Intel, Inc.  All rights reserved.
- * Copyright (c) 2015      Research Organization for Information Science
+ * Copyright (c) 2015-2017 Research Organization for Information Science
  *                         and Technology (RIST). All rights reserved.
  * $COPYRIGHT$
  *
@@ -133,7 +133,6 @@ static int bind_upwards(orte_job_t *jdata,
     orte_job_map_t *map;
     orte_proc_t *proc;
     hwloc_obj_t obj;
-    hwloc_cpuset_t cpus;
     unsigned int idx, ncpus;
     opal_hwloc_obj_data_t *data;
     hwloc_obj_t locale;
@@ -210,8 +209,7 @@ static int bind_upwards(orte_job_t *jdata,
                     }
                 }
                 /* bind it here */
-                cpus = opal_hwloc_base_get_available_cpus(node->topology->topo, obj);
-                hwloc_bitmap_list_asprintf(&cpu_bitmap, cpus);
+                hwloc_bitmap_list_asprintf(&cpu_bitmap, obj->cpuset);
                 orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, cpu_bitmap, OPAL_STRING);
                 /* record the location */
                 orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_BOUND, ORTE_ATTR_LOCAL, obj, OPAL_PTR);
@@ -250,7 +248,6 @@ static int bind_downwards(orte_job_t *jdata,
     orte_job_map_t *map;
     orte_proc_t *proc;
     hwloc_obj_t trg_obj, nxt_obj;
-    hwloc_cpuset_t cpus;
     unsigned int ncpus;
     opal_hwloc_obj_data_t *data;
     int total_cpus;
@@ -344,8 +341,7 @@ static int bind_downwards(orte_job_t *jdata,
                 }
             }
             /* bind the proc here */
-            cpus = opal_hwloc_base_get_available_cpus(node->topology->topo, trg_obj);
-            hwloc_bitmap_or(totalcpuset, totalcpuset, cpus);
+            hwloc_bitmap_or(totalcpuset, totalcpuset, trg_obj->cpuset);
             /* track total #cpus */
             total_cpus += ncpus;
             /* move to the next location, in case we need it */
@@ -395,7 +391,6 @@ static int bind_in_place(orte_job_t *jdata,
     orte_job_map_t *map;
     orte_node_t *node;
     orte_proc_t *proc;
-    hwloc_cpuset_t cpus;
     unsigned int idx, ncpus;
     struct hwloc_topology_support *support;
     opal_hwloc_obj_data_t *data;
@@ -566,8 +561,7 @@ static int bind_in_place(orte_job_t *jdata,
                                 ORTE_NAME_PRINT(&proc->name),
                                 hwloc_obj_type_string(locale->type), idx);
             /* bind the proc here */
-            cpus = opal_hwloc_base_get_available_cpus(node->topology->topo, locale);
-            hwloc_bitmap_list_asprintf(&cpu_bitmap, cpus);
+            hwloc_bitmap_list_asprintf(&cpu_bitmap, locale->cpuset);
             orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, cpu_bitmap, OPAL_STRING);
             /* update the location, in case it changed */
             orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_BOUND, ORTE_ATTR_LOCAL, locale, OPAL_PTR);
diff --git a/orte/mca/rmaps/ppr/rmaps_ppr.c b/orte/mca/rmaps/ppr/rmaps_ppr.c
index 6524337dfd..33495a0143 100644
--- a/orte/mca/rmaps/ppr/rmaps_ppr.c
+++ b/orte/mca/rmaps/ppr/rmaps_ppr.c
@@ -3,7 +3,7 @@
  * Copyright (c) 2011      Los Alamos National Security, LLC.
  *                         All rights reserved.
  * Copyright (c) 2014-2017 Intel, Inc.  All rights reserved.
- * Copyright (c) 2015      Research Organization for Information Science
+ * Copyright (c) 2015-2017 Research Organization for Information Science
  *                         and Technology (RIST). All rights reserved.
  * $COPYRIGHT$
  *
@@ -441,7 +441,7 @@ static void prune(orte_jobid_t jobid,
     hwloc_obj_type_t lvl;
     unsigned cache_level = 0, k;
     int nprocs;
-    hwloc_cpuset_t avail, cpus, childcpus;
+    hwloc_cpuset_t avail;
     int n, limit, nmax, nunder, idx, idxmax = 0;
     orte_proc_t *proc, *pptr, *procmax;
     opal_hwloc_level_t ll;
@@ -492,7 +492,7 @@ static void prune(orte_jobid_t jobid,
                                               lvl, cache_level,
                                               i, OPAL_HWLOC_AVAILABLE);
         /* get the available cpuset */
-        avail = opal_hwloc_base_get_available_cpus(node->topology->topo, obj);
+        avail = obj->cpuset;
 
         /* look at the intersection of this object's cpuset and that
          * of each proc in the job/app - if they intersect, then count this proc
@@ -512,8 +512,7 @@ static void prune(orte_jobid_t jobid,
                 ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
                 return;
             }
-            cpus = opal_hwloc_base_get_available_cpus(node->topology->topo, locale);
-            if (hwloc_bitmap_intersects(avail, cpus)) {
+            if (hwloc_bitmap_intersects(avail, locale->cpuset)) {
                 nprocs++;
             }
         }
@@ -550,7 +549,6 @@ static void prune(orte_jobid_t jobid,
             /* find the child with the most procs underneath it */
             for (k=0; k < top->arity && limit < nprocs; k++) {
                 /* get this object's available cpuset */
-                childcpus = opal_hwloc_base_get_available_cpus(node->topology->topo, top->children[k]);
                 nunder = 0;
                 pptr = NULL;
                 for (n=0; n < node->procs->size; n++) {
@@ -566,8 +564,7 @@ static void prune(orte_jobid_t jobid,
                         ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
                         return;
                     }
-                    cpus = opal_hwloc_base_get_available_cpus(node->topology->topo, locale);
-                    if (hwloc_bitmap_intersects(childcpus, cpus)) {
+                    if (hwloc_bitmap_intersects(top->children[k]->cpuset, locale->cpuset)) {
                         nunder++;
                         if (NULL == pptr) {
                             /* save the location of the first proc under this object */
diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c
index 53a271e440..99af5e7612 100644
--- a/orte/orted/orted_main.c
+++ b/orte/orted/orted_main.c
@@ -362,7 +362,7 @@ int orte_daemon(int argc, char *argv[])
     if (NULL != orte_daemon_cores) {
         char **cores=NULL, tmp[128];
         hwloc_obj_t pu;
-        hwloc_cpuset_t ours, pucpus, res;
+        hwloc_cpuset_t ours, res;
         int core;
 
         /* could be a collection of comma-delimited ranges, so
@@ -372,7 +372,6 @@ int orte_daemon(int argc, char *argv[])
         if (NULL != cores) {
             ours = hwloc_bitmap_alloc();
             hwloc_bitmap_zero(ours);
-            pucpus = hwloc_bitmap_alloc();
             res = hwloc_bitmap_alloc();
             for (i=0; NULL != cores[i]; i++) {
                 core = strtoul(cores[i], NULL, 10);
@@ -387,12 +386,10 @@ int orte_daemon(int argc, char *argv[])
                                    orte_daemon_cores);
                     ret = ORTE_ERR_NOT_SUPPORTED;
                     hwloc_bitmap_free(ours);
-                    hwloc_bitmap_free(pucpus);
                     hwloc_bitmap_free(res);
                     goto DONE;
                 }
-                hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset);
-                hwloc_bitmap_or(res, ours, pucpus);
+                hwloc_bitmap_or(res, ours, pu->cpuset);
                 hwloc_bitmap_copy(ours, res);
             }
             /* if the result is all zeros, then don't bind */
@@ -406,7 +403,6 @@ int orte_daemon(int argc, char *argv[])
             }
             /* cleanup */
             hwloc_bitmap_free(ours);
-            hwloc_bitmap_free(pucpus);
             hwloc_bitmap_free(res);
             opal_argv_free(cores);
         }

From 60aa9cfcb619ab69dff8079a6bc10221b282c9f4 Mon Sep 17 00:00:00 2001
From: Gilles Gouaillardet <gilles@rist.or.jp>
Date: Thu, 20 Jul 2017 17:39:44 +0900
Subject: [PATCH 3/4] hwloc: add support for hwloc v2 API

Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp>
---
 ompi/mca/topo/treematch/treematch/tm_hwloc.c |   8 +
 opal/mca/btl/openib/btl_openib_component.c   |   6 +-
 opal/mca/hwloc/base/base.h                   |  23 +++
 opal/mca/hwloc/base/hwloc_base_dt.c          |   9 +-
 opal/mca/hwloc/base/hwloc_base_frame.c       |   8 +-
 opal/mca/hwloc/base/hwloc_base_util.c        | 152 ++++++++++++++++---
 opal/mca/hwloc/external/configure.m4         |  16 +-
 orte/mca/ess/base/ess_base_fns.c             |  11 +-
 orte/mca/ras/simulator/ras_sim_module.c      |   2 +-
 orte/mca/rmaps/base/rmaps_base_binding.c     |  30 ++--
 orte/mca/rmaps/base/rmaps_base_ranking.c     |  13 +-
 orte/mca/rmaps/round_robin/rmaps_rr.c        |  30 ++--
 orte/orted/pmix/pmix_server.c                |   2 +-
 orte/test/system/opal_hwloc.c                |   5 +-
 14 files changed, 233 insertions(+), 82 deletions(-)

diff --git a/ompi/mca/topo/treematch/treematch/tm_hwloc.c b/ompi/mca/topo/treematch/treematch/tm_hwloc.c
index 4a85588cb9..00e279e0cd 100644
--- a/ompi/mca/topo/treematch/treematch/tm_hwloc.c
+++ b/ompi/mca/topo/treematch/treematch/tm_hwloc.c
@@ -159,7 +159,11 @@ tm_topology_t* hwloc_to_tm(char *filename,double **pcost)
     exit(-1);
   }
 
+#if HWLOC_API_VERSION < 0x20000
   hwloc_topology_ignore_all_keep_structure(topology);
+#else
+#warning FIXME hwloc v2
+#endif
   hwloc_topology_load(topology);
 
 
@@ -229,7 +233,11 @@ tm_topology_t* get_local_topo_with_hwloc(void)
 
   /* Build the topology */
   hwloc_topology_init(&topology);
+#if HWLOC_API_VERSION < 0x20000
   hwloc_topology_ignore_all_keep_structure(topology);
+#else
+#warning FIXME hwloc v2
+#endif
   hwloc_topology_load(topology);
 
   /* Test if symetric */
diff --git a/opal/mca/btl/openib/btl_openib_component.c b/opal/mca/btl/openib/btl_openib_component.c
index c7cfb834eb..42e21e666f 100644
--- a/opal/mca/btl/openib/btl_openib_component.c
+++ b/opal/mca/btl/openib/btl_openib_component.c
@@ -19,7 +19,7 @@
  * Copyright (c) 2011-2015 NVIDIA Corporation.  All rights reserved.
  * Copyright (c) 2012      Oak Ridge National Laboratory.  All rights reserved
  * Copyright (c) 2013-2016 Intel, Inc.  All rights reserved.
- * Copyright (c) 2014-2016 Research Organization for Information Science
+ * Copyright (c) 2014-2017 Research Organization for Information Science
  *                         and Technology (RIST). All rights reserved.
  * Copyright (c) 2014      Bull SAS.  All rights reserved.
  * $COPYRIGHT$
@@ -2331,6 +2331,7 @@ static float get_ib_dev_distance(struct ibv_device *dev)
        because we have no way of measuring. */
     float distance = 0;
 
+#if HWLOC_API_VERSION < 0x20000
     /* Override any distance logic so all devices are used */
     if (0 != mca_btl_openib_component.ignore_locality ||
         OPAL_SUCCESS != opal_hwloc_base_get_topology()) {
@@ -2475,6 +2476,9 @@ static float get_ib_dev_distance(struct ibv_device *dev)
     if (NULL != my_cpuset) {
         hwloc_bitmap_free(my_cpuset);
     }
+#else
+#warning FIXME get_ib_dev_distance is not implemented with hwloc v2
+#endif
 
     return distance;
 }
diff --git a/opal/mca/hwloc/base/base.h b/opal/mca/hwloc/base/base.h
index 2f3ab8c5e7..1413034866 100644
--- a/opal/mca/hwloc/base/base.h
+++ b/opal/mca/hwloc/base/base.h
@@ -19,6 +19,12 @@
 
 #include "opal/mca/hwloc/hwloc-internal.h"
 
+#if HWLOC_API_VERSION < 0x20000
+#define HWLOC_OBJ_L3CACHE HWLOC_OBJ_CACHE
+#define HWLOC_OBJ_L2CACHE HWLOC_OBJ_CACHE
+#define HWLOC_OBJ_L1CACHE HWLOC_OBJ_CACHE
+#endif
+
 /*
  * Global functions for MCA overall hwloc open and close
  */
@@ -83,6 +89,20 @@ OPAL_DECLSPEC extern char *opal_hwloc_base_topo_file;
         hwloc_bitmap_free(bind);                                        \
     } while(0);
 
+#if HWLOC_API_VERSION < 0x20000
+#define OPAL_HWLOC_MAKE_OBJ_CACHE(level, obj, cache_level)              \
+    do {                                                                \
+        obj = HWLOC_OBJ_CACHE;                                          \
+        cache_level = level;                                            \
+    } while(0)
+#else
+#define OPAL_HWLOC_MAKE_OBJ_CACHE(level, obj, cache_level)              \
+    do {                                                                \
+        obj = HWLOC_OBJ_L##level##CACHE;                                \
+        cache_level = 0;                                                \
+    } while(0)
+#endif
+
 OPAL_DECLSPEC opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t topo,
                                                                           char *cpuset1, char *cpuset2);
 
@@ -282,6 +302,9 @@ OPAL_DECLSPEC char* opal_hwloc_base_get_location(char *locality,
 
 OPAL_DECLSPEC opal_hwloc_locality_t opal_hwloc_compute_relative_locality(char *loc1, char *loc2);
 
+OPAL_DECLSPEC int opal_hwloc_base_topology_export_xmlbuffer(hwloc_topology_t topology, char **xmlpath, int *buflen);
+
+OPAL_DECLSPEC int opal_hwloc_base_topology_set_flags (hwloc_topology_t topology, unsigned long flags, bool io);
 END_C_DECLS
 
 #endif /* OPAL_HWLOC_BASE_H */
diff --git a/opal/mca/hwloc/base/hwloc_base_dt.c b/opal/mca/hwloc/base/hwloc_base_dt.c
index 4f680788ec..0840ee13f1 100644
--- a/opal/mca/hwloc/base/hwloc_base_dt.c
+++ b/opal/mca/hwloc/base/hwloc_base_dt.c
@@ -31,7 +31,7 @@ int opal_hwloc_pack(opal_buffer_t *buffer, const void *src,
         t = tarray[i];
 
         /* extract an xml-buffer representation of the tree */
-        if (0 != hwloc_topology_export_xmlbuffer(t, &xmlbuffer, &len)) {
+        if (0 != opal_hwloc_base_topology_export_xmlbuffer(t, &xmlbuffer, &len)) {
             return OPAL_ERROR;
         }
 
@@ -106,8 +106,7 @@ int opal_hwloc_unpack(opal_buffer_t *buffer, void *dest,
         /* since we are loading this from an external source, we have to
          * explicitly set a flag so hwloc sets things up correctly
          */
-        if (0 != hwloc_topology_set_flags(t, (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM |
-                                              HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) {
+        if (0 != opal_hwloc_base_topology_set_flags(t, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM, true)) {
             rc = OPAL_ERROR;
             hwloc_topology_destroy(t);
             goto cleanup;
@@ -191,10 +190,10 @@ int opal_hwloc_compare(const hwloc_topology_t topo1,
      * where we really need to do a tree-wise search so we only compare
      * the things we care about, and ignore stuff like MAC addresses
      */
-    if (0 != hwloc_topology_export_xmlbuffer(t1, &x1, &l1)) {
+    if (0 != opal_hwloc_base_topology_export_xmlbuffer(t1, &x1, &l1)) {
         return OPAL_EQUAL;
     }
-    if (0 != hwloc_topology_export_xmlbuffer(t2, &x2, &l2)) {
+    if (0 != opal_hwloc_base_topology_export_xmlbuffer(t2, &x2, &l2)) {
         free(x1);
         return OPAL_EQUAL;
     }
diff --git a/opal/mca/hwloc/base/hwloc_base_frame.c b/opal/mca/hwloc/base/hwloc_base_frame.c
index e27985d38e..538437fb0d 100644
--- a/opal/mca/hwloc/base/hwloc_base_frame.c
+++ b/opal/mca/hwloc/base/hwloc_base_frame.c
@@ -1,7 +1,7 @@
 /*
  * Copyright (c) 2011-2017 Cisco Systems, Inc.  All rights reserved
  * Copyright (c) 2013-2017 Intel, Inc.  All rights reserved.
- * Copyright (c) 2016      Research Organization for Information Science
+ * Copyright (c) 2016-2017 Research Organization for Information Science
  *                         and Technology (RIST). All rights reserved.
  * $COPYRIGHT$
  *
@@ -50,9 +50,9 @@ hwloc_obj_type_t opal_hwloc_levels[] = {
     HWLOC_OBJ_MACHINE,
     HWLOC_OBJ_NODE,
     HWLOC_OBJ_SOCKET,
-    HWLOC_OBJ_CACHE,
-    HWLOC_OBJ_CACHE,
-    HWLOC_OBJ_CACHE,
+    HWLOC_OBJ_L3CACHE,
+    HWLOC_OBJ_L2CACHE,
+    HWLOC_OBJ_L1CACHE,
     HWLOC_OBJ_CORE,
     HWLOC_OBJ_PU
 };
diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c
index 0c23af43f3..4cddabf3c4 100644
--- a/opal/mca/hwloc/base/hwloc_base_util.c
+++ b/opal/mca/hwloc/base/hwloc_base_util.c
@@ -114,6 +114,7 @@ static void fill_cache_line_size(void)
 {
     int i = 0, cache_level = 2;
     unsigned size;
+    unsigned int cache_object = HWLOC_OBJ_L2CACHE;
     hwloc_obj_t obj;
     bool found = false;
 
@@ -123,10 +124,11 @@ static void fill_cache_line_size(void)
         i=0;
         while (1) {
             obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology,
-                                                  HWLOC_OBJ_CACHE, cache_level,
+                                                  cache_object, cache_level,
                                                   i, OPAL_HWLOC_LOGICAL);
             if (NULL == obj) {
                 --cache_level;
+                cache_object = HWLOC_OBJ_L1CACHE;
                 break;
             } else {
                 if (NULL != obj->attr &&
@@ -188,10 +190,9 @@ int opal_hwloc_base_get_topology(void)
         /* since we are loading this from an external source, we have to
          * explicitly set a flag so hwloc sets things up correctly
          */
-        if (0 != hwloc_topology_set_flags(opal_hwloc_topology,
-                                         (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM |
-                                          HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM |
-                                          HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) {
+        if (0 != opal_hwloc_base_topology_set_flags(opal_hwloc_topology,
+                                                    HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM,
+                                                    true)) {
             hwloc_topology_destroy(opal_hwloc_topology);
             free(val);
             return OPAL_ERROR;
@@ -205,8 +206,7 @@ int opal_hwloc_base_get_topology(void)
         free(val);
     } else if (NULL == opal_hwloc_base_topo_file) {
         if (0 != hwloc_topology_init(&opal_hwloc_topology) ||
-            0 != hwloc_topology_set_flags(opal_hwloc_topology,
-                                          HWLOC_TOPOLOGY_FLAG_IO_DEVICES) ||
+            0 != opal_hwloc_base_topology_set_flags(opal_hwloc_topology, 0, true) ||
             0 != hwloc_topology_load(opal_hwloc_topology)) {
             return OPAL_ERR_NOT_SUPPORTED;
         }
@@ -250,9 +250,9 @@ int opal_hwloc_base_set_topology(char *topofile)
     /* since we are loading this from an external source, we have to
      * explicitly set a flag so hwloc sets things up correctly
      */
-    if (0 != hwloc_topology_set_flags(opal_hwloc_topology,
-                                      (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM |
-                                       HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) {
+    if (0 != opal_hwloc_base_topology_set_flags(opal_hwloc_topology,
+                                                HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM,
+                                                true)) {
         hwloc_topology_destroy(opal_hwloc_topology);
         return OPAL_ERR_NOT_SUPPORTED;
     }
@@ -502,10 +502,13 @@ unsigned int opal_hwloc_base_get_obj_idx(hwloc_topology_t topo,
         return data->idx;
     }
 
+#if HWLOC_API_VERSION < 0x20000
     /* determine the number of objects of this type */
     if (HWLOC_OBJ_CACHE == obj->type) {
         cache_level = obj->attr->cache.depth;
     }
+#endif
+
     nobjs = opal_hwloc_base_get_nbobjs_by_type(topo, obj->type, cache_level, rtype);
 
     OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output,
@@ -555,9 +558,11 @@ static hwloc_obj_t df_search(hwloc_topology_t topo,
     opal_hwloc_obj_data_t *data;
 
     if (target == start->type) {
+#if HWLOC_API_VERSION < 0x20000
         if (HWLOC_OBJ_CACHE == start->type && cache_level != start->attr->cache.depth) {
             goto notfound;
         }
+#endif
         if (OPAL_HWLOC_LOGICAL == rtype) {
             /* the hwloc tree is composed of LOGICAL objects, so the only
              * time we come here is when we are looking for logical caches
@@ -662,7 +667,11 @@ unsigned int opal_hwloc_base_get_nbobjs_by_type(hwloc_topology_t topo,
      * use the hwloc accessor to get it, unless it is a CACHE
      * as these are treated as special cases
      */
-    if (OPAL_HWLOC_LOGICAL == rtype && HWLOC_OBJ_CACHE != target) {
+    if (OPAL_HWLOC_LOGICAL == rtype
+#if HWLOC_API_VERSION < 0x20000
+        && HWLOC_OBJ_CACHE != target
+#endif
+       ) {
         /* we should not get an error back, but just in case... */
         if (0 > (rc = hwloc_get_nbobjs_by_type(topo, target))) {
             opal_output(0, "UNKNOWN HWLOC ERROR");
@@ -728,9 +737,11 @@ static hwloc_obj_t df_search_min_bound(hwloc_topology_t topo,
         if (0 == (k = opal_hwloc_base_get_npus(topo, start))) {
             goto notfound;
         }
+#if HWLOC_API_VERSION < 0x20000
         if (HWLOC_OBJ_CACHE == start->type && cache_level != start->attr->cache.depth) {
             goto notfound;
         }
+#endif
         /* see how many procs are bound to us */
         data = (opal_hwloc_obj_data_t*)start->userdata;
         if (NULL == data) {
@@ -793,10 +804,12 @@ hwloc_obj_t opal_hwloc_base_find_min_bound_target_under_obj(hwloc_topology_t top
         /* again, we have to treat caches differently as
          * the levels distinguish them
          */
+#if HWLOC_API_VERSION < 0x20000
         if (HWLOC_OBJ_CACHE == target &&
             cache_level < obj->attr->cache.depth) {
             goto moveon;
         }
+#endif
         return obj;
     }
 
@@ -809,16 +822,17 @@ hwloc_obj_t opal_hwloc_base_find_min_bound_target_under_obj(hwloc_topology_t top
     loc = df_search_min_bound(topo, obj, target, cache_level, &min_bound);
 
     if (NULL != loc) {
+#if HWLOC_API_VERSION < 0x20000
         if (HWLOC_OBJ_CACHE == target) {
             OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output,
                         "hwloc:base:min_bound_under_obj found min bound of %u on %s:%u:%u",
                         min_bound, hwloc_obj_type_string(target),
                         cache_level, loc->logical_index));
-        } else {
+        } else
+#endif
             OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output,
                         "hwloc:base:min_bound_under_obj found min bound of %u on %s:%u",
                         min_bound, hwloc_obj_type_string(target), loc->logical_index));
-        }
     }
 
     return loc;
@@ -845,7 +859,11 @@ hwloc_obj_t opal_hwloc_base_get_obj_by_type(hwloc_topology_t topo,
      * use the hwloc accessor to get it, unless it is a CACHE
      * as these are treated as special cases
      */
-    if (OPAL_HWLOC_LOGICAL == rtype && HWLOC_OBJ_CACHE != target) {
+    if (OPAL_HWLOC_LOGICAL == rtype
+#if HWLOC_API_VERSION < 0x20000
+        && HWLOC_OBJ_CACHE != target
+#endif
+       ) {
         return hwloc_get_obj_by_type(topo, target, instance);
     }
 
@@ -1230,7 +1248,13 @@ opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t top
         /* if it isn't one of interest, then ignore it */
         if (HWLOC_OBJ_NODE != type &&
             HWLOC_OBJ_SOCKET != type &&
+#if HWLOC_API_VERSION < 0x20000
             HWLOC_OBJ_CACHE != type &&
+#else
+            HWLOC_OBJ_L3CACHE != type &&
+            HWLOC_OBJ_L2CACHE != type &&
+            HWLOC_OBJ_L1CACHE != type &&
+#endif
             HWLOC_OBJ_CORE != type &&
             HWLOC_OBJ_PU != type) {
             continue;
@@ -1257,6 +1281,7 @@ opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t top
                 case HWLOC_OBJ_SOCKET:
                     locality |= OPAL_PROC_ON_SOCKET;
                     break;
+#if HWLOC_API_VERSION < 0x20000
                 case HWLOC_OBJ_CACHE:
                     if (3 == obj->attr->cache.depth) {
                         locality |= OPAL_PROC_ON_L3CACHE;
@@ -1266,6 +1291,17 @@ opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t top
                         locality |= OPAL_PROC_ON_L1CACHE;
                     }
                     break;
+#else
+                case HWLOC_OBJ_L3CACHE:
+                    locality |= OPAL_PROC_ON_L3CACHE;
+                    break;
+                case HWLOC_OBJ_L2CACHE:
+                    locality |= OPAL_PROC_ON_L2CACHE;
+                    break;
+                case HWLOC_OBJ_L1CACHE:
+                    locality |= OPAL_PROC_ON_L1CACHE;
+                    break;
+#endif
                 case HWLOC_OBJ_CORE:
                     locality |= OPAL_PROC_ON_CORE;
                     break;
@@ -1801,13 +1837,14 @@ static void sort_by_dist(hwloc_topology_t topo, char* device_name, opal_list_t *
 {
     hwloc_obj_t device_obj = NULL;
     hwloc_obj_t obj = NULL, root = NULL;
-    const struct hwloc_distances_s* distances;
+    struct hwloc_distances_s* distances;
     opal_rmaps_numa_node_t *numa_node;
     int close_node_index;
     float latency;
     unsigned int j;
     int depth;
     unsigned i;
+    unsigned distances_nr = 0;
 
     for (device_obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_OS_DEVICE, 0); device_obj; device_obj = hwloc_get_next_osdev(topo, device_obj)) {
         if (device_obj->attr->osdev.type == HWLOC_OBJ_OSDEV_OPENFABRICS
@@ -1828,6 +1865,7 @@ static void sort_by_dist(hwloc_topology_t topo, char* device_name, opal_list_t *
                 }
 
                 /* find distance matrix for all numa nodes */
+#if HWLOC_API_VERSION < 0x20000
                 distances = hwloc_get_whole_distance_matrix_by_type(topo, HWLOC_OBJ_NODE);
                 if (NULL ==  distances) {
                     /* we can try to find distances under group object. This info can be there. */
@@ -1864,6 +1902,22 @@ static void sort_by_dist(hwloc_topology_t topo, char* device_name, opal_list_t *
                     numa_node->dist_from_closed = latency;
                     opal_list_append(sorted_list, &numa_node->super);
                 }
+#else
+		if (0 != hwloc_distances_get_by_type(topo, HWLOC_OBJ_NODE, &distances_nr, &distances, 0, 0) || 0 == distances_nr) {
+                    opal_output_verbose(5, opal_hwloc_base_framework.framework_output,
+                            "hwloc:base:get_sorted_numa_list: There is no information about distances on the node.");
+                    return;
+                }
+                /* fill list of numa nodes */
+                for (j = 0; j < distances->nbobjs; j++) {
+                    latency = distances->values[close_node_index + distances->nbobjs * j];
+                    numa_node = OBJ_NEW(opal_rmaps_numa_node_t);
+                    numa_node->index = j;
+                    numa_node->dist_from_closed = latency;
+                    opal_list_append(sorted_list, &numa_node->super);
+                }
+                hwloc_distances_release(topo, distances);
+#endif
                 /* sort numa nodes by distance from the closest one to PCI */
                 opal_list_sort(sorted_list, dist_cmp_fn);
                 return;
@@ -1956,9 +2010,9 @@ char* opal_hwloc_base_get_topo_signature(hwloc_topology_t topo)
 
     nnuma = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_NODE, 0, OPAL_HWLOC_AVAILABLE);
     nsocket = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_SOCKET, 0, OPAL_HWLOC_AVAILABLE);
-    nl3 = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_CACHE, 3, OPAL_HWLOC_AVAILABLE);
-    nl2 = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_CACHE, 2, OPAL_HWLOC_AVAILABLE);
-    nl1 = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_CACHE, 1, OPAL_HWLOC_AVAILABLE);
+    nl3 = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_L3CACHE, 3, OPAL_HWLOC_AVAILABLE);
+    nl2 = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_L2CACHE, 2, OPAL_HWLOC_AVAILABLE);
+    nl1 = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_L1CACHE, 1, OPAL_HWLOC_AVAILABLE);
     ncore = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE, 0, OPAL_HWLOC_AVAILABLE);
     nhwt = opal_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_PU, 0, OPAL_HWLOC_AVAILABLE);
 
@@ -2025,7 +2079,13 @@ char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo,
         /* if it isn't one of interest, then ignore it */
         if (HWLOC_OBJ_NODE != type &&
             HWLOC_OBJ_SOCKET != type &&
+#if HWLOC_API_VERSION < 0x20000
             HWLOC_OBJ_CACHE != type &&
+#else
+            HWLOC_OBJ_L1CACHE != type &&
+            HWLOC_OBJ_L2CACHE != type &&
+            HWLOC_OBJ_L3CACHE != type &&
+#endif
             HWLOC_OBJ_CORE != type &&
             HWLOC_OBJ_PU != type) {
             continue;
@@ -2067,6 +2127,7 @@ char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo,
                     }
                     locality = t2;
                     break;
+#if HWLOC_API_VERSION < 0x20000
                 case HWLOC_OBJ_CACHE:
                     if (3 == obj->attr->cache.depth) {
                         asprintf(&t2, "%sL3%s:", (NULL == locality) ? "" : locality, tmp);
@@ -2091,6 +2152,29 @@ char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo,
                         break;
                     }
                     break;
+#else
+                case HWLOC_OBJ_L3CACHE:
+                    asprintf(&t2, "%sL3%s:", (NULL == locality) ? "" : locality, tmp);
+                    if (NULL != locality) {
+                        free(locality);
+                    }
+                    locality = t2;
+                    break;
+                case HWLOC_OBJ_L2CACHE:
+                    asprintf(&t2, "%sL2%s:", (NULL == locality) ? "" : locality, tmp);
+                    if (NULL != locality) {
+                        free(locality);
+                    }
+                    locality = t2;
+                    break;
+                case HWLOC_OBJ_L1CACHE:
+                    asprintf(&t2, "%sL1%s:", (NULL == locality) ? "" : locality, tmp);
+                    if (NULL != locality) {
+                        free(locality);
+                    }
+                    locality = t2;
+                    break;
+#endif
                 case HWLOC_OBJ_CORE:
                     asprintf(&t2, "%sCR%s:", (NULL == locality) ? "" : locality, tmp);
                     if (NULL != locality) {
@@ -2141,6 +2225,7 @@ char* opal_hwloc_base_get_location(char *locality,
         case HWLOC_OBJ_SOCKET:
             srch = "SK";
             break;
+#if HWLOC_API_VERSION < 0x20000
         case HWLOC_OBJ_CACHE:
             if (3 == index) {
                 srch = "L3";
@@ -2150,6 +2235,17 @@ char* opal_hwloc_base_get_location(char *locality,
                 srch = "L0";
             }
             break;
+#else
+        case HWLOC_OBJ_L3CACHE:
+            srch = "L3";
+            break;
+        case HWLOC_OBJ_L2CACHE:
+            srch = "L2";
+            break;
+        case HWLOC_OBJ_L1CACHE:
+            srch = "L0";
+            break;
+#endif
         case HWLOC_OBJ_CORE:
             srch = "CR";
             break;
@@ -2235,3 +2331,23 @@ opal_hwloc_locality_t opal_hwloc_compute_relative_locality(char *loc1, char *loc
     hwloc_bitmap_free(bit2);
     return locality;
 }
+
+int opal_hwloc_base_topology_export_xmlbuffer(hwloc_topology_t topology, char **xmlpath, int *buflen) {
+#if HWLOC_API_VERSION < 0x20000
+    return hwloc_topology_export_xmlbuffer(topology, xmlpath, buflen);
+#else
+    return hwloc_topology_export_xmlbuffer(topology, xmlpath, buflen, 0);
+#endif
+}
+
+int opal_hwloc_base_topology_set_flags (hwloc_topology_t topology, unsigned long flags, bool io) {
+    if (io) {
+#if HWLOC_API_VERSION < 0x20000
+        flags |= HWLOC_TOPOLOGY_FLAG_IO_DEVICES;
+#else
+        int ret = hwloc_topology_set_io_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_IMPORTANT);
+        if (0 != ret) return ret;
+#endif
+    }
+    return hwloc_topology_set_flags(topology, flags);
+}
diff --git a/opal/mca/hwloc/external/configure.m4 b/opal/mca/hwloc/external/configure.m4
index 032eebce59..c7c3d02ed9 100644
--- a/opal/mca/hwloc/external/configure.m4
+++ b/opal/mca/hwloc/external/configure.m4
@@ -183,21 +183,7 @@ AC_DEFUN([MCA_opal_hwloc_external_CONFIG],[
                [AC_MSG_RESULT([yes])],
                [AC_MSG_RESULT([no])
                 AC_MSG_ERROR([Cannot continue])])
-           AC_MSG_CHECKING([if external hwloc version is lower than 2.0])
-           AS_IF([test "$opal_hwloc_dir" != ""],
-                 [opal_hwloc_external_CFLAGS_save=$CFLAGS
-                  CFLAGS="-I$opal_hwloc_dir/include $opal_hwloc_external_CFLAGS_save"])
-           AC_COMPILE_IFELSE(
-               [AC_LANG_PROGRAM([[#include <hwloc.h>]],
-                   [[
-#if HWLOC_API_VERSION >= 0x00020000
-#error "hwloc API version is greater or equal than 0x00020000"
-#endif
-                   ]])],
-               [AC_MSG_RESULT([yes])],
-               [AC_MSG_RESULT([no])
-                AC_MSG_ERROR([OMPI does not currently support hwloc v2 API
-Cannot continue])])
+
            AS_IF([test "$opal_hwloc_dir" != ""],
                  [CFLAGS=$opal_hwloc_external_CFLAGS_save])
 
diff --git a/orte/mca/ess/base/ess_base_fns.c b/orte/mca/ess/base/ess_base_fns.c
index ae29db2874..0a7a61b2fd 100644
--- a/orte/mca/ess/base/ess_base_fns.c
+++ b/orte/mca/ess/base/ess_base_fns.c
@@ -232,14 +232,11 @@ int orte_ess_base_proc_binding(void)
                         goto error;
                     }
                     if (OPAL_BIND_TO_L1CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
-                        target = HWLOC_OBJ_CACHE;
-                        cache_level = 1;
+                        OPAL_HWLOC_MAKE_OBJ_CACHE(1, target, cache_level);
                     } else if (OPAL_BIND_TO_L2CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
-                        target = HWLOC_OBJ_CACHE;
-                        cache_level = 2;
+                        OPAL_HWLOC_MAKE_OBJ_CACHE(2, target, cache_level);
                     } else if (OPAL_BIND_TO_L3CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
-                        target = HWLOC_OBJ_CACHE;
-                        cache_level = 3;
+                        OPAL_HWLOC_MAKE_OBJ_CACHE(3, target, cache_level);
                     } else if (OPAL_BIND_TO_SOCKET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                         target = HWLOC_OBJ_SOCKET;
                     } else if (OPAL_BIND_TO_NUMA == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
@@ -251,9 +248,11 @@ int orte_ess_base_proc_binding(void)
                     }
                     for (obj = obj->parent; NULL != obj; obj = obj->parent) {
                         if (target == obj->type) {
+#if HWLOC_API_VERSION < 0x20000
                             if (HWLOC_OBJ_CACHE == target && cache_level != obj->attr->cache.depth) {
                                 continue;
                             }
+#endif
                             /* this is the place! */
                             cpus = obj->cpuset;
                             if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
diff --git a/orte/mca/ras/simulator/ras_sim_module.c b/orte/mca/ras/simulator/ras_sim_module.c
index 9e71040713..dd7eea91c8 100644
--- a/orte/mca/ras/simulator/ras_sim_module.c
+++ b/orte/mca/ras/simulator/ras_sim_module.c
@@ -135,7 +135,7 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes)
             /* since we are loading this from an external source, we have to
              * explicitly set a flag so hwloc sets things up correctly
              */
-            if (0 != hwloc_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM)) {
+            if (0 != opal_hwloc_base_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM, false)) {
                 orte_show_help("help-ras-simulator.txt",
                                "hwloc API fail", true,
                                __FILE__, __LINE__, "hwloc_topology_set_flags");
diff --git a/orte/mca/rmaps/base/rmaps_base_binding.c b/orte/mca/rmaps/base/rmaps_base_binding.c
index a524823e1a..646921861f 100644
--- a/orte/mca/rmaps/base/rmaps_base_binding.c
+++ b/orte/mca/rmaps/base/rmaps_base_binding.c
@@ -171,9 +171,11 @@ static int bind_upwards(orte_job_t *jdata,
                                 hwloc_obj_type_string(target),
                                 hwloc_obj_type_string(obj->type));
             if (target == obj->type) {
+#if HWLOC_API_VERSION < 0x20000
                 if (HWLOC_OBJ_CACHE == target && cache_level != obj->attr->cache.depth) {
                     continue;
                 }
+#endif
                 /* get its index */
                 if (UINT_MAX == (idx = opal_hwloc_base_get_obj_idx(node->topology->topo, obj, OPAL_HWLOC_AVAILABLE))) {
                     ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
@@ -726,16 +728,13 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
         hwb = HWLOC_OBJ_SOCKET;
         break;
     case OPAL_BIND_TO_L3CACHE:
-        hwb = HWLOC_OBJ_CACHE;
-        clvl = 3;
+        OPAL_HWLOC_MAKE_OBJ_CACHE(3, hwb, clvl);
         break;
     case OPAL_BIND_TO_L2CACHE:
-        hwb = HWLOC_OBJ_CACHE;
-        clvl = 2;
+        OPAL_HWLOC_MAKE_OBJ_CACHE(2, hwb, clvl);
         break;
     case OPAL_BIND_TO_L1CACHE:
-        hwb = HWLOC_OBJ_CACHE;
-        clvl = 1;
+        OPAL_HWLOC_MAKE_OBJ_CACHE(1, hwb, clvl);
         break;
     case OPAL_BIND_TO_CORE:
         hwb = HWLOC_OBJ_CORE;
@@ -763,16 +762,13 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
         hwm = HWLOC_OBJ_SOCKET;
         break;
     case ORTE_MAPPING_BYL3CACHE:
-        hwm = HWLOC_OBJ_CACHE;
-        clvm = 3;
+        OPAL_HWLOC_MAKE_OBJ_CACHE(3, hwm, clvm);
         break;
     case ORTE_MAPPING_BYL2CACHE:
-        hwm = HWLOC_OBJ_CACHE;
-        clvm = 2;
+        OPAL_HWLOC_MAKE_OBJ_CACHE(2, hwm, clvm);
         break;
     case ORTE_MAPPING_BYL1CACHE:
-        hwm = HWLOC_OBJ_CACHE;
-        clvm = 1;
+        OPAL_HWLOC_MAKE_OBJ_CACHE(1, hwm, clvm);
         break;
     case ORTE_MAPPING_BYCORE:
         hwm = HWLOC_OBJ_CORE;
@@ -915,28 +911,30 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
             }
         } else {
             /* determine the relative depth on this node */
+#if HWLOC_API_VERSION < 0x20000
             if (HWLOC_OBJ_CACHE == hwb) {
                 /* must use a unique function because blasted hwloc
                  * just doesn't deal with caches very well...sigh
                  */
                 bind_depth = hwloc_get_cache_type_depth(node->topology->topo, clvl, (hwloc_obj_cache_type_t)-1);
-            } else {
+            } else
+#endif
                 bind_depth = hwloc_get_type_depth(node->topology->topo, hwb);
-            }
             if (0 > bind_depth) {
                 /* didn't find such an object */
                 orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-objects",
                                true, hwloc_obj_type_string(hwb), node->name);
                 return ORTE_ERR_SILENT;
             }
+#if HWLOC_API_VERSION < 0x20000
             if (HWLOC_OBJ_CACHE == hwm) {
                 /* must use a unique function because blasted hwloc
                  * just doesn't deal with caches very well...sigh
                  */
                 map_depth = hwloc_get_cache_type_depth(node->topology->topo, clvm, (hwloc_obj_cache_type_t)-1);
-            } else {
+            } else
+#endif
                 map_depth = hwloc_get_type_depth(node->topology->topo, hwm);
-            }
             if (0 > map_depth) {
                 /* didn't find such an object */
                 orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-objects",
diff --git a/orte/mca/rmaps/base/rmaps_base_ranking.c b/orte/mca/rmaps/base/rmaps_base_ranking.c
index 8be87fa50e..6102f0cdf5 100644
--- a/orte/mca/rmaps/base/rmaps_base_ranking.c
+++ b/orte/mca/rmaps/base/rmaps_base_ranking.c
@@ -11,6 +11,8 @@
  *                         All rights reserved.
  * Copyright (c) 2011      Cisco Systems, Inc.  All rights reserved.
  * Copyright (c) 2014-2017 Intel, Inc.  All rights reserved.
+ * Copyright (c) 2017      Research Organization for Information Science
+ *                         and Technology (RIST). All rights reserved.
  * $COPYRIGHT$
  *
  * Additional copyrights may follow
@@ -466,6 +468,8 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
     orte_proc_t *proc, *pptr;
     int rc;
     bool one_found;
+    hwloc_obj_type_t target;
+    unsigned cache_level;
 
     map = jdata->map;
 
@@ -508,7 +512,8 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                             "mca:rmaps: computing ranks by L3cache for job %s",
                             ORTE_JOBID_PRINT(jdata->jobid));
-        if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_CACHE, 3))) {
+        OPAL_HWLOC_MAKE_OBJ_CACHE(3, target, cache_level);
+        if (ORTE_SUCCESS != (rc = rank_by(jdata, target, cache_level))) {
             if (ORTE_ERR_NOT_SUPPORTED == rc &&
                 !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
                 ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
@@ -523,7 +528,8 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                             "mca:rmaps: computing ranks by L2cache for job %s",
                             ORTE_JOBID_PRINT(jdata->jobid));
-        if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_CACHE, 2))) {
+        OPAL_HWLOC_MAKE_OBJ_CACHE(2, target, cache_level);
+        if (ORTE_SUCCESS != (rc = rank_by(jdata, target, cache_level))) {
             if (ORTE_ERR_NOT_SUPPORTED == rc &&
                 !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
                 ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
@@ -538,7 +544,8 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                             "mca:rmaps: computing ranks by L1cache for job %s",
                             ORTE_JOBID_PRINT(jdata->jobid));
-        if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_CACHE, 1))) {
+        OPAL_HWLOC_MAKE_OBJ_CACHE(1, target, cache_level);
+        if (ORTE_SUCCESS != (rc = rank_by(jdata, target, cache_level))) {
             if (ORTE_ERR_NOT_SUPPORTED == rc &&
                 !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
                 ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
diff --git a/orte/mca/rmaps/round_robin/rmaps_rr.c b/orte/mca/rmaps/round_robin/rmaps_rr.c
index b268c4953e..ab1b3584b1 100644
--- a/orte/mca/rmaps/round_robin/rmaps_rr.c
+++ b/orte/mca/rmaps/round_robin/rmaps_rr.c
@@ -13,6 +13,8 @@
  * Copyright (c) 2011-2012 Los Alamos National Security, LLC.
  *                         All rights reserved.
  * Copyright (c) 2014-2017 Intel, Inc.  All rights reserved.
+ * Copyright (c) 2017      Research Organization for Information Science
+ *                         and Technology (RIST). All rights reserved.
  * $COPYRIGHT$
  *
  * Additional copyrights may follow
@@ -93,6 +95,8 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
 
     /* cycle through the app_contexts, mapping them sequentially */
     for(i=0; i < jdata->apps->size; i++) {
+        hwloc_obj_type_t target;
+        unsigned cache_level;
         if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
             continue;
         }
@@ -171,8 +175,9 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
                                           app->num_procs);
             }
         } else if (ORTE_MAPPING_BYL1CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
-            rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots,
-                                     app->num_procs, HWLOC_OBJ_CACHE, 1);
+            OPAL_HWLOC_MAKE_OBJ_CACHE(1, target, cache_level);
+            rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs,
+                                     target, cache_level);
             if (ORTE_ERR_NOT_FOUND == rc) {
                 /* if the mapper couldn't map by this object because
                  * it isn't available, but the error allows us to try
@@ -183,8 +188,9 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
                                           app->num_procs);
             }
         } else if (ORTE_MAPPING_BYL2CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
-            rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots,
-                                     app->num_procs, HWLOC_OBJ_CACHE, 2);
+            OPAL_HWLOC_MAKE_OBJ_CACHE(2, target, cache_level);
+            rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs,
+                                     target, cache_level);
             if (ORTE_ERR_NOT_FOUND == rc) {
                 /* if the mapper couldn't map by this object because
                  * it isn't available, but the error allows us to try
@@ -195,8 +201,9 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
                                           app->num_procs);
             }
         } else if (ORTE_MAPPING_BYL3CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
-            rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots,
-                                     app->num_procs, HWLOC_OBJ_CACHE, 3);
+            OPAL_HWLOC_MAKE_OBJ_CACHE(3, target, cache_level);
+            rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs,
+                                     target, cache_level);
             if (ORTE_ERR_NOT_FOUND == rc) {
                 /* if the mapper couldn't map by this object because
                  * it isn't available, but the error allows us to try
@@ -272,6 +279,8 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
 static int orte_rmaps_rr_assign_locations(orte_job_t *jdata)
 {
     mca_base_component_t *c = &mca_rmaps_round_robin_component.base_version;
+    hwloc_obj_type_t target;
+    unsigned cache_level;
     int rc;
 
     if (NULL == jdata->map->last_mapper ||
@@ -316,7 +325,8 @@ static int orte_rmaps_rr_assign_locations(orte_job_t *jdata)
             rc = orte_rmaps_rr_assign_root_level(jdata);
         }
     } else if (ORTE_MAPPING_BYL1CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
-        rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_CACHE, 1);
+        OPAL_HWLOC_MAKE_OBJ_CACHE(1, target, cache_level);
+        rc = orte_rmaps_rr_assign_byobj(jdata, target, cache_level);
         if (ORTE_ERR_NOT_FOUND == rc) {
             /* if the mapper couldn't map by this object because
              * it isn't available, but the error allows us to try
@@ -326,7 +336,8 @@ static int orte_rmaps_rr_assign_locations(orte_job_t *jdata)
             rc = orte_rmaps_rr_assign_root_level(jdata);
         }
     } else if (ORTE_MAPPING_BYL2CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
-        rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_CACHE, 2);
+        OPAL_HWLOC_MAKE_OBJ_CACHE(2, target, cache_level);
+        rc = orte_rmaps_rr_assign_byobj(jdata, target, cache_level);
         if (ORTE_ERR_NOT_FOUND == rc) {
             /* if the mapper couldn't map by this object because
              * it isn't available, but the error allows us to try
@@ -336,7 +347,8 @@ static int orte_rmaps_rr_assign_locations(orte_job_t *jdata)
             rc = orte_rmaps_rr_assign_root_level(jdata);
         }
     } else if (ORTE_MAPPING_BYL3CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
-        rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_CACHE, 3);
+        OPAL_HWLOC_MAKE_OBJ_CACHE(3, target, cache_level);
+        rc = orte_rmaps_rr_assign_byobj(jdata, target, cache_level);
         if (ORTE_ERR_NOT_FOUND == rc) {
             /* if the mapper couldn't map by this object because
              * it isn't available, but the error allows us to try
diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c
index 2d7913b33d..cd705438e5 100644
--- a/orte/orted/pmix/pmix_server.c
+++ b/orte/orted/pmix/pmix_server.c
@@ -235,7 +235,7 @@ int pmix_server_init(void)
         int len;
         kv = OBJ_NEW(opal_value_t);
         kv->key = strdup(OPAL_PMIX_LOCAL_TOPO);
-        if (0 != hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &xmlbuffer, &len)) {
+        if (0 != opal_hwloc_base_topology_export_xmlbuffer(opal_hwloc_topology, &xmlbuffer, &len)) {
             OBJ_RELEASE(kv);
             OBJ_DESTRUCT(&info);
             return ORTE_ERROR;
diff --git a/orte/test/system/opal_hwloc.c b/orte/test/system/opal_hwloc.c
index ae2f7f5b40..ce45c8697c 100644
--- a/orte/test/system/opal_hwloc.c
+++ b/orte/test/system/opal_hwloc.c
@@ -72,9 +72,8 @@ int main(int argc, char* argv[])
     /* since we are loading this from an external source, we have to
      * explicitly set a flag so hwloc sets things up correctly
      */
-    if (0 != hwloc_topology_set_flags(my_topology,
-                                      (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM |
-                                       HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) {
+    if (0 != opal_hwloc_base_topology_set_flags(my_topology,
+                                                HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM)) {
         hwloc_topology_destroy(my_topology);
         return OPAL_ERR_NOT_SUPPORTED;
     }

From 593e4ce63f22d32943de1915cd509261ccf20042 Mon Sep 17 00:00:00 2001
From: Gilles Gouaillardet <gilles@rist.or.jp>
Date: Mon, 10 Apr 2017 10:12:59 +0900
Subject: [PATCH 4/4] hwloc: add hwloc2x

internal hwloc 2x is used with --with-hwloc=future

Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp>
---
 opal/mca/hwloc/external/configure.m4          |    3 +-
 opal/mca/hwloc/hwloc1116/configure.m4         |    3 +-
 opal/mca/hwloc/hwloc2x/Makefile.am            |   39 +
 opal/mca/hwloc/hwloc2x/autogen.subdirs        |    1 +
 opal/mca/hwloc/hwloc2x/configure.m4           |  112 +
 opal/mca/hwloc/hwloc2x/hwloc/AUTHORS          |   29 +
 opal/mca/hwloc/hwloc2x/hwloc/COPYING          |   39 +
 opal/mca/hwloc/hwloc2x/hwloc/Makefile.am      |   89 +
 opal/mca/hwloc/hwloc2x/hwloc/NEWS             | 1482 +++++
 opal/mca/hwloc/hwloc2x/hwloc/README           |   65 +
 opal/mca/hwloc/hwloc2x/hwloc/VERSION          |   47 +
 opal/mca/hwloc/hwloc2x/hwloc/autogen.sh       |    2 +
 .../hwloc/hwloc2x/hwloc/config/distscript.sh  |  130 +
 .../hwloc/config/distscript_embedded.sh       |   13 +
 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc.m4  | 1364 ++++
 .../hwloc/config/hwloc_check_attributes.m4    |  534 ++
 .../hwloc/config/hwloc_check_vendor.m4        |  246 +
 .../hwloc/config/hwloc_check_visibility.m4    |  131 +
 .../hwloc2x/hwloc/config/hwloc_components.m4  |   66 +
 .../hwloc2x/hwloc/config/hwloc_get_version.sh |   98 +
 .../hwloc2x/hwloc/config/hwloc_internal.m4    |  470 ++
 .../hwloc/hwloc2x/hwloc/config/hwloc_pkg.m4   |  207 +
 opal/mca/hwloc/hwloc2x/hwloc/config/netloc.m4 |  116 +
 opal/mca/hwloc/hwloc2x/hwloc/configure.ac     |  271 +
 .../hwloc2x/hwloc/contrib/hwloc-valgrind.supp |  161 +
 .../hwloc2x/hwloc/contrib/misc/Makefile.am    |    2 +
 .../hwloc2x/hwloc/contrib/systemd/Makefile.am |    2 +
 opal/mca/hwloc/hwloc2x/hwloc/doc/Makefile.am  |    2 +
 .../hwloc2x/hwloc/doc/doxygen-config.cfg.in   |    2 +
 .../hwloc2x/hwloc/doc/examples/Makefile.am    |    2 +
 opal/mca/hwloc/hwloc2x/hwloc/hwloc.pc.in      |   12 +
 .../mca/hwloc/hwloc2x/hwloc/hwloc/Makefile.am |  230 +
 opal/mca/hwloc/hwloc2x/hwloc/hwloc/base64.c   |  306 +
 opal/mca/hwloc/hwloc2x/hwloc/hwloc/bind.c     |  951 +++
 opal/mca/hwloc/hwloc2x/hwloc/hwloc/bitmap.c   | 1522 +++++
 .../hwloc/hwloc2x/hwloc/hwloc/components.c    |  784 +++
 opal/mca/hwloc/hwloc2x/hwloc/hwloc/diff.c     |  468 ++
 .../mca/hwloc/hwloc2x/hwloc/hwloc/distances.c |  927 +++
 opal/mca/hwloc/hwloc2x/hwloc/hwloc/dolib.c    |   47 +
 opal/mca/hwloc/hwloc2x/hwloc/hwloc/misc.c     |  166 +
 .../hwloc/hwloc2x/hwloc/hwloc/pci-common.c    |  954 +++
 .../hwloc/hwloc2x/hwloc/hwloc/topology-aix.c  |  875 +++
 .../hwloc/hwloc2x/hwloc/hwloc/topology-bgq.c  |  301 +
 .../hwloc/hwloc2x/hwloc/hwloc/topology-cuda.c |  170 +
 .../hwloc2x/hwloc/hwloc/topology-darwin.c     |  307 +
 .../hwloc/hwloc2x/hwloc/hwloc/topology-fake.c |   61 +
 .../hwloc2x/hwloc/hwloc/topology-freebsd.c    |  254 +
 .../hwloc/hwloc2x/hwloc/hwloc/topology-gl.c   |  185 +
 .../hwloc2x/hwloc/hwloc/topology-hardwired.c  |  223 +
 .../hwloc/hwloc2x/hwloc/hwloc/topology-hpux.c |  312 +
 .../hwloc2x/hwloc/hwloc/topology-linux.c      | 5790 +++++++++++++++++
 .../hwloc2x/hwloc/hwloc/topology-netbsd.c     |  213 +
 .../hwloc/hwloc2x/hwloc/hwloc/topology-noos.c |   57 +
 .../hwloc/hwloc2x/hwloc/hwloc/topology-nvml.c |  146 +
 .../hwloc2x/hwloc/hwloc/topology-opencl.c     |  203 +
 .../hwloc/hwloc2x/hwloc/hwloc/topology-pci.c  |  323 +
 .../hwloc/hwloc/topology-solaris-chiptype.c   |  346 +
 .../hwloc2x/hwloc/hwloc/topology-solaris.c    |  817 +++
 .../hwloc2x/hwloc/hwloc/topology-synthetic.c  | 1215 ++++
 .../hwloc2x/hwloc/hwloc/topology-windows.c    | 1171 ++++
 .../hwloc/hwloc2x/hwloc/hwloc/topology-x86.c  | 1437 ++++
 .../hwloc2x/hwloc/hwloc/topology-xml-libxml.c |  569 ++
 .../hwloc/hwloc/topology-xml-nolibxml.c       |  873 +++
 .../hwloc/hwloc2x/hwloc/hwloc/topology-xml.c  | 2398 +++++++
 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology.c | 3684 +++++++++++
 .../mca/hwloc/hwloc2x/hwloc/hwloc/traversal.c |  553 ++
 .../hwloc/hwloc2x/hwloc/include/Makefile.am   |   65 +
 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc.h  | 2184 +++++++
 .../hwloc/include/hwloc/autogen/config.h.in   |  201 +
 .../hwloc2x/hwloc/include/hwloc/bitmap.h      |  376 ++
 .../hwloc/hwloc2x/hwloc/include/hwloc/cuda.h  |  220 +
 .../hwloc2x/hwloc/include/hwloc/cudart.h      |  177 +
 .../hwloc2x/hwloc/include/hwloc/deprecated.h  |  216 +
 .../hwloc/hwloc2x/hwloc/include/hwloc/diff.h  |  284 +
 .../hwloc2x/hwloc/include/hwloc/distances.h   |  223 +
 .../hwloc2x/hwloc/include/hwloc/export.h      |  236 +
 .../hwloc/hwloc2x/hwloc/include/hwloc/gl.h    |  135 +
 .../hwloc2x/hwloc/include/hwloc/glibc-sched.h |  125 +
 .../hwloc2x/hwloc/include/hwloc/helper.h      | 1081 +++
 .../hwloc2x/hwloc/include/hwloc/inlines.h     |  140 +
 .../hwloc2x/hwloc/include/hwloc/intel-mic.h   |  134 +
 .../hwloc/include/hwloc/linux-libnuma.h       |  273 +
 .../hwloc/hwloc2x/hwloc/include/hwloc/linux.h |   79 +
 .../hwloc2x/hwloc/include/hwloc/myriexpress.h |  127 +
 .../hwloc/hwloc2x/hwloc/include/hwloc/nvml.h  |  181 +
 .../hwloc2x/hwloc/include/hwloc/opencl.h      |  196 +
 .../hwloc/include/hwloc/openfabrics-verbs.h   |  150 +
 .../hwloc2x/hwloc/include/hwloc/plugins.h     |  522 ++
 .../hwloc2x/hwloc/include/hwloc/rename.h      |  707 ++
 opal/mca/hwloc/hwloc2x/hwloc/include/netloc.h |   56 +
 .../hwloc2x/hwloc/include/netloc/utarray.h    |  237 +
 .../hwloc2x/hwloc/include/netloc/uthash.h     |  966 +++
 .../hwloc2x/hwloc/include/netlocscotch.h      |  122 +
 .../hwloc/include/private/autogen/config.h.in |  728 +++
 .../hwloc/include/private/components.h        |   43 +
 .../hwloc2x/hwloc/include/private/cpuid-x86.h |   86 +
 .../hwloc2x/hwloc/include/private/debug.h     |   82 +
 .../hwloc2x/hwloc/include/private/misc.h      |  439 ++
 .../hwloc2x/hwloc/include/private/netloc.h    |  578 ++
 .../hwloc2x/hwloc/include/private/private.h   |  388 ++
 .../hwloc/include/private/solaris-chiptype.h  |   59 +
 .../hwloc/hwloc2x/hwloc/include/private/xml.h |  102 +
 opal/mca/hwloc/hwloc2x/hwloc/netloc.pc.in     |   11 +
 .../hwloc/hwloc2x/hwloc/netloc/Makefile.am    |   87 +
 .../hwloc/hwloc2x/hwloc/netloc/architecture.c |  852 +++
 opal/mca/hwloc/hwloc2x/hwloc/netloc/edge.c    |   87 +
 opal/mca/hwloc/hwloc2x/hwloc/netloc/hwloc.c   |  288 +
 opal/mca/hwloc/hwloc2x/hwloc/netloc/mpicomm.c |  101 +
 opal/mca/hwloc/hwloc2x/hwloc/netloc/node.c    |  129 +
 opal/mca/hwloc/hwloc2x/hwloc/netloc/path.c    |   31 +
 .../hwloc2x/hwloc/netloc/physical_link.c      |   88 +
 opal/mca/hwloc/hwloc2x/hwloc/netloc/scotch.c  |  469 ++
 opal/mca/hwloc/hwloc2x/hwloc/netloc/support.c |   59 +
 .../mca/hwloc/hwloc2x/hwloc/netloc/topology.c |  598 ++
 .../hwloc/hwloc2x/hwloc/netlocscotch.pc.in    |   11 +
 .../mca/hwloc/hwloc2x/hwloc/tests/Makefile.am |    2 +
 .../hwloc2x/hwloc/tests/hwloc/Makefile.am     |    2 +
 .../hwloc/tests/hwloc/linux/Makefile.am       |    2 +
 .../tests/hwloc/linux/allowed/Makefile.am     |    2 +
 .../hwloc/linux/allowed/test-topology.sh.in   |    2 +
 .../tests/hwloc/linux/gather/Makefile.am      |    2 +
 .../linux/gather/test-gather-topology.sh.in   |    2 +
 .../tests/hwloc/linux/test-topology.sh.in     |    2 +
 .../hwloc/tests/hwloc/ports/Makefile.am       |    2 +
 .../hwloc/tests/hwloc/rename/Makefile.am      |    2 +
 .../hwloc2x/hwloc/tests/hwloc/wrapper.sh.in   |    2 +
 .../hwloc2x/hwloc/tests/hwloc/x86/Makefile.am |    2 +
 .../hwloc/tests/hwloc/x86/test-topology.sh.in |    2 +
 .../hwloc2x/hwloc/tests/hwloc/xml/Makefile.am |    2 +
 .../hwloc/tests/hwloc/xml/test-topology.sh.in |    2 +
 .../hwloc2x/hwloc/tests/netloc/Makefile.am    |    2 +
 .../hwloc2x/hwloc/tests/netloc/tests.sh.in    |    2 +
 .../mca/hwloc/hwloc2x/hwloc/utils/Makefile.am |    2 +
 .../hwloc2x/hwloc/utils/hwloc/Makefile.am     |    2 +
 .../hwloc/utils/hwloc/hwloc-compress-dir.in   |    2 +
 .../utils/hwloc/hwloc-gather-topology.in      |    2 +
 .../hwloc/utils/hwloc/test-fake-plugin.sh.in  |    2 +
 .../utils/hwloc/test-hwloc-annotate.sh.in     |    2 +
 .../hwloc/utils/hwloc/test-hwloc-calc.sh.in   |    2 +
 .../utils/hwloc/test-hwloc-compress-dir.sh.in |    2 +
 .../utils/hwloc/test-hwloc-diffpatch.sh.in    |    2 +
 .../utils/hwloc/test-hwloc-distrib.sh.in      |    2 +
 .../hwloc/test-hwloc-dump-hwdata/Makefile.am  |    2 +
 .../test-hwloc-dump-hwdata.sh.in              |    2 +
 .../hwloc/utils/hwloc/test-hwloc-info.sh.in   |    2 +
 .../hwloc2x/hwloc/utils/lstopo/Makefile.am    |    2 +
 .../hwloc/utils/lstopo/lstopo-windows.c       |    2 +
 .../hwloc/utils/lstopo/test-lstopo.sh.in      |    2 +
 .../hwloc/utils/netloc/draw/Makefile.am       |    2 +
 .../hwloc/utils/netloc/infiniband/Makefile.am |    2 +
 .../netloc/infiniband/netloc_ib_gather_raw.in |  477 ++
 .../hwloc/utils/netloc/mpi/Makefile.am        |    2 +
 opal/mca/hwloc/hwloc2x/hwloc2x.h              |   50 +
 opal/mca/hwloc/hwloc2x/hwloc2x_component.c    |   57 +
 opal/mca/hwloc/hwloc2x/owner.txt              |    7 +
 155 files changed, 49968 insertions(+), 2 deletions(-)
 create mode 100644 opal/mca/hwloc/hwloc2x/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/autogen.subdirs
 create mode 100644 opal/mca/hwloc/hwloc2x/configure.m4
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/AUTHORS
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/COPYING
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/NEWS
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/README
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/VERSION
 create mode 100755 opal/mca/hwloc/hwloc2x/hwloc/autogen.sh
 create mode 100755 opal/mca/hwloc/hwloc2x/hwloc/config/distscript.sh
 create mode 100755 opal/mca/hwloc/hwloc2x/hwloc/config/distscript_embedded.sh
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc.m4
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_attributes.m4
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_vendor.m4
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_visibility.m4
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_components.m4
 create mode 100755 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_get_version.sh
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_internal.m4
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_pkg.m4
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/config/netloc.m4
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/configure.ac
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/contrib/hwloc-valgrind.supp
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/contrib/misc/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/contrib/systemd/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/doc/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/doc/doxygen-config.cfg.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/doc/examples/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc.pc.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/base64.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/bind.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/bitmap.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/components.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/diff.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/distances.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/dolib.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/misc.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/pci-common.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-aix.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-bgq.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-cuda.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-darwin.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-fake.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-freebsd.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-gl.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-hardwired.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-hpux.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-linux.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-netbsd.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-noos.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-nvml.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-opencl.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-pci.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-solaris-chiptype.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-solaris.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-synthetic.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-windows.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-x86.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-xml-libxml.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-xml-nolibxml.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-xml.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/hwloc/traversal.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/autogen/config.h.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/bitmap.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/cuda.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/cudart.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/deprecated.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/diff.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/distances.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/export.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/gl.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/glibc-sched.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/helper.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/inlines.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/intel-mic.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/linux-libnuma.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/linux.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/myriexpress.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/nvml.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/opencl.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/openfabrics-verbs.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/plugins.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/rename.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/netloc.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/netloc/utarray.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/netloc/uthash.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/netlocscotch.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/autogen/config.h.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/components.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/cpuid-x86.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/debug.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/misc.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/netloc.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/private.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/solaris-chiptype.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/include/private/xml.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc.pc.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/architecture.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/edge.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/hwloc.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/mpicomm.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/node.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/path.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/physical_link.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/scotch.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/support.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netloc/topology.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/netlocscotch.pc.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/allowed/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/allowed/test-topology.sh.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/gather/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/gather/test-gather-topology.sh.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/test-topology.sh.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/ports/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/rename/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/wrapper.sh.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/x86/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/x86/test-topology.sh.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/xml/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/xml/test-topology.sh.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/netloc/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/tests/netloc/tests.sh.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/hwloc-compress-dir.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/hwloc-gather-topology.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-fake-plugin.sh.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-annotate.sh.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-calc.sh.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-compress-dir.sh.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-diffpatch.sh.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-distrib.sh.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-dump-hwdata/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-dump-hwdata/test-hwloc-dump-hwdata.sh.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-info.sh.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/lstopo/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/lstopo/lstopo-windows.c
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/lstopo/test-lstopo.sh.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/draw/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/infiniband/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/infiniband/netloc_ib_gather_raw.in
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/mpi/Makefile.am
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc2x.h
 create mode 100644 opal/mca/hwloc/hwloc2x/hwloc2x_component.c
 create mode 100644 opal/mca/hwloc/hwloc2x/owner.txt

diff --git a/opal/mca/hwloc/external/configure.m4 b/opal/mca/hwloc/external/configure.m4
index c7c3d02ed9..411d8ad1c1 100644
--- a/opal/mca/hwloc/external/configure.m4
+++ b/opal/mca/hwloc/external/configure.m4
@@ -103,7 +103,8 @@ AC_DEFUN([MCA_opal_hwloc_external_CONFIG],[
     AS_IF([test "$with_hwloc" = "external"], [opal_hwloc_external_want=yes])
     AS_IF([test "$with_hwloc" != "" && \
            test "$with_hwloc" != "no" && \
-           test "$with_hwloc" != "internal"], [opal_hwloc_external_want=yes])
+           test "$with_hwloc" != "internal" && \
+           test "$with_hwloc" != "future"], [opal_hwloc_external_want=yes])
     AS_IF([test "$with_hwloc" = "no"], [opal_hwloc_external_want=no])
 
     # If we still want external support, try it
diff --git a/opal/mca/hwloc/hwloc1116/configure.m4 b/opal/mca/hwloc/hwloc1116/configure.m4
index de1ff24ac3..fbb8bd24b1 100644
--- a/opal/mca/hwloc/hwloc1116/configure.m4
+++ b/opal/mca/hwloc/hwloc1116/configure.m4
@@ -88,7 +88,8 @@ AC_DEFUN([MCA_opal_hwloc_hwloc1116_CONFIG],[
 
     # Run the hwloc configuration - if no external hwloc, then set the prefixi
     # to minimize the chance that someone will use the internal symbols
-    AS_IF([test "$opal_hwloc_external" = "no"],
+    AS_IF([test "$opal_hwloc_external" = "no" &&
+           test "$with_hwloc" != "future"],
           [HWLOC_SET_SYMBOL_PREFIX([opal_hwloc1116_])])
 
     # save XML or graphical options
diff --git a/opal/mca/hwloc/hwloc2x/Makefile.am b/opal/mca/hwloc/hwloc2x/Makefile.am
new file mode 100644
index 0000000000..7a9a9da0b5
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/Makefile.am
@@ -0,0 +1,39 @@
+#
+# Copyright (c) 2011-2016 Cisco Systems, Inc.  All rights reserved.
+# Copyright (c) 2014-2015 Intel, Inc. All right reserved.
+# Copyright (c) 2016      Los Alamos National Security, LLC. All rights
+#                         reserved.
+# Copyright (c) 2017      Research Organization for Information Science
+#                         and Technology (RIST). All rights reserved.
+# $COPYRIGHT$
+#
+# Additional copyrights may follow
+#
+# $HEADER$
+#
+
+EXTRA_DIST = autogen.subdirs
+
+SUBDIRS = hwloc
+DIST_SUBDIRS=hwloc
+
+# Headers and sources
+headers = hwloc2x.h
+sources = hwloc2x_component.c
+
+libs = hwloc/hwloc/libhwloc_embedded.la
+
+# We only ever build this component statically
+noinst_LTLIBRARIES = libmca_hwloc_hwloc2x.la
+libmca_hwloc_hwloc2x_la_SOURCES = $(headers) $(sources)
+nodist_libmca_hwloc_hwloc2x_la_SOURCES = $(nodist_headers)
+libmca_hwloc_hwloc2x_la_LDFLAGS = -module -avoid-version
+libmca_hwloc_hwloc2x_la_LIBADD = $(libs)
+libmca_hwloc_hwloc2x_la_DEPENDENCIES = $(libs)
+
+# Conditionally install the header files
+if WANT_INSTALL_HEADERS
+opaldir = $(opalincludedir)/$(subdir)
+nobase_opal_HEADERS = $(headers)
+nobase_nodist_opal_HEADERS = $(nodist_headers)
+endif
diff --git a/opal/mca/hwloc/hwloc2x/autogen.subdirs b/opal/mca/hwloc/hwloc2x/autogen.subdirs
new file mode 100644
index 0000000000..beb596cf6a
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/autogen.subdirs
@@ -0,0 +1 @@
+hwloc
diff --git a/opal/mca/hwloc/hwloc2x/configure.m4 b/opal/mca/hwloc/hwloc2x/configure.m4
new file mode 100644
index 0000000000..1d0d127237
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/configure.m4
@@ -0,0 +1,112 @@
+# -*- shell-script -*-
+#
+# Copyright (c) 2009-2017 Cisco Systems, Inc.  All rights reserved
+# Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
+# Copyright (c) 2015-2017 Research Organization for Information Science
+#                         and Technology (RIST). All rights reserved.
+# Copyright (c) 2016      Los Alamos National Security, LLC. All rights
+#                         reserved.
+#
+# $COPYRIGHT$
+#
+# Additional copyrights may follow
+#
+# $HEADER$
+#
+
+#
+# Priority
+#
+AC_DEFUN([MCA_opal_hwloc_hwloc2x_PRIORITY], [90])
+
+#
+# Force this component to compile in static-only mode
+#
+AC_DEFUN([MCA_opal_hwloc_hwloc2x_COMPILE_MODE], [
+    AC_MSG_CHECKING([for MCA component $2:$3 compile mode])
+    $4="static"
+    AC_MSG_RESULT([$$4])
+])
+
+# MCA_hwloc_hwloc2x_POST_CONFIG()
+# ---------------------------------
+AC_DEFUN([MCA_opal_hwloc_hwloc2x_POST_CONFIG],[
+    OPAL_VAR_SCOPE_PUSH([opal_hwloc_hwloc2x_basedir])
+
+    # If we won, then do all the rest of the setup
+    AS_IF([test "$1" = "1" && test "$opal_hwloc_hwloc2x_support" = "yes"],
+          [
+           # Set this variable so that the framework m4 knows what
+           # file to include in opal/mca/hwloc/hwloc-internal.h
+           opal_hwloc_hwloc2x_basedir=opal/mca/hwloc/hwloc2x
+           opal_hwloc_base_include="$opal_hwloc_hwloc2x_basedir/hwloc2x.h"
+
+           # Add some stuff to CPPFLAGS so that the rest of the source
+           # tree can be built
+           file=$opal_hwloc_hwloc2x_basedir/hwloc
+           CPPFLAGS="-I$OPAL_TOP_SRCDIR/$file/include $CPPFLAGS"
+           AS_IF([test "$OPAL_TOP_BUILDDIR" != "$OPAL_TOP_SRCDIR"],
+                 [CPPFLAGS="-I$OPAL_TOP_BUILDDIR/$file/include $CPPFLAGS"])
+           unset file
+          ])
+    OPAL_VAR_SCOPE_POP
+])dnl
+
+
+# MCA_hwloc_hwloc2x_CONFIG([action-if-found], [action-if-not-found])
+# --------------------------------------------------------------------
+AC_DEFUN([MCA_opal_hwloc_hwloc2x_CONFIG],[
+    # Hwloc needs to know if we have Verbs support
+    AC_REQUIRE([OPAL_CHECK_VERBS_DIR])
+
+    AC_CONFIG_FILES([opal/mca/hwloc/hwloc2x/Makefile])
+
+    OPAL_VAR_SCOPE_PUSH([HWLOC_VERSION opal_hwloc_hwloc2x_flags opal_hwloc_hwloc2x_save_CPPFLAGS opal_hwloc_hwloc2x_basedir opal_hwloc_hwloc2x_file opal_hwloc_future])
+
+    # default to this component not providing support
+    opal_hwloc_hwloc2x_basedir=opal/mca/hwloc/hwloc2x
+    opal_hwloc_hwloc2x_support=no
+
+    AS_IF([test "$with_hwloc" = "future"],
+          [opal_hwloc_future="yes"],
+          [opal_hwloc_future="no"])
+
+    opal_hwloc_hwloc2x_save_CPPFLAGS=$CPPFLAGS
+
+    # Run the hwloc configuration - if no external hwloc, then set the prefix
+    # to minimize the chance that someone will use the internal symbols
+    
+    opal_hwloc_hwloc2x_flags="--enable-embedded-mode --with-hwloc-symbol-prefix=opal_hwloc2x_ --disable-cairo --disable-pugins --enable-static --enable-xml"
+    AS_IF([test "$opal_check_cuda_happy" = "yes"],
+          [CPPFLAGS="$CPPFLAGS $opal_datatype_cuda_CPPFLAGS",
+           opal_hwloc_hwloc2x_flags="$opal_hwloc_hwloc2x_flags --enable-nvml CPPFLAGS=\"$CPPFLAGS\""]
+          [opal_hwloc_hwloc2x_flags="$opal_hwloc_hwloc2x_flags --disable-nvml"])
+
+    OPAL_CONFIG_SUBDIR([opal/mca/hwloc/hwloc2x/hwloc],
+                       [$opal_hwloc_hwloc2x_flags],
+                       [opal_hwloc_hwloc2x_support="yes"],
+                       [opal_hwloc_hwloc2x_support="no"])
+
+    CPPFLAGS=$opal_hwloc_hwloc2x_save_CPPFLAGS
+
+    # If we are not building the internal hwloc, then indicate that
+    # this component should not be built.  NOTE: we still did all the
+    # above configury so that all the proper GNU Autotools
+    # infrastructure is setup properly (e.g., w.r.t. SUBDIRS=hwloc in
+    # this directory's Makefile.am, we still need the Autotools "make
+    # distclean" infrastructure to work properly).
+    AS_IF([test "$opal_hwloc_future" != "yes"],
+          [AC_MSG_WARN([not using future hwloc; disqualifying this component])
+           opal_hwloc_hwloc2x_support=no])
+
+    # Done!
+    AS_IF([test "$opal_hwloc_hwloc2x_support" = "yes"],
+          [AC_DEFINE_UNQUOTED([HWLOC_SYM_PREFIX],[opal_hwloc2x_])
+           AC_DEFINE_UNQUOTED([HWLOC_SYM_PREFIX_CAPS], [OPAL_HWLOC2X_])
+           AC_DEFINE_UNQUOTED([HWLOC_SYM_TRANSFORM], [1])
+           AC_DEFINE([HAVE_DECL_HWLOC_OBJ_OSDEV_COPROC], [1])
+           $1],
+          [$2])
+
+    OPAL_VAR_SCOPE_POP
+])dnl
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/AUTHORS b/opal/mca/hwloc/hwloc2x/hwloc/AUTHORS
new file mode 100644
index 0000000000..740de337b2
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/AUTHORS
@@ -0,0 +1,29 @@
+netloc Authors
+==============
+
+The following cumulative list contains the names of most individuals who
+have committed code to the hwloc repository.
+
+Name                         Affiliation(s)
+---------------------------  --------------------
+Cédric Augonnet              University of Bordeaux
+Guillaume Beauchamp          Inria
+Ahmad Boissetri Binzagr      Inria
+Cyril Bordage                Inria
+Nicholas Buroker             UWL
+Jérôme Clet-Ortega           University of Bordeaux
+Ludovic Courtès              Inria
+Nathalie Furmento            CNRS
+Brice Goglin                 Inria
+Joshua Hursey                UWL
+Alexey Kardashevskiy         IBM
+Douglas MacFarland           UWL
+Antoine Rougier              intern from University of Bordeaux
+Jeff Squyres                 Cisco
+Samuel Thibault              University of Bordeaux
+
+Affiliaion abbreviations:
+-------------------------
+Cisco = Cisco Systems, Inc.
+CNRS = Centre national de la recherche scientifique (France)
+UWL = University of Wisconsin-La Crosse
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/COPYING b/opal/mca/hwloc/hwloc2x/hwloc/COPYING
new file mode 100644
index 0000000000..e77516e180
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/COPYING
@@ -0,0 +1,39 @@
+Copyright © 2004-2006 The Trustees of Indiana University and Indiana University Research and Technology Corporation.  All rights reserved.
+Copyright © 2004-2005 The University of Tennessee and The University of Tennessee Research Foundation.  All rights reserved.
+Copyright © 2004-2005 High Performance Computing Center Stuttgart, University of Stuttgart.  All rights reserved.
+Copyright © 2004-2005 The Regents of the University of California. All rights reserved.
+Copyright © 2009      CNRS
+Copyright © 2009-2016 Inria.  All rights reserved.
+Copyright © 2009-2015 Université Bordeaux
+Copyright © 2009-2015 Cisco Systems, Inc.  All rights reserved.
+Copyright © 2009-2012 Oracle and/or its affiliates.  All rights reserved.
+Copyright © 2010      IBM
+Copyright © 2010      Jirka Hladky
+Copyright © 2012      Aleksej Saushev, The NetBSD Foundation
+Copyright © 2012      Blue Brain Project, EPFL. All rights reserved.
+Copyright © 2013-2014 University of Wisconsin-La Crosse. All rights reserved.
+Copyright © 2015      Research Organization for Information Science and Technology (RIST). All rights reserved.
+Copyright © 2015-2016 Intel, Inc.  All rights reserved.
+See COPYING in top-level directory.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+3. The name of the author may not be used to endorse or promote products
+   derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/Makefile.am
new file mode 100644
index 0000000000..3aa78ab251
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/Makefile.am
@@ -0,0 +1,89 @@
+# Copyright © 2009-2016 Inria.  All rights reserved.
+# Copyright © 2009      Université Bordeaux
+# Copyright © 2009-2014 Cisco Systems, Inc.  All rights reserved.
+# See COPYING in top-level directory.
+
+# Note that the -I directory must *exactly* match what was specified
+# via AC_CONFIG_MACRO_DIR in configure.ac.
+ACLOCAL_AMFLAGS = -I ./config
+
+#
+# "make distcheck" requires that tarballs are able to be able to "make
+# dist", so we have to include config/distscript.sh.
+#
+EXTRA_DIST = \
+        README VERSION COPYING AUTHORS \
+        config/hwloc_get_version.sh \
+        config/distscript.sh
+
+SUBDIRS = include hwloc
+
+if BUILD_NETLOC
+SUBDIRS += netloc
+endif
+
+if HWLOC_BUILD_STANDALONE
+SUBDIRS += utils tests contrib/systemd contrib/misc
+# We need doc/ if HWLOC_BUILD_DOXYGEN, or during make install if HWLOC_INSTALL_DOXYGEN.
+# There's no INSTALL_SUBDIRS, so always enter doc/ and check HWLOC_BUILD/INSTALL_DOXYGEN there
+SUBDIRS += doc
+endif
+
+# Do not let automake automatically add the non-standalone dirs to the
+# distribution tarball if we're building in embedded mode.
+DIST_SUBDIRS = $(SUBDIRS)
+
+# Only install the pkg file if we're building in standalone mode (and not on Windows)
+if HWLOC_BUILD_STANDALONE
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = hwloc.pc
+if BUILD_NETLOC
+# JMS Need to compare hwloc.pc and netloc.pc -- I think netloc.pc is
+# missing some things.
+# pkgconfig_DATA += netloc.pc Disabled until the netloc API is public
+EXTRA_DIST += netloc.pc
+if BUILD_NETLOCSCOTCH
+pkgconfig_DATA += netlocscotch.pc
+endif BUILD_NETLOCSCOTCH
+endif BUILD_NETLOC
+endif HWLOC_BUILD_STANDALONE
+
+# Only install the valgrind suppressions file if we're building in
+# standalone mode
+if HWLOC_BUILD_STANDALONE
+dist_pkgdata_DATA = contrib/hwloc-valgrind.supp
+endif
+
+# Only install entire visual studio subdirectory if we're building in
+# standalone mode
+if HWLOC_BUILD_STANDALONE
+EXTRA_DIST += contrib/windows
+endif
+
+if HWLOC_BUILD_STANDALONE
+dist-hook:
+	sh "$(top_srcdir)/config/distscript.sh" "$(top_srcdir)" "$(distdir)" "$(HWLOC_VERSION)"
+endif HWLOC_BUILD_STANDALONE
+
+if HWLOC_BUILD_STANDALONE
+if HWLOC_HAVE_WINDOWS
+#
+# Winball specific rules
+#
+install-data-local:
+	sed -e 's/$$/'$$'\015'/ < $(srcdir)/README > $(DESTDIR)$(prefix)/README.txt
+	sed -e 's/$$/'$$'\015'/ < $(srcdir)/NEWS > $(DESTDIR)$(prefix)/NEWS.txt
+	sed -e 's/$$/'$$'\015'/ < $(srcdir)/COPYING > $(DESTDIR)$(prefix)/COPYING.txt
+uninstall-local:
+	rm -f $(DESTDIR)$(prefix)/README.txt $(DESTDIR)$(prefix)/NEWS.txt $(DESTDIR)$(prefix)/COPYING.txt
+endif HWLOC_HAVE_WINDOWS
+endif HWLOC_BUILD_STANDALONE
+
+#
+# Build the documenation and top-level README file
+#
+if HWLOC_BUILD_STANDALONE
+.PHONY: doc readme
+doc readme:
+	$(MAKE) -C doc
+endif HWLOC_BUILD_STANDALONE
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/NEWS b/opal/mca/hwloc/hwloc2x/hwloc/NEWS
new file mode 100644
index 0000000000..e1a0fffef5
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/NEWS
@@ -0,0 +1,1482 @@
+Copyright © 2009 CNRS
+Copyright © 2009-2017 Inria.  All rights reserved.
+Copyright © 2009-2013 Université Bordeaux
+Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
+
+$COPYRIGHT$
+
+Additional copyrights may follow
+
+$HEADER$
+
+===========================================================================
+
+This file contains the main features as well as overviews of specific
+bug fixes (and other actions) for each version of hwloc since version
+0.9 (as initially released as "libtopology", then re-branded to "hwloc"
+in v0.9.1).
+
+
+Version 2.0.0
+-------------
+* The ABI of the library has changed. For instance some hwloc_obj fields
+  were reordered.
+  - HWLOC_API_VERSION and hwloc_get_api_version() now give 0x00020000.
+  - See "How do I handle ABI breaks and API upgrades ?" in the FAQ
+    and https://github.com/open-mpi/hwloc/wiki/Upgrading-to-v2.0-API
+* Major changes
+  + Topologies always have at least one NUMA object. On non-NUMA machines,
+    a single NUMA object is added to describe the entire machine memory.
+    The NUMA level cannot be ignored anymore.
+  + The HWLOC_OBJ_CACHE type is replaced with 8 types HWLOC_OBJ_L[1-5]CACHE
+    and HWLOC_OBJ_L[1-3]ICACHE that remove the need to disambiguate levels
+    when looking for caches with _by_type() functions.
+    - New hwloc_obj_type_is_{,d,i}cache() functions may be used to check whether
+      a given type is a cache.
+  + Replace hwloc_topology_ignore*() functions with hwloc_topology_set_type_filter()
+    and hwloc_topology_set_all_types_filter().
+    - Contrary to hwloc_topology_ignore_{type,all}_keep_structure() which
+      removed individual objects, HWLOC_TYPE_FILTER_KEEP_STRUCTURE only removes
+      entire levels (so that topology do not become too asymmetric).
+  + Remove HWLOC_TOPOLOGY_FLAG_ICACHES in favor of hwloc_topology_set_icache_types_filter()
+    with HWLOC_TYPE_FILTER_KEEP_ALL.
+  + Remove HWLOC_TOPOLOGY_FLAG_IO_DEVICES, _IO_BRIDGES and _WHOLE_IO in favor of
+    hwloc_topology_set_io_types_filter() with HWLOC_TYPE_FILTER_KEEP_ALL or
+    HWLOC_TYPE_FILTER_KEEP_IMPORTANT.
+  + hwloc_topology_restrict() doesn't remove objects that contain memory
+    by default anymore.
+    - The list of existing restrict flags was modified.
+  + XML export functions take an additional flags argument,
+    for instance for exporting XMLs that are compatible with hwloc 1.x.
+  + The distance API has been completely reworked. It is now described
+    in hwloc/distances.h.
+  + Add the experimental netloc subproject. It is enabled by default when
+    supported and can be disabled with --disable-netloc.
+    It currently brings command-line tools to gather and visualize the
+    topology of InfiniBand fabrics, and an API to convert such topologies
+    into Scotch architectures for process mapping.
+    See the documentation for details.
+  + Remove the online_cpuset from struct hwloc_obj. Offline PUs get unknown
+    topologies on Linux nowadays, and wrong topology on Solaris. Other OS
+    do not support them. And one cannot do much about them anyway. Just keep
+    them in complete_cpuset.
+  + Remove the custom interface for assembling the topologies of different
+    nodes as well as the hwloc-assembler tools.
+  + Remove Kerrighed support from the Linux backend.
+  + Remove Tru64 (OSF/1) support.
+    - Remove HWLOC_MEMBIND_REPLICATE which wasn't available anywhere else.
+* API
+  + Objects now have a "subtype" field that supersedes former "Type" and
+    "CoProcType" info attributes.
+  + The almost-unused "os_level" attribute has been removed from the
+    hwloc_obj structure.
+  + I/O and Misc objects are now stored in a dedicated children list, only
+    normal children with non-NULL cpusets and nodesets are in the main
+    children list.
+    - hwloc_get_next_child() may still be used to iterate over these 3 lists
+      of children at once.
+  + Replace hwloc_topology_insert_misc_object_by_cpuset() with
+    hwloc_topology_insert_group_object() to precisely specify the location
+    of an additional hierarchy level in the topology.
+  + Misc objects have their own level and depth to iterate over all of them.
+  + Misc objects may now only be inserted as a leaf object with
+    hwloc_topology_insert_misc_object() which deprecates
+    hwloc_topology_insert_misc_object_by_parent().
+  + hwloc_topology_set_fsroot() is removed, the environment variable
+    HWLOC_FSROOT may be used for the same remote testing/debugging purpose.
+  + hwloc_type_sscanf() deprecates the old hwloc_obj_type_sscanf().
+  + hwloc_type_sscanf_as_depth() is added to convert a type name into
+    a level depth.
+  + hwloc_type_name() deprecates the old hwloc_obj_type_string().
+  + Remove the deprecated hwloc_obj_snprintf(), hwloc_obj_type_of_string(),
+    hwloc_distribute[v]().
+  + hwloc_obj_cpuset_snprintf() is deprecated in favor of hwloc_bitmap_snprintf().
+  + Functions diff_load_xml*(), diff_export_xml*() and diff_destroy() in
+    hwloc/diff.h do not need a topology as first parameter anymore.
+  + hwloc_parse_cpumap_file () superseded by hwloc_linux_read_path_as_cpumask()
+    in hwloc/linux.h.
+* Tools
+  - lstopo and hwloc-info have a new --filter option matching the new filtering API.
+  - hwloc-distances was removed and replaced with lstopo --distances.
+* Plugin API
+  + hwloc_fill_object_sets() is renamed into hwloc_obj_add_children_sets().
+* Misc
+  + Linux OS devices do not have to be attached through PCI anymore,
+    for instance enabling the discovery of NVDIMM block devices.
+  + Add a SectorSize attribute to block OS devices on Linux.
+  + Misc MemoryModule objects are only added when full I/O discovery is enabled
+    (WHOLE_IO topology flag).
+  + Do not set PCI devices and bridges name automatically. Vendor and device
+    names are already in info attributes.
+  + Exporting to synthetic now ignores I/O and Misc objects.
+  + XML and Synthetic export functions have moved to hwloc/export.h,
+    automatically included from hwloc.h.
+  + Separate OS device discovery from PCI discovery. Only the latter is disabled
+    with --disable-pci at configure time. Both may be disabled with --disable-io.
+  + The old `libpci' component name from hwloc 1.6 is not supported anymore,
+    only the `pci' name from hwloc 1.7 is now recognized.
+  + The `linuxpci' component is now renamed into `linuxio'.
+  + The HWLOC_PCI_<domain>_<bus>_LOCALCPUS environment variables are superseded
+    with a single HWLOC_PCI_LOCALITY where bus ranges may be specified.
+  + Add HWLOC_SYNTHETIC environment variable to enforce a synthetic topology
+    as if hwloc_topology_set_synthetic() had been called.
+  + HWLOC_COMPONENTS doesn't support xml or synthetic component attributes
+    anymore, they should be passed in HWLOC_XMLFILE or HWLOC_SYNTHETIC instead.
+  + HWLOC_COMPONENTS takes precedence over other environment variables
+    for selecting components.
+  + Remove the dependency on libnuma on Linux.
+
+
+Version 1.11.7
+--------------
+* Fix hwloc-bind --membind for CPU-less NUMA nodes (again).
+  Thanks to Gilles Gouaillardet for reporting the issue.
+* Fix a memory leak on IBM S/390 platforms running Linux.
+* Fix a memory leak when forcing the x86 backend first on amd64/topoext
+  platforms running Linux.
+* Command-line tools now support "hbm" instead "numanode" for filtering
+  only high-bandwidth memory nodes when selecting locations.
+  + hwloc-bind also support --hbm and --no-hbm for filtering only or
+    no HBM nodes.
+  Thanks to Nicolas Denoyelle for the suggestion.
+* Add --children and --descendants to hwloc-info for listing object
+  children or object descendants of a specific type.
+* Add --no-index, --index, --no-attrs, --attrs to disable/enable display
+  of index numbers or attributes in the graphical lstopo output.
+* Try to gather hwloc-dump-hwdata output from all possible locations
+  in hwloc-gather-topology.
+* Updates to the documentation of locations in hwloc(7) and
+  command-line tools manpages.
+
+
+Version 1.11.6
+--------------
+* Make the Linux discovery about twice faster, especially on the CPU side,
+  by trying to avoid sysfs file accesses as much as possible.
+* Add support for AMD Family 17h processors (Zen) SMT cores in the Linux
+  and x86 backends.
+* Add the HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES flag (and the
+  HWLOC_THISSYSTEM_ALLOWED_RESOURCES environment variable) for reading the
+  set of allowed resources from the local operating system even if the
+  topology was loaded from XML or synthetic.
+* Fix hwloc_bitmap_set/clr_range() for infinite ranges that do not
+  overlap currently defined ranges in the bitmap.
+* Don't reset the lstopo zoom scale when moving the X11 window.
+* lstopo now has --flags for manually setting topology flags.
+* hwloc_get_depth_type() returns HWLOC_TYPE_DEPTH_UNKNOWN for Misc objects.
+
+
+Version 1.11.5
+--------------
+* Add support for Knights Mill Xeon Phi, thanks to Piotr Luc for the patch.
+* Reenable distance gathering on Solaris, disabled by mistake since v1.0.
+  Thanks to TU Wien for the help.
+* Fix hwloc_get_*obj*_inside_cpuset() functions to ignore objects with
+  empty CPU sets, for instance, CPU-less NUMA nodes such as KNL MCDRAM.
+  Thanks to Nicolas Denoyelle for the report.
+* Fix XML import of multiple distance matrices.
+* Add a FAQ entry about "hwloc is only a structural model, it ignores
+  performance models, memory bandwidth, etc.?"
+
+
+Version 1.11.4
+--------------
+* Add MemoryMode and ClusterMode attributes in the Machine object on KNL.
+  Add doc/examples/get-knl-modes.c for an example of retrieving them.
+  Thanks to Grzegorz Andrejczuk.
+* Fix Linux build with -m32 with respect to libudev.
+  Thanks to Paul Hargrove for reporting the issue.
+* Fix build with Visual Studio 2015, thanks to Eloi Gaudry for reporting
+  the issue and providing the patch.
+* Don't forget to display OS device children in the graphical lstopo.
+* Fix a memory leak on Solaris, thanks to Bryon Gloden for the patch.
+* Properly handle realloc() failures, thanks to Bryon Gloden for reporting
+  the issue.
+* Fix lstopo crash in ascii/fig/windows outputs when some objects have a
+  lstopoStyle info attribute.
+
+
+Version 1.11.3
+--------------
+* Bug fixes
+  + Fix a memory leak on Linux S/390 hosts with books.
+  + Fix /proc/mounts parsing on Linux by using mntent.h.
+    Thanks to Nathan Hjelm for reporting the issue.
+  + Fix a x86 infinite loop on VMware due to the x2APIC feature being
+    advertised without actually being fully supported.
+    Thanks to Jianjun Wen for reporting the problem and testing the patch.
+  + Fix the return value of hwloc_alloc() on mmap() failure.
+    Thanks to Hugo Brunie for reporting the issue.
+  + Fix the return value of command-line tools in some error cases.
+  + Do not break individual thread bindings during x86 backend discovery in a
+    multithreaded process. Thanks to Farouk Mansouri for the report.
+  + Fix hwloc-bind --membind for CPU-less NUMA nodes.
+  + Fix some corner cases in the XML export/import of application userdata.
+* API Improvements
+  + Add HWLOC_MEMBIND_BYNODESET flag so that membind() functions accept
+    either cpusets or nodesets.
+  + Add hwloc_get_area_memlocation() to check where pages are actually
+    allocated. Only implemented on Linux for now.
+    - There's no _nodeset() variant, but the new flag HWLOC_MEMBIND_BYNODESET
+      is supported.
+  + Make hwloc_obj_type_sscanf() parse back everything that may be outputted
+    by hwloc_obj_type_snprintf().
+* Detection Improvements
+  + Allow the x86 backend to add missing cache levels, so that it completes
+    what the Solaris backend lacks.
+    Thanks to Ryan Zezeski for reporting the issue.
+  + Do not filter-out FibreChannel PCI adapters by default anymore.
+    Thanks to Matt Muggeridge for the report.
+  + Add support for CUDA compute capability 6.x.
+* Tools
+  + Add --support to hwloc-info to list supported features, just like with
+    hwloc_topology_get_support().
+    - Also add --objects and --topology to explicitly switch between the
+      default modes.
+  + Add --tid to let hwloc-bind operate on individual threads on Linux.
+  + Add --nodeset to let hwloc-bind report memory binding as NUMA node sets.
+  + hwloc-annotate and lstopo don't drop application userdata from XMLs anymore.
+    - Add --cu to hwloc-annotate to drop these application userdata.
+  + Make the hwloc-dump-hwdata dump directory configurable through configure
+    options such as --runstatedir or --localstatedir.
+* Misc Improvements
+  + Add systemd service template contrib/systemd/hwloc-dump-hwdata.service
+    for launching hwloc-dump-hwdata at boot on Linux.
+    Thanks to Grzegorz Andrejczuk.
+  + Add HWLOC_PLUGINS_BLACKLIST environment variable to prevent some plugins
+    from being loaded. Thanks to Alexandre Denis for the suggestion.
+  + Small improvements for various Windows build systems,
+    thanks to Jonathan L Peyton and Marco Atzeri.
+
+
+Version 1.11.2
+--------------
+* Improve support for Intel Knights Landing Xeon Phi on Linux:
+  + Group local NUMA nodes of normal memory (DDR) and high-bandwidth memory
+    (MCDRAM) together through "Cluster" groups so that the local MCDRAM is
+    easy to find.
+    - See "How do I find the local MCDRAM NUMA node on Intel Knights
+      Landing Xeon Phi?" in the documentation.
+    - For uniformity across all KNL configurations, always have a NUMA node
+      object even if the host is UMA.
+  + Fix the detection of the memory-side cache:
+    - Add the hwloc-dump-hwdata superuser utility to dump SMBIOS information
+      into /var/run/hwloc/ as root during boot, and load this dumped
+      information from the hwloc library at runtime.
+    - See "Why do I need hwloc-dump-hwdata for caches on Intel Knights
+      Landing Xeon Phi?" in the documentation.
+  Thanks to Grzegorz Andrejczuk for the patches and for the help.
+* The x86 and linux backends may now be combined for discovering CPUs
+  through x86 CPUID and memory from the Linux kernel.
+  This is useful for working around buggy CPU information reported by Linux
+  (for instance the AMD Bulldozer/Piledriver bug below).
+  Combination is enabled by passing HWLOC_COMPONENTS=x86 in the environment.
+* Fix L3 cache sharing on AMD Opteron 63xx (Piledriver) and 62xx (Bulldozer)
+  in the x86 backend. Thanks to many users who helped.
+* Fix the overzealous L3 cache sharing fix added to the x86 backend in 1.11.1
+  for AMD Opteron 61xx (Magny-Cours) processors.
+* The x86 backend may now add the info attribute Inclusive=0 or 1 to caches
+  it discovers, or to caches discovered by other backends earlier.
+  Thanks to Guillaume Beauchamp for the patch.
+* Fix the management on alloc_membind() allocation failures on AIX, HP-UX
+  and OSF/Tru64.
+* Fix spurious failures to load with ENOMEM on AIX in case of Misc objects
+  below PUs.
+* lstopo improvements in X11 and Windows graphical mode:
+  + Add + - f 1 shortcuts to manually zoom-in, zoom-out, reset the scale,
+    or fit the entire window.
+  + Display all keyboard shortcuts in the console.
+* Debug messages may be disabled at runtime by passing HWLOC_DEBUG_VERBOSE=0
+  in the environment when --enable-debug was passed to configure.
+* Add a FAQ entry "What are these Group objects in my topology?".
+
+
+Version 1.11.1
+--------------
+* Detection fixes
+  + Hardwire the topology of Fujitsu K-computer, FX10, FX100 servers to
+    workaround buggy Linux kernels.
+    Thanks to Takahiro Kawashima and Gilles Gouaillardet.
+  + Fix L3 cache information on AMD Opteron 61xx Magny-Cours processors
+    in the x86 backend. Thanks to Guillaume Beauchamp for the patch.
+  + Detect block devices directly attached to PCI without a controller,
+    for instance NVMe disks. Thanks to Barry M. Tannenbaum.
+  + Add the PCISlot attribute to all PCI functions instead of only the
+    first one.
+* Miscellaneous internal fixes
+  + Ignore PCI bridges that could fail assertions by reporting buggy
+    secondary-subordinate bus numbers
+    Thanks to George Bosilca for reporting the issue.
+  + Fix an overzealous assertion when inserting an intermediate Group object
+    while Groups are totally ignored.
+  + Fix a memory leak on Linux on AMD processors with dual-core compute units.
+    Thanks to Bob Benner.
+  + Fix a memory leak on failure to load a xml diff file.
+  + Fix some segfaults when inputting an invalid synthetic description.
+  + Fix a segfault when plugins fail to find core symbols.
+    Thanks to Guy Streeter.
+* Many fixes and improvements in the Windows backend:
+  + Fix the discovery of more than 32 processors and multiple processor
+    groups. Thanks to Barry M. Tannenbaum for the help.
+  + Add thread binding set support in case of multiple process groups.
+  + Add thread binding get support.
+  + Add get_last_cpu_location() support for the current thread.
+  + Disable the unsupported process binding in case of multiple processor
+    groups.
+  + Fix/update the Visual Studio support under contrib/windows.
+    Thanks to Eloi Gaudry for the help.
+* Tools fixes
+  + Fix a segfault when displaying logical indexes in the graphical lstopo.
+    Thanks to Guillaume Mercier for reporting the issue.
+  + Fix lstopo linking with X11 libraries, for instance on Mac OS X.
+    Thanks to Scott Atchley and Pierre Ramet for reporting the issue.
+  + hwloc-annotate, hwloc-diff and hwloc-patch do not drop unavailable
+    resources from the output anymore and those may be annotated as well.
+  + Command-line tools may now import XML from the standard input with -i -.xml
+  + Add missing documentation for the hwloc-info --no-icaches option.
+
+
+Version 1.11.0
+--------------
+* API
+  + Socket objects are renamed into Package to align with the terminology
+    used by processor vendors. The old HWLOC_OBJ_SOCKET type and "Socket"
+    name are still supported for backward compatibility.
+  + HWLOC_OBJ_NODE is replaced with HWLOC_OBJ_NUMANODE for clarification.
+    HWLOC_OBJ_NODE is still supported for backward compatibility.
+    "Node" and "NUMANode" strings are supported as in earlier releases.
+* Detection improvements
+  + Add support for Intel Knights Landing Xeon Phi.
+    Thanks to Grzegorz Andrejczuk and Lukasz Anaczkowski.
+  + Add Vendor, Model, Revision, SerialNumber, Type and LinuxDeviceID
+    info attributes to Block OS devices on Linux. Thanks to Vineet Pedaballe
+    for the help.
+    - Add --disable-libudev to avoid dependency on the libudev library.
+  + Add "MemoryModule" Misc objects with information about DIMMs, on Linux
+    when privileged and when I/O is enabled.
+    Thanks to Vineet Pedaballe for the help.
+  + Add a PCISlot attribute to PCI devices on Linux when supported to
+    identify the physical PCI slot where the board is plugged.
+  + Add CPUStepping info attribute on x86 processors,
+    thanks to Thomas Röhl for the suggestion.
+  + Ignore the device-tree on non-Power architectures to avoid buggy
+    detection on ARM. Thanks to Orion Poplawski for reporting the issue.
+  + Work-around buggy Xeon E5v3 BIOS reporting invalid PCI-NUMA affinity
+    for the PCI links on the second processor.
+  + Add support for CUDA compute capability 5.x, thanks Benjamin Worpitz.
+  + Many fixes to the x86 backend
+    - Add L1i and fix L2/L3 type on old AMD processors without topoext support.
+    - Fix Intel CPU family and model numbers when basic family isn't 6 or 15.
+    - Fix package IDs on recent AMD processors.
+    - Fix misc issues due to incomplete APIC IDs on x2APIC processors.
+    - Avoid buggy discovery on old SGI Altix UVs with non-unique APIC IDs.
+  + Gather total machine memory on NetBSD.
+* Tools
+  + lstopo
+    - Collapse identical PCI devices unless --no-collapse is given.
+      This avoids gigantic outputs when a PCI device contains dozens of
+      identical virtual functions.
+    - The ASCII art output is now called "ascii", for instance in
+      "lstopo -.ascii".
+      The former "txt" extension is retained for backward compatibility.
+    - Automatically scales graphical box width to the inner text in Cairo,
+      ASCII and Windows outputs.
+    - Add --rect to lstopo to force rectangular layout even for NUMA nodes.
+    - Add --restrict-flags to configure the behavior of --restrict.
+    - Objects may have a "Type" info attribute to specify a better type name
+      and display it in lstopo.
+    - Really export all verbose information to the given output file.
+  + hwloc-annotate
+    - May now operate on all types of objects, including I/O.
+    - May now insert Misc objects in the topology.
+    - Do not drop instruction caches and I/O devices from the output anymore.
+  + Fix lstopo path in hwloc-gather-topology after install.
+* Misc
+  + Fix hwloc/cudart.h for machines with multiple PCI domains,
+    thanks to Imre Kerr for reporting the problem.
+  + Fix PCI Bridge-specific depth attribute.
+  + Fix hwloc_bitmap_intersect() for two infinite bitmaps.
+  + Fix some corner cases in the building of levels on large NUMA machines
+    with non-uniform NUMA groups and I/Os.
+  + Improve the performance of object insertion by cpuset for large
+    topologies.
+  + Prefix verbose XML import errors with the source name.
+  + Improve pkg-config checks and error messages.
+  + Fix excluding after a component with an argument in the HWLOC_COMPONENTS
+    environment variable.
+* Documentation
+  + Fix the recommended way in documentation and examples to allocate memory
+    on some node, it should use HWLOC_MEMBIND_BIND.
+    Thanks to Nicolas Bouzat for reporting the issue.
+  + Add a "Miscellaneous objects" section in the documentation.
+  + Add a FAQ entry "What happens to my topology if I disable symmetric
+    multithreading, hyper-threading, etc. ?" to the documentation.
+
+
+Version 1.10.1
+--------------
+* Actually remove disallowed NUMA nodes from nodesets when the whole-system
+  flag isn't enabled.
+* Fix the gathering of PCI domains. Thanks to James Custer for reporting
+  the issue and providing a patch.
+* Fix the merging of identical parent and child in presence of Misc objects.
+  Thanks to Dave Love for reporting the issue.
+* Fix some misordering of children when merging with ignore_keep_structure()
+  in partially allowed topologies.
+* Fix an overzealous assertion in the debug code when running on a single-PU
+  host with I/O. Thanks to Thomas Van Doren for reporting the issue.
+* Don't forget to setup NUMA node object nodesets in x86 backend (for BSDs)
+  and OSF/Tru64 backend.
+* Fix cpuid-x86 build error with gcc -O3 on x86-32. Thanks to Thomas Van Doren
+  for reporting the issue.
+* Fix support for future very large caches in the x86 backend.
+* Fix vendor/device names for SR-IOV PCI devices on Linux.
+* Fix an unlikely crash in case of buggy hierarchical distance matrix.
+* Fix PU os_index on some AIX releases. Thanks to Hendryk Bockelmann and
+  Erik Schnetter for helping debugging.
+* Fix hwloc_bitmap_isincluded() in case of infinite sets.
+* Change hwloc-ls.desktop into a lstopo.desktop and only install it if
+  lstopo is built with Cairo/X11 support. It cannot work with a non-graphical
+  lstopo or hwloc-ls.
+* Add support for the renaming of Socket into Package in future releases.
+* Add support for the replacement of HWLOC_OBJ_NODE with HWLOC_OBJ_NUMANODE
+  in future releases.
+* Clarify the documentation of distance matrices in hwloc.h and in the manpage
+  of the hwloc-distances. Thanks to Dave Love for the suggestion.
+* Improve some error messages by displaying more information about the
+  hwloc library in use.
+* Document how to deal with the ABI break when upgrading to the upcoming 2.0
+  See "How do I handle ABI breaks and API upgrades ?" in the FAQ.
+
+
+Version 1.10.0
+--------------
+* API
+  + Add hwloc_topology_export_synthetic() to export a topology to a
+    synthetic string without using lstopo. See the Synthetic topologies
+    section in the documentation.
+  + Add hwloc_topology_set/get_userdata() to let the application save
+    a private pointer in the topology whenever it needs a way to find
+    its own object corresponding to a topology.
+  + Add hwloc_get_numanode_obj_by_os_index() and document that this function
+    as well as hwloc_get_pu_obj_by_os_index() are good at converting
+    nodesets and cpusets into objects.
+  + hwloc_distrib() does not ignore any objects anymore when there are
+    too many of them. They get merged with others instead.
+    Thanks to Tim Creech for reporting the issue.
+* Tools
+  + hwloc-bind --get <command-line> now executes the command after displaying
+    the binding instead of ignoring the command entirely.
+    Thanks to John Donners for the suggestion.
+  + Clarify that memory sizes shown in lstopo are local by default
+    unless specified (total memory added in the root object).
+* Synthetic topologies
+  + Synthetic topology descriptions may now specify attributes such as
+    memory sizes and OS indexes. See the Synthetic topologies section
+    in the documentation.
+  + lstopo now exports in this fully-detailed format by default.
+    The new option --export-synthetic-flags may be used to revert
+    back the old format.
+* Documentation
+  + Add the doc/examples/ subdirectory with several real-life examples,
+    including the already existing hwloc-hello.C for basics.
+    Thanks to Rob Aulwes for the suggestion.
+  + Improve the documentation of CPU and memory binding in the API.
+  + Add a FAQ entry about operating system errors, especially on AMD
+    platforms with buggy cache information.
+  + Add a FAQ entry about loading many topologies in a single program.
+* Misc
+  + Work around buggy Linux kernels reporting 2 sockets instead
+    1 socket with 2 NUMA nodes for each Xeon E5 v3 (Haswell) processor.
+  + pciutils/libpci support is now removed since libpciaccess works
+    well and there's also a Linux-specific PCI backend. For the record,
+    pciutils was GPL and therefore disabled by default since v1.6.2.
+  + Add --disable-cpuid configure flag to work around buggy processor
+    simulators reporting invalid CPUID information.
+    Thanks for Andrew Friedley for reporting the issue.
+  + Fix a racy use of libltdl when manipulating multiple topologies in
+    different threads.
+    Thanks to Andra Hugo for reporting the issue and testing patches.
+  + Fix some build failures in private/misc.h.
+    Thanks to Pavan Balaji and Ralph Castain for the reports.
+  + Fix failures to detect X11/Xutil.h on some Solaris platforms.
+    Thanks to Siegmar Gross for reporting the failure.
+  + The plugin ABI has changed, this release will not load plugins
+    built against previous hwloc releases.
+
+
+Version 1.9.1
+-------------
+* Fix a crash when the PCI locality is invalid. Attach to the root object
+  instead. Thanks to Nicolas Denoyelle for reporting the issue.
+* Fix -f in lstopo manpage. Thanks to Jirka Hladky for reporting the issue.
+* Fix hwloc_obj_type_sscanf() and others when strncasecmp() is not properly
+  available. Thanks to Nick Papior Andersen for reporting the problem.
+* Mark Linux file descriptors as close-on-exec to avoid leaks on exec.
+* Fix some minor memory leaks.
+
+
+Version 1.9.0
+-------------
+* API
+  + Add hwloc_obj_type_sscanf() to extend hwloc_obj_type_of_string() with
+    type-specific attributes such as Cache/Group depth and Cache type.
+    hwloc_obj_type_of_string() is moved to hwloc/deprecated.h.
+  + Add hwloc_linux_get_tid_last_cpu_location() for retrieving the
+    last CPU where a Linux thread given by TID ran.
+  + Add hwloc_distrib() to extend the old hwloc_distribute[v]() functions.
+    hwloc_distribute[v]() is moved to hwloc/deprecated.h.
+  + Don't mix total and local memory when displaying verbose object attributes
+    with hwloc_obj_attr_snprintf() or in lstopo.
+* Backends
+  + Add CPUVendor, CPUModelNumber and CPUFamilyNumber info attributes for
+    x86, ia64 and Xeon Phi sockets on Linux, to extend the x86-specific
+    support added in v1.8.1. Requested by Ralph Castain.
+  + Add many CPU- and Platform-related info attributes on ARM and POWER
+    platforms, in the Machine and Socket objects.
+  + Add CUDA info attributes describing the number of multiprocessors and
+    cores and the size of the global, shared and L2 cache memories in CUDA
+    OS devices.
+  + Add OpenCL info attributes describing the number of compute units and
+    the global memory size in OpenCL OS devices.
+  + The synthetic backend now accepts extended types such as L2Cache, L1i or
+    Group3. lstopo also exports synthetic strings using these extended types.
+* Tools
+  + lstopo
+    - Do not overwrite output files by default anymore.
+      Pass -f or --force to enforce it.
+    - Display OpenCL, CUDA and Xeon Phi numbers of cores and memory sizes
+      in the graphical output.
+    - Fix export to stdout when specifying a Cairo-based output type
+      with --of.
+  + hwloc-ps
+    - Add -e or --get-last-cpu-location to report where processes/threads
+      run instead of where they are bound.
+    - Report locations as likely-more-useful objects such as Cores or Sockets
+      instead of Caches when possible.
+  + hwloc-bind
+    - Fix failure on Windows when not using --pid.
+    - Add -e as a synonym to --get-last-cpu-location.
+  + hwloc-distrib
+    - Add --reverse to distribute using last objects first and singlify
+      into last bits first. Thanks to Jirka Hladky for the suggestion.
+  + hwloc-info
+    - Report unified caches when looking for data or instruction cache
+      ancestor objects.
+* Misc
+  + Add experimental Visual Studio support under contrib/windows.
+    Thanks to Eloi Gaudry for his help and for providing the first draft.
+  + Fix some overzealous assertions and warnings about the ordering of
+    objects on a level with respect to cpusets. The ordering is only
+    guaranteed for complete cpusets (based on the first bit in sets).
+  + Fix some memory leaks when importing xml diffs and when exporting a
+    "too complex" entry.
+
+
+Version 1.8.1
+-------------
+* Fix the cpuid code on Windows 64bits so that the x86 backend gets
+  enabled as expected and can populate CPU information.
+  Thanks to Robin Scher for reporting the problem.
+* Add CPUVendor/CPUModelNumber/CPUFamilyNumber attributes when running
+  on x86 architecture. Thanks to Ralph Castain for the suggestion.
+* Work around buggy BIOS reporting duplicate NUMA nodes on Linux.
+  Thanks to Jeff Becker for reporting the problem and testing the patch.
+* Add a name to the lstopo graphical window. Thanks to Michael Prokop
+  for reporting the issue.
+
+
+Version 1.8.0
+-------------
+* New components
+  + Add the "linuxpci" component that always works on Linux even when
+    libpciaccess and libpci aren't available (and even with a modified
+    file-system root). By default the old "pci" component runs first
+    because "linuxpci" lacks device names (obj->name is always NULL).
+* API
+  + Add the topology difference API in hwloc/diff.h for manipulating
+    many similar topologies.
+  + Add hwloc_topology_dup() for duplicating an entire topology.
+  + hwloc.h and hwloc/helper.h have been reorganized to clarify the
+    documentation sections. The actual inline code has moved out of hwloc.h
+    into the new hwloc/inlines.h.
+  + Deprecated functions are now in hwloc/deprecated.h, and not in the
+    official documentation anymore.
+* Tools
+  + Add hwloc-diff and hwloc-patch tools together with the new diff API.
+  + Add hwloc-compress-dir to (de)compress an entire directory of XML files
+    using hwloc-diff and hwloc-patch.
+  + Object colors in the graphical output of lstopo may be changed by adding
+    a "lstopoStyle" info attribute. See CUSTOM COLORS in the lstopo(1) manpage
+    for details. Thanks to Jirka Hladky for discussing the idea.
+  + hwloc-gather-topology may now gather I/O-related files on Linux when
+    --io is given. Only the linuxpci component supports discovering I/O
+    objects from these extended tarballs.
+  + hwloc-annotate now supports --ri to remove/replace info attributes with
+    a given name.
+  + hwloc-info supports "root" and "all" special locations for dumping
+    information about the root object.
+  + lstopo now supports --append-legend to append custom lines of text
+    to the legend in the graphical output. Thanks to Jirka Hladky for
+    discussing the idea.
+  + hwloc-calc and friends have a more robust parsing of locations given
+    on the command-line and they report useful error messages about it.
+  + Add --whole-system to hwloc-bind, hwloc-calc, hwloc-distances and
+    hwloc-distrib, and add --restrict to hwloc-bind for uniformity among
+    tools.
+* Misc
+  + Calling hwloc_topology_load() or hwloc_topology_set_*() on an already
+    loaded topology now returns an error (deprecated since release 1.6.1).
+  + Fix the initialisation of cpusets and nodesets in Group objects added
+    when inserting PCI hostbridges.
+  + Never merge Group objects that were added explicitly by the user with
+    hwloc_custom_insert_group_object_by_parent().
+  + Add a sanity check during dynamic plugin loading to prevent some
+    crashes when hwloc is dynamically loaded by another plugin mechanisms.
+  + Add --with-hwloc-plugins-path to specify the install/load directories
+    of plugins.
+  + Add the MICSerialNumber info attribute to the root object when running
+    hwloc inside a Xeon Phi to match the same attribute in the MIC OS device
+    when running in the host.
+
+
+Version 1.7.2
+-------------
+* Do not create invalid block OS devices on very old Linux kernel such
+  as RHEL4 2.6.9.
+* Fix PCI subvendor/device IDs.
+* Fix the management of Misc objects inserted by parent.
+  Thanks to Jirka Hladky for reporting the problem.
+* Add a Port<n>State into attribute to OpenFabrics OS devices.
+* Add a MICSerialNumber info attribute to Xeon PHI/MIC OS devices.
+* Improve verbose error messages when failing to load from XML.
+
+
+Version 1.7.1
+-------------
+* Fix a failed assertion in the distance grouping code when loading a XML
+  file that already contains some groups.
+  Thanks to Laercio Lima Pilla for reporting the problem.
+* Remove unexpected Group objects when loading XML topologies with I/O
+  objects and NUMA distances.
+  Thanks to Elena Elkina for reporting the problem and testing patches.
+* Fix PCI link speed discovery when using libpciaccess.
+* Fix invalid libpciaccess virtual function device/vendor IDs when using
+  SR-IOV PCI devices on Linux.
+* Fix GL component build with old NVCtrl releases.
+  Thanks to Jirka Hladky for reporting the problem.
+* Fix embedding breakage caused by libltdl.
+  Thanks to Pavan Balaji for reporting the problem.
+* Always use the system-wide libltdl instead of shipping one inside hwloc.
+* Document issues when enabling plugins while embedding hwloc in another
+  project, in the documentation section Embedding hwloc in Other Software.
+* Add a FAQ entry "How to get useful topology information on NetBSD?"
+  in the documentation.
+* Somes fixes in the renaming code for embedding.
+* Miscellaneous minor build fixes.
+
+
+Version 1.7.0
+-------------
+* New operating system backends
+  + Add BlueGene/Q compute node kernel (CNK) support. See the FAQ in the
+    documentation for details. Thanks to Jeff Hammond, Christopher Samuel
+    and Erik Schnetter for their help.
+  + Add NetBSD support, thanks to Aleksej Saushev.
+* New I/O device discovery
+  + Add co-processor OS devices such as "mic0" for Intel Xeon Phi (MIC)
+    on Linux. Thanks to Jerome Vienne for helping.
+  + Add co-processor OS devices such as "cuda0" for NVIDIA CUDA-capable GPUs.
+  + Add co-processor OS devices such as "opencl0d0" for OpenCL GPU devices
+    on the AMD OpenCL implementation.
+  + Add GPU OS devices such as ":0.0" for NVIDIA X11 displays.
+  + Add GPU OS devices such as "nvml0" for NVIDIA GPUs.
+    Thanks to Marwan Abdellah and Stefan Eilemann for helping.
+  These new OS devices have some string info attributes such as CoProcType,
+  GPUModel, etc. to better identify them.
+  See the I/O Devices and Attributes documentation sections for details.
+* New components
+  + Add the "opencl", "cuda", "nvml" and "gl" components for I/O device
+    discovery.
+  + "nvml" also improves the discovery of NVIDIA GPU PCIe link speed.
+  All of these new components may be built as plugins. They may also be
+  disabled entirely by passing --disable-opencl/cuda/nvml/gl to configure.
+  See the I/O Devices, Components and Plugins, and FAQ documentation
+  sections for details.
+* API
+  + Add hwloc_topology_get_flags().
+  + Add hwloc/plugins.h for building external plugins.
+    See the Adding new discovery components and plugins section.
+* Interoperability
+  + Add hwloc/opencl.h, hwloc/nvml.h, hwloc/gl.h and hwloc/intel-mic.h
+    to retrieve the locality of OS devices that correspond to AMD OpenCL
+    GPU devices or indexes, to NVML devices or indexes, to NVIDIA X11
+    displays, or to Intel Xeon Phi (MIC) device indexes.
+  + Add new helpers in hwloc/cuda.h and hwloc/cudart.h to convert
+    between CUDA devices or indexes and hwloc OS devices.
+  + Add hwloc_ibv_get_device_osdev() and clarify the requirements
+    of the OpenFabrics Verbs helpers in hwloc/openfabrics-verbs.h.
+* Tools
+  + hwloc-info is not only a synonym of lstopo -s anymore, it also
+    dumps information about objects given on the command-line.
+* Documentation
+  + Add a section "Existing components and plugins".
+  + Add a list of common OS devices in section "Software devices".
+  + Add a new FAQ entry "Why is lstopo slow?" about lstopo slowness
+    issues because of GPUs.
+  + Clarify the documentation of inline helpers in hwloc/myriexpress.h
+    and hwloc/openfabrics-verbs.h.
+* Misc
+  + Improve cache detection on AIX.
+  + The HWLOC_COMPONENTS variable now excludes the components whose
+    names are prefixed with '-'.
+  + lstopo --ignore PU now works when displaying the topology in
+    graphical and textual mode (not when exporting to XML).
+  + Make sure I/O options always appear in lstopo usage, not only when
+    using pciutils/libpci.
+  + Remove some unneeded Linux specific includes from some interoperability
+    headers.
+  + Fix some inconsistencies in hwloc-distrib and hwloc-assembler-remote
+    manpages. Thanks to Guy Streeter for the report.
+  + Fix a memory leak on AIX when getting memory binding.
+  + Fix many small memory leaks on Linux.
+  + The `libpci' component is now called `pci' but the old name is still
+    accepted in the HWLOC_COMPONENTS variable for backward compatibility.
+
+
+Version 1.6.2
+-------------
+* Use libpciaccess instead of pciutils/libpci by default for I/O discovery.
+  pciutils/libpci is only used if --enable-libpci is given to configure
+  because its GPL license may taint hwloc. See the Installation section
+  in the documentation for details.
+* Fix get_cpubind on Solaris when bound to a single PU with
+  processor_bind(). Thanks to Eugene Loh for reporting the problem
+  and providing a patch.
+
+
+Version 1.6.1
+-------------
+* Fix some crash or buggy detection in the x86 backend when Linux
+  cgroups/cpusets restrict the available CPUs.
+* Fix the pkg-config output with --libs --static.
+  Thanks to Erik Schnetter for reporting one of the problems.
+* Fix the output of hwloc-calc -H --hierarchical when using logical
+  indexes in the output.
+* Calling hwloc_topology_load() multiple times on the same topology
+  is officially deprecated. hwloc will warn in such cases.
+* Add some documentation about existing plugins/components, package
+  dependencies, and I/O devices specification on the command-line.
+
+
+Version 1.6.0
+-------------
+* Major changes
+  + Reorganize the backend infrastructure to support dynamic selection
+    of components and dynamic loading of plugins. For details, see the
+    new documentation section Components and plugins.
+    - The HWLOC_COMPONENTS variable lets one replace the default discovery
+      components.
+    - Dynamic loading of plugins may be enabled with --enable-plugins
+      (except on AIX and Windows). It will build libxml2 and libpci
+      support as separated modules. This helps reducing the dependencies
+      of the core hwloc library when distributed as a binary package.
+* Backends
+  + Add CPUModel detection on Darwin and x86/FreeBSD.
+    Thanks to Robin Scher for providing ways to implement this.
+  + The x86 backend now adds CPUModel info attributes to socket objects
+    created by other backends that do not natively support this attribute.
+  + Fix detection on FreeBSD in case of cpuset restriction. Thanks to
+    Sebastian Kuzminsky for reporting the problem.
+* XML
+  + Add hwloc_topology_set_userdata_import/export_callback(),
+    hwloc_export_obj_userdata() and _userdata_base64() to let
+    applications specify how to save/restore the custom data they placed
+    in the userdata private pointer field of hwloc objects.
+* Tools
+  + Add hwloc-annotate program to add string info attributes to XML
+    topologies.
+  + Add --pid-cmd to hwloc-ps to append the output of a command to each
+    PID line. May be used for showing Open MPI process ranks, see the
+    hwloc-ps(1) manpage for details.
+  + hwloc-bind now exits with an error if binding fails; the executable
+    is not launched unless binding suceeeded or --force was given.
+  + Add --quiet to hwloc-calc and hwloc-bind to hide non-fatal error
+    messages.
+  + Fix command-line pid support in windows tools.
+  + All programs accept --verbose as a synonym to -v.
+* Misc
+  + Fix some DIR descriptor leaks on Linux.
+  + Fix I/O device lists when some were filtered out after a XML import.
+  + Fix the removal of I/O objects when importing a I/O-enabled XML topology
+    without any I/O topology flag.
+  + When merging objects with HWLOC_IGNORE_TYPE_KEEP_STRUCTURE or
+    lstopo --merge, compare object types before deciding which one of two
+    identical object to remove (e.g. keep sockets in favor of caches).
+  + Add some GUID- and LID-related info attributes to OpenFabrics
+    OS devices.
+  + Only add CPUType socket attributes on Solaris/Sparc. Other cases
+    don't report reliable information (Solaris/x86), and a replacement
+    is available as the Architecture string info in the Machine object.
+  + Add missing Backend string info on Solaris in most cases.
+  + Document object attributes and string infos in a new Attributes
+    section in the documentation.
+  + Add a section about Synthetic topologies in the documentation.
+
+
+Version 1.5.2 (some of these changes are in v1.6.2 but not in v1.6)
+-------------
+* Use libpciaccess instead of pciutils/libpci by default for I/O discovery.
+  pciutils/libpci is only used if --enable-libpci is given to configure
+  because its GPL license may taint hwloc. See the Installation section
+  in the documentation for details.
+* Fix get_cpubind on Solaris when bound to a single PU with
+  processor_bind(). Thanks to Eugene Loh for reporting the problem
+  and providing a patch.
+* Fix some DIR descriptor leaks on Linux.
+* Fix I/O device lists when some were filtered out after a XML import.
+* Add missing Backend string info on Solaris in most cases.
+* Fix the removal of I/O objects when importing a I/O-enabled XML topology
+  without any I/O topology flag.
+* Fix the output of hwloc-calc -H --hierarchical when using logical
+  indexes in the output.
+* Fix the pkg-config output with --libs --static.
+  Thanks to Erik Schnetter for reporting one of the problems.
+
+
+Version 1.5.1
+-------------
+* Fix block OS device detection on Linux kernel 3.3 and later.
+  Thanks to Guy Streeter for reporting the problem and testing the fix.
+* Fix the cpuid code in the x86 backend (for FreeBSD). Thanks to
+  Sebastian Kuzminsky for reporting problems and testing patches.
+* Fix 64bit detection on FreeBSD.
+* Fix some corner cases in the management of the thissystem flag with
+  respect to topology flags and environment variables.
+* Fix some corner cases in command-line parsing checks in hwloc-distrib
+  and hwloc-distances.
+* Make sure we do not miss some block OS devices on old Linux kernels
+  when a single PCI device has multiple IDE hosts/devices behind it.
+* Do not disable I/O devices or instruction caches in hwloc-assembler output.
+
+
+Version 1.5.0
+-------------
+* Backends
+  + Do not limit the number of processors to 1024 on Solaris anymore.
+  + Gather total machine memory on FreeBSD. Thanks to Cyril Roelandt.
+  + XML topology files do not depend on the locale anymore. Float numbers
+    such as NUMA distances or PCI link speeds now always use a dot as a
+    decimal separator.
+  + Add instruction caches detection on Linux, AIX, Windows and Darwin.
+  + Add get_last_cpu_location() support for the current thread on AIX.
+  + Support binding on AIX when threads or processes were bound with
+    bindprocessor(). Thanks to Hendryk Bockelmann for reporting the issue
+    and testing patches, and to Farid Parpia for explaining the binding
+    interfaces.
+  + Improve AMD topology detection in the x86 backend (for FreeBSD) using
+    the topoext feature.
+* API
+  + Increase HWLOC_API_VERSION to 0x00010500 so that API changes may be
+    detected at build-time.
+  + Add a cache type attribute describind Data, Instruction and Unified
+    caches. Caches with different types but same depth (for instance L1d
+    and L1i) are placed on different levels.
+  + Add hwloc_get_cache_type_depth() to retrieve the hwloc level depth of
+    of the given cache depth and type, for instance L1i or L2.
+    It helps  disambiguating the case where hwloc_get_type_depth() returns
+    HWLOC_TYPE_DEPTH_MULTIPLE.
+  + Instruction caches are ignored unless HWLOC_TOPOLOGY_FLAG_ICACHES is
+    passed to hwloc_topology_set_flags() before load.
+  + Add hwloc_ibv_get_device_osdev_by_name() OpenFabrics helper in
+    openfabrics-verbs.h to find the hwloc OS device object corresponding to
+    an OpenFabrics device.
+* Tools
+  + Add lstopo-no-graphics, a lstopo built without graphical support to
+    avoid dependencies on external libraries such as Cairo and X11. When
+    supported, graphical outputs are only available in the original lstopo
+    program.
+    - Packagers splitting lstopo and lstopo-no-graphics into different
+      packages are advised to use the alternatives system so that lstopo
+      points to the best available binary.
+  + Instruction caches are enabled in lstopo by default. Use --no-icaches
+    to disable them.
+  + Add -t/--threads to show threads in hwloc-ps.
+* Removal of obsolete components
+  + Remove the old cpuset interface (hwloc/cpuset.h) which is deprecated and
+    superseded by the bitmap API (hwloc/bitmap.h) since v1.1.
+    hwloc_cpuset and nodeset types are still defined, but all hwloc_cpuset_*
+    compatibility wrappers are now gone.
+  + Remove Linux libnuma conversion helpers for the deprecated and
+    broken nodemask_t interface.
+  + Remove support for "Proc" type name, it was superseded by "PU" in v1.0.
+  + Remove hwloc-mask symlinks, it was replaced by hwloc-calc in v1.0.
+* Misc
+  + Fix PCIe 3.0 link speed computation.
+  + Non-printable characters are dropped from strings during XML export.
+  + Fix importing of escaped characters with the minimalistic XML backend.
+  + Assert hwloc_is_thissystem() in several I/O related helpers.
+  + Fix some memory leaks in the x86 backend for FreeBSD.
+  + Minor fixes to ease native builds on Windows.
+  + Limit the number of retries when operating on all threads within a
+    process on Linux if the list of threads is heavily getting modified.
+
+
+Version 1.4.3
+-------------
+* This release is only meant to fix the pciutils license issue when upgrading
+  to hwloc v1.5 or later is not possible. It contains several other minor
+  fixes but ignores many of them that are only in v1.5 or later.
+* Use libpciaccess instead of pciutils/libpci by default for I/O discovery.
+  pciutils/libpci is only used if --enable-libpci is given to configure
+  because its GPL license may taint hwloc. See the Installation section
+  in the documentation for details.
+* Fix PCIe 3.0 link speed computation.
+* Fix importing of escaped characters with the minimalistic XML backend.
+* Fix a memory leak in the x86 backend.
+
+
+Version 1.4.2
+-------------
+* Fix build on Solaris 9 and earlier when fabsf() is not a compiler
+  built-in. Thanks to Igor Galić for reporting the problem.
+* Fix support for more than 32 processors on Windows. Thanks to Hartmut
+  Kaiser for reporting the problem.
+* Fix process-wide binding and cpulocation routines on Linux when some
+  threads disappear in the meantime. Thanks to Vlad Roubtsov for reporting
+  the issue.
+* Make installed scripts executable. Thanks to Jirka Hladky for reporting
+  the problem.
+* Fix libtool revision management when building for Windows. This fix was
+  also released as hwloc v1.4.1.1 Windows builds. Thanks to Hartmut Kaiser
+  for reporting the problem.
+* Fix the __hwloc_inline keyword in public headers when compiling with a
+  C++ compiler.
+* Add Port info attribute to network OS devices inside OpenFabrics PCI
+  devices so as to identify which interface corresponds to which port.
+* Document requirements for interoperability helpers: I/O devices discovery
+  is required for some of them; the topology must match the current host
+  for most of them.
+
+
+Version 1.4.1
+-------------
+* This release contains all changes from v1.3.2.
+* Fix hwloc_alloc_membind, thanks Karl Napf for reporting the issue.
+* Fix memory leaks in some get_membind() functions.
+* Fix helpers converting from Linux libnuma to hwloc (hwloc/linux-libnuma.h)
+  in case of out-of-order NUMA node ids.
+* Fix some overzealous assertions in the distance grouping code.
+* Workaround BIOS reporting empty I/O locality in CUDA and OpenFabrics
+  helpers on Linux. Thanks to Albert Solernou for reporting the problem.
+* Install a valgrind suppressions file hwloc-valgrind.supp (see the FAQ).
+* Fix memory binding documentation. Thanks to Karl Napf for reporting the
+  issues.
+
+
+Version 1.4.0 (does not contain all v1.3.2 changes)
+-------------
+* Major features
+  + Add "custom" interface and "assembler" tools to build multi-node
+    topology. See the Multi-node Topologies section in the documentation
+    for details.
+* Interface improvements
+  + Add symmetric_subtree object attribute to ease assumptions when consulting
+    regular symmetric topologies.
+  + Add a CPUModel and CPUType info attribute to Socket objects on Linux
+    and Solaris.
+  + Add hwloc_get_obj_index_inside_cpuset() to retrieve the "logical" index
+    of an object within a subtree of the topology.
+  + Add more NVIDIA CUDA helpers in cuda.h and cudart.h to find hwloc objects
+    corresponding to CUDA devices.
+* Discovery improvements
+  + Add a group object above partial distance matrices to make sure
+    the matrices are available in the final topology, except when this
+    new object would contradict the existing hierarchy.
+  + Grouping by distances now also works when loading from XML.
+  + Fix some corner cases in object insertion, for instance when dealing
+    with NUMA nodes without any CPU.
+* Backends
+  + Implement hwloc_get_area_membind() on Linux.
+  + Honor I/O topology flags when importing from XML.
+  + Further improve XML-related error checking and reporting.
+  + Hide synthetic topology error messages unless HWLOC_SYNTHETIC_VERBOSE=1.
+* Tools
+  + Add synthetic exporting of symmetric topologies to lstopo.
+  + lstopo --horiz and --vert can now be applied to some specific object types.
+  + lstopo -v -p now displays distance matrices with physical indexes.
+  + Add hwloc-distances utility to list distances.
+* Documentation
+  + Fix and/or document the behavior of most inline functions in hwloc/helper.h
+    when the topology contains some I/O or Misc objects.
+  + Backend documentation enhancements.
+* Bug fixes
+  + Fix missing last bit in hwloc_linux_get_thread_cpubind().
+    Thanks to Carolina Gómez-Tostón Gutiérrez for reporting the issue.
+  + Fix FreeBSD build without cpuid support.
+  + Fix several Windows build issues.
+  + Fix inline keyword definition in public headers.
+  + Fix dependencies in the embedded library.
+  + Improve visibility support detection. Thanks to Dave Love for providing
+    the patch.
+  + Remove references to internal symbols in the tools.
+
+
+Version 1.3.3
+-------------
+* This release is only meant to fix the pciutils license issue when upgrading
+  to hwloc v1.4 or later is not possible. It contains several other minor
+  fixes but ignores many of them that are only in v1.4 or later.
+* Use libpciaccess instead of pciutils/libpci by default for I/O discovery.
+  pciutils/libpci is only used if --enable-libpci is given to configure
+  because its GPL license may taint hwloc. See the Installation section
+  in the documentation for details.
+
+
+Version 1.3.2
+-------------
+* Fix missing last bit in hwloc_linux_get_thread_cpubind().
+  Thanks to Carolina Gómez-Tostón Gutiérrez for reporting the issue.
+* Fix build with -mcmodel=medium. Thanks to Devendar Bureddy for reporting
+  the issue.
+* Fix build with Solaris Studio 12 compiler when XML is disabled.
+  Thanks to Paul H. Hargrove for reporting the problem.
+* Fix installation with old GNU sed, for instance on Red Hat 8.
+  Thanks to Paul H. Hargrove for reporting the problem.
+* Fix PCI locality when Linux cgroups restrict the available CPUs.
+* Fix floating point issue when grouping by distance on mips64 architecture.
+  Thanks to Paul H. Hargrove for reporting the problem.
+* Fix conversion from/to Linux libnuma when some NUMA nodes have no memory.
+* Fix support for gccfss compilers with broken ffs() support. Thanks to
+  Paul H. Hargrove for reporting the problem and providing a patch.
+* Fix FreeBSD build without cpuid support.
+* Fix several Windows build issues.
+* Fix inline keyword definition in public headers.
+* Fix dependencies in the embedded library.
+* Detect when a compiler such as xlc may not report compile errors
+  properly, causing some configure checks to be wrong. Thanks to
+  Paul H. Hargrove for reporting the problem and providing a patch.
+* Improve visibility support detection. Thanks to Dave Love for providing
+  the patch.
+* Remove references to internal symbols in the tools.
+* Fix installation on systems with limited command-line size.
+  Thanks to Paul H. Hargrove for reporting the problem.
+* Further improve XML-related error checking and reporting.
+
+
+Version 1.3.1
+-------------
+* Fix pciutils detection with pkg-config when not installed in standard
+  directories.
+* Fix visibility options detection with the Solaris Studio compiler.
+  Thanks to Igor Galić and Terry Dontje for reporting the problems.
+* Fix support for old Linux sched.h headers such as those found
+  on Red Hat 8. Thanks to Paul H. Hargrove for reporting the problems.
+* Fix inline and attribute support for Solaris compilers. Thanks to
+  Dave Love for reporting the problems.
+* Print a short summary at the end of the configure output. Thanks to
+  Stefan Eilemann for the suggestion.
+* Add --disable-libnuma configure option to disable libnuma-based
+  memory binding support on Linux.  Thanks to Rayson Ho for the
+  suggestion.
+* Make hwloc's configure script properly obey $PKG_CONFIG.  Thanks to
+  Nathan Phillip Brink for raising the issue.
+* Silence some harmless pciutils warnings, thanks to Paul H. Hargrove
+  for reporting the problem.
+* Fix the documentation with respect to hwloc_pid_t and hwloc_thread_t
+  being either pid_t and pthread_t on Unix, or HANDLE on Windows.
+
+
+Version 1.3.0
+-------------
+* Major features
+  + Add I/O devices and bridges to the topology using the pciutils
+    library. Only enabled after setting the relevant flag with
+    hwloc_topology_set_flags() before hwloc_topology_load(). See the
+    I/O Devices section in the documentation for details.
+* Discovery improvements
+  + Add associativity to the cache attributes.
+  + Add support for s390/z11 "books" on Linux.
+  + Add the HWLOC_GROUPING_ACCURACY environment variable to relax
+    distance-based grouping constraints. See the Environment Variables
+    section in the documentation for details about grouping behavior
+    and configuration.
+  + Allow user-given distance matrices to remove or replace those
+    discovered by the OS backend.
+* XML improvements
+  + XML is now always supported: a minimalistic custom import/export
+    code is used when libxml2 is not available. It is only guaranteed
+    to read XML files generated by hwloc.
+  + hwloc_topology_export_xml() and export_xmlbuffer() now return an
+    integer.
+  + Add hwloc_free_xmlbuffer() to free the buffer allocated by
+    hwloc_topology_export_xmlbuffer().
+  + Hide XML topology error messages unless HWLOC_XML_VERBOSE=1.
+* Minor API updates
+  + Add hwloc_obj_add_info to customize object info attributes.
+* Tools
+  + lstopo now displays I/O devices by default. Several options are
+    added to configure the I/O discovery.
+  + hwloc-calc and hwloc-bind now accept I/O devices as input.
+  + Add --restrict option to hwloc-calc and hwloc-distribute.
+  + Add --sep option to change the output field separator in hwloc-calc.
+  + Add --whole-system option to hwloc-ps.
+
+
+Version 1.2.2
+-------------
+* Fix build on AIX 5.2, thanks Utpal Kumar Ray for the report.
+* Fix XML import of very large page sizes or counts on 32bits platform,
+  thanks to Karsten Hopp for the RedHat ticket.
+* Fix crash when administrator limitations such as Linux cgroup require
+  to restrict distance matrices. Thanks to Ake Sandgren for reporting the
+  problem.
+* Fix the removal of objects such as AMD Magny-Cours dual-node sockets
+  in case of administrator restrictions.
+* Improve error reporting and messages in case of wrong synthetic topology
+  description.
+* Several other minor internal fixes and documentation improvements.
+
+
+Version 1.2.1
+-------------
+* Improve support of AMD Bulldozer "Compute-Unit" modules by detecting
+  logical processors with different core IDs on Linux.
+* Fix hwloc-ps crash when listing processes from another Linux cpuset.
+  Thanks to Carl Smith for reporting the problem.
+* Fix build on AIX and Solaris. Thanks to Carl Smith and Andreas Kupries
+  for reporting the problems.
+* Fix cache size detection on Darwin. Thanks to Erkcan Özcan for reporting
+  the problem.
+* Make configure fail if --enable-xml or --enable-cairo is given and
+  proper support cannot be found. Thanks to Andreas Kupries for reporting
+  the XML problem.
+* Fix spurious L1 cache detection on AIX. Thanks to Hendryk Bockelmann
+  for reporting the problem.
+* Fix hwloc_get_last_cpu_location(THREAD) on Linux. Thanks to Gabriele
+  Fatigati for reporting the problem.
+* Fix object distance detection on Solaris.
+* Add pthread_self weak symbol to ease static linking.
+* Minor documentation fixes.
+
+
+Version 1.2.0
+-------------
+* Major features
+  + Expose latency matrices in the API as an array of distance structures
+    within objects. Add several helpers to find distances.
+  + Add hwloc_topology_set_distance_matrix() and environment variables
+    to provide a matrix of distances between a given set of objects.
+  + Add hwloc_get_last_cpu_location() and hwloc_get_proc_last_cpu_location()
+    to retrieve the processors where a process or thread recently ran.
+    - Add the corresponding --get-last-cpu-location option to hwloc-bind.
+  + Add hwloc_topology_restrict() to restrict an existing topology to a
+    given cpuset.
+    - Add the corresponding --restrict option to lstopo.
+* Minor API updates
+  + Add hwloc_bitmap_list_sscanf/snprintf/asprintf to convert between bitmaps
+    and strings such as 4-5,7-9,12,15-
+  + hwloc_bitmap_set/clr_range() now support infinite ranges.
+  + Clarify the difference between inserting Misc objects by cpuset or by
+    parent.
+  + hwloc_insert_misc_object_by_cpuset() now returns NULL in case of error.
+* Discovery improvements
+  + x86 backend (for freebsd): add x2APIC support
+  + Support standard device-tree phandle, to get better support on e.g. ARM
+    systems providing it.
+  + Detect cache size on AIX. Thanks Christopher and IBM.
+  + Improve grouping to support asymmetric topologies.
+* Tools
+  + Command-line tools now support "all" and "root" special locations
+    consisting in the entire topology, as well as type names with depth
+    attributes such as L2 or Group4.
+  + hwloc-calc improvements:
+    - Add --number-of/-N option to report the number of objects of a given
+      type or depth.
+    - -I is now equivalent to --intersect for listing the indexes of
+      objects of a given type or depth that intersects the input.
+    - Add -H to report the output as a hierarchical combination of types
+      and depths.
+  + Add --thissystem to lstopo.
+  + Add lstopo-win, a console-less lstopo variant on Windows.
+* Miscellaneous
+  + Remove C99 usage from code base.
+  + Rename hwloc-gather-topology.sh into hwloc-gather-topology
+  + Fix AMD cache discovery on freebsd when there is no L3 cache, thanks
+    Andriy Gapon for the fix.
+
+
+Version 1.1.2
+-------------
+* Fix a segfault in the distance-based grouping code when some objects
+  are not placed in any group. Thanks to Bernd Kallies for reporting
+  the problem and providing a patch.
+* Fix the command-line parsing of hwloc-bind --mempolicy interleave.
+  Thanks to Guy Streeter for reporting the problem.
+* Stop truncating the output in hwloc_obj_attr_snprintf() and in the
+  corresponding lstopo output. Thanks to Guy Streeter for reporting the
+  problem.
+* Fix object levels ordering in synthetic topologies.
+* Fix potential incoherency between device tree and kernel information,
+  when SMT is disabled on Power machines.
+* Fix and document the behavior of hwloc_topology_set_synthetic() in case
+  of invalid argument. Thanks to Guy Streeter for reporting the problem.
+* Add some verbose error message reporting when it looks like the OS
+  gives erroneous information.
+* Do not include unistd.h and stdint.h in public headers on Windows.
+* Move config.h files into their own subdirectories to avoid name
+  conflicts when AC_CONFIG_HEADERS adds -I's for them.
+* Remove the use of declaring variables inside "for" loops.
+* Some other minor fixes.
+* Many minor documentation fixes.
+
+
+Version 1.1.1
+-------------
+* Add hwloc_get_api_version() which returns the version of hwloc used
+  at runtime. Thanks to Guy Streeter for the suggestion.
+* Fix the number of hugepages reported for NUMA nodes on Linux.
+* Fix hwloc_bitmap_to_ulong() right after allocating the bitmap.
+  Thanks to Bernd Kallies for reporting the problem.
+* Fix hwloc_bitmap_from_ith_ulong() to properly zero the first ulong.
+  Thanks to Guy Streeter for reporting the problem.
+* Fix hwloc_get_membind_nodeset() on Linux.
+  Thanks to Bernd Kallies for reporting the problem and providing a patch.
+* Fix some file descriptor leaks in the Linux discovery.
+* Fix the minimum width of NUMA nodes, caches and the legend in the graphical
+  lstopo output. Thanks to Jirka Hladky for reporting the problem.
+* Various fixes to bitmap conversion from/to taskset-strings.
+* Fix and document snprintf functions behavior when the buffer size is too
+  small or zero. Thanks to Guy Streeter for reporting the problem.
+* Fix configure to avoid spurious enabling of the cpuid backend.
+  Thanks to Tim Anderson for reporting the problem.
+* Cleanup error management in hwloc-gather-topology.sh.
+  Thanks to Jirka Hladky for reporting the problem and providing a patch.
+* Add a manpage and usage for hwloc-gather-topology.sh on Linux.
+  Thanks to Jirka Hladky for providing a patch.
+* Memory binding documentation enhancements.
+
+
+Version 1.1.0
+-------------
+
+* API
+  + Increase HWLOC_API_VERSION to 0x00010100 so that API changes may be
+    detected at build-time.
+  + Add a memory binding interface.
+  + The cpuset API (hwloc/cpuset.h) is now deprecated. It is replaced by
+    the bitmap API (hwloc/bitmap.h) which offers the same features with more
+    generic names since it applies to CPU sets, node sets and more.
+    Backward compatibility with the cpuset API and ABI is still provided but
+    it will be removed in a future release.
+    Old types (hwloc_cpuset_t, ...) are still available as a way to clarify
+    what kind of hwloc_bitmap_t each API function manipulates.
+    Upgrading to the new API only requires to replace hwloc_cpuset_ function
+    calls with the corresponding hwloc_bitmap_ calls, with the following
+    renaming exceptions:
+    - hwloc_cpuset_cpu -> hwloc_bitmap_only
+    - hwloc_cpuset_all_but_cpu -> hwloc_bitmap_allbut
+    - hwloc_cpuset_from_string -> hwloc_bitmap_sscanf
+  + Add an `infos' array in each object to store couples of info names and
+    values. It enables generic storage of things like the old dmi board infos
+    that were previously stored in machine specific attributes.
+  + Add linesize cache attribute.
+* Features
+  + Bitmaps (and thus CPU sets and node sets) are dynamically (re-)allocated,
+    the maximal number of CPUs (HWLOC_NBMAXCPUS) has been removed.
+  + Improve the distance-based grouping code to better support irregular
+    distance matrices.
+  + Add support for device-tree to get cache information (useful on Power
+    architectures).
+* Helpers
+  + Add NVIDIA CUDA helpers in cuda.h and cudart.h to ease interoperability
+    with CUDA Runtime and Driver APIs.
+  + Add Myrinet Express helper in myriexpress.h to ease interoperability.
+* Tools
+  + lstopo now displays physical/OS indexes by default in graphical mode
+    (use -l to switch back to logical indexes). The textual output still uses
+    logical by default (use -p to switch to physical indexes).
+  + lstopo prefixes logical indexes with `L#' and physical indexes with `P#'.
+    Physical indexes are also printed as `P#N' instead of `phys=N' within
+    object attributes (in parentheses).
+  + Add a legend at the bottom of the lstopo graphical output, use --no-legend
+    to remove it.
+  + Add hwloc-ps to list process' bindings.
+  + Add --membind and --mempolicy options to hwloc-bind.
+  + Improve tools command-line options by adding a generic --input option
+    (and more) which replaces the old --xml, --synthetic and --fsys-root.
+  + Cleanup lstopo output configuration by adding --output-format.
+  + Add --intersect in hwloc-calc, and replace --objects with --largest.
+  + Add the ability to work on standard input in hwloc-calc.
+  + Add --from, --to and --at in hwloc-distrib.
+  + Add taskset-specific functions and command-line tools options to
+    manipulate CPU set strings in the format of the taskset program.
+  + Install hwloc-gather-topology.sh on Linux.
+
+
+Version 1.0.3
+-------------
+
+* Fix support for Linux cpuset when emulated by a cgroup mount point.
+* Remove unneeded runtime dependency on libibverbs.so in the library and
+  all utils programs.
+* Fix hwloc_cpuset_to_linux_libnuma_ulongs in case of non-linear OS-indexes
+  for NUMA nodes.
+* lstopo now displays physical/OS indexes by default in graphical mode
+  (use -l to switch back to logical indexes). The textual output still uses
+  logical by default (use -p to switch to physical indexes).
+
+
+Version 1.0.2
+-------------
+
+* Public headers can now be included directly from C++ programs.
+* Solaris fix for non-contiguous cpu numbers.  Thanks to Rolf vandeVaart for
+  reporting the issue.
+* Darwin 10.4 fix.  Thanks to Olivier Cessenat for reporting the issue.
+* Revert 1.0.1 patch that ignored sockets with unknown ID values since it
+  only slightly helped POWER7 machines with old Linux kernels while it
+  prevents recent kernels from getting the complete POWER7 topology.
+* Fix hwloc_get_common_ancestor_obj().
+* Remove arch-specific bits in public headers.
+* Some fixes in the lstopo graphical output.
+* Various man page clarifications and minor updates.
+
+
+Version 1.0.1
+-------------
+
+* Various Solaris fixes.  Thanks to Yannick Martin for reporting the issue.
+* Fix "non-native" builds on x86 platforms (e.g., when building 32
+  bit executables with compilers that natively build 64 bit).
+* Ignore sockets with unknown ID values (which fixes issues on POWER7
+  machines).  Thanks to Greg Bauer for reporting the issue.
+* Various man page clarifications and minor updates.
+* Fixed memory leaks in hwloc_setup_group_from_min_distance_clique().
+* Fix cache type filtering on MS Windows 7.  Thanks to Αλέξανδρος
+  Παπαδογιαννάκ for reporting the issue.
+* Fixed warnings when compiling with -DNDEBUG.
+
+
+Version 1.0.0
+-------------
+
+* The ABI of the library has changed.
+* Backend updates
+  + Add FreeBSD support.
+  + Add x86 cpuid based backend.
+  + Add Linux cgroup support to the Linux cpuset code.
+  + Support binding of entire multithreaded process on Linux.
+  + Fix and enable Group support in Windows.
+  + Cleanup XML export/import.
+* Objects
+  + HWLOC_OBJ_PROC is renamed into HWLOC_OBJ_PU for "Processing Unit",
+    its stringified type name is now "PU".
+  + Use new HWLOC_OBJ_GROUP objects instead of MISC when grouping
+    objects according to NUMA distances or arbitrary OS aggregation.
+  + Rework memory attributes.
+  + Add different cpusets in each object to specify processors that
+    are offline, unavailable, ...
+  + Cleanup the storage of object names and DMI infos.
+* Features
+  + Add support for looking up specific PID topology information.
+  + Add hwloc_topology_export_xml() to export the topology in a XML file.
+  + Add hwloc_topology_get_support() to retrieve the supported features
+    for the current topology context.
+  + Support non-SYSTEM object as the root of the tree, use MACHINE in
+    most common cases.
+  + Add hwloc_get_*cpubind() routines to retrieve the current binding
+    of processes and threads.
+* API
+  + Add HWLOC_API_VERSION to help detect the currently used API version.
+  + Add missing ending "e" to *compare* functions.
+  + Add several routines to emulate PLPA functions.
+  + Rename and rework the cpuset and/or/xor/not/clear operators to output
+    their result in a dedicated argument instead of modifying one input.
+  + Deprecate hwloc_obj_snprintf() in favor of hwloc_obj_type/attr_snprintf().
+  + Clarify the use of parent and ancestor in the API, do not use father.
+  + Replace hwloc_get_system_obj() with hwloc_get_root_obj().
+  + Return -1 instead of HWLOC_OBJ_TYPE_MAX in the API since the latter
+    isn't public.
+  + Relax constraints in hwloc_obj_type_of_string().
+  + Improve displaying of memory sizes.
+  + Add 0x prefix to cpuset strings.
+* Tools
+  + lstopo now displays logical indexes by default, use --physical to
+    revert back to OS/physical indexes.
+  + Add colors in the lstopo graphical outputs to distinguish between online,
+    offline, reserved, ... objects.
+  + Extend lstopo to show cpusets, filter objects by type, ...
+  + Renamed hwloc-mask into hwloc-calc which supports many new options.
+* Documentation
+  + Add a hwloc(7) manpage containing general information.
+  + Add documentation about how to switch from PLPA to hwloc.
+  + Cleanup the distributed documentation files.
+* Miscellaneous
+  + Many compilers warning fixes.
+  + Cleanup the ABI by using the visibility attribute.
+  + Add project embedding support.
+
+
+Version 0.9.4 (unreleased)
+--------------------------
+
+* Fix reseting colors to normal in lstopo -.txt output.
+* Fix Linux pthread_t binding error report.
+
+
+Version 0.9.3
+-------------
+
+* Fix autogen.sh to work with Autoconf 2.63.
+* Fix various crashes in particular conditions:
+  - xml files with root attributes
+  - offline CPUs
+  - partial sysfs support
+  - unparseable /proc/cpuinfo
+  - ignoring NUMA level while Misc level have been generated
+* Tweak documentation a bit
+* Do not require the pthread library for binding the current thread on Linux
+* Do not erroneously consider the sched_setaffinity prototype is the old version
+  when there is actually none.
+* Fix _syscall3 compilation on archs for which we do not have the
+  sched_setaffinity system call number.
+* Fix AIX binding.
+* Fix libraries dependencies: now only lstopo depends on libtermcap, fix
+  binutils-gold link
+* Have make check always build and run hwloc-hello.c
+* Do not limit size of a cpuset.
+
+
+Version 0.9.2
+-------------
+
+* Trivial documentation changes.
+
+
+Version 0.9.1
+-------------
+
+* Re-branded to "hwloc" and moved to the Open MPI project, relicensed under the
+  BSD license.
+* The prefix of all functions and tools is now hwloc, and some public
+  functions were also renamed for real.
+* Group NUMA nodes into Misc objects according to their physical distance
+  that may be reported by the OS/BIOS.
+  May be ignored by setting HWLOC_IGNORE_DISTANCES=1 in the environment.
+* Ignore offline CPUs on Solaris.
+* Improved binding support on AIX.
+* Add HP-UX support.
+* CPU sets are now allocated/freed dynamically.
+* Add command line options to tune the lstopo graphical output, add
+  semi-graphical textual output
+* Extend topobind to support multiple cpusets or objects on the command
+  line as topomask does.
+* Add an Infiniband-specific helper hwloc/openfabrics-verbs.h to retrieve
+  the physical location of IB devices.
+
+
+Version 0.9 (libtopology)
+-------------------------
+
+* First release.
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/README b/opal/mca/hwloc/hwloc2x/hwloc/README
new file mode 100644
index 0000000000..eadf3bc6a0
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/README
@@ -0,0 +1,65 @@
+Introduction
+
+The Hardware Locality (hwloc) software project aims at easing the process of
+discovering hardware resources in parallel architectures. It offers
+command-line tools and a C API for consulting these resources, their locality,
+attributes, and interconnection. hwloc primarily aims at helping
+high-performance computing (HPC) applications, but is also applicable to any
+project seeking to exploit code and/or data locality on modern computing
+platforms.
+
+hwloc is actually made of two subprojects distributed together:
+
+  * The original hwloc project for describing the internals of computing nodes.
+ It is described in details between sections Hardware Locality (hwloc)
+ Introduction and Network Locality (netloc).
+  * The network-oriented companion called netloc (Network Locality), described
+ in details starting at section Network Locality (netloc). Netloc may be
+ disabled, but the original hwloc cannot. Both hwloc and netloc APIs are
+ documented after these sections.
+
+Installation
+
+hwloc (http://www.open-mpi.org/projects/hwloc/) is available under the BSD
+license. It is hosted as a sub-project of the overall Open MPI project (http://
+www.open-mpi.org/). Note that hwloc does not require any functionality from
+Open MPI -- it is a wholly separate (and much smaller!) project and code base.
+It just happens to be hosted as part of the overall Open MPI project.
+
+Nightly development snapshots are available on the web site. Additionally, the
+code can be directly cloned from Git:
+
+shell$ git clone https://github.com/open-mpi/hwloc.git
+shell$ cd hwloc
+shell$ ./autogen.sh
+
+Note that GNU Autoconf >=2.63, Automake >=1.11 and Libtool >=2.2.6 are required
+when building from a Git clone.
+
+Installation by itself is the fairly common GNU-based process:
+
+shell$ ./configure --prefix=...
+shell$ make
+shell$ make install
+
+hwloc- and netloc-specific configure options and requirements are documented in
+sections hwloc Installation and Netloc Installation respectively.
+
+Also note that if you install supplemental libraries in non-standard locations,
+hwloc's configure script may not be able to find them without some help. You
+may need to specify additional CPPFLAGS, LDFLAGS, or PKG_CONFIG_PATH values on
+the configure command line.
+
+For example, if libpciaccess was installed into /opt/pciaccess, hwloc's
+configure script may not find it be default. Try adding PKG_CONFIG_PATH to the
+./configure command line, like this:
+
+./configure PKG_CONFIG_PATH=/opt/pciaccess/lib/pkgconfig ...
+
+Running the "lstopo" tool is a good way to check as a graphical output whether
+hwloc properly detected the architecture of your node. Netloc command-line
+tools can be used to display the network topology interconnecting your nodes.
+
+
+
+See https://www.open-mpi.org/projects/hwloc/doc/ for more hwloc documentation.
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/VERSION b/opal/mca/hwloc/hwloc2x/hwloc/VERSION
new file mode 100644
index 0000000000..d3bb6ed201
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/VERSION
@@ -0,0 +1,47 @@
+# This is the VERSION file for hwloc, describing the precise version
+# of hwloc in this distribution.  The various components of the version
+# number below are combined to form a single version number string.
+
+# major, minor, and release are generally combined in the form
+# <major>.<minor>.<release>.  If release is zero, then it is omitted.
+
+# Please update HWLOC_VERSION in contrib/windows/private_config.h too.
+
+major=2
+minor=0
+release=0
+
+# greek is used for alpha or beta release tags.  If it is non-empty,
+# it will be appended to the version number.  It does not have to be
+# numeric.  Common examples include a1 (alpha release 1), b1 (beta
+# release 1), sc2005 (Super Computing 2005 release).  The only
+# requirement is that it must be entirely printable ASCII characters
+# and have no white space.
+
+greek=a1
+
+# The date when this release was created
+
+date="Unreleased developer copy"
+
+# If snapshot=1, then use the value from snapshot_version as the
+# entire hwloc version (i.e., ignore major, minor, release, and
+# greek).  This is only set to 1 when making snapshot tarballs.
+snapshot=1
+snapshot_version=${major}.${minor}.${release}${greek}-git
+
+# The shared library version of hwloc's public library.  This version
+# is maintained in accordance with the "Library Interface Versions"
+# chapter from the GNU Libtool documentation.  Notes:
+
+# 1. Since version numbers are associated with *releases*, the version
+# number maintained on the hwloc git master (and developer branches)
+# is always 0:0:0.
+
+# 2. Version numbers are described in the Libtool current:revision:age
+# format.
+
+libhwloc_so_version=0:0:0
+libnetloc_so_version=0:0:0
+
+# Please also update the <TargetName> lines in contrib/windows/libhwloc.vcxproj
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/autogen.sh b/opal/mca/hwloc/hwloc2x/hwloc/autogen.sh
new file mode 100755
index 0000000000..df4280218e
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/autogen.sh
@@ -0,0 +1,2 @@
+:
+autoreconf ${autoreconf_args:-"-ivf"}
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/distscript.sh b/opal/mca/hwloc/hwloc2x/hwloc/config/distscript.sh
new file mode 100755
index 0000000000..d72a3fd305
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/config/distscript.sh
@@ -0,0 +1,130 @@
+#!/bin/sh -f
+#
+# Copyright © 2004-2005 The Trustees of Indiana University and Indiana
+#                         University Research and Technology
+#                         Corporation.  All rights reserved.
+# Copyright © 2004-2005 The University of Tennessee and The University
+#                         of Tennessee Research Foundation.  All rights
+#                         reserved.
+# Copyright © 2004-2005 High Performance Computing Center Stuttgart,
+#                         University of Stuttgart.  All rights reserved.
+# Copyright © 2004-2005 The Regents of the University of California.
+#                         All rights reserved.
+# Copyright © 2010-2014   Inria.  All rights reserved.
+# Copyright © 2009-2014 Cisco Systems, Inc.  All rights reserved.
+# $COPYRIGHT$
+#
+# Additional copyrights may follow
+#
+# $HEADER$
+#
+
+builddir="`pwd`"
+
+srcdir=$1
+cd "$srcdir"
+srcdir=`pwd`
+cd "$builddir"
+
+distdir="$builddir/$2"
+HWLOC_VERSION=$3
+
+if test "$distdir" = ""; then
+    echo "Must supply relative distdir as argv[2] -- aborting"
+    exit 1
+elif test "$HWLOC_VERSION" = ""; then
+    echo "Must supply version as argv[1] -- aborting"
+    exit 1
+fi
+
+#========================================================================
+
+start=`date`
+cat <<EOF
+
+Creating hwloc distribution
+In directory: `pwd`
+Srcdir: $srcdir
+Builddir: $builddir
+Version: $HWLOC_VERSION
+Started: $start
+
+EOF
+
+umask 022
+
+if test ! -d "$distdir"; then
+    echo "*** ERROR: dist dir does not exist"
+    echo "*** ERROR:   $distdir"
+    exit 1
+fi
+
+if test ! -d $srcdir/doc/doxygen-doc; then
+    echo "*** The srcdir does not already have a doxygen-doc tree built."
+    echo "*** hwloc's config/distscript.csh requires the docs to be built"
+    echo "*** in the srcdir before executing 'make dist'."
+    exit 1
+fi
+
+# Trivial helper function
+doit() {
+    echo $*
+    eval $*
+}
+
+echo "*** Copying doxygen-doc tree to dist..."
+echo "*** Directory: srcdir: $srcdir, distdir: $distdir, pwd: `pwd`"
+doit mkdir -p $distdir/doc/doxygen-doc
+doit chmod -R a=rwx $distdir/doc/doxygen-doc
+doit rm -rf $distdir/doc/doxygen-doc
+
+# We want to copy the entire directory tree to the distdir.  In some
+# cases, doxygen-doc may be a sym link, so we want the copy to follow
+# the sym links.  It's a bit of a portability nightmare, so try a few
+# different ways...
+# This seems to work on OS X and Linux (but not Solaris)
+doit "tar c -C $srcdir -h -f - doc/doxygen-doc | tar x -C $distdir -f -"
+if test ! -d $distdir/doc/doxygen-doc; then
+    # This seems to work on Linux and Solaris
+    doit cp -rpf $srcdir/doc/doxygen-doc/ $distdir/doc
+fi
+if test ! -d $distdir/doc/doxygen-doc; then
+    # This seems to work on OS X (probably redundant, but we know it works)
+    doit cp -rpf $srcdir/doc/doxygen-doc $distdir/doc
+fi
+# If we still failed, just error out
+if test ! -d $distdir/doc/doxygen-doc; then
+    echo "ERROR: Cannot seem to copy a directory to the distdir :-("
+    exit 1
+fi
+
+echo "*** Copying new README"
+ls -lf $distdir/README
+doit cp -pf $srcdir/README $distdir
+
+#########################################################
+# VERY IMPORTANT: Now go into the new distribution tree #
+#########################################################
+cd "$distdir"
+echo "*** Now in distdir: $distdir"
+
+#
+# Remove all the latex source files from the distribution tree (the
+# PDFs are still there; we're just removing the latex source because
+# some of the filenames get really, really long...).
+#
+
+echo "*** Removing latex source from dist tree"
+doit rm -rf doc/doxygen-doc/latex
+
+#
+# All done
+#
+
+cat <<EOF
+*** hwloc version $HWLOC_VERSION distribution created
+
+Started: $start
+Ended:   `date`
+
+EOF
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/distscript_embedded.sh b/opal/mca/hwloc/hwloc2x/hwloc/config/distscript_embedded.sh
new file mode 100755
index 0000000000..b97147d15b
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/config/distscript_embedded.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+chmod u+w $2
+makefiles="$2/doc/Makefile.am $2/doc/examples/Makefile.am $2/doc/doxygen-config.cfg.in $2/utils/Makefile.am $2/utils/hwloc/Makefile.am $2/utils/lstopo/Makefile.am $2/utils/netloc/infiniband/Makefile.am $2/utils/netloc/draw/Makefile.am $2/utils/netloc/mpi/Makefile.am $2/tests/Makefile.am $2/tests/hwloc/Makefile.am $2/tests/hwloc/linux/Makefile.am $2/tests/hwloc/linux/allowed/Makefile.am $2/tests/hwloc/linux/gather/Makefile.am $2/tests/hwloc/x86/Makefile.am $2/tests/hwloc/xml/Makefile.am $2/tests/hwloc/ports/Makefile.am $2/tests/hwloc/rename/Makefile.am $2/tests/hwloc/linux/allowed/test-topology.sh.in $2/tests/hwloc/linux/gather/test-gather-topology.sh.in $2/tests/hwloc/linux/test-topology.sh.in $2/tests/hwloc/x86/test-topology.sh.in $2/tests/hwloc/xml/test-topology.sh.in $2/tests/hwloc/wrapper.sh.in $2/utils/hwloc/hwloc-compress-dir.in $2/utils/hwloc/hwloc-gather-topology.in $2/utils/hwloc/test-hwloc-annotate.sh.in $2/utils/hwloc/test-hwloc-calc.sh.in $2/utils/hwloc/test-hwloc-compress-dir.sh.in $2/utils/hwloc/test-hwloc-diffpatch.sh.in $2/utils/hwloc/test-hwloc-distrib.sh.in $2/utils/hwloc/test-hwloc-info.sh.in $2/utils/hwloc/test-fake-plugin.sh.in $2/utils/hwloc/test-hwloc-dump-hwdata/Makefile.am $2/utils/hwloc/test-hwloc-dump-hwdata/test-hwloc-dump-hwdata.sh.in $2/utils/lstopo/test-lstopo.sh.in $2/contrib/systemd/Makefile.am $2/contrib/misc/Makefile.am $2/tests/netloc/Makefile.am $2/tests/netloc/tests.sh.in $2/utils/lstopo/lstopo-windows.c"
+rm -f $makefiles
+for i in $makefiles; do
+    [ -d $(dirname $i) ] || mkdir -p $(dirname $i)
+    cat > $i << EOF
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
+EOF
+done
+
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc.m4 b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc.m4
new file mode 100644
index 0000000000..df4764a578
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc.m4
@@ -0,0 +1,1364 @@
+dnl -*- Autoconf -*-
+dnl
+dnl Copyright © 2009-2016 Inria.  All rights reserved.
+dnl Copyright © 2009-2012, 2015-2017 Université Bordeaux
+dnl Copyright © 2004-2005 The Trustees of Indiana University and Indiana
+dnl                         University Research and Technology
+dnl                         Corporation.  All rights reserved.
+dnl Copyright © 2004-2012 The Regents of the University of California.
+dnl                         All rights reserved.
+dnl Copyright © 2004-2008 High Performance Computing Center Stuttgart,
+dnl                         University of Stuttgart.  All rights reserved.
+dnl Copyright © 2006-2017 Cisco Systems, Inc.  All rights reserved.
+dnl Copyright © 2012  Blue Brain Project, BBP/EPFL. All rights reserved.
+dnl Copyright © 2012       Oracle and/or its affiliates.  All rights reserved.
+dnl See COPYING in top-level directory.
+
+# Main hwloc m4 macro, to be invoked by the user
+#
+# Expects two or three paramters:
+# 1. Configuration prefix
+# 2. What to do upon success
+# 3. What to do upon failure
+# 4. If non-empty, print the announcement banner
+#
+AC_DEFUN([HWLOC_SETUP_CORE],[
+    AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS])
+    AC_REQUIRE([AC_CANONICAL_TARGET])
+    AC_REQUIRE([AC_PROG_CC])
+
+    AS_IF([test "x$4" != "x"],
+          [cat <<EOF
+
+###
+### Configuring hwloc core
+###
+EOF])
+
+    # If no prefix was defined, set a good value
+    m4_ifval([$1],
+             [m4_define([hwloc_config_prefix],[$1/])],
+             [m4_define([hwloc_config_prefix], [])])
+
+    # Unless previously set to "standalone" mode, default to embedded
+    # mode
+    AS_IF([test "$hwloc_mode" = ""], [hwloc_mode=embedded])
+    AC_MSG_CHECKING([hwloc building mode])
+    AC_MSG_RESULT([$hwloc_mode])
+
+    # Get hwloc's absolute top builddir (which may not be the same as
+    # the real $top_builddir, because we may be building in embedded
+    # mode).
+    HWLOC_startdir=`pwd`
+    if test x"hwloc_config_prefix" != "x" -a ! -d "hwloc_config_prefix"; then
+        mkdir -p "hwloc_config_prefix"
+    fi
+    if test x"hwloc_config_prefix" != "x"; then
+        cd "hwloc_config_prefix"
+    fi
+    HWLOC_top_builddir=`pwd`
+    AC_SUBST(HWLOC_top_builddir)
+
+    # Get hwloc's absolute top srcdir (which may not be the same as
+    # the real $top_srcdir, because we may be building in embedded
+    # mode).  First, go back to the startdir incase the $srcdir is
+    # relative.
+
+    cd "$HWLOC_startdir"
+    cd "$srcdir"/hwloc_config_prefix
+    HWLOC_top_srcdir="`pwd`"
+    AC_SUBST(HWLOC_top_srcdir)
+
+    # Go back to where we started
+    cd "$HWLOC_startdir"
+
+    AC_MSG_NOTICE([hwloc builddir: $HWLOC_top_builddir])
+    AC_MSG_NOTICE([hwloc srcdir: $HWLOC_top_srcdir])
+    if test "$HWLOC_top_builddir" != "$HWLOC_top_srcdir"; then
+        AC_MSG_NOTICE([Detected VPATH build])
+    fi
+
+    # Get the version of hwloc that we are installing
+    AC_MSG_CHECKING([for hwloc version])
+    HWLOC_VERSION="`$HWLOC_top_srcdir/config/hwloc_get_version.sh $HWLOC_top_srcdir/VERSION`"
+    if test "$?" != "0"; then
+        AC_MSG_ERROR([Cannot continue])
+    fi
+    HWLOC_RELEASE_DATE="`$HWLOC_top_srcdir/config/hwloc_get_version.sh $HWLOC_top_srcdir/VERSION --release-date`"
+    AC_SUBST(HWLOC_VERSION)
+    AC_DEFINE_UNQUOTED([HWLOC_VERSION], ["$HWLOC_VERSION"],
+                       [The library version, always available, even in embedded mode, contrary to VERSION])
+    AC_SUBST(HWLOC_RELEASE_DATE)
+    AC_MSG_RESULT([$HWLOC_VERSION])
+
+    # Debug mode?
+    AC_MSG_CHECKING([if want hwloc maintainer support])
+    hwloc_debug=
+
+    # Unconditionally disable debug mode in embedded mode; if someone
+    # asks, we can add a configure-time option for it.  Disable it
+    # now, however, because --enable-debug is not even added as an
+    # option when configuring in embedded mode, and we wouldn't want
+    # to hijack the enclosing application's --enable-debug configure
+    # switch.
+    AS_IF([test "$hwloc_mode" = "embedded"],
+          [hwloc_debug=0
+           hwloc_debug_msg="disabled (embedded mode)"])
+    AS_IF([test "$hwloc_debug" = "" -a "$enable_debug" = "yes"],
+          [hwloc_debug=1
+           hwloc_debug_msg="enabled"])
+    AS_IF([test "$hwloc_debug" = ""],
+          [hwloc_debug=0
+           hwloc_debug_msg="disabled"])
+    # Grr; we use #ifndef for HWLOC_DEBUG!  :-(
+    AH_TEMPLATE(HWLOC_DEBUG, [Whether we are in debugging mode or not])
+    AS_IF([test "$hwloc_debug" = "1"], [AC_DEFINE([HWLOC_DEBUG])])
+    AC_MSG_RESULT([$hwloc_debug_msg])
+
+    # We need to set a path for header, etc files depending on whether
+    # we're standalone or embedded. this is taken care of by HWLOC_EMBEDDED.
+
+    AC_MSG_CHECKING([for hwloc directory prefix])
+    AC_MSG_RESULT(m4_ifval([$1], hwloc_config_prefix, [(none)]))
+
+    # Note that private/config.h *MUST* be listed first so that it
+    # becomes the "main" config header file.  Any AC-CONFIG-HEADERS
+    # after that (hwloc/config.h) will only have selective #defines
+    # replaced, not the entire file.
+    AC_CONFIG_HEADERS(hwloc_config_prefix[include/private/autogen/config.h])
+    AC_CONFIG_HEADERS(hwloc_config_prefix[include/hwloc/autogen/config.h])
+
+    # What prefix are we using?
+    AC_MSG_CHECKING([for hwloc symbol prefix])
+    AS_IF([test "$hwloc_symbol_prefix_value" = ""],
+          [AS_IF([test "$with_hwloc_symbol_prefix" = ""],
+                 [hwloc_symbol_prefix_value=hwloc_],
+                 [hwloc_symbol_prefix_value=$with_hwloc_symbol_prefix])])
+    AC_DEFINE_UNQUOTED(HWLOC_SYM_PREFIX, [$hwloc_symbol_prefix_value],
+                       [The hwloc symbol prefix])
+    # Ensure to [] escape the whole next line so that we can get the
+    # proper tr tokens
+    [hwloc_symbol_prefix_value_caps="`echo $hwloc_symbol_prefix_value | tr '[:lower:]' '[:upper:]'`"]
+    AC_DEFINE_UNQUOTED(HWLOC_SYM_PREFIX_CAPS, [$hwloc_symbol_prefix_value_caps],
+                       [The hwloc symbol prefix in all caps])
+    AC_MSG_RESULT([$hwloc_symbol_prefix_value])
+
+    # Give an easy #define to know if we need to transform all the
+    # hwloc names
+    AH_TEMPLATE([HWLOC_SYM_TRANSFORM], [Whether we need to re-define all the hwloc public symbols or not])
+    AS_IF([test "$hwloc_symbol_prefix_value" = "hwloc_"],
+          [AC_DEFINE([HWLOC_SYM_TRANSFORM], [0])],
+          [AC_DEFINE([HWLOC_SYM_TRANSFORM], [1])])
+
+    # GCC specifics.
+    if test "x$GCC" = "xyes"; then
+        HWLOC_GCC_CFLAGS="-Wall -Wmissing-prototypes -Wundef"
+        HWLOC_GCC_CFLAGS="$HWLOC_GCC_CFLAGS -Wpointer-arith -Wcast-align"
+    fi
+
+    # Enample system extensions for O_DIRECTORY, fdopen, fssl, etc.
+    AH_VERBATIM([USE_HPUX_SYSTEM_EXTENSIONS],
+[/* Enable extensions on HP-UX. */
+#ifndef _HPUX_SOURCE
+# undef _HPUX_SOURCE
+#endif
+])
+    AC_DEFINE([_HPUX_SOURCE], [1], [Are we building for HP-UX?])
+
+    AC_LANG_PUSH([C])
+
+    # Check to see if we're producing a 32 or 64 bit executable by
+    # checking the sizeof void*.  Note that AC CHECK_SIZEOF even works
+    # when cross compiling (!), according to the AC 2.64 docs.  This
+    # check is needed because on some systems, you can instruct the
+    # compiler to specifically build 32 or 64 bit executables -- even
+    # though the $target may indicate something different.
+    AC_CHECK_SIZEOF([void *])
+
+    #
+    # List of components to be built, either statically or dynamically.
+    # To be enlarged below.
+    #
+    hwloc_components="noos xml synthetic xml_nolibxml"
+
+    #
+    # Check OS support
+    #
+    AC_MSG_CHECKING([which OS support to include])
+    case ${target} in
+      powerpc64-bgq-linux*) # must be before Linux
+	AC_DEFINE(HWLOC_BGQ_SYS, 1, [Define to 1 on BlueGene/Q])
+	hwloc_bgq=yes
+	AC_MSG_RESULT([bgq])
+	hwloc_components="$hwloc_components bgq"
+	;;
+      *-*-linux*)
+        AC_DEFINE(HWLOC_LINUX_SYS, 1, [Define to 1 on Linux])
+        hwloc_linux=yes
+        AC_MSG_RESULT([Linux])
+        hwloc_components="$hwloc_components linux"
+        if test "x$enable_io" != xno; then
+	  hwloc_components="$hwloc_components linuxio"
+	  AC_DEFINE(HWLOC_HAVE_LINUXIO, 1, [Define to 1 if building the Linux I/O component])
+	  hwloc_linuxio_happy=yes
+	  if test x$enable_pci != xno; then
+	    AC_DEFINE(HWLOC_HAVE_LINUXPCI, 1, [Define to 1 if enabling Linux-specific PCI discovery in the Linux I/O component])
+	    hwloc_linuxpci_happy=yes
+	  fi
+	fi
+        ;;
+      *-*-irix*)
+        AC_DEFINE(HWLOC_IRIX_SYS, 1, [Define to 1 on Irix])
+        hwloc_irix=yes
+        AC_MSG_RESULT([IRIX])
+        # no irix component yet
+        ;;
+      *-*-darwin*)
+        AC_DEFINE(HWLOC_DARWIN_SYS, 1, [Define to 1 on Darwin])
+        hwloc_darwin=yes
+        AC_MSG_RESULT([Darwin])
+        hwloc_components="$hwloc_components darwin"
+        ;;
+      *-*-solaris*)
+        AC_DEFINE(HWLOC_SOLARIS_SYS, 1, [Define to 1 on Solaris])
+        hwloc_solaris=yes
+        AC_MSG_RESULT([Solaris])
+        hwloc_components="$hwloc_components solaris"
+        ;;
+      *-*-aix*)
+        AC_DEFINE(HWLOC_AIX_SYS, 1, [Define to 1 on AIX])
+        hwloc_aix=yes
+        AC_MSG_RESULT([AIX])
+        hwloc_components="$hwloc_components aix"
+        ;;
+      *-*-hpux*)
+        AC_DEFINE(HWLOC_HPUX_SYS, 1, [Define to 1 on HP-UX])
+        hwloc_hpux=yes
+        AC_MSG_RESULT([HP-UX])
+        hwloc_components="$hwloc_components hpux"
+        ;;
+      *-*-mingw*|*-*-cygwin*)
+        AC_DEFINE(HWLOC_WIN_SYS, 1, [Define to 1 on WINDOWS])
+        hwloc_windows=yes
+        AC_MSG_RESULT([Windows])
+        hwloc_components="$hwloc_components windows"
+        ;;
+      *-*-*freebsd*)
+        AC_DEFINE(HWLOC_FREEBSD_SYS, 1, [Define to 1 on *FREEBSD])
+        hwloc_freebsd=yes
+        AC_MSG_RESULT([FreeBSD])
+        hwloc_components="$hwloc_components freebsd"
+        ;;
+      *-*-*netbsd*)
+        AC_DEFINE(HWLOC_NETBSD_SYS, 1, [Define to 1 on *NETBSD])
+        hwloc_netbsd=yes
+        AC_MSG_RESULT([NetBSD])
+        hwloc_components="$hwloc_components netbsd"
+        ;;
+      *)
+        AC_MSG_RESULT([Unsupported! ($target)])
+        AC_DEFINE(HWLOC_UNSUPPORTED_SYS, 1, [Define to 1 on unsupported systems])
+        AC_MSG_WARN([***********************************************************])
+        AC_MSG_WARN([*** hwloc does not support this system.])
+        AC_MSG_WARN([*** hwloc will *attempt* to build (but it may not work).])
+        AC_MSG_WARN([*** hwloc run-time results may be reduced to showing just one processor,])
+        AC_MSG_WARN([*** and binding will not be supported.])
+        AC_MSG_WARN([*** You have been warned.])
+        AC_MSG_WARN([*** Pausing to give you time to read this message...])
+        AC_MSG_WARN([***********************************************************])
+        sleep 10
+        ;;
+    esac
+
+    #
+    # Check CPU support
+    #
+    AC_MSG_CHECKING([which CPU support to include])
+    case ${target} in
+      i*86-*-*|x86_64-*-*|amd64-*-*)
+        case ${ac_cv_sizeof_void_p} in
+          4)
+            AC_DEFINE(HWLOC_X86_32_ARCH, 1, [Define to 1 on x86_32])
+            hwloc_x86_32=yes
+	    HWLOC_MS_LIB_ARCH=X86
+            AC_MSG_RESULT([x86_32])
+            ;;
+          8)
+            AC_DEFINE(HWLOC_X86_64_ARCH, 1, [Define to 1 on x86_64])
+            hwloc_x86_64=yes
+	    HWLOC_MS_LIB_ARCH=X64
+            AC_MSG_RESULT([x86_64])
+            ;;
+          *)
+            AC_DEFINE(HWLOC_X86_64_ARCH, 1, [Define to 1 on x86_64])
+            hwloc_x86_64=yes
+	    HWLOC_MS_LIB_ARCH=X64
+            AC_MSG_RESULT([unknown -- assuming x86_64])
+            ;;
+        esac
+        ;;
+      *)
+        AC_MSG_RESULT([unknown])
+        ;;
+    esac
+    AC_SUBST(HWLOC_MS_LIB_ARCH)
+
+    AC_CHECK_SIZEOF([unsigned long])
+    AC_DEFINE_UNQUOTED([HWLOC_SIZEOF_UNSIGNED_LONG], $ac_cv_sizeof_unsigned_long, [The size of `unsigned long', as computed by sizeof])
+    AC_CHECK_SIZEOF([unsigned int])
+    AC_DEFINE_UNQUOTED([HWLOC_SIZEOF_UNSIGNED_INT], $ac_cv_sizeof_unsigned_int, [The size of `unsigned int', as computed by sizeof])
+
+    #
+    # Check for compiler attributes and visibility
+    #
+    _HWLOC_C_COMPILER_VENDOR([hwloc_c_vendor])
+    _HWLOC_CHECK_ATTRIBUTES
+    _HWLOC_CHECK_VISIBILITY
+    HWLOC_CFLAGS="$HWLOC_FLAGS $HWLOC_VISIBILITY_CFLAGS"
+    AS_IF([test "$HWLOC_VISIBILITY_CFLAGS" != ""],
+          [AC_MSG_WARN(["$HWLOC_VISIBILITY_CFLAGS" has been added to the hwloc CFLAGS])])
+
+    # Make sure the compiler returns an error code when function arg
+    # count is wrong, otherwise sched_setaffinity checks may fail.
+    HWLOC_STRICT_ARGS_CFLAGS=
+    hwloc_args_check=0
+    AC_MSG_CHECKING([whether the C compiler rejects function calls with too many arguments])
+    AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+        extern int one_arg(int x);
+        int foo(void) { return one_arg(1, 2); }
+      ]])],
+      [AC_MSG_RESULT([no])],
+      [hwloc_args_check=1
+       AC_MSG_RESULT([yes])])
+    AC_MSG_CHECKING([whether the C compiler rejects function calls with too few arguments])
+    AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+        extern int two_arg(int x, int y);
+        int foo(void) { return two_arg(3); }
+      ]])],
+      [AC_MSG_RESULT([no])],
+      [hwloc_args_check=`expr $hwloc_args_check + 1`
+       AC_MSG_RESULT([yes])])
+    AS_IF([test "$hwloc_args_check" != "2"],[
+         AC_MSG_WARN([Your C compiler does not consider incorrect argument counts to be a fatal error.])
+        case "$hwloc_c_vendor" in
+        ibm)
+            HWLOC_STRICT_ARGS_CFLAGS="-qhalt=e"
+            ;;
+        intel)
+            HWLOC_STRICT_ARGS_CFLAGS="-we140"
+            ;;
+        *)
+            HWLOC_STRICT_ARGS_CFLAGS=FAIL
+            AC_MSG_WARN([Please report this warning and configure using a different C compiler if possible.])
+            ;;
+        esac
+        AS_IF([test "$HWLOC_STRICT_ARGS_CFLAGS" != "FAIL"],[
+            AC_MSG_WARN([Configure will append '$HWLOC_STRICT_ARGS_CFLAGS' to the value of CFLAGS when needed.])
+             AC_MSG_WARN([Alternatively you may configure with a different compiler.])
+        ])
+    ])
+
+    #
+    # Now detect support
+    #
+
+    AC_CHECK_HEADERS([unistd.h])
+    AC_CHECK_HEADERS([dirent.h])
+    AC_CHECK_HEADERS([strings.h])
+    AC_CHECK_HEADERS([ctype.h])
+
+    AC_CHECK_FUNCS([strncasecmp], [
+      _HWLOC_CHECK_DECL([strncasecmp], [
+	AC_DEFINE([HWLOC_HAVE_DECL_STRNCASECMP], [1], [Define to 1 if function `strncasecmp' is declared by system headers])
+      ])
+    ])
+
+    AC_CHECK_FUNCS([strftime])
+    AC_CHECK_FUNCS([setlocale])
+
+    AC_CHECK_HEADER([stdint.h], [
+      AC_DEFINE([HWLOC_HAVE_STDINT_H], [1], [Define to 1 if you have the <stdint.h> header file.])
+    ])
+    AC_CHECK_HEADERS([sys/mman.h])
+
+    old_CPPFLAGS="$CPPFLAGS"
+    CPPFLAGS="$CPPFLAGS -D_WIN32_WINNT=0x0601"
+    AC_CHECK_TYPES([KAFFINITY,
+                    PROCESSOR_CACHE_TYPE,
+                    CACHE_DESCRIPTOR,
+                    LOGICAL_PROCESSOR_RELATIONSHIP,
+                    RelationProcessorPackage,
+                    SYSTEM_LOGICAL_PROCESSOR_INFORMATION,
+                    GROUP_AFFINITY,
+                    PROCESSOR_RELATIONSHIP,
+                    NUMA_NODE_RELATIONSHIP,
+                    CACHE_RELATIONSHIP,
+                    PROCESSOR_GROUP_INFO,
+                    GROUP_RELATIONSHIP,
+                    SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX,
+		    PSAPI_WORKING_SET_EX_BLOCK,
+		    PSAPI_WORKING_SET_EX_INFORMATION,
+		    PROCESSOR_NUMBER],
+                    [],[],[[#include <windows.h>]])
+    CPPFLAGS="$old_CPPFLAGS"
+    AC_CHECK_LIB([gdi32], [main],
+                 [HWLOC_LIBS="-lgdi32 $HWLOC_LIBS"
+                  AC_DEFINE([HAVE_LIBGDI32], 1, [Define to 1 if we have -lgdi32])])
+    AC_CHECK_LIB([user32], [PostQuitMessage], [hwloc_have_user32="yes"])
+
+    AC_CHECK_HEADER([windows.h], [
+      AC_DEFINE([HWLOC_HAVE_WINDOWS_H], [1], [Define to 1 if you have the `windows.h' header.])
+    ])
+
+    AC_CHECK_HEADERS([sys/lgrp_user.h], [
+      AC_CHECK_LIB([lgrp], [lgrp_init],
+                   [HWLOC_LIBS="-llgrp $HWLOC_LIBS"
+                    AC_DEFINE([HAVE_LIBLGRP], 1, [Define to 1 if we have -llgrp])
+                    AC_CHECK_DECLS([lgrp_latency_cookie],,,[[#include <sys/lgrp_user.h>]])
+      ])
+    ])
+    AC_CHECK_HEADERS([kstat.h], [
+      AC_CHECK_LIB([kstat], [main],
+                   [HWLOC_LIBS="-lkstat $HWLOC_LIBS"
+                    AC_DEFINE([HAVE_LIBKSTAT], 1, [Define to 1 if we have -lkstat])])
+    ])
+
+    AC_CHECK_DECLS([fabsf], [
+      AC_CHECK_LIB([m], [fabsf],
+                   [HWLOC_LIBS="-lm $HWLOC_LIBS"])
+    ], [], [[#include <math.h>]])
+
+    AC_CHECK_HEADERS([picl.h], [
+      AC_CHECK_LIB([picl], [picl_initialize],
+                   [HWLOC_LIBS="-lpicl $HWLOC_LIBS"])])
+
+    AC_CHECK_DECLS([_SC_NPROCESSORS_ONLN,
+    		_SC_NPROCESSORS_CONF,
+    		_SC_NPROC_ONLN,
+    		_SC_NPROC_CONF,
+    		_SC_PAGESIZE,
+    		_SC_PAGE_SIZE,
+    		_SC_LARGE_PAGESIZE],,[:],[[#include <unistd.h>]])
+
+    AC_HAVE_HEADERS([mach/mach_host.h])
+    AC_HAVE_HEADERS([mach/mach_init.h], [
+      AC_CHECK_FUNCS([host_info])
+    ])
+
+    AC_CHECK_HEADERS([sys/param.h])
+    AC_CHECK_HEADERS([sys/sysctl.h], [
+      AC_CHECK_DECLS([CTL_HW, HW_NCPU],,,[[
+      #if HAVE_SYS_PARAM_H
+      #include <sys/param.h>
+      #endif
+      #include <sys/sysctl.h>
+      ]])
+    ],,[
+      AC_INCLUDES_DEFAULT
+      #if HAVE_SYS_PARAM_H
+      #include <sys/param.h>
+      #endif
+    ])
+
+    AC_CHECK_DECLS([strtoull], [], [AC_CHECK_FUNCS([strtoull])], [AC_INCLUDES_DEFAULT])
+
+    # Needed for Windows in private/misc.h
+    AC_CHECK_TYPES([ssize_t])
+    AC_CHECK_DECLS([snprintf], [], [], [AC_INCLUDES_DEFAULT])
+    AC_CHECK_DECLS([strcasecmp], [], [], [AC_INCLUDES_DEFAULT])
+    # strdup and putenv are declared in windows headers but marked deprecated
+    AC_CHECK_DECLS([_strdup], [], [], [AC_INCLUDES_DEFAULT])
+    AC_CHECK_DECLS([_putenv], [], [], [AC_INCLUDES_DEFAULT])
+    # Could add mkdir and access for hwloc-gather-cpuid.c on Windows
+
+    # Do a full link test instead of just using AC_CHECK_FUNCS, which
+    # just checks to see if the symbol exists or not.  For example,
+    # the prototype of sysctl uses u_int, which on some platforms
+    # (such as FreeBSD) is only defined under __BSD_VISIBLE, __USE_BSD
+    # or other similar definitions.  So while the symbols "sysctl" and
+    # "sysctlbyname" might still be available in libc (which autoconf
+    # checks for), they might not be actually usable.
+    AC_TRY_LINK([
+               #include <stdio.h>
+               #include <sys/types.h>
+               #include <sys/sysctl.h>
+               ],
+                [return sysctl(NULL,0,NULL,NULL,NULL,0);],
+                AC_DEFINE([HAVE_SYSCTL],[1],[Define to '1' if sysctl is present and usable]))
+    AC_TRY_LINK([
+               #include <stdio.h>
+               #include <sys/types.h>
+               #include <sys/sysctl.h>
+               ],
+                [return sysctlbyname(NULL,NULL,NULL,NULL,0);],
+                AC_DEFINE([HAVE_SYSCTLBYNAME],[1],[Define to '1' if sysctlbyname is present and usable]))
+
+    AC_CHECK_DECLS([getprogname], [], [], [AC_INCLUDES_DEFAULT])
+    AC_CHECK_DECLS([getexecname], [], [], [AC_INCLUDES_DEFAULT])
+    AC_CHECK_DECLS([GetModuleFileName], [], [], [#include <windows.h>])
+    # program_invocation_name and __progname may be available but not exported in headers
+    AC_MSG_CHECKING([for program_invocation_name])
+    AC_TRY_LINK([
+		#ifndef _GNU_SOURCE
+		# define _GNU_SOURCE
+		#endif
+		#include <errno.h>
+		#include <stdio.h>
+		extern char *program_invocation_name;
+		],[
+		return printf("%s\n", program_invocation_name);
+		],
+		[AC_DEFINE([HAVE_PROGRAM_INVOCATION_NAME], [1], [Define to '1' if program_invocation_name is present and usable])
+		 AC_MSG_RESULT([yes])
+		],[AC_MSG_RESULT([no])])
+    AC_MSG_CHECKING([for __progname])
+    AC_TRY_LINK([
+		#include <stdio.h>
+		extern char *__progname;
+		],[
+		return printf("%s\n", __progname);
+		],
+		[AC_DEFINE([HAVE___PROGNAME], [1], [Define to '1' if __progname is present and usable])
+		 AC_MSG_RESULT([yes])
+		],[AC_MSG_RESULT([no])])
+
+    case ${target} in
+      *-*-mingw*|*-*-cygwin*)
+        hwloc_pid_t=HANDLE
+        hwloc_thread_t=HANDLE
+        ;;
+      *)
+        hwloc_pid_t=pid_t
+        AC_CHECK_TYPES([pthread_t], [hwloc_thread_t=pthread_t], [:], [[#include <pthread.h>]])
+        ;;
+    esac
+    AC_DEFINE_UNQUOTED(hwloc_pid_t, $hwloc_pid_t, [Define this to the process ID type])
+    if test "x$hwloc_thread_t" != "x" ; then
+      AC_DEFINE_UNQUOTED(hwloc_thread_t, $hwloc_thread_t, [Define this to the thread ID type])
+    fi
+
+    _HWLOC_CHECK_DECL([sched_setaffinity], [
+      AC_DEFINE([HWLOC_HAVE_SCHED_SETAFFINITY], [1], [Define to 1 if glibc provides a prototype of sched_setaffinity()])
+      AS_IF([test "$HWLOC_STRICT_ARGS_CFLAGS" = "FAIL"],[
+        AC_MSG_WARN([Support for sched_setaffinity() requires a C compiler which])
+        AC_MSG_WARN([considers incorrect argument counts to be a fatal error.])
+        AC_MSG_ERROR([Cannot continue.])
+      ])
+      AC_MSG_CHECKING([for old prototype of sched_setaffinity])
+      hwloc_save_CFLAGS=$CFLAGS
+      CFLAGS="$CFLAGS $HWLOC_STRICT_ARGS_CFLAGS"
+      AC_COMPILE_IFELSE([
+          AC_LANG_PROGRAM([[
+              #ifndef _GNU_SOURCE
+              # define _GNU_SOURCE
+              #endif
+              #include <sched.h>
+              static unsigned long mask;
+              ]], [[ sched_setaffinity(0, (void*) &mask); ]])],
+          [AC_DEFINE([HWLOC_HAVE_OLD_SCHED_SETAFFINITY], [1], [Define to 1 if glibc provides the old prototype (without length) of sched_setaffinity()])
+           AC_MSG_RESULT([yes])],
+          [AC_MSG_RESULT([no])])
+      CFLAGS=$hwloc_save_CFLAGS
+    ], , [[
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE
+#endif
+#include <sched.h>
+]])
+
+    AC_MSG_CHECKING([for working CPU_SET])
+    AC_LINK_IFELSE([
+      AC_LANG_PROGRAM([[
+        #include <sched.h>
+        cpu_set_t set;
+        ]], [[ CPU_ZERO(&set); CPU_SET(0, &set);]])],
+	[AC_DEFINE([HWLOC_HAVE_CPU_SET], [1], [Define to 1 if the CPU_SET macro works])
+         AC_MSG_RESULT([yes])],
+        [AC_MSG_RESULT([no])])
+
+    AC_MSG_CHECKING([for working CPU_SET_S])
+    AC_LINK_IFELSE([
+      AC_LANG_PROGRAM([[
+          #include <sched.h>
+          cpu_set_t *set;
+        ]], [[
+          set = CPU_ALLOC(1024);
+          CPU_ZERO_S(CPU_ALLOC_SIZE(1024), set);
+          CPU_SET_S(CPU_ALLOC_SIZE(1024), 0, set);
+          CPU_FREE(set);
+        ]])],
+        [AC_DEFINE([HWLOC_HAVE_CPU_SET_S], [1], [Define to 1 if the CPU_SET_S macro works])
+         AC_MSG_RESULT([yes])],
+        [AC_MSG_RESULT([no])])
+
+    AC_MSG_CHECKING([for working syscall with 6 parameters])
+    AC_LINK_IFELSE([
+      AC_LANG_PROGRAM([[
+          #include <unistd.h>
+          #include <sys/syscall.h>
+          ]], [[syscall(0, 1, 2, 3, 4, 5, 6);]])],
+        [AC_DEFINE([HWLOC_HAVE_SYSCALL], [1], [Define to 1 if function `syscall' is available with 6 parameters])
+         AC_MSG_RESULT([yes])],
+        [AC_MSG_RESULT([no])])
+
+    AC_PATH_PROGS([HWLOC_MS_LIB], [lib])
+    AC_ARG_VAR([HWLOC_MS_LIB], [Path to Microsoft's Visual Studio `lib' tool])
+
+    AC_PATH_PROG([BASH], [bash])
+
+    AC_CHECK_FUNCS([ffs], [
+      _HWLOC_CHECK_DECL([ffs],[
+        AC_DEFINE([HWLOC_HAVE_DECL_FFS], [1], [Define to 1 if function `ffs' is declared by system headers])
+      ])
+      AC_DEFINE([HWLOC_HAVE_FFS], [1], [Define to 1 if you have the `ffs' function.])
+      if ( $CC --version | grep gccfss ) >/dev/null 2>&1 ; then
+        dnl May be broken due to
+        dnl    https://forums.oracle.com/forums/thread.jspa?threadID=1997328
+        dnl TODO: a more selective test, since bug may be version dependent.
+        dnl We can't use AC_TRY_LINK because the failure does not appear until
+        dnl run/load time and there is currently no precedent for AC_TRY_RUN
+        dnl use in hwloc.  --PHH
+        dnl For now, we're going with "all gccfss compilers are broken".
+        dnl Better to be safe and correct; it's not like this is
+        dnl performance-critical code, after all.
+        AC_DEFINE([HWLOC_HAVE_BROKEN_FFS], [1],
+                  [Define to 1 if your `ffs' function is known to be broken.])
+      fi
+    ])
+    AC_CHECK_FUNCS([ffsl], [
+      _HWLOC_CHECK_DECL([ffsl],[
+        AC_DEFINE([HWLOC_HAVE_DECL_FFSL], [1], [Define to 1 if function `ffsl' is declared by system headers])
+      ])
+      AC_DEFINE([HWLOC_HAVE_FFSL], [1], [Define to 1 if you have the `ffsl' function.])
+    ])
+
+    AC_CHECK_FUNCS([fls], [
+      _HWLOC_CHECK_DECL([fls],[
+        AC_DEFINE([HWLOC_HAVE_DECL_FLS], [1], [Define to 1 if function `fls' is declared by system headers])
+      ])
+      AC_DEFINE([HWLOC_HAVE_FLS], [1], [Define to 1 if you have the `fls' function.])
+    ])
+    AC_CHECK_FUNCS([flsl], [
+      _HWLOC_CHECK_DECL([flsl],[
+        AC_DEFINE([HWLOC_HAVE_DECL_FLSL], [1], [Define to 1 if function `flsl' is declared by system headers])
+      ])
+      AC_DEFINE([HWLOC_HAVE_FLSL], [1], [Define to 1 if you have the `flsl' function.])
+    ])
+
+    AC_CHECK_FUNCS([clz], [
+      _HWLOC_CHECK_DECL([clz],[
+        AC_DEFINE([HWLOC_HAVE_DECL_CLZ], [1], [Define to 1 if function `clz' is declared by system headers])
+      ])
+      AC_DEFINE([HWLOC_HAVE_CLZ], [1], [Define to 1 if you have the `clz' function.])
+    ])
+    AC_CHECK_FUNCS([clzl], [
+      _HWLOC_CHECK_DECL([clzl],[
+        AC_DEFINE([HWLOC_HAVE_DECL_CLZL], [1], [Define to 1 if function `clzl' is declared by system headers])
+      ])
+      AC_DEFINE([HWLOC_HAVE_CLZL], [1], [Define to 1 if you have the `clzl' function.])
+    ])
+
+    AS_IF([test "$hwloc_c_vendor" != "android"], [AC_CHECK_FUNCS([openat], [hwloc_have_openat=yes])])
+
+
+    AC_CHECK_HEADERS([malloc.h])
+    AC_CHECK_FUNCS([getpagesize memalign posix_memalign])
+
+    AC_CHECK_HEADERS([sys/utsname.h])
+    AC_CHECK_FUNCS([uname])
+
+    dnl Don't check for valgrind in embedded mode because this may conflict
+    dnl with the embedder projects also checking for it.
+    dnl We only use Valgrind to nicely disable the x86 backend with a warning,
+    dnl but we can live without it in embedded mode (it auto-disables itself
+    dnl because of invalid CPUID outputs).
+    dnl Non-embedded checks usually go to hwloc_internal.m4 but this one is
+    dnl is really for the core library.
+    AS_IF([test "$hwloc_mode" != "embedded"],
+        [AC_CHECK_HEADERS([valgrind/valgrind.h])
+         AC_CHECK_DECLS([RUNNING_ON_VALGRIND],,[:],[[#include <valgrind/valgrind.h>]])
+	],[
+	 AC_DEFINE([HAVE_DECL_RUNNING_ON_VALGRIND], [0], [Embedded mode; just assume we do not have Valgrind support])
+	])
+
+    AC_CHECK_HEADERS([pthread_np.h])
+    AC_CHECK_DECLS([pthread_setaffinity_np],,[:],[[
+      #include <pthread.h>
+      #ifdef HAVE_PTHREAD_NP_H
+      #  include <pthread_np.h>
+      #endif
+    ]])
+    AC_CHECK_DECLS([pthread_getaffinity_np],,[:],[[
+      #include <pthread.h>
+      #ifdef HAVE_PTHREAD_NP_H
+      #  include <pthread_np.h>
+      #endif
+    ]])
+    AC_CHECK_FUNC([sched_setaffinity], [hwloc_have_sched_setaffinity=yes])
+    AC_CHECK_HEADERS([sys/cpuset.h],,,[[#include <sys/param.h>]])
+    AC_CHECK_FUNCS([cpuset_setaffinity])
+    AC_SEARCH_LIBS([pthread_getthrds_np], [pthread],
+      AC_DEFINE([HWLOC_HAVE_PTHREAD_GETTHRDS_NP], 1, `Define to 1 if you have pthread_getthrds_np')
+    )
+    AC_CHECK_FUNCS([cpuset_setid])
+
+    # Linux libudev support
+    if test "x$enable_libudev" != xno; then
+      AC_CHECK_HEADERS([libudev.h], [
+	AC_CHECK_LIB([udev], [udev_device_new_from_subsystem_sysname], [
+	  HWLOC_LIBS="$HWLOC_LIBS -ludev"
+	  AC_DEFINE([HWLOC_HAVE_LIBUDEV], [1], [Define to 1 if you have libudev.])
+	])
+      ])
+    fi
+
+    # PCI support via libpciaccess.  NOTE: we do not support
+    # libpci/pciutils because that library is GPL and is incompatible
+    # with our BSD license.
+    hwloc_pciaccess_happy=no
+    if test "x$enable_io" != xno && test "x$enable_pci" != xno; then
+      hwloc_pciaccess_happy=yes
+      HWLOC_PKG_CHECK_MODULES([PCIACCESS], [pciaccess], [pci_slot_match_iterator_create], [pciaccess.h], [:], [hwloc_pciaccess_happy=no])
+
+      # Only add the REQUIRES if we got pciaccess through pkg-config.
+      # Otherwise we don't know if pciaccess.pc is installed
+      AS_IF([test "$hwloc_pciaccess_happy" = "yes"], [HWLOC_PCIACCESS_REQUIRES=pciaccess])
+
+      # Just for giggles, if we didn't find a pciaccess pkg-config,
+      # just try looking for its header file and library.
+      AS_IF([test "$hwloc_pciaccess_happy" != "yes"],
+         [AC_CHECK_HEADER([pciaccess.h],
+              [AC_CHECK_LIB([pciaccess], [pci_slot_match_iterator_create],
+                   [hwloc_pciaccess_happy=yes
+                    HWLOC_PCIACCESS_LIBS="-lpciaccess"])
+              ])
+         ])
+
+      AS_IF([test "$hwloc_pciaccess_happy" = "yes"],
+         [hwloc_components="$hwloc_components pci"
+          hwloc_pci_component_maybeplugin=1])
+    fi
+    # If we asked for pci support but couldn't deliver, fail
+    AS_IF([test "$enable_pci" = "yes" -a "$hwloc_pciaccess_happy" = "no"],
+          [AC_MSG_WARN([Specified --enable-pci switch, but could not])
+           AC_MSG_WARN([find appropriate support])
+           AC_MSG_ERROR([Cannot continue])])
+    # don't add LIBS/CFLAGS/REQUIRES yet, depends on plugins
+
+    # OpenCL support
+    hwloc_opencl_happy=no
+    if test "x$enable_io" != xno && test "x$enable_opencl" != "xno"; then
+        hwloc_opencl_happy=yes
+        AC_CHECK_HEADERS([CL/cl_ext.h], [
+	  AC_CHECK_LIB([OpenCL], [clGetDeviceIDs], [HWLOC_OPENCL_LIBS="-lOpenCL"], [hwloc_opencl_happy=no])
+        ], [hwloc_opencl_happy=no])
+    fi
+    AC_SUBST(HWLOC_OPENCL_LIBS)
+    # Check if required extensions are available
+    if test "x$hwloc_opencl_happy" = "xyes"; then
+      tmp_save_CFLAGS="$CFLAGS"
+      CFLAGS="$CFLAGS $HWLOC_OPENCL_CFLAGS"
+      tmp_save_LIBS="$LIBS"
+      LIBS="$LIBS $HWLOC_OPENCL_LIBS"
+      AC_CHECK_DECLS([CL_DEVICE_TOPOLOGY_AMD],[hwloc_opencl_amd_happy=yes],[:],[[#include <CL/cl_ext.h>]])
+      CFLAGS="$tmp_save_CFLAGS"
+      LIBS="$tmp_save_LIBS"
+      # We can't do anything without CL_DEVICE_TOPOLOGY_AMD so far, so disable OpenCL entirely if not found
+      test "x$hwloc_opencl_amd_happy" != "xyes" && hwloc_opencl_happy=no
+    fi
+    # If we asked for opencl support but couldn't deliver, fail
+    AS_IF([test "$enable_opencl" = "yes" -a "$hwloc_opencl_happy" = "no"],
+          [AC_MSG_WARN([Specified --enable-opencl switch, but could not])
+           AC_MSG_WARN([find appropriate support])
+           AC_MSG_ERROR([Cannot continue])])
+    if test "x$hwloc_opencl_happy" = "xyes"; then
+      AC_DEFINE([HWLOC_HAVE_OPENCL], [1], [Define to 1 if you have the `OpenCL' library.])
+      AC_SUBST([HWLOC_HAVE_OPENCL], [1])
+      hwloc_components="$hwloc_components opencl"
+      hwloc_opencl_component_maybeplugin=1
+    else
+      AC_SUBST([HWLOC_HAVE_OPENCL], [0])
+    fi
+    # don't add LIBS/CFLAGS/REQUIRES yet, depends on plugins
+
+    # CUDA support
+    hwloc_have_cuda=no
+    hwloc_have_cudart=no
+    if test "x$enable_io" != xno && test "x$enable_cuda" != "xno"; then
+      AC_CHECK_HEADERS([cuda.h], [
+        AC_MSG_CHECKING(if CUDA_VERSION >= 3020)
+        AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+#include <cuda.h>
+#ifndef CUDA_VERSION
+#error CUDA_VERSION undefined
+#elif CUDA_VERSION < 3020
+#error CUDA_VERSION too old
+#endif]], [[int i = 3;]])],
+         [AC_MSG_RESULT(yes)
+          AC_CHECK_LIB([cuda], [cuInit],
+                       [AC_DEFINE([HAVE_CUDA], 1, [Define to 1 if we have -lcuda])
+                        hwloc_have_cuda=yes])],
+         [AC_MSG_RESULT(no)])])
+
+      AC_CHECK_HEADERS([cuda_runtime_api.h], [
+        AC_MSG_CHECKING(if CUDART_VERSION >= 3020)
+        AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+#include <cuda_runtime_api.h>
+#ifndef CUDART_VERSION
+#error CUDART_VERSION undefined
+#elif CUDART_VERSION < 3020
+#error CUDART_VERSION too old
+#endif]], [[int i = 3;]])],
+         [AC_MSG_RESULT(yes)
+          AC_CHECK_LIB([cudart], [cudaGetDeviceProperties], [
+            HWLOC_CUDA_LIBS="-lcudart"
+            AC_SUBST(HWLOC_CUDA_LIBS)
+            hwloc_have_cudart=yes
+            AC_DEFINE([HWLOC_HAVE_CUDART], [1], [Define to 1 if you have the `cudart' SDK.])
+          ])
+        ])
+      ])
+
+      AS_IF([test "$enable_cuda" = "yes" -a "$hwloc_have_cudart" = "no"],
+            [AC_MSG_WARN([Specified --enable-cuda switch, but could not])
+             AC_MSG_WARN([find appropriate support])
+             AC_MSG_ERROR([Cannot continue])])
+
+      if test "x$hwloc_have_cudart" = "xyes"; then
+	hwloc_components="$hwloc_components cuda"
+        hwloc_cuda_component_maybeplugin=1
+      fi
+    fi
+    # don't add LIBS/CFLAGS yet, depends on plugins
+
+    # NVML support
+    hwloc_nvml_happy=no
+    if test "x$enable_io" != xno && test "x$enable_nvml" != "xno"; then
+	hwloc_nvml_happy=yes
+	AC_CHECK_HEADERS([nvml.h], [
+	  AC_CHECK_LIB([nvidia-ml], [nvmlInit], [HWLOC_NVML_LIBS="-lnvidia-ml"], [hwloc_nvml_happy=no])
+        ], [hwloc_nvml_happy=no])
+    fi
+    if test "x$hwloc_nvml_happy" = "xyes"; then
+      tmp_save_CFLAGS="$CFLAGS"
+      CFLAGS="$CFLAGS $HWLOC_NVML_CFLAGS"
+      tmp_save_LIBS="$LIBS"
+      LIBS="$LIBS $HWLOC_NVML_LIBS"
+      AC_CHECK_DECLS([nvmlDeviceGetMaxPcieLinkGeneration],,[:],[[#include <nvml.h>]])
+      CFLAGS="$tmp_save_CFLAGS"
+      LIBS="$tmp_save_LIBS"
+    fi
+    AC_SUBST(HWLOC_NVML_LIBS)
+    # If we asked for nvml support but couldn't deliver, fail
+    AS_IF([test "$enable_nvml" = "yes" -a "$hwloc_nvml_happy" = "no"],
+	  [AC_MSG_WARN([Specified --enable-nvml switch, but could not])
+	   AC_MSG_WARN([find appropriate support])
+	   AC_MSG_ERROR([Cannot continue])])
+    if test "x$hwloc_nvml_happy" = "xyes"; then
+      AC_DEFINE([HWLOC_HAVE_NVML], [1], [Define to 1 if you have the `NVML' library.])
+      AC_SUBST([HWLOC_HAVE_NVML], [1])
+      hwloc_components="$hwloc_components nvml"
+      hwloc_nvml_component_maybeplugin=1
+    else
+      AC_SUBST([HWLOC_HAVE_NVML], [0])
+    fi
+    # don't add LIBS/CFLAGS/REQUIRES yet, depends on plugins
+
+    # X11 support
+    AC_PATH_XTRA
+
+    CPPFLAGS_save=$CPPFLAGS
+    LIBS_save=$LIBS
+
+    CPPFLAGS="$CPPFLAGS $X_CFLAGS"
+    LIBS="$LIBS $X_PRE_LIBS $X_LIBS $X_EXTRA_LIBS"
+    AC_CHECK_HEADERS([X11/Xlib.h],
+        [AC_CHECK_LIB([X11], [XOpenDisplay],
+            [
+             # the GL backend just needs XOpenDisplay
+             hwloc_enable_X11=yes
+             # lstopo needs more
+             AC_CHECK_HEADERS([X11/Xutil.h],
+                [AC_CHECK_HEADERS([X11/keysym.h],
+                    [AC_DEFINE([HWLOC_HAVE_X11_KEYSYM], [1], [Define to 1 if X11 headers including Xutil.h and keysym.h are available.])
+                     HWLOC_X11_CPPFLAGS="$X_CFLAGS"
+                     AC_SUBST([HWLOC_X11_CPPFLAGS])
+                     HWLOC_X11_LIBS="$X_PRE_LIBS $X_LIBS -lX11 $X_EXTRA_LIBS"
+                     AC_SUBST([HWLOC_X11_LIBS])])
+                ], [], [#include <X11/Xlib.h>])
+            ])
+         ])
+    CPPFLAGS=$CPPFLAGS_save
+    LIBS=$LIBS_save
+
+    # GL Support
+    hwloc_gl_happy=no
+    if test "x$enable_io" != xno && test "x$enable_gl" != "xno"; then
+	hwloc_gl_happy=yes
+
+	AS_IF([test "$hwloc_enable_X11" != "yes"],
+              [AC_MSG_WARN([X11 not found; GL disabled])
+               hwloc_gl_happy=no])
+
+        AC_CHECK_HEADERS([NVCtrl/NVCtrl.h], [
+          AC_CHECK_LIB([XNVCtrl], [XNVCTRLQueryTargetAttribute], [:], [hwloc_gl_happy=no], [-lXext])
+        ], [hwloc_gl_happy=no])
+
+        if test "x$hwloc_gl_happy" = "xyes"; then
+            AC_DEFINE([HWLOC_HAVE_GL], [1], [Define to 1 if you have the GL module components.])
+	    HWLOC_GL_LIBS="-lXNVCtrl -lXext -lX11"
+	    AC_SUBST(HWLOC_GL_LIBS)
+	    # FIXME we actually don't know if xext.pc and x11.pc are installed
+	    # since we didn't look for Xext and X11 using pkg-config
+	    HWLOC_GL_REQUIRES="xext x11"
+            hwloc_have_gl=yes
+	    hwloc_components="$hwloc_components gl"
+	    hwloc_gl_component_maybeplugin=1
+	else
+            AS_IF([test "$enable_gl" = "yes"], [
+                AC_MSG_WARN([Specified --enable-gl switch, but could not])
+                AC_MSG_WARN([find appropriate support])
+                AC_MSG_ERROR([Cannot continue])
+            ])
+        fi
+    fi
+    # don't add LIBS/CFLAGS yet, depends on plugins
+
+    # libxml2 support
+    hwloc_libxml2_happy=
+    if test "x$enable_libxml2" != "xno"; then
+        HWLOC_PKG_CHECK_MODULES([LIBXML2], [libxml-2.0], [xmlNewDoc], [libxml/parser.h],
+                                [hwloc_libxml2_happy=yes],
+                                [hwloc_libxml2_happy=no])
+    fi
+    if test "x$hwloc_libxml2_happy" = "xyes"; then
+        HWLOC_LIBXML2_REQUIRES="libxml-2.0"
+        AC_DEFINE([HWLOC_HAVE_LIBXML2], [1], [Define to 1 if you have the `libxml2' library.])
+        AC_SUBST([HWLOC_HAVE_LIBXML2], [1])
+
+        hwloc_components="$hwloc_components xml_libxml"
+        hwloc_xml_libxml_component_maybeplugin=1
+    else
+        AC_SUBST([HWLOC_HAVE_LIBXML2], [0])
+	AS_IF([test "$enable_libxml2" = "yes"],
+              [AC_MSG_WARN([--enable-libxml2 requested, but libxml2 was not found])
+               AC_MSG_ERROR([Cannot continue])])
+    fi
+    # don't add LIBS/CFLAGS/REQUIRES yet, depends on plugins
+
+    # Try to compile the x86 cpuid inlines
+    if test "x$enable_cpuid" != "xno"; then
+	AC_MSG_CHECKING([for x86 cpuid])
+	old_CPPFLAGS="$CPPFLAGS"
+	CPPFLAGS="$CPPFLAGS -I$HWLOC_top_srcdir/include"
+	# We need hwloc_uint64_t but we can't use autogen/config.h before configure ends.
+	# So pass #include/#define manually here for now.
+	CPUID_CHECK_HEADERS=
+	CPUID_CHECK_DEFINE=
+	if test "x$hwloc_windows" = xyes; then
+	    X86_CPUID_CHECK_HEADERS="#include <windows.h>"
+	    X86_CPUID_CHECK_DEFINE="#define hwloc_uint64_t DWORDLONG"
+	else
+	    X86_CPUID_CHECK_DEFINE="#define hwloc_uint64_t uint64_t"
+	    if test "x$ac_cv_header_stdint_h" = xyes; then
+	        X86_CPUID_CHECK_HEADERS="#include <stdint.h>"
+	    fi
+	fi
+	AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+	    #include <stdio.h>
+	    $X86_CPUID_CHECK_HEADERS
+	    $X86_CPUID_CHECK_DEFINE
+	    #define __hwloc_inline
+	    #include <private/cpuid-x86.h>
+	]], [[
+	    if (hwloc_have_x86_cpuid()) {
+		unsigned eax = 0, ebx, ecx = 0, edx;
+		hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
+		printf("highest x86 cpuid %x\n", eax);
+		return 0;
+	    }
+	]])],
+	[AC_MSG_RESULT([yes])
+	 AC_DEFINE(HWLOC_HAVE_X86_CPUID, 1, [Define to 1 if you have x86 cpuid])
+	 hwloc_have_x86_cpuid=yes],
+	[AC_MSG_RESULT([no])])
+	if test "x$hwloc_have_x86_cpuid" = xyes; then
+	    hwloc_components="$hwloc_components x86"
+	fi
+	CPPFLAGS="$old_CPPFLAGS"
+    fi
+
+    # Components require pthread_mutex, see if it needs -lpthread
+    hwloc_pthread_mutex_happy=no
+    # Try without explicit -lpthread first
+    AC_CHECK_FUNC([pthread_mutex_lock],
+      [hwloc_pthread_mutex_happy=yes
+       HWLOC_LIBS_PRIVATE="$HWLOC_LIBS_PRIVATE -lpthread"
+      ],
+      [AC_MSG_CHECKING([for pthread_mutex_lock with -lpthread])
+       # Try again with explicit -lpthread, but don't use AC_CHECK_FUNC to avoid the cache
+       tmp_save_LIBS=$LIBS
+       LIBS="$LIBS -lpthread"
+       AC_LINK_IFELSE([AC_LANG_CALL([], [pthread_mutex_lock])],
+         [hwloc_pthread_mutex_happy=yes
+          HWLOC_LIBS="$HWLOC_LIBS -lpthread"
+         ])
+       AC_MSG_RESULT([$hwloc_pthread_mutex_happy])
+       LIBS="$tmp_save_LIBS"
+      ])
+    AS_IF([test "x$hwloc_pthread_mutex_happy" = "xyes"],
+      [AC_DEFINE([HWLOC_HAVE_PTHREAD_MUTEX], 1, [Define to 1 if pthread mutexes are available])])
+
+    AS_IF([test "x$hwloc_pthread_mutex_happy" != xyes -a "x$hwloc_windows" != xyes],
+      [AC_MSG_WARN([pthread_mutex_lock not available, required for thread-safe initialization on non-Windows platforms.])
+       AC_MSG_WARN([Please report this to the hwloc-devel mailing list.])
+       AC_MSG_ERROR([Cannot continue])])
+
+    #
+    # Now enable registration of listed components
+    #
+
+    # Plugin support
+    AC_MSG_CHECKING([if plugin support is enabled])
+    # Plugins (even core support) are totally disabled by default
+    AS_IF([test "x$enable_plugins" = "x"], [enable_plugins=no])
+    AS_IF([test "x$enable_plugins" != "xno"], [hwloc_have_plugins=yes], [hwloc_have_plugins=no])
+    AC_MSG_RESULT([$hwloc_have_plugins])
+    AS_IF([test "x$hwloc_have_plugins" = "xyes"],
+          [AC_DEFINE([HWLOC_HAVE_PLUGINS], 1, [Define to 1 if the hwloc library should support dynamically-loaded plugins])])
+
+    # Some sanity checks about plugins
+    # libltdl doesn't work on AIX as of 2.4.2
+    AS_IF([test "x$enable_plugins" = "xyes" -a "x$hwloc_aix" = "xyes"],
+      [AC_MSG_WARN([libltdl does not work on AIX, plugins support cannot be enabled.])
+       AC_MSG_ERROR([Cannot continue])])
+    # posix linkers don't work well with plugins and windows dll constraints
+    AS_IF([test "x$enable_plugins" = "xyes" -a "x$hwloc_windows" = "xyes"],
+      [AC_MSG_WARN([Plugins not supported on non-native Windows build, plugins support cannot be enabled.])
+       AC_MSG_ERROR([Cannot continue])])
+
+    # If we want plugins, look for ltdl.h and libltdl
+    if test "x$hwloc_have_plugins" = xyes; then
+      AC_CHECK_HEADER([ltdl.h], [],
+	[AC_MSG_WARN([Plugin support requested, but could not find ltdl.h])
+	 AC_MSG_ERROR([Cannot continue])])
+      AC_CHECK_LIB([ltdl], [lt_dlopenext],
+	[HWLOC_LIBS="$HWLOC_LIBS -lltdl"],
+	[AC_MSG_WARN([Plugin support requested, but could not find libltdl])
+	 AC_MSG_ERROR([Cannot continue])])
+      # Add libltdl static-build dependencies to hwloc.pc
+      HWLOC_CHECK_LTDL_DEPS
+    fi
+
+    AC_ARG_WITH([hwloc-plugins-path],
+		AC_HELP_STRING([--with-hwloc-plugins-path=dir:...],
+                               [Colon-separated list of plugin directories. Default: "$prefix/lib/hwloc". Plugins will be installed in the first directory. They will be loaded from all of them, in order.]),
+		[HWLOC_PLUGINS_PATH="$with_hwloc_plugins_path"],
+		[HWLOC_PLUGINS_PATH="\$(libdir)/hwloc"])
+    AC_SUBST(HWLOC_PLUGINS_PATH)
+    HWLOC_PLUGINS_DIR=`echo "$HWLOC_PLUGINS_PATH" | cut -d: -f1`
+    AC_SUBST(HWLOC_PLUGINS_DIR)
+
+    # Static components output file
+    hwloc_static_components_dir=${HWLOC_top_builddir}/hwloc
+    mkdir -p ${hwloc_static_components_dir}
+    hwloc_static_components_file=${hwloc_static_components_dir}/static-components.h
+    rm -f ${hwloc_static_components_file}
+
+    # Make $enable_plugins easier to use (it contains either "yes" (all) or a list of <name>)
+    HWLOC_PREPARE_FILTER_COMPONENTS([$enable_plugins])
+    # Now we have some hwloc_<name>_component_wantplugin=1
+
+    # See which core components want plugin and support it
+    HWLOC_FILTER_COMPONENTS
+    # Now we have some hwloc_<name>_component=plugin/static
+    # and hwloc_static/plugin_components
+    AC_MSG_CHECKING([components to build statically])
+    AC_MSG_RESULT($hwloc_static_components)
+    HWLOC_LIST_STATIC_COMPONENTS([$hwloc_static_components_file], [$hwloc_static_components])
+    AC_MSG_CHECKING([components to build as plugins])
+    AC_MSG_RESULT([$hwloc_plugin_components])
+
+    AS_IF([test "$hwloc_pci_component" = "static"],
+          [HWLOC_LIBS="$HWLOC_LIBS $HWLOC_PCIACCESS_LIBS"
+           HWLOC_CFLAGS="$HWLOC_CFLAGS $HWLOC_PCIACCESS_CFLAGS"
+           HWLOC_REQUIRES="$HWLOC_PCIACCESS_REQUIRES $HWLOC_REQUIRES"])
+    AS_IF([test "$hwloc_opencl_component" = "static"],
+          [HWLOC_LIBS="$HWLOC_LIBS $HWLOC_OPENCL_LIBS"
+           HWLOC_CFLAGS="$HWLOC_CFLAGS $HWLOC_OPENCL_CFLAGS"
+           HWLOC_REQUIRES="$HWLOC_OPENCL_REQUIRES $HWLOC_REQUIRES"])
+    AS_IF([test "$hwloc_cuda_component" = "static"],
+          [HWLOC_LIBS="$HWLOC_LIBS $HWLOC_CUDA_LIBS"
+           HWLOC_CFLAGS="$HWLOC_CFLAGS $HWLOC_CUDA_CFLAGS"
+           HWLOC_REQUIRES="$HWLOC_CUDA_REQUIRES $HWLOC_REQUIRES"])
+    AS_IF([test "$hwloc_nvml_component" = "static"],
+          [HWLOC_LIBS="$HWLOC_LIBS $HWLOC_NVML_LIBS"
+           HWLOC_CFLAGS="$HWLOC_CFLAGS $HWLOC_NVML_CFLAGS"
+           HWLOC_REQUIRES="$HWLOC_NVML_REQUIRES $HWLOC_REQUIRES"])
+    AS_IF([test "$hwloc_gl_component" = "static"],
+          [HWLOC_LIBS="$HWLOC_LIBS $HWLOC_GL_LIBS"
+           HWLOC_CFLAGS="$HWLOC_CFLAGS $HWLOC_GL_CFLAGS"
+           HWLOC_REQUIRES="$HWLOC_GL_REQUIRES $HWLOC_REQUIRES"])
+    AS_IF([test "$hwloc_xml_libxml_component" = "static"],
+          [HWLOC_LIBS="$HWLOC_LIBS $HWLOC_LIBXML2_LIBS"
+           HWLOC_CFLAGS="$HWLOC_CFLAGS $HWLOC_LIBXML2_CFLAGS"
+           HWLOC_REQUIRES="$HWLOC_LIBXML2_REQUIRES $HWLOC_REQUIRES"])
+
+    #
+    # Setup HWLOC's C, CPP, and LD flags, and LIBS
+    #
+    AC_SUBST(HWLOC_REQUIRES)
+    AC_SUBST(HWLOC_CFLAGS)
+    HWLOC_CPPFLAGS='-I$(HWLOC_top_builddir)/include -I$(HWLOC_top_srcdir)/include'
+    AC_SUBST(HWLOC_CPPFLAGS)
+    AC_SUBST(HWLOC_LDFLAGS)
+    AC_SUBST(HWLOC_LIBS)
+    AC_SUBST(HWLOC_LIBS_PRIVATE)
+
+    # Set these values explicitly for embedded builds.  Exporting
+    # these values through *_EMBEDDED_* values gives us the freedom to
+    # do something different someday if we ever need to.  There's no
+    # need to fill these values in unless we're in embedded mode.
+    # Indeed, if we're building in embedded mode, we want HWLOC_LIBS
+    # to be empty so that nothing is linked into libhwloc_embedded.la
+    # itself -- only the upper-layer will link in anything required.
+
+    AS_IF([test "$hwloc_mode" = "embedded"],
+          [HWLOC_EMBEDDED_CFLAGS=$HWLOC_CFLAGS
+           HWLOC_EMBEDDED_CPPFLAGS=$HWLOC_CPPFLAGS
+           HWLOC_EMBEDDED_LDADD='$(HWLOC_top_builddir)/hwloc/libhwloc_embedded.la'
+           HWLOC_EMBEDDED_LIBS=$HWLOC_LIBS
+           HWLOC_LIBS=])
+    AC_SUBST(HWLOC_EMBEDDED_CFLAGS)
+    AC_SUBST(HWLOC_EMBEDDED_CPPFLAGS)
+    AC_SUBST(HWLOC_EMBEDDED_LDADD)
+    AC_SUBST(HWLOC_EMBEDDED_LIBS)
+
+    # Always generate these files
+    AC_CONFIG_FILES(
+        hwloc_config_prefix[Makefile]
+        hwloc_config_prefix[include/Makefile]
+        hwloc_config_prefix[hwloc/Makefile ]
+    )
+
+    # Cleanup
+    AC_LANG_POP
+
+    # Success
+    $2
+])dnl
+
+#-----------------------------------------------------------------------
+
+# Specify the symbol prefix
+AC_DEFUN([HWLOC_SET_SYMBOL_PREFIX],[
+    hwloc_symbol_prefix_value=$1
+])dnl
+
+#-----------------------------------------------------------------------
+
+# This must be a standalone routine so that it can be called both by
+# HWLOC_INIT and an external caller (if HWLOC_INIT is not invoked).
+AC_DEFUN([HWLOC_DO_AM_CONDITIONALS],[
+    AS_IF([test "$hwloc_did_am_conditionals" != "yes"],[
+        AM_CONDITIONAL([HWLOC_BUILD_STANDALONE], [test "$hwloc_mode" = "standalone"])
+
+        AM_CONDITIONAL([HWLOC_HAVE_GCC], [test "x$GCC" = "xyes"])
+        AM_CONDITIONAL([HWLOC_HAVE_MS_LIB], [test "x$HWLOC_MS_LIB" != "x"])
+        AM_CONDITIONAL([HWLOC_HAVE_OPENAT], [test "x$hwloc_have_openat" = "xyes"])
+        AM_CONDITIONAL([HWLOC_HAVE_SCHED_SETAFFINITY],
+                       [test "x$hwloc_have_sched_setaffinity" = "xyes"])
+        AM_CONDITIONAL([HWLOC_HAVE_PTHREAD],
+                       [test "x$hwloc_have_pthread" = "xyes"])
+        AM_CONDITIONAL([HWLOC_HAVE_LINUX_LIBNUMA],
+                       [test "x$hwloc_have_linux_libnuma" = "xyes"])
+        AM_CONDITIONAL([HWLOC_HAVE_LIBIBVERBS],
+                       [test "x$hwloc_have_libibverbs" = "xyes"])
+	AM_CONDITIONAL([HWLOC_HAVE_CUDA],
+		       [test "x$hwloc_have_cuda" = "xyes"])
+	AM_CONDITIONAL([HWLOC_HAVE_GL],
+		       [test "x$hwloc_have_gl" = "xyes"])
+	AM_CONDITIONAL([HWLOC_HAVE_MYRIEXPRESS],
+		       [test "x$hwloc_have_myriexpress" = "xyes"])
+	AM_CONDITIONAL([HWLOC_HAVE_CUDART],
+		       [test "x$hwloc_have_cudart" = "xyes"])
+        AM_CONDITIONAL([HWLOC_HAVE_LIBXML2], [test "$hwloc_libxml2_happy" = "yes"])
+        AM_CONDITIONAL([HWLOC_HAVE_CAIRO], [test "$hwloc_cairo_happy" = "yes"])
+        AM_CONDITIONAL([HWLOC_HAVE_PCIACCESS], [test "$hwloc_pciaccess_happy" = "yes"])
+        AM_CONDITIONAL([HWLOC_HAVE_OPENCL], [test "$hwloc_opencl_happy" = "yes"])
+        AM_CONDITIONAL([HWLOC_HAVE_NVML], [test "$hwloc_nvml_happy" = "yes"])
+        AM_CONDITIONAL([HWLOC_HAVE_BUNZIPP], [test "x$BUNZIPP" != "xfalse"])
+        AM_CONDITIONAL([HWLOC_HAVE_USER32], [test "x$hwloc_have_user32" = "xyes"])
+
+        AM_CONDITIONAL([HWLOC_BUILD_DOXYGEN],
+                       [test "x$hwloc_generate_doxs" = "xyes"])
+        AM_CONDITIONAL([HWLOC_BUILD_README],
+                       [test "x$hwloc_generate_readme" = "xyes" -a \( "x$hwloc_install_doxs" = "xyes" -o "x$hwloc_generate_doxs" = "xyes" \) ])
+        AM_CONDITIONAL([HWLOC_INSTALL_DOXYGEN],
+                       [test "x$hwloc_install_doxs" = "xyes"])
+
+        AM_CONDITIONAL([HWLOC_HAVE_LINUX], [test "x$hwloc_linux" = "xyes"])
+        AM_CONDITIONAL([HWLOC_HAVE_BGQ], [test "x$hwloc_bgq" = "xyes"])
+        AM_CONDITIONAL([HWLOC_HAVE_IRIX], [test "x$hwloc_irix" = "xyes"])
+        AM_CONDITIONAL([HWLOC_HAVE_DARWIN], [test "x$hwloc_darwin" = "xyes"])
+        AM_CONDITIONAL([HWLOC_HAVE_FREEBSD], [test "x$hwloc_freebsd" = "xyes"])
+        AM_CONDITIONAL([HWLOC_HAVE_NETBSD], [test "x$hwloc_netbsd" = "xyes"])
+        AM_CONDITIONAL([HWLOC_HAVE_SOLARIS], [test "x$hwloc_solaris" = "xyes"])
+        AM_CONDITIONAL([HWLOC_HAVE_AIX], [test "x$hwloc_aix" = "xyes"])
+        AM_CONDITIONAL([HWLOC_HAVE_HPUX], [test "x$hwloc_hpux" = "xyes"])
+        AM_CONDITIONAL([HWLOC_HAVE_WINDOWS], [test "x$hwloc_windows" = "xyes"])
+        AM_CONDITIONAL([HWLOC_HAVE_MINGW32], [test "x$target_os" = "xmingw32"])
+
+        AM_CONDITIONAL([HWLOC_HAVE_X86], [test "x$hwloc_x86_32" = "xyes" -o "x$hwloc_x86_64" = "xyes"])
+        AM_CONDITIONAL([HWLOC_HAVE_X86_32], [test "x$hwloc_x86_32" = "xyes"])
+        AM_CONDITIONAL([HWLOC_HAVE_X86_64], [test "x$hwloc_x86_64" = "xyes"])
+        AM_CONDITIONAL([HWLOC_HAVE_X86_CPUID], [test "x$hwloc_have_x86_cpuid" = "xyes"])
+
+        AM_CONDITIONAL([HWLOC_HAVE_PLUGINS], [test "x$hwloc_have_plugins" = "xyes"])
+        AM_CONDITIONAL([HWLOC_PCI_BUILD_STATIC], [test "x$hwloc_pci_component" = "xstatic"])
+        AM_CONDITIONAL([HWLOC_OPENCL_BUILD_STATIC], [test "x$hwloc_opencl_component" = "xstatic"])
+        AM_CONDITIONAL([HWLOC_CUDA_BUILD_STATIC], [test "x$hwloc_cuda_component" = "xstatic"])
+        AM_CONDITIONAL([HWLOC_NVML_BUILD_STATIC], [test "x$hwloc_nvml_component" = "xstatic"])
+        AM_CONDITIONAL([HWLOC_GL_BUILD_STATIC], [test "x$hwloc_gl_component" = "xstatic"])
+        AM_CONDITIONAL([HWLOC_XML_LIBXML_BUILD_STATIC], [test "x$hwloc_xml_libxml_component" = "xstatic"])
+
+        AM_CONDITIONAL([HWLOC_HAVE_CXX], [test "x$hwloc_have_cxx" = "xyes"])
+    ])
+    hwloc_did_am_conditionals=yes
+
+    # For backwards compatibility (i.e., packages that only call
+    # HWLOC_DO_AM_CONDITIONS, not NETLOC DO_AM_CONDITIONALS), we also have to
+    # do the netloc AM conditionals here
+    NETLOC_DO_AM_CONDITIONALS
+])dnl
+
+#-----------------------------------------------------------------------
+
+AC_DEFUN([_HWLOC_CHECK_DIFF_U], [
+  AC_MSG_CHECKING([whether diff accepts -u])
+  if diff -u /dev/null /dev/null 2> /dev/null
+  then
+    HWLOC_DIFF_U="-u"
+  else
+    HWLOC_DIFF_U=""
+  fi
+  AC_SUBST([HWLOC_DIFF_U])
+  AC_MSG_RESULT([$HWLOC_DIFF_U])
+])
+
+AC_DEFUN([_HWLOC_CHECK_DIFF_W], [
+  AC_MSG_CHECKING([whether diff accepts -w])
+  if diff -w /dev/null /dev/null 2> /dev/null
+  then
+    HWLOC_DIFF_W="-w"
+  else
+    HWLOC_DIFF_W=""
+  fi
+  AC_SUBST([HWLOC_DIFF_W])
+  AC_MSG_RESULT([$HWLOC_DIFF_W])
+])
+
+#-----------------------------------------------------------------------
+
+dnl HWLOC_CHECK_DECL
+dnl
+dnl Check that the declaration of the given function has a complete prototype
+dnl with argument list by trying to call it with an insane dnl number of
+dnl arguments (10). Success means the compiler couldn't really check.
+AC_DEFUN([_HWLOC_CHECK_DECL], [
+  AC_CHECK_DECL([$1], [
+    AC_MSG_CHECKING([whether function $1 has a complete prototype])
+    AC_REQUIRE([AC_PROG_CC])
+    AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
+         [AC_INCLUDES_DEFAULT([$4])],
+         [$1(1,2,3,4,5,6,7,8,9,10);]
+      )],
+      [AC_MSG_RESULT([no])
+       $3],
+      [AC_MSG_RESULT([yes])
+       $2]
+    )], [$3], $4
+  )
+])
+
+#-----------------------------------------------------------------------
+
+dnl HWLOC_CHECK_DECLS
+dnl
+dnl Same as HWLOCK_CHECK_DECL, but defines HAVE_DECL_foo to 1 or 0 depending on
+dnl the result.
+AC_DEFUN([_HWLOC_CHECK_DECLS], [
+  HWLOC_CHECK_DECL([$1], [ac_have_decl=1], [ac_have_decl=0], [$4])
+  AC_DEFINE_UNQUOTED(AS_TR_CPP([HAVE_DECL_$1]), [$ac_have_decl],
+    [Define to 1 if you have the declaration of `$1', and to 0 if you don't])
+])
+
+#-----------------------------------------------------------------------
+
+dnl HWLOC_CHECK_LTDL_DEPS
+dnl
+dnl Add ltdl dependencies to HWLOC_LIBS_PRIVATE
+AC_DEFUN([HWLOC_CHECK_LTDL_DEPS], [
+  # save variables that we'll modify below
+  save_lt_cv_dlopen="$lt_cv_dlopen"
+  save_lt_cv_dlopen_libs="$lt_cv_dlopen_libs"
+  save_lt_cv_dlopen_self="$lt_cv_dlopen_self"
+  ###########################################################
+  # code stolen from LT_SYS_DLOPEN_SELF in libtool.m4
+  case $host_os in
+  beos*)
+    lt_cv_dlopen="load_add_on"
+    lt_cv_dlopen_libs=
+    lt_cv_dlopen_self=yes
+    ;;
+
+  mingw* | pw32* | cegcc*)
+    lt_cv_dlopen="LoadLibrary"
+    lt_cv_dlopen_libs=
+    ;;
+
+  cygwin*)
+    lt_cv_dlopen="dlopen"
+    lt_cv_dlopen_libs=
+    ;;
+
+  darwin*)
+  # if libdl is installed we need to link against it
+    AC_CHECK_LIB([dl], [dlopen],
+                [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],[
+    lt_cv_dlopen="dyld"
+    lt_cv_dlopen_libs=
+    lt_cv_dlopen_self=yes
+    ])
+    ;;
+
+  *)
+    AC_CHECK_FUNC([shl_load],
+          [lt_cv_dlopen="shl_load"],
+      [AC_CHECK_LIB([dld], [shl_load],
+            [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"],
+        [AC_CHECK_FUNC([dlopen],
+              [lt_cv_dlopen="dlopen"],
+          [AC_CHECK_LIB([dl], [dlopen],
+                [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],
+            [AC_CHECK_LIB([svld], [dlopen],
+                  [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"],
+              [AC_CHECK_LIB([dld], [dld_link],
+                    [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"])
+              ])
+            ])
+          ])
+        ])
+      ])
+    ;;
+  esac
+  # end of code stolen from LT_SYS_DLOPEN_SELF in libtool.m4
+  ###########################################################
+
+  HWLOC_LIBS_PRIVATE="$HWLOC_LIBS_PRIVATE $lt_cv_dlopen_libs"
+
+  # restore modified variable in case the actual libtool code uses them
+  lt_cv_dlopen="$save_lt_cv_dlopen"
+  lt_cv_dlopen_libs="$save_lt_cv_dlopen_libs"
+  lt_cv_dlopen_self="$save_lt_cv_dlopen_self"
+])
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_attributes.m4 b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_attributes.m4
new file mode 100644
index 0000000000..96348e819e
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_attributes.m4
@@ -0,0 +1,534 @@
+# This macro set originally copied from Open MPI:
+# Copyright © 2004-2007 The Trustees of Indiana University and Indiana
+#                         University Research and Technology
+#                         Corporation.  All rights reserved.
+# Copyright © 2004-2005 The University of Tennessee and The University
+#                         of Tennessee Research Foundation.  All rights
+#                         reserved.
+# Copyright © 2004-2007 High Performance Computing Center Stuttgart,
+#                         University of Stuttgart.  All rights reserved.
+# Copyright © 2004-2005 The Regents of the University of California.
+#                         All rights reserved.
+# and renamed for hwloc:
+# Copyright © 2009 Inria.  All rights reserved.
+# Copyright © 2009 Université Bordeaux
+# Copyright © 2010 Cisco Systems, Inc.  All rights reserved.
+# See COPYING in top-level directory.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# - Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+#
+# - Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer listed
+#   in this license in the documentation and/or other materials
+#   provided with the distribution.
+#
+# - Neither the name of the copyright holders nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# The copyright holders provide no reassurances that the source code
+# provided does not infringe any patent, copyright, or any other
+# intellectual property rights of third parties.  The copyright holders
+# disclaim any liability to any recipient for claims brought against
+# recipient by any third party for infringement of that parties
+# intellectual property rights.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+#
+# Search the generated warnings for
+# keywords regarding skipping or ignoring certain attributes
+#   Intel: ignore
+#   Sun C++: skip
+#
+AC_DEFUN([_HWLOC_ATTRIBUTE_FAIL_SEARCH],[
+    # Be safe for systems that have ancient Autoconf's (e.g., RHEL5)
+    m4_ifdef([AC_PROG_GREP],
+             [AC_REQUIRE([AC_PROG_GREP])],
+             [GREP=grep])
+
+    if test -s conftest.err ; then
+        for i in ignore skip ; do
+            $GREP -iq $i conftest.err
+            if test "$?" = "0" ; then
+                hwloc_cv___attribute__[$1]=0
+                break;
+            fi
+        done
+    fi
+])
+
+#
+# HWLOC: Remove C++ compiler check.  It can result in a circular
+# dependency in embedded situations.
+#
+# Check for one specific attribute by compiling with C
+# and possibly using a cross-check.
+#
+# If the cross-check is defined, a static function "usage" should be
+# defined, which is to be called from main (to circumvent warnings
+# regarding unused function in main file)
+#       static int usage (int * argument);
+#
+# The last argument is for specific CFLAGS, that need to be set
+# for the compiler to generate a warning on the cross-check.
+# This may need adaption for future compilers / CFLAG-settings.
+#
+AC_DEFUN([_HWLOC_CHECK_SPECIFIC_ATTRIBUTE], [
+    AC_MSG_CHECKING([for __attribute__([$1])])
+    AC_CACHE_VAL(hwloc_cv___attribute__[$1], [
+        #
+        # Try to compile using the C compiler
+        #
+        AC_TRY_COMPILE([$2],[],
+                       [
+                        #
+                        # In case we did succeed: Fine, but was this due to the
+                        # attribute being ignored/skipped? Grep for IgNoRe/skip in conftest.err
+                        # and if found, reset the hwloc_cv__attribute__var=0
+                        #
+                        hwloc_cv___attribute__[$1]=1
+                        _HWLOC_ATTRIBUTE_FAIL_SEARCH([$1])
+                       ],
+                       [hwloc_cv___attribute__[$1]=0])
+
+        #
+        # If the attribute is supported by both compilers,
+        # try to recompile a *cross-check*, IFF defined.
+        #
+        if test '(' "$hwloc_cv___attribute__[$1]" = "1" -a "[$3]" != "" ')' ; then
+            ac_c_werror_flag_safe=$ac_c_werror_flag
+            ac_c_werror_flag="yes"
+            CFLAGS_safe=$CFLAGS
+            CFLAGS="$CFLAGS [$4]"
+
+            AC_TRY_COMPILE([$3],
+                [
+                 int i=4711;
+                 i=usage(&i);
+                ],
+                [hwloc_cv___attribute__[$1]=0],
+                [
+                 #
+                 # In case we did NOT succeed: Fine, but was this due to the
+                 # attribute being ignored? Grep for IgNoRe in conftest.err
+                 # and if found, reset the hwloc_cv__attribute__var=0
+                 #
+                 hwloc_cv___attribute__[$1]=1
+                 _HWLOC_ATTRIBUTE_FAIL_SEARCH([$1])
+                ])
+
+            ac_c_werror_flag=$ac_c_werror_flag_safe
+            CFLAGS=$CFLAGS_safe
+        fi
+    ])
+
+    if test "$hwloc_cv___attribute__[$1]" = "1" ; then
+        AC_MSG_RESULT([yes])
+    else
+        AC_MSG_RESULT([no])
+    fi
+])
+
+
+#
+# Test the availability of __attribute__ and with the help
+# of _HWLOC_CHECK_SPECIFIC_ATTRIBUTE for the support of
+# particular attributes. Compilers, that do not support an
+# attribute most often fail with a warning (when the warning
+# level is set).
+# The compilers output is parsed in _HWLOC_ATTRIBUTE_FAIL_SEARCH
+#
+# To add a new attributes __NAME__ add the
+#   hwloc_cv___attribute__NAME
+# add a new check with _HWLOC_CHECK_SPECIFIC_ATTRIBUTE (possibly with a cross-check)
+#   _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([name], [int foo (int arg) __attribute__ ((__name__));], [], [])
+# and define the corresponding
+#   AC_DEFINE_UNQUOTED(_HWLOC_HAVE_ATTRIBUTE_NAME, [$hwloc_cv___attribute__NAME],
+#                      [Whether your compiler has __attribute__ NAME or not])
+# and decide on a correct macro (in opal/include/opal_config_bottom.h):
+#  #  define __opal_attribute_NAME(x)  __attribute__(__NAME__)
+#
+# Please use the "__"-notation of the attribute in order not to
+# clash with predefined names or macros (e.g. const, which some compilers
+# do not like..)
+#
+
+
+AC_DEFUN([_HWLOC_CHECK_ATTRIBUTES], [
+  AC_MSG_CHECKING(for __attribute__)
+
+  AC_CACHE_VAL(hwloc_cv___attribute__, [
+    AC_TRY_COMPILE(
+      [#include <stdlib.h>
+       /* Check for the longest available __attribute__ (since gcc-2.3) */
+       struct foo {
+           char a;
+           int x[2] __attribute__ ((__packed__));
+        };
+      ],
+      [],
+      [hwloc_cv___attribute__=1],
+      [hwloc_cv___attribute__=0],
+    )
+
+    if test "$hwloc_cv___attribute__" = "1" ; then
+        AC_TRY_COMPILE(
+          [#include <stdlib.h>
+           /* Check for the longest available __attribute__ (since gcc-2.3) */
+           struct foo {
+               char a;
+               int x[2] __attribute__ ((__packed__));
+            };
+          ],
+          [],
+          [hwloc_cv___attribute__=1],
+          [hwloc_cv___attribute__=0],
+        )
+    fi
+    ])
+  AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE, [$hwloc_cv___attribute__],
+                     [Whether your compiler has __attribute__ or not])
+
+#
+# Now that we know the compiler support __attribute__ let's check which kind of
+# attributed are supported.
+#
+  if test "$hwloc_cv___attribute__" = "0" ; then
+    AC_MSG_RESULT([no])
+    hwloc_cv___attribute__aligned=0
+    hwloc_cv___attribute__always_inline=0
+    hwloc_cv___attribute__cold=0
+    hwloc_cv___attribute__const=0
+    hwloc_cv___attribute__deprecated=0
+    hwloc_cv___attribute__format=0
+    hwloc_cv___attribute__hot=0
+    hwloc_cv___attribute__malloc=0
+    hwloc_cv___attribute__may_alias=0
+    hwloc_cv___attribute__no_instrument_function=0
+    hwloc_cv___attribute__nonnull=0
+    hwloc_cv___attribute__noreturn=0
+    hwloc_cv___attribute__packed=0
+    hwloc_cv___attribute__pure=0
+    hwloc_cv___attribute__sentinel=0
+    hwloc_cv___attribute__unused=0
+    hwloc_cv___attribute__warn_unused_result=0
+    hwloc_cv___attribute__weak_alias=0
+  else
+    AC_MSG_RESULT([yes])
+
+    _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([aligned],
+        [struct foo { char text[4]; }  __attribute__ ((__aligned__(8)));],
+        [],
+        [])
+
+    #
+    # Ignored by PGI-6.2.5; -- recognized by output-parser
+    #
+    _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([always_inline],
+        [int foo (int arg) __attribute__ ((__always_inline__));],
+        [],
+        [])
+
+    _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([cold],
+        [
+         int foo(int arg1, int arg2) __attribute__ ((__cold__));
+         int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; }
+        ],
+        [],
+        [])
+
+    _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([const],
+        [
+         int foo(int arg1, int arg2) __attribute__ ((__const__));
+         int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; }
+        ],
+        [],
+        [])
+
+
+    _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([deprecated],
+        [
+         int foo(int arg1, int arg2) __attribute__ ((__deprecated__));
+         int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; }
+        ],
+        [],
+        [])
+
+
+    HWLOC_ATTRIBUTE_CFLAGS=
+    case "$hwloc_c_vendor" in
+        gnu)
+            HWLOC_ATTRIBUTE_CFLAGS="-Wall"
+            ;;
+        intel)
+            # we want specifically the warning on format string conversion
+            HWLOC_ATTRIBUTE_CFLAGS="-we181"
+            ;;
+    esac
+    _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([format],
+        [
+         int this_printf (void *my_object, const char *my_format, ...) __attribute__ ((__format__ (__printf__, 2, 3)));
+        ],
+        [
+         static int usage (int * argument);
+         extern int this_printf (int arg1, const char *my_format, ...) __attribute__ ((__format__ (__printf__, 2, 3)));
+
+         static int usage (int * argument) {
+             return this_printf (*argument, "%d", argument); /* This should produce a format warning */
+         }
+         /* The autoconf-generated main-function is int main(), which produces a warning by itself */
+         int main(void);
+        ],
+        [$HWLOC_ATTRIBUTE_CFLAGS])
+
+    _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([hot],
+        [
+         int foo(int arg1, int arg2) __attribute__ ((__hot__));
+         int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; }
+        ],
+        [],
+        [])
+
+    _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([malloc],
+        [
+#ifdef HAVE_STDLIB_H
+#  include <stdlib.h>
+#endif
+         int * foo(int arg1) __attribute__ ((__malloc__));
+         int * foo(int arg1) { return (int*) malloc(arg1); }
+        ],
+        [],
+        [])
+
+
+    #
+    # Attribute may_alias: No suitable cross-check available, that works for non-supporting compilers
+    # Ignored by intel-9.1.045 -- turn off with -wd1292
+    # Ignored by PGI-6.2.5; ignore not detected due to missing cross-check
+    # The test case is chosen to match our only use in topology-xml-*.c, and reproduces an xlc-13.1.0 bug.
+    #
+    _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([may_alias],
+        [struct { int i; } __attribute__ ((__may_alias__)) * p_value;],
+        [],
+        [])
+
+
+    _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([no_instrument_function],
+        [int * foo(int arg1) __attribute__ ((__no_instrument_function__));],
+        [],
+        [])
+
+
+    #
+    # Attribute nonnull:
+    # Ignored by intel-compiler 9.1.045 -- recognized by cross-check
+    # Ignored by PGI-6.2.5 (pgCC) -- recognized by cross-check
+    #
+    HWLOC_ATTRIBUTE_CFLAGS=
+    case "$hwloc_c_vendor" in
+        gnu)
+            HWLOC_ATTRIBUTE_CFLAGS="-Wall"
+            ;;
+        intel)
+            # we do not want to get ignored attributes warnings, but rather real warnings
+            HWLOC_ATTRIBUTE_CFLAGS="-wd1292"
+            ;;
+    esac
+    _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([nonnull],
+        [
+         int square(int *arg) __attribute__ ((__nonnull__));
+         int square(int *arg) { return *arg; }
+        ],
+        [
+         static int usage(int * argument);
+         int square(int * argument) __attribute__ ((__nonnull__));
+         int square(int * argument) { return (*argument) * (*argument); }
+
+         static int usage(int * argument) {
+             return square( ((void*)0) );    /* This should produce an argument must be nonnull warning */
+         }
+         /* The autoconf-generated main-function is int main(), which produces a warning by itself */
+         int main(void);
+        ],
+        [$HWLOC_ATTRIBUTE_CFLAGS])
+
+
+    _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([noreturn],
+        [
+#ifdef HAVE_UNISTD_H
+#  include <unistd.h>
+#endif
+#ifdef HAVE_STDLIB_H
+#  include <stdlib.h>
+#endif
+         void fatal(int arg1) __attribute__ ((__noreturn__));
+         void fatal(int arg1) { exit(arg1); }
+        ],
+        [],
+        [])
+
+    _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([packed],
+        [
+         struct foo {
+             char a;
+             int x[2] __attribute__ ((__packed__));
+         };
+        ],
+        [],
+        [])
+
+    _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([pure],
+        [
+         int square(int arg) __attribute__ ((__pure__));
+         int square(int arg) { return arg * arg; }
+        ],
+        [],
+        [])
+
+    #
+    # Attribute sentinel:
+    # Ignored by the intel-9.1.045 -- recognized by cross-check
+    #                intel-10.0beta works fine
+    # Ignored by PGI-6.2.5 (pgCC) -- recognized by output-parser and cross-check
+    # Ignored by pathcc-2.2.1 -- recognized by cross-check (through grep ignore)
+    #
+    HWLOC_ATTRIBUTE_CFLAGS=
+    case "$hwloc_c_vendor" in
+        gnu)
+            HWLOC_ATTRIBUTE_CFLAGS="-Wall"
+            ;;
+        intel)
+            # we do not want to get ignored attributes warnings
+            HWLOC_ATTRIBUTE_CFLAGS="-wd1292"
+            ;;
+    esac
+    _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([sentinel],
+        [
+         int my_execlp(const char * file, const char *arg, ...) __attribute__ ((__sentinel__));
+        ],
+        [
+         static int usage(int * argument);
+         int my_execlp(const char * file, const char *arg, ...) __attribute__ ((__sentinel__));
+
+         static int usage(int * argument) {
+             void * last_arg_should_be_null = argument;
+             return my_execlp ("lala", "/home/there", last_arg_should_be_null);   /* This should produce a warning */
+         }
+         /* The autoconf-generated main-function is int main(), which produces a warning by itself */
+         int main(void);
+        ],
+        [$HWLOC_ATTRIBUTE_CFLAGS])
+
+    _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([unused],
+        [
+         int square(int arg1 __attribute__ ((__unused__)), int arg2);
+         int square(int arg1, int arg2) { return arg2; }
+        ],
+        [],
+        [])
+
+
+    #
+    # Attribute warn_unused_result:
+    # Ignored by the intel-compiler 9.1.045 -- recognized by cross-check
+    # Ignored by pathcc-2.2.1 -- recognized by cross-check (through grep ignore)
+    #
+    HWLOC_ATTRIBUTE_CFLAGS=
+    case "$hwloc_c_vendor" in
+        gnu)
+            HWLOC_ATTRIBUTE_CFLAGS="-Wall"
+            ;;
+        intel)
+            # we do not want to get ignored attributes warnings
+            HWLOC_ATTRIBUTE_CFLAGS="-wd1292"
+            ;;
+    esac
+    _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([warn_unused_result],
+        [
+         int foo(int arg) __attribute__ ((__warn_unused_result__));
+         int foo(int arg) { return arg + 3; }
+        ],
+        [
+         static int usage(int * argument);
+         int foo(int arg) __attribute__ ((__warn_unused_result__));
+
+         int foo(int arg) { return arg + 3; }
+         static int usage(int * argument) {
+           foo (*argument);        /* Should produce an unused result warning */
+           return 0;
+         }
+
+         /* The autoconf-generated main-function is int main(), which produces a warning by itself */
+         int main(void);
+        ],
+        [$HWLOC_ATTRIBUTE_CFLAGS])
+
+
+    _HWLOC_CHECK_SPECIFIC_ATTRIBUTE([weak_alias],
+        [
+         int foo(int arg);
+         int foo(int arg) { return arg + 3; }
+         int foo2(int arg) __attribute__ ((__weak__, __alias__("foo")));
+        ],
+        [],
+        [])
+
+  fi
+
+  # Now that all the values are set, define them
+
+  AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_ALIGNED, [$hwloc_cv___attribute__aligned],
+                     [Whether your compiler has __attribute__ aligned or not])
+  AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_ALWAYS_INLINE, [$hwloc_cv___attribute__always_inline],
+                     [Whether your compiler has __attribute__ always_inline or not])
+  AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_COLD, [$hwloc_cv___attribute__cold],
+                     [Whether your compiler has __attribute__ cold or not])
+  AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_CONST, [$hwloc_cv___attribute__const],
+                     [Whether your compiler has __attribute__ const or not])
+  AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_DEPRECATED, [$hwloc_cv___attribute__deprecated],
+                     [Whether your compiler has __attribute__ deprecated or not])
+  AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_FORMAT, [$hwloc_cv___attribute__format],
+                     [Whether your compiler has __attribute__ format or not])
+  AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_HOT, [$hwloc_cv___attribute__hot],
+                     [Whether your compiler has __attribute__ hot or not])
+  AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_MALLOC, [$hwloc_cv___attribute__malloc],
+                     [Whether your compiler has __attribute__ malloc or not])
+  AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS, [$hwloc_cv___attribute__may_alias],
+                     [Whether your compiler has __attribute__ may_alias or not])
+  AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_NO_INSTRUMENT_FUNCTION, [$hwloc_cv___attribute__no_instrument_function],
+                     [Whether your compiler has __attribute__ no_instrument_function or not])
+  AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_NONNULL, [$hwloc_cv___attribute__nonnull],
+                     [Whether your compiler has __attribute__ nonnull or not])
+  AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_NORETURN, [$hwloc_cv___attribute__noreturn],
+                     [Whether your compiler has __attribute__ noreturn or not])
+  AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_PACKED, [$hwloc_cv___attribute__packed],
+                     [Whether your compiler has __attribute__ packed or not])
+  AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_PURE, [$hwloc_cv___attribute__pure],
+                     [Whether your compiler has __attribute__ pure or not])
+  AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_SENTINEL, [$hwloc_cv___attribute__sentinel],
+                     [Whether your compiler has __attribute__ sentinel or not])
+  AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_UNUSED, [$hwloc_cv___attribute__unused],
+                     [Whether your compiler has __attribute__ unused or not])
+  AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_WARN_UNUSED_RESULT, [$hwloc_cv___attribute__warn_unused_result],
+                     [Whether your compiler has __attribute__ warn unused result or not])
+  AC_DEFINE_UNQUOTED(HWLOC_HAVE_ATTRIBUTE_WEAK_ALIAS, [$hwloc_cv___attribute__weak_alias],
+                     [Whether your compiler has __attribute__ weak alias or not])
+])
+
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_vendor.m4 b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_vendor.m4
new file mode 100644
index 0000000000..2281113bc6
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_vendor.m4
@@ -0,0 +1,246 @@
+dnl -*- shell-script -*-
+dnl
+dnl Copyright © 2004-2005 The Trustees of Indiana University and Indiana
+dnl                         University Research and Technology
+dnl                         Corporation.  All rights reserved.
+dnl Copyright © 2004-2005 The University of Tennessee and The University
+dnl                         of Tennessee Research Foundation.  All rights
+dnl                         reserved.
+dnl Copyright © 2004-2005 High Performance Computing Center Stuttgart,
+dnl                         University of Stuttgart.  All rights reserved.
+dnl Copyright © 2004-2005 The Regents of the University of California.
+dnl                         All rights reserved.
+dnl Copyright © 2011      Cisco Systems, Inc.  All rights reserved.
+dnl Copyright © 2015 Inria.  All rights reserved.
+dnl $COPYRIGHT$
+dnl
+dnl Additional copyrights may follow
+dnl
+dnl $HEADER$
+dnl
+
+dnl ------------------------------------------------------------------
+dnl This m4 file originally copied from Open MPI
+dnl config/ompi_check_vendor.m4.
+dnl ------------------------------------------------------------------
+
+
+# HWLOC_C_COMPILER_VENDOR(VENDOR_VARIABLE)
+# ---------------------------------------
+# Set shell variable VENDOR_VARIABLE to the name of the compiler
+# vendor for the current C compiler.
+#
+# See comment for _HWLOC_CHECK_COMPILER_VENDOR for a complete
+# list of currently detected compilers.
+AC_DEFUN([_HWLOC_C_COMPILER_VENDOR], [
+    AC_REQUIRE([AC_PROG_CC])
+
+    AC_CACHE_CHECK([for the C compiler vendor],
+        [hwloc_cv_c_compiler_vendor],
+        [AC_LANG_PUSH(C)
+         _HWLOC_CHECK_COMPILER_VENDOR([hwloc_cv_c_compiler_vendor])
+         AC_LANG_POP(C)])
+
+    $1="$hwloc_cv_c_compiler_vendor"
+])
+
+
+# workaround to avoid syntax error with Autoconf < 2.68:
+m4_ifndef([AC_LANG_DEFINES_PROVIDED],
+	  [m4_define([AC_LANG_DEFINES_PROVIDED])])
+
+# HWLOC_IFDEF_IFELSE(symbol, [action-if-defined],
+#                   [action-if-not-defined])
+# ----------------------------------------------
+# Run compiler to determine if preprocessor symbol "symbol" is
+# defined by the compiler.
+AC_DEFUN([HWLOC_IFDEF_IFELSE], [
+    AC_COMPILE_IFELSE([AC_LANG_DEFINES_PROVIDED
+#ifndef $1
+#error "symbol $1 not defined"
+choke me
+#endif], [$2], [$3])])
+
+
+# HWLOC_IF_IFELSE(symbol, [action-if-defined],
+#                [action-if-not-defined])
+# ----------------------------------------------
+# Run compiler to determine if preprocessor symbol "symbol" is
+# defined by the compiler.
+AC_DEFUN([HWLOC_IF_IFELSE], [
+    AC_COMPILE_IFELSE([AC_LANG_DEFINES_PROVIDED
+#if !( $1 )
+#error "condition $1 not met"
+choke me
+#endif], [$2], [$3])])
+
+
+# _HWLOC_CHECK_COMPILER_VENDOR(VENDOR_VARIABLE)
+# --------------------------------------------
+# Set shell variable VENDOR_VARIABLE to the name of the compiler
+# vendor for the compiler for the current language.  Language must be
+# one of C, OBJC, or C++.
+#
+# thanks to http://predef.sourceforge.net/precomp.html for the list
+# of defines to check.
+AC_DEFUN([_HWLOC_CHECK_COMPILER_VENDOR], [
+    hwloc_check_compiler_vendor_result="unknown"
+
+    # GNU is probably the most common, so check that one as soon as
+    # possible. Intel and Android pretend to be GNU, so need to
+    # check Intel and Android before checking for GNU.
+
+    # Android
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IFDEF_IFELSE([__ANDROID__],
+               [hwloc_check_compiler_vendor_result="android"])])
+
+    # Intel
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IF_IFELSE([defined(__INTEL_COMPILER) || defined(__ICC)],
+               [hwloc_check_compiler_vendor_result="intel"])])
+
+    # GNU
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IFDEF_IFELSE([__GNUC__],
+               [hwloc_check_compiler_vendor_result="gnu"])])
+
+    # Borland Turbo C
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IFDEF_IFELSE([__TURBOC__],
+               [hwloc_check_compiler_vendor_result="borland"])])
+
+    # Borland C++
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IFDEF_IFELSE([__BORLANDC__],
+               [hwloc_check_compiler_vendor_result="borland"])])
+
+    # Comeau C++
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IFDEF_IFELSE([__COMO__],
+               [hwloc_check_compiler_vendor_result="comeau"])])
+
+    # Compaq C/C++
+    # OSF part actually not needed anymore but doesn't hurt
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IF_IFELSE([defined(__DECC) || defined(VAXC) || defined(__VAXC)],
+               [hwloc_check_compiler_vendor_result="compaq"],
+               [HWLOC_IF_IFELSE([defined(__osf__) && defined(__LANGUAGE_C__)],
+                    [hwloc_check_compiler_vendor_result="compaq"],
+                    [HWLOC_IFDEF_IFELSE([__DECCXX],
+                         [hwloc_check_compiler_vendor_result="compaq"])])])])
+
+    # Cray C/C++
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IFDEF_IFELSE([_CRAYC],
+               [hwloc_check_compiler_vendor_result="cray"])])
+
+    # Diab C/C++
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IFDEF_IFELSE([__DCC__],
+               [hwloc_check_compiler_vendor_result="diab"])])
+
+    # Digital Mars
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IF_IFELSE([defined(__DMC__) || defined(__SC__) || defined(__ZTC__)],
+               [hwloc_check_compiler_vendor_result="digital mars"])])
+
+    # HP ANSI C / aC++
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IF_IFELSE([defined(__HP_cc) || defined(__HP_aCC)],
+               [hwloc_check_compiler_vendor_result="hp"])])
+
+    # IBM XL C/C++
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IF_IFELSE([defined(__xlC__) || defined(__IBMC__) || defined(__IBMCPP__)],
+               [hwloc_check_compiler_vendor_result="ibm"],
+               [HWLOC_IF_IFELSE([defined(_AIX) && !defined(__GNUC__)],
+                    [hwloc_check_compiler_vendor_result="ibm"])])])
+
+    # KAI C++ (rest in peace)
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IFDEF_IFELSE([__KCC],
+               [hwloc_check_compiler_vendor_result="kai"])])
+
+    # LCC
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IFDEF_IFELSE([__LCC__],
+               [hwloc_check_compiler_vendor_result="lcc"])])
+
+    # MetaWare High C/C++
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IFDEF_IFELSE([__HIGHC__],
+               [hwloc_check_compiler_vendor_result="metaware high"])])
+
+    # Metrowerks Codewarrior
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IFDEF_IFELSE([__MWERKS__],
+               [hwloc_check_compiler_vendor_result="metrowerks"])])
+
+    # MIPSpro (SGI)
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IF_IFELSE([defined(sgi) || defined(__sgi)],
+               [hwloc_check_compiler_vendor_result="sgi"])])
+
+    # MPW C++
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IF_IFELSE([defined(__MRC__) || defined(MPW_C) || defined(MPW_CPLUS)],
+               [hwloc_check_compiler_vendor_result="mpw"])])
+
+    # Microsoft
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [# Always use C compiler when checking for Microsoft, as
+           # Visual C++ doesn't recognize .cc as a C++ file.
+           AC_LANG_PUSH(C)
+           HWLOC_IF_IFELSE([defined(_MSC_VER) || defined(__MSC_VER)],
+               [hwloc_check_compiler_vendor_result="microsoft"])
+           AC_LANG_POP(C)])
+
+    # Norcroft C
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IFDEF_IFELSE([__CC_NORCROFT],
+               [hwloc_check_compiler_vendor_result="norcroft"])])
+
+    # Pelles C
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IFDEF_IFELSE([__POCC__],
+               [hwloc_check_compiler_vendor_result="pelles"])])
+
+    # Portland Group
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IFDEF_IFELSE([__PGI],
+               [hwloc_check_compiler_vendor_result="portland group"])])
+
+    # SAS/C
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IF_IFELSE([defined(SASC) || defined(__SASC) || defined(__SASC__)],
+               [hwloc_check_compiler_vendor_result="sas"])])
+
+    # Sun Workshop C/C++
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IF_IFELSE([defined(__SUNPRO_C) || defined(__SUNPRO_CC)],
+               [hwloc_check_compiler_vendor_result="sun"])])
+
+    # TenDRA C/C++
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IFDEF_IFELSE([__TenDRA__],
+               [hwloc_check_compiler_vendor_result="tendra"])])
+
+    # Tiny C
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IFDEF_IFELSE([__TINYC__],
+               [hwloc_check_compiler_vendor_result="tiny"])])
+
+    # USL C
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IFDEF_IFELSE([__USLC__],
+               [hwloc_check_compiler_vendor_result="usl"])])
+
+    # Watcom C++
+    AS_IF([test "$hwloc_check_compiler_vendor_result" = "unknown"],
+          [HWLOC_IFDEF_IFELSE([__WATCOMC__],
+               [hwloc_check_compiler_vendor_result="watcom"])])
+
+    $1="$hwloc_check_compiler_vendor_result"
+    unset hwloc_check_compiler_vendor_result
+])
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_visibility.m4 b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_visibility.m4
new file mode 100644
index 0000000000..885fe3d8df
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_check_visibility.m4
@@ -0,0 +1,131 @@
+# This macro set originally copied from Open MPI:
+# Copyright © 2004-2005 The Trustees of Indiana University and Indiana
+#                         University Research and Technology
+#                         Corporation.  All rights reserved.
+# Copyright © 2004-2005 The University of Tennessee and The University
+#                         of Tennessee Research Foundation.  All rights
+#                         reserved.
+# Copyright © 2004-2007 High Performance Computing Center Stuttgart,
+#                         University of Stuttgart.  All rights reserved.
+# Copyright © 2004-2005 The Regents of the University of California.
+#                         All rights reserved.
+# Copyright © 2006-2007 Cisco Systems, Inc.  All rights reserved.
+# and renamed/modified for hwloc:
+# Copyright © 2009 Inria.  All rights reserved.
+# Copyright © 2009-2010 Université Bordeaux
+# Copyright © 2010-2012 Cisco Systems, Inc.  All rights reserved.
+# See COPYING in top-level directory.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# - Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+#
+# - Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer listed
+#   in this license in the documentation and/or other materials
+#   provided with the distribution.
+#
+# - Neither the name of the copyright holders nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# The copyright holders provide no reassurances that the source code
+# provided does not infringe any patent, copyright, or any other
+# intellectual property rights of third parties.  The copyright holders
+# disclaim any liability to any recipient for claims brought against
+# recipient by any third party for infringement of that parties
+# intellectual property rights.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+# _HWLOC_CHECK_VISIBILITY
+# --------------------------------------------------------
+AC_DEFUN([_HWLOC_CHECK_VISIBILITY],[
+    # Be safe for systems that have ancient Autoconf's (e.g., RHEL5)
+    m4_ifdef([AC_PROG_GREP],
+             [AC_REQUIRE([AC_PROG_GREP])],
+             [GREP=grep])
+
+    # Check if the compiler has support for visibility, like some
+    # versions of gcc, icc, Sun Studio cc.
+    AC_ARG_ENABLE(visibility,
+        AC_HELP_STRING([--enable-visibility],
+            [enable visibility feature of certain compilers/linkers (default: enabled on platforms that support it)]))
+
+    case ${target} in
+        *-*-aix*|*-*-mingw*|*-*-cygwin*|*-*-hpux*)
+            enable_visibility=no
+            ;;
+    esac
+
+    hwloc_visibility_define=0
+    hwloc_msg="whether to enable symbol visibility"
+    if test "$enable_visibility" = "no"; then
+        AC_MSG_CHECKING([$hwloc_msg])
+        AC_MSG_RESULT([no (disabled)])
+    else
+        CFLAGS_orig=$CFLAGS
+
+        hwloc_add=
+        case "$hwloc_c_vendor" in
+        sun)
+            # Check using Sun Studio -xldscope=hidden flag
+            hwloc_add=-xldscope=hidden
+            CFLAGS="$CFLAGS_orig $hwloc_add -errwarn=%all"
+            ;;
+
+        *)
+            # Check using -fvisibility=hidden
+            hwloc_add=-fvisibility=hidden
+            CFLAGS="$CFLAGS_orig $hwloc_add -Werror"
+            ;;
+        esac
+
+        AC_MSG_CHECKING([if $CC supports $hwloc_add])
+        AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+            #include <stdio.h>
+            __attribute__((visibility("default"))) int foo;
+            ]],[[fprintf(stderr, "Hello, world\n");]])],
+            [AS_IF([test -s conftest.err],
+                   [$GREP -iq visibility conftest.err
+                    # If we find "visibility" in the stderr, then
+                    # assume it doesn't work
+                    AS_IF([test "$?" = "0"], [hwloc_add=])])
+            ], [hwloc_add=])
+        AS_IF([test "$hwloc_add" = ""],
+              [AC_MSG_RESULT([no])],
+              [AC_MSG_RESULT([yes])])
+
+        CFLAGS=$CFLAGS_orig
+        HWLOC_VISIBILITY_CFLAGS=$hwloc_add
+
+        if test "$hwloc_add" != "" ; then
+            hwloc_visibility_define=1
+            AC_MSG_CHECKING([$hwloc_msg])
+            AC_MSG_RESULT([yes (via $hwloc_add)])
+        elif test "$enable_visibility" = "yes"; then
+            AC_MSG_ERROR([Symbol visibility support requested but compiler does not seem to support it.  Aborting])
+        else
+            AC_MSG_CHECKING([$hwloc_msg])
+            AC_MSG_RESULT([no (unsupported)])
+        fi
+        unset hwloc_add
+    fi
+
+    AC_DEFINE_UNQUOTED([HWLOC_C_HAVE_VISIBILITY], [$hwloc_visibility_define],
+            [Whether C compiler supports symbol visibility or not])
+])
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_components.m4 b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_components.m4
new file mode 100644
index 0000000000..7d5c1fa194
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_components.m4
@@ -0,0 +1,66 @@
+# Copyright © 2012 Inria.  All rights reserved.
+# See COPYING in top-level directory.
+
+
+# HWLOC_PREPARE_FILTER_COMPONENTS
+#
+# Given a comma-separated list of names, define hwloc_<name>_component_maybeplugin=1.
+#
+# $1 = command-line given list of components to build as plugins
+#
+AC_DEFUN([HWLOC_PREPARE_FILTER_COMPONENTS], [
+  for name in `echo [$1] | sed -e 's/,/ /g'` ; do
+    str="hwloc_${name}_component_wantplugin=1"
+    eval $str
+  done
+])
+
+
+# HWLOC_FILTER_COMPONENTS
+#
+# For each component in hwloc_components,
+# check if hwloc_<name>_component_wantplugin=1 or enable_plugin=yes,
+# and check if hwloc_<name>_component_maybeplugin=1.
+# Add <name> to hwloc_[static|plugin]_components accordingly.
+# And set hwloc_<name>_component=[static|plugin] accordingly.
+#
+AC_DEFUN([HWLOC_FILTER_COMPONENTS], [
+for name in $hwloc_components ; do
+  str="maybeplugin=\$hwloc_${name}_component_maybeplugin"
+  eval $str
+  str="wantplugin=\$hwloc_${name}_component_wantplugin"
+  eval $str
+  if test x$hwloc_have_plugins = xyes && test x$maybeplugin = x1 && test x$wantplugin = x1 -o x$enable_plugins = xyes; then
+    hwloc_plugin_components="$hwloc_plugin_components $name"
+    str="hwloc_${name}_component=plugin"
+  else
+    hwloc_static_components="$hwloc_static_components $name"
+    str="hwloc_${name}_component=static"
+  fi
+  eval $str
+done
+])
+
+
+# HWLOC_LIST_STATIC_COMPONENTS
+#
+# Append to file $1 an array of components by listing component names in $2.
+#
+# $1 = filename
+# $2 = list of component names
+#
+AC_DEFUN([HWLOC_LIST_STATIC_COMPONENTS], [
+for comp in [$2]; do
+  echo "HWLOC_DECLSPEC extern const struct hwloc_component hwloc_${comp}_component;" >>[$1]
+done
+cat <<EOF >>[$1]
+static const struct hwloc_component * hwloc_static_components[[]] = {
+EOF
+for comp in [$2]; do
+  echo "  &hwloc_${comp}_component," >>[$1]
+done
+cat <<EOF >>[$1]
+  NULL
+};
+EOF
+])
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_get_version.sh b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_get_version.sh
new file mode 100755
index 0000000000..74bca537ce
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_get_version.sh
@@ -0,0 +1,98 @@
+#!/bin/sh
+#
+# Copyright © 2004-2006 The Trustees of Indiana University and Indiana
+#                         University Research and Technology
+#                         Corporation.  All rights reserved.
+# Copyright © 2004-2005 The University of Tennessee and The University
+#                         of Tennessee Research Foundation.  All rights
+#                         reserved.
+# Copyright © 2004-2005 High Performance Computing Center Stuttgart,
+#                         University of Stuttgart.  All rights reserved.
+# Copyright © 2004-2005 The Regents of the University of California.
+#                         All rights reserved.
+# Copyright © 2008-2014 Cisco Systems, Inc.  All rights reserved.
+# Copyright © 2014 Inria.  All rights reserved.
+# $COPYRIGHT$
+#
+# Additional copyrights may follow
+#
+# $HEADER$
+#
+
+srcfile="$1"
+option="$2"
+
+if test -z "$srcfile"; then
+    option="--help"
+else
+    : ${srcdir=.}
+
+    if test -f "$srcfile"; then
+        ompi_vers=`sed -n "
+	t clear
+	: clear
+	s/^major/HWLOC_MAJOR_VERSION/
+	s/^minor/HWLOC_MINOR_VERSION/
+	s/^release/HWLOC_RELEASE_VERSION/
+	s/^greek/HWLOC_GREEK_VERSION/
+	s/\\\${major}/\\\${HWLOC_MAJOR_VERSION}/
+	s/\\\${minor}/\\\${HWLOC_MINOR_VERSION}/
+	s/\\\${release}/\\\${HWLOC_RELEASE_VERSION}/
+	s/\\\${greek}/\\\${HWLOC_GREEK_VERSION}/
+	s/^date/HWLOC_RELEASE_DATE/
+	s/^snapshot_version/HWLOC_SNAPSHOT_VERSION/
+	s/^snapshot/HWLOC_SNAPSHOT/
+	t print
+	b
+	: print
+	p" < "$srcfile"`
+	eval "$ompi_vers"
+
+        HWLOC_VERSION="$HWLOC_MAJOR_VERSION.$HWLOC_MINOR_VERSION.$HWLOC_RELEASE_VERSION${HWLOC_GREEK_VERSION}"
+
+        # If HWLOC_SNAPSHOT=1, then use HWLOC_SNAPSHOT_VERSION
+        if test "$HWLOC_SNAPSHOT" = "1"; then
+            # First, verify that HWLOC_SNAPSHOT_VERSION isn't empty.
+            if test -z "$HWLOC_SNAPSHOT_VERSION"; then
+                echo "*** ERROR: $1 contains snapshot=1, but an empty value for snapshot_version" 1>&2
+                exit 1
+            fi
+            HWLOC_VERSION=$HWLOC_SNAPSHOT_VERSION
+        fi
+    fi
+
+    if test "$option" = ""; then
+	option="--version"
+    fi
+fi
+
+case "$option" in
+    --version)
+	echo $HWLOC_VERSION
+	;;
+    --release-date)
+        echo $HWLOC_RELEASE_DATE
+        ;;
+    --snapshot)
+        echo $HWLOC_SNAPSHOT
+        ;;
+    -h|--help)
+	cat <<EOF
+$0 <srcfile> <option>
+
+<srcfile> - Text version file
+<option>  - One of:
+    --version      - Show version number
+    --release-date - Show the release date
+    --snapshot     - Show whether this is a snapshot release or not
+    --help         - This message
+EOF
+        ;;
+    *)
+        echo "Unrecognized option $option.  Run $0 --help for options"
+        ;;
+esac
+
+# All done
+
+exit 0
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_internal.m4 b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_internal.m4
new file mode 100644
index 0000000000..b210f2afd8
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_internal.m4
@@ -0,0 +1,470 @@
+dnl -*- Autoconf -*-
+dnl
+dnl Copyright © 2010-2017 Inria.  All rights reserved.
+dnl Copyright © 2009, 2011 Université Bordeaux
+dnl Copyright © 2004-2005 The Trustees of Indiana University and Indiana
+dnl                         University Research and Technology
+dnl                         Corporation.  All rights reserved.
+dnl Copyright © 2004-2005 The Regents of the University of California.
+dnl                         All rights reserved.
+dnl Copyright © 2004-2008 High Performance Computing Center Stuttgart,
+dnl                         University of Stuttgart.  All rights reserved.
+dnl Copyright © 2006-2014 Cisco Systems, Inc.  All rights reserved.
+dnl
+dnl See COPYING in top-level directory.
+
+#-----------------------------------------------------------------------
+
+# Probably only ever invoked by hwloc's configure.ac
+AC_DEFUN([HWLOC_BUILD_STANDALONE],[
+    hwloc_mode=standalone
+])dnl
+
+#-----------------------------------------------------------------------
+
+# Probably only ever invoked by hwloc's configure.ac
+AC_DEFUN([HWLOC_DEFINE_ARGS],[
+    # Embedded mode, or standalone?
+    AC_ARG_ENABLE([embedded-mode],
+                    AC_HELP_STRING([--enable-embedded-mode],
+                                   [Using --enable-embedded-mode puts the HWLOC into "embedded" mode.  The default is --disable-embedded-mode, meaning that the HWLOC is in "standalone" mode.]))
+
+    # Change the symbol prefix?
+    AC_ARG_WITH([hwloc-symbol-prefix],
+                AC_HELP_STRING([--with-hwloc-symbol-prefix=STRING],
+                               [STRING can be any valid C symbol name.  It will be prefixed to all public HWLOC symbols.  Default: "hwloc_"]))
+
+    # Debug mode?
+    AC_ARG_ENABLE([debug],
+                  AC_HELP_STRING([--enable-debug],
+                                 [Using --enable-debug enables various hwloc maintainer-level debugging controls.  This option is not recomended for end users.]))
+
+    # Doxygen?
+    AC_ARG_ENABLE([doxygen],
+        [AC_HELP_STRING([--enable-doxygen],
+                        [enable support for building Doxygen documentation (note that this option is ONLY relevant in developer builds; Doxygen documentation is pre-built for tarball builds and this option is therefore ignored)])])
+
+    # Picky?
+    AC_ARG_ENABLE(picky,
+                  AC_HELP_STRING([--disable-picky],
+                                 [When in developer checkouts of hwloc and compiling with gcc, the default is to enable maximum compiler pickyness.  Using --disable-picky or --enable-picky overrides any default setting]))
+
+    # Cairo?
+    AC_ARG_ENABLE([cairo],
+                  AS_HELP_STRING([--disable-cairo],
+                                 [Disable the Cairo back-end of hwloc's lstopo command]))
+
+    # CPUID
+    AC_ARG_ENABLE([cpuid],
+		  AS_HELP_STRING([--disable-cpuid],
+				 [Disable the cpuid-based architecture specific support (x86 component)]))
+
+    # XML using libxml2?
+    AC_ARG_ENABLE([libxml2],
+                  AS_HELP_STRING([--disable-libxml2],
+		                 [Do not use libxml2 for XML support, use a custom minimalistic support]))
+
+    # I/O?
+    AC_ARG_ENABLE([io],
+                  AS_HELP_STRING([--disable-io],
+                                 [Disable I/O discovery entirely (PCI, LinuxIO, CUDA, OpenCL, NVML, GL)]))
+
+    # PCI?
+    AC_ARG_ENABLE([pci],
+                  AS_HELP_STRING([--disable-pci],
+                                 [Disable the PCI device discovery]))
+
+    # OpenCL?
+    AC_ARG_ENABLE([opencl],
+                  AS_HELP_STRING([--disable-opencl],
+                                 [Disable the OpenCL device discovery]))
+
+    # CUDA?
+    AC_ARG_ENABLE([cuda],
+                  AS_HELP_STRING([--disable-cuda],
+                                 [Disable the CUDA device discovery using libcudart]))
+
+    # NVML?
+    AC_ARG_ENABLE([nvml],
+                  AS_HELP_STRING([--disable-nvml],
+                                 [Disable the NVML device discovery]))
+
+    # GL/Display
+    AC_ARG_ENABLE([gl],
+		  AS_HELP_STRING([--disable-gl],
+				 [Disable the GL display device discovery]))
+
+    # LibUdev
+    AC_ARG_ENABLE([libudev],
+                  AS_HELP_STRING([--disable-libudev],
+                                 [Disable the Linux libudev]))
+
+    # Plugins
+    AC_ARG_ENABLE([plugins],
+                  AS_HELP_STRING([--enable-plugins=name,...],
+                                 [Build the given components as dynamically-loaded plugins]))
+
+])dnl
+
+#-----------------------------------------------------------------------
+
+dnl We only build documentation if this is a developer checkout.
+dnl Distribution tarballs just install pre-built docuemntation that was
+dnl included in the tarball.
+
+# Probably only ever invoked by hwloc's configure.ac
+AC_DEFUN([HWLOC_SETUP_DOCS],[
+    cat <<EOF
+
+###
+### Configuring hwloc documentation
+###
+EOF
+
+    AC_MSG_CHECKING([if this is a developer build])
+    AS_IF([test ! -d "$srcdir/.hg" -a ! -d "$srcdir/.git"],
+          [AC_MSG_RESULT([no (doxygen generation is optional)])
+	   test "x$enable_doxygen" = x && enable_doxygen=no],
+          [AC_MSG_RESULT([yes])
+	   test "x$enable_doxygen" = x && enable_doxygen=yes])
+
+    # Generating the doxygen output requires a few tools.  If we
+    # don't have all of them, refuse the build the docs.
+    AC_ARG_VAR([DOXYGEN], [Location of the doxygen program (required for building the hwloc doxygen documentation)])
+    AC_PATH_TOOL([DOXYGEN], [doxygen])
+    HWLOC_DOXYGEN_VERSION=`doxygen --version 2> /dev/null`
+
+    AC_ARG_VAR([PDFLATEX], [Location of the pdflatex program (required for building the hwloc doxygen documentation)])
+    AC_PATH_TOOL([PDFLATEX], [pdflatex])
+
+    AC_ARG_VAR([MAKEINDEX], [Location of the makeindex program (required for building the hwloc doxygen documentation)])
+    AC_PATH_TOOL([MAKEINDEX], [makeindex])
+
+    AC_ARG_VAR([FIG2DEV], [Location of the fig2dev program (required for building the hwloc doxygen documentation)])
+    AC_PATH_TOOL([FIG2DEV], [fig2dev])
+
+    AC_ARG_VAR([GS], [Location of the gs program (required for building the hwloc doxygen documentation)])
+    AC_PATH_TOOL([GS], [gs])
+
+    AC_ARG_VAR([EPSTOPDF], [Location of the epstopdf program (required for building the hwloc doxygen documentation)])
+    AC_PATH_TOOL([EPSTOPDF], [epstopdf])
+
+    AC_MSG_CHECKING([if can build doxygen docs])
+    AS_IF([test "x$DOXYGEN" != "x" -a "x$PDFLATEX" != "x" -a "x$MAKEINDEX" != "x" -a "x$FIG2DEV" != "x" -a "x$GS" != "x" -a "x$EPSTOPDF" != "x"],
+                 [hwloc_generate_doxs=yes], [hwloc_generate_doxs=no])
+    AC_MSG_RESULT([$hwloc_generate_doxs])
+    AS_IF([test "x$hwloc_generate_doxs" = xyes -a "x$HWLOC_DOXYGEN_VERSION" = x1.6.2],
+                 [hwloc_generate_doxs="no"; AC_MSG_WARN([doxygen 1.6.2 has broken short name support, disabling])])
+
+    AC_REQUIRE([AC_PROG_SED])
+
+    # Making the top-level README requires w3m or lynx.
+    AC_ARG_VAR([W3M], [Location of the w3m program (required to building the top-level hwloc README file)])
+    AC_PATH_TOOL([W3M], [w3m])
+    AC_ARG_VAR([LYNX], [Location of the lynx program (required to building the top-level hwloc README file)])
+    AC_PATH_TOOL([LYNX], [lynx])
+
+    AC_MSG_CHECKING([if can build top-level README])
+    AS_IF([test "x$W3M" != "x"],
+          [hwloc_generate_readme=yes
+           HWLOC_W3_GENERATOR=$W3M],
+          [AS_IF([test "x$LYNX" != "x"],
+                 [hwloc_generate_readme=yes
+                  HWLOC_W3_GENERATOR="$LYNX -dump -nolist"],
+                 [hwloc_generate_readme=no])])
+    AC_SUBST(HWLOC_W3_GENERATOR)
+    AC_MSG_RESULT([$hwloc_generate_readme])
+
+    # If any one of the above tools is missing, we will refuse to make dist.
+    AC_MSG_CHECKING([if will build doxygen docs])
+    AS_IF([test "x$hwloc_generate_doxs" = "xyes" -a "x$enable_doxygen" != "xno"],
+          [], [hwloc_generate_doxs=no])
+    AC_MSG_RESULT([$hwloc_generate_doxs])
+
+    # See if we want to install the doxygen docs
+    AC_MSG_CHECKING([if will install doxygen docs])
+    AS_IF([test "x$hwloc_generate_doxs" = "xyes" -o \
+	    -f "$srcdir/doc/doxygen-doc/man/man3/hwloc_distrib.3" -a \
+    	    -f "$srcdir/doc/doxygen-doc/hwloc-a4.pdf" -a \
+    	    -f "$srcdir/doc/doxygen-doc/hwloc-letter.pdf"],
+          [hwloc_install_doxs=yes],
+          [hwloc_install_doxs=no])
+    AC_MSG_RESULT([$hwloc_install_doxs])
+
+    # For the common developer case, if we're in a developer checkout and
+    # using the GNU compilers, turn on maximum warnings unless
+    # specifically disabled by the user.
+    AC_MSG_CHECKING([whether to enable "picky" compiler mode])
+    hwloc_want_picky=0
+    AS_IF([test "$hwloc_c_vendor" = "gnu"],
+          [AS_IF([test -d "$srcdir/.hg" -o -d "$srcdir/.git"],
+                 [hwloc_want_picky=1])])
+    if test "$enable_picky" = "yes"; then
+        if test "$GCC" = "yes"; then
+            AC_MSG_RESULT([yes])
+            hwloc_want_picky=1
+        else
+            AC_MSG_RESULT([no])
+            AC_MSG_WARN([Warning: --enable-picky used, but is currently only defined for the GCC compiler set -- automatically disabled])
+            hwloc_want_picky=0
+        fi
+    elif test "$enable_picky" = "no"; then
+        AC_MSG_RESULT([no])
+        hwloc_want_picky=0
+    else
+        if test "$hwloc_want_picky" = 1; then
+            AC_MSG_RESULT([yes (default)])
+        else
+            AC_MSG_RESULT([no (default)])
+        fi
+    fi
+    if test "$hwloc_want_picky" = 1; then
+        add="-Wall -Wunused-parameter -Wundef -Wno-long-long -Wsign-compare"
+        add="$add -Wmissing-prototypes -Wstrict-prototypes"
+        add="$add -Wcomment -pedantic -Wshadow"
+
+        HWLOC_CFLAGS="$HWLOC_CFLAGS $add"
+    fi
+
+    # Generate some files for the docs
+    AC_CONFIG_FILES(
+        hwloc_config_prefix[doc/Makefile]
+        hwloc_config_prefix[doc/examples/Makefile]
+        hwloc_config_prefix[doc/doxygen-config.cfg])
+])
+
+#-----------------------------------------------------------------------
+
+# Probably only ever invoked by hwloc's configure.ac
+AC_DEFUN([HWLOC_SETUP_UTILS],[
+    cat <<EOF
+
+###
+### Configuring hwloc command line utilities
+###
+EOF
+
+    AC_REQUIRE([AC_PROG_SED])
+
+    # runstatedir only supported in autoconf >= 2.70 and in some backports
+    if test "x${runstatedir}" != "x"; then
+      HWLOC_runstatedir=${runstatedir}
+    else
+      HWLOC_runstatedir='${localstatedir}/run'
+    fi
+    AC_SUBST([HWLOC_runstatedir])
+
+    # Cairo support
+    hwloc_cairo_happy=no
+    if test "x$enable_cairo" != "xno"; then
+      HWLOC_PKG_CHECK_MODULES([CAIRO], [cairo], [cairo_fill], [cairo.h],
+                              [hwloc_cairo_happy=yes],
+                              [hwloc_cairo_happy=no])
+    fi
+
+    if test "x$hwloc_cairo_happy" = "xyes"; then
+        AC_DEFINE([HWLOC_HAVE_CAIRO], [1], [Define to 1 if you have the `cairo' library.])
+    else
+        AS_IF([test "$enable_cairo" = "yes"],
+              [AC_MSG_WARN([--enable-cairo requested, but Cairo/X11 support was not found])
+               AC_MSG_ERROR([Cannot continue])])
+    fi
+
+    AC_CHECK_TYPES([wchar_t], [
+      AC_CHECK_FUNCS([putwc])
+    ], [], [[#include <wchar.h>]])
+
+    HWLOC_XML_LOCALIZED=1
+    AC_CHECK_HEADERS([locale.h xlocale.h], [
+      AC_CHECK_FUNCS([setlocale])
+      AC_CHECK_FUNCS([uselocale], [HWLOC_XML_LOCALIZED=0])
+    ])
+    AC_SUBST([HWLOC_XML_LOCALIZED])
+    AC_CHECK_HEADERS([langinfo.h], [
+      AC_CHECK_FUNCS([nl_langinfo])
+    ])
+    hwloc_old_LIBS="$LIBS"
+    chosen_curses=""
+    for curses in ncurses curses
+    do
+      for lib in "" -ltermcap -l${curses}w -l$curses
+      do
+        AC_MSG_CHECKING(termcap support using $curses and $lib)
+        LIBS="$hwloc_old_LIBS $lib"
+        AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+#include <$curses.h>
+#include <term.h>
+]], [[tparm(NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0)]])], [
+          AC_MSG_RESULT(yes)
+          AC_SUBST([HWLOC_TERMCAP_LIBS], ["$LIBS"])
+          AC_DEFINE([HWLOC_HAVE_LIBTERMCAP], [1],
+                    [Define to 1 if you have a library providing the termcap interface])
+          chosen_curses=$curses
+        ], [
+          AC_MSG_RESULT(no)
+        ])
+        test "x$chosen_curses" != "x" && break
+      done
+      test "x$chosen_curses" != "x" && break
+    done
+    if test "$chosen_curses" = ncurses
+    then
+      AC_DEFINE([HWLOC_USE_NCURSES], [1], [Define to 1 if ncurses works, preferred over curses])
+    fi
+    LIBS="$hwloc_old_LIBS"
+    unset hwloc_old_LIBS
+
+    _HWLOC_CHECK_DIFF_U
+    _HWLOC_CHECK_DIFF_W
+
+    AC_CHECK_HEADERS([time.h], [
+      AC_CHECK_FUNCS([clock_gettime])
+    ])
+
+    # Only generate this if we're building the utilities
+    AC_CONFIG_FILES(
+        hwloc_config_prefix[utils/Makefile]
+        hwloc_config_prefix[utils/hwloc/Makefile]
+        hwloc_config_prefix[utils/lstopo/Makefile]
+        hwloc_config_prefix[hwloc.pc]
+
+        hwloc_config_prefix[utils/netloc/infiniband/Makefile]
+        hwloc_config_prefix[utils/netloc/draw/Makefile]
+        hwloc_config_prefix[utils/netloc/mpi/Makefile]
+        hwloc_config_prefix[netloc.pc]
+        hwloc_config_prefix[netlocscotch.pc]
+   )
+])dnl
+
+#-----------------------------------------------------------------------
+
+# Probably only ever invoked by hwloc's configure.ac
+AC_DEFUN([HWLOC_SETUP_TESTS],[
+    cat <<EOF
+
+###
+### Configuring tests
+###
+EOF
+
+    AC_CHECK_LIB([pthread], [pthread_self], [hwloc_have_pthread=yes])
+
+    # linux-libnuma.h testing requires libnuma with numa_bitmask_alloc()
+    AC_CHECK_LIB([numa], [numa_available], [
+      AC_CHECK_DECL([numa_bitmask_alloc], [hwloc_have_linux_libnuma=yes], [],
+    	      [#include <numa.h>])
+    ])
+
+    AC_CHECK_HEADERS([infiniband/verbs.h], [
+      AC_CHECK_LIB([ibverbs], [ibv_open_device],
+                   [AC_DEFINE([HAVE_LIBIBVERBS], 1, [Define to 1 if we have -libverbs])
+                    hwloc_have_libibverbs=yes])
+    ])
+
+    AC_CHECK_HEADERS([myriexpress.h], [
+      AC_MSG_CHECKING(if MX_NUMA_NODE exists)
+      AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <myriexpress.h>]],
+                                         [[int a = MX_NUMA_NODE;]])],
+                        [AC_MSG_RESULT(yes)
+                         AC_CHECK_LIB([myriexpress], [mx_get_info],
+                                      [AC_DEFINE([HAVE_MYRIEXPRESS], 1, [Define to 1 if we have -lmyriexpress])
+                                       hwloc_have_myriexpress=yes])],
+                        [AC_MSG_RESULT(no)])])
+
+    AC_CHECK_PROGS(XMLLINT, [xmllint])
+
+    AC_CHECK_PROGS(BUNZIPP, bunzip2, false)
+
+    AC_MSG_CHECKING(if CXX works)
+    AC_LANG_PUSH([C++])
+    AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+#include <iostream>
+using namespace std;
+int foo(void) {
+  cout << "test" << endl;
+  return 0;
+}
+	]])], [hwloc_have_cxx=yes], [hwloc_have_cxx=no])
+    AC_LANG_POP([C++])
+    AC_MSG_RESULT([$hwloc_have_cxx])
+
+    _HWLOC_CHECK_DIFF_U
+
+    # Only generate these files if we're making the tests
+    AC_CONFIG_FILES(
+        hwloc_config_prefix[tests/Makefile]
+        hwloc_config_prefix[tests/hwloc/Makefile]
+        hwloc_config_prefix[tests/hwloc/linux/Makefile]
+        hwloc_config_prefix[tests/hwloc/linux/allowed/Makefile]
+        hwloc_config_prefix[tests/hwloc/linux/gather/Makefile]
+        hwloc_config_prefix[tests/hwloc/x86/Makefile]
+        hwloc_config_prefix[tests/hwloc/xml/Makefile]
+        hwloc_config_prefix[tests/hwloc/ports/Makefile]
+        hwloc_config_prefix[tests/hwloc/rename/Makefile]
+        hwloc_config_prefix[tests/hwloc/linux/allowed/test-topology.sh]
+        hwloc_config_prefix[tests/hwloc/linux/gather/test-gather-topology.sh]
+        hwloc_config_prefix[tests/hwloc/linux/test-topology.sh]
+        hwloc_config_prefix[tests/hwloc/x86/test-topology.sh]
+        hwloc_config_prefix[tests/hwloc/xml/test-topology.sh]
+        hwloc_config_prefix[tests/hwloc/wrapper.sh]
+        hwloc_config_prefix[utils/hwloc/hwloc-compress-dir]
+        hwloc_config_prefix[utils/hwloc/hwloc-gather-topology]
+        hwloc_config_prefix[utils/hwloc/test-hwloc-annotate.sh]
+        hwloc_config_prefix[utils/hwloc/test-hwloc-calc.sh]
+        hwloc_config_prefix[utils/hwloc/test-hwloc-compress-dir.sh]
+        hwloc_config_prefix[utils/hwloc/test-hwloc-diffpatch.sh]
+        hwloc_config_prefix[utils/hwloc/test-hwloc-distrib.sh]
+        hwloc_config_prefix[utils/hwloc/test-hwloc-info.sh]
+        hwloc_config_prefix[utils/hwloc/test-fake-plugin.sh]
+        hwloc_config_prefix[utils/hwloc/test-hwloc-dump-hwdata/Makefile]
+        hwloc_config_prefix[utils/hwloc/test-hwloc-dump-hwdata/test-hwloc-dump-hwdata.sh]
+        hwloc_config_prefix[utils/lstopo/test-lstopo.sh]
+        hwloc_config_prefix[contrib/systemd/Makefile]
+        hwloc_config_prefix[contrib/misc/Makefile]
+        hwloc_config_prefix[tests/netloc/Makefile]
+        hwloc_config_prefix[tests/netloc/tests.sh]
+    )
+
+    AC_CONFIG_COMMANDS([chmoding-scripts], [
+chmod +x ]hwloc_config_prefix[tests/hwloc/linux/test-topology.sh \
+      ]hwloc_config_prefix[tests/hwloc/x86/test-topology.sh \
+      ]hwloc_config_prefix[tests/hwloc/xml/test-topology.sh \
+      ]hwloc_config_prefix[tests/hwloc/linux/allowed/test-topology.sh \
+      ]hwloc_config_prefix[tests/hwloc/linux/gather/test-gather-topology.sh \
+      ]hwloc_config_prefix[tests/hwloc/wrapper.sh \
+      ]hwloc_config_prefix[utils/hwloc/hwloc-compress-dir \
+      ]hwloc_config_prefix[utils/hwloc/hwloc-gather-topology \
+      ]hwloc_config_prefix[utils/hwloc/test-hwloc-annotate.sh \
+      ]hwloc_config_prefix[utils/hwloc/test-hwloc-calc.sh \
+      ]hwloc_config_prefix[utils/hwloc/test-hwloc-compress-dir.sh \
+      ]hwloc_config_prefix[utils/hwloc/test-hwloc-diffpatch.sh \
+      ]hwloc_config_prefix[utils/hwloc/test-hwloc-distrib.sh \
+      ]hwloc_config_prefix[utils/hwloc/test-hwloc-info.sh \
+      ]hwloc_config_prefix[utils/hwloc/test-fake-plugin.sh \
+      ]hwloc_config_prefix[utils/hwloc/test-hwloc-dump-hwdata/test-hwloc-dump-hwdata.sh \
+      ]hwloc_config_prefix[utils/lstopo/test-lstopo.sh \
+      ]hwloc_config_prefix[tests/netloc/tests.sh])
+
+    # These links are only needed in standalone mode.  It would
+    # be nice to m4 foreach this somehow, but whenever I tried
+    # it, I got obscure "invalid tag" errors from
+    # AC_CONFIG_LINKS.  :-\ Since these tests are only run when
+    # built in standalone mode, only generate them in
+    # standalone mode.
+    AC_CONFIG_LINKS(
+	hwloc_config_prefix[tests/hwloc/ports/topology-solaris.c]:hwloc_config_prefix[hwloc/topology-solaris.c]
+	hwloc_config_prefix[tests/hwloc/ports/topology-solaris-chiptype.c]:hwloc_config_prefix[hwloc/topology-solaris-chiptype.c]
+	hwloc_config_prefix[tests/hwloc/ports/topology-aix.c]:hwloc_config_prefix[hwloc/topology-aix.c]
+	hwloc_config_prefix[tests/hwloc/ports/topology-windows.c]:hwloc_config_prefix[hwloc/topology-windows.c]
+	hwloc_config_prefix[tests/hwloc/ports/topology-darwin.c]:hwloc_config_prefix[hwloc/topology-darwin.c]
+	hwloc_config_prefix[tests/hwloc/ports/topology-freebsd.c]:hwloc_config_prefix[hwloc/topology-freebsd.c]
+	hwloc_config_prefix[tests/hwloc/ports/topology-netbsd.c]:hwloc_config_prefix[hwloc/topology-netbsd.c]
+	hwloc_config_prefix[tests/hwloc/ports/topology-hpux.c]:hwloc_config_prefix[hwloc/topology-hpux.c]
+	hwloc_config_prefix[tests/hwloc/ports/topology-bgq.c]:hwloc_config_prefix[hwloc/topology-bgq.c]
+	hwloc_config_prefix[tests/hwloc/ports/topology-opencl.c]:hwloc_config_prefix[hwloc/topology-opencl.c]
+	hwloc_config_prefix[tests/hwloc/ports/topology-cuda.c]:hwloc_config_prefix[hwloc/topology-cuda.c]
+	hwloc_config_prefix[tests/hwloc/ports/topology-nvml.c]:hwloc_config_prefix[hwloc/topology-nvml.c]
+	hwloc_config_prefix[tests/hwloc/ports/topology-gl.c]:hwloc_config_prefix[hwloc/topology-gl.c]
+	hwloc_config_prefix[tests/hwloc/ports/lstopo-windows.c]:hwloc_config_prefix[utils/lstopo/lstopo-windows.c])
+    ])
+])dnl
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_pkg.m4 b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_pkg.m4
new file mode 100644
index 0000000000..0729c9b788
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/config/hwloc_pkg.m4
@@ -0,0 +1,207 @@
+# Copyright © 2010 Cisco Systems, Inc.  All rights reserved.
+# Copyright © 2015 Inria.  All rights reserved.
+# See COPYING in top-level directory.
+#
+# hwloc modification to the following PKG_* macros -- add HWLOC_
+# prefix to make it "safe" to embed these macros in other packages.
+# Originally copied from the pkg-config package; see copyright and
+# license below.
+
+# pkg.m4 - Macros to locate and utilise pkg-config.            -*- Autoconf -*-
+#
+# Copyright © 2004 Scott James Remnant <scott@netsplit.com>.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# HWLOC_PKG_PROG_PKG_CONFIG([MIN-VERSION])
+# ----------------------------------
+# hwloc note: Per https://git.open-mpi.org/trac/hwloc/ticket/55, keep
+# the environment variable $PKG_CONFIG (vs. renaming it
+# $HWLOC_PKG_CONFIG).  Short explanation: $PKG_CONFIG is a well-known
+# environment variable that can be set by users to override what these
+# .m4 macros do.  There's no reason we should have a different env
+# variable name (e.g., $HWLOC_PKG_CONFIG).  So leave it named
+# $PKG_CONFIG both here in this specific macro, and all the other
+# macros that use it.
+AC_DEFUN([HWLOC_PKG_PROG_PKG_CONFIG],
+[m4_pattern_forbid([^_?PKG_[A-Z_]+$])
+m4_pattern_allow([^PKG_CONFIG(_PATH)?$])
+AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility])dnl
+
+if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
+	AC_PATH_TOOL([PKG_CONFIG], [pkg-config])
+fi
+if test -n "$PKG_CONFIG"; then
+	HWLOC_pkg_min_version=m4_default([$1], [0.9.0])
+	AC_MSG_CHECKING([pkg-config is at least version $HWLOC_pkg_min_version])
+	if $PKG_CONFIG --atleast-pkgconfig-version $HWLOC_pkg_min_version; then
+		AC_MSG_RESULT([yes])
+	else
+		AC_MSG_RESULT([no])
+		PKG_CONFIG=""
+	fi
+
+fi[]dnl
+])# HWLOC_PKG_PROG_PKG_CONFIG
+
+# HWLOC_PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
+#
+# Check to see whether a particular set of modules exists.  Similar
+# to HWLOC_PKG_CHECK_MODULES(), but does not set variables or print errors.
+#
+#
+# Similar to HWLOC_PKG_CHECK_MODULES, make sure that the first instance of
+# this or HWLOC_PKG_CHECK_MODULES is called, or make sure to call
+# HWLOC_PKG_CHECK_EXISTS manually
+# --------------------------------------------------------------
+AC_DEFUN([HWLOC_PKG_CHECK_EXISTS],
+[AC_REQUIRE([HWLOC_PKG_PROG_PKG_CONFIG])dnl
+if test -n "$PKG_CONFIG" && \
+    AC_RUN_LOG([$PKG_CONFIG --exists --silence-errors "$1"]); then
+    m4_ifval([$2], [$2], [:])
+    m4_ifvaln([$3], [else
+                     $3])dnl
+fi])
+
+
+# _HWLOC_PKG_CONFIG([VARIABLE], [COMMAND], [MODULES])
+# ---------------------------------------------
+m4_define([_HWLOC_PKG_CONFIG],
+[if test -n "$PKG_CONFIG"; then
+    if test -n "$$1"; then
+        HWLOC_pkg_cv_[]$1="$$1"
+    else
+        HWLOC_PKG_CHECK_EXISTS([$3],
+                         [HWLOC_pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null`],
+			 [HWLOC_pkg_failed=yes])
+    fi
+else
+	HWLOC_pkg_failed=untried
+fi[]
+])# _HWLOC_PKG_CONFIG
+
+# _HWLOC_PKG_SHORT_ERRORS_SUPPORTED
+# -----------------------------
+AC_DEFUN([_HWLOC_PKG_SHORT_ERRORS_SUPPORTED],
+[AC_REQUIRE([HWLOC_PKG_PROG_PKG_CONFIG])
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+        HWLOC_pkg_short_errors_supported=yes
+else
+        HWLOC_pkg_short_errors_supported=no
+fi[]dnl
+])# _HWLOC_PKG_SHORT_ERRORS_SUPPORTED
+
+
+# HWLOC_PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, FUNCTION, HEADER, [ACTION-IF-FOUND],
+# [ACTION-IF-NOT-FOUND])
+#
+#
+# Note that if there is a possibility the first call to
+# HWLOC_PKG_CHECK_MODULES might not happen, you should be sure to include an
+# explicit call to HWLOC_PKG_PROG_PKG_CONFIG in your configure.ac
+#
+#
+# --------------------------------------------------------------
+AC_DEFUN([HWLOC_PKG_CHECK_MODULES],[
+    AC_REQUIRE([HWLOC_PKG_PROG_PKG_CONFIG])dnl
+    AC_ARG_VAR([HWLOC_]$1[_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl
+    AC_ARG_VAR([HWLOC_]$1[_LIBS], [linker flags for $1, overriding pkg-config])dnl
+
+    HWLOC_pkg_failed=no
+    AC_MSG_CHECKING([for $1])
+
+    _HWLOC_PKG_CONFIG([HWLOC_][$1][_CFLAGS], [cflags], [$2])
+    _HWLOC_PKG_CONFIG([HWLOC_][$1][_LIBS], [libs], [$2])
+
+    m4_define([_HWLOC_PKG_TEXT], [Alternatively, you may set the environment variables HWLOC_[]$1[]_CFLAGS
+and HWLOC_[]$1[]_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.])
+
+    # Check for failure of pkg-config
+    if test $HWLOC_pkg_failed = yes; then
+        _HWLOC_PKG_SHORT_ERRORS_SUPPORTED
+        if test $HWLOC_pkg_short_errors_supported = yes; then
+            HWLOC_[]$1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "$2" 2>&1`
+        else
+            HWLOC_[]$1[]_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "$2" 2>&1`
+        fi
+        # Put the nasty error message in config.log where it belongs
+	echo "$HWLOC_[]$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD
+
+	ifelse([$6], , [AC_MSG_ERROR(dnl
+[Package requirements ($2) were not met:
+
+$HWLOC_$1_PKG_ERRORS
+
+Consider adjusting the PKG_CONFIG_PATH environment variable if you
+installed software in a non-standard prefix.
+
+_HWLOC_PKG_TEXT
+])],
+		[AC_MSG_RESULT([no])
+                $6])
+    elif test $HWLOC_pkg_failed = untried; then
+        ifelse([$6], , [AC_MSG_FAILURE(dnl
+[The pkg-config script could not be found or is too old.  Make sure it
+is in your PATH or set the PKG_CONFIG environment variable to the full
+path to pkg-config.
+
+_HWLOC_PKG_TEXT
+
+To get pkg-config, see <http://pkg-config.freedesktop.org/>.])],
+		[AC_MSG_RESULT([cannot check without pkg-config])
+		$6])
+    else
+        AC_MSG_RESULT([yes])
+
+        # If we got good results from pkg-config, check that they
+        # actually work (i.e., that we can link against the resulting
+        # $LIBS).  The canonical example why we do this is if
+        # pkg-config returns 64 bit libraries but ./configure was run
+        # with CFLAGS=-m32 LDFLAGS=-m32.  pkg-config gave us valid
+        # results, but we'll fail if we try to link.  So detect that
+        # failure now.
+        # There are also cases on Mac where pkg-config returns paths
+        # that do not actually exists until some magic is applied.
+        # http://www.open-mpi.org/community/lists/hwloc-devel/2015/03/4402.php
+        # So check whether we find the header as well.
+        hwloc_cflags_save=$CFLAGS
+        hwloc_cppflags_save=$CPPFLAGS
+        hwloc_libs_save=$LIBS
+        CFLAGS="$CFLAGS $HWLOC_pkg_cv_HWLOC_[]$1[]_CFLAGS"
+        CPPFLAGS="$CPPFLAGS $HWLOC_pkg_cv_HWLOC_[]$1[]_CFLAGS"
+        LIBS="$LIBS $HWLOC_pkg_cv_HWLOC_[]$1[]_LIBS"
+        AC_CHECK_HEADER([$4], [
+            AC_CHECK_FUNC([$3], [hwloc_result=yes], [hwloc_result=no])
+            ], [hwloc_result=no])
+        CFLAGS=$hwloc_cflags_save
+        CPPFLAGS=$hwloc_cppflags_save
+        LIBS=$hwloc_libs_save
+
+        AC_MSG_CHECKING([for final $1 support])
+        AS_IF([test "$hwloc_result" = "yes"],
+              [HWLOC_[]$1[]_CFLAGS=$HWLOC_pkg_cv_HWLOC_[]$1[]_CFLAGS
+               HWLOC_[]$1[]_LIBS=$HWLOC_pkg_cv_HWLOC_[]$1[]_LIBS
+               AC_MSG_RESULT([yes])
+               ifelse([$5], , :, [$5])],
+              [AC_MSG_RESULT([no])
+               ifelse([$6], , :, [$6])])
+    fi[]dnl
+])# HWLOC_PKG_CHECK_MODULES
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/config/netloc.m4 b/opal/mca/hwloc/hwloc2x/hwloc/config/netloc.m4
new file mode 100644
index 0000000000..8565d54026
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/config/netloc.m4
@@ -0,0 +1,116 @@
+dnl -*- Autoconf -*-
+dnl
+dnl Copyright © 2014 Cisco Systems, Inc.  All rights reserved.
+dnl
+dnl Copyright © 2014-2017 Inria.  All rights reserved.
+dnl See COPYING in top-level directory.
+
+# Main hwloc m4 macro, to be invoked by the user
+#
+# Expects:
+# 1. Configuration prefix
+# 2. What to do upon success
+# 3. What to do upon failure
+# 4. If non-empty, print the announcement banner
+#
+AC_DEFUN([NETLOC_SETUP_CORE],[
+    AC_REQUIRE([HWLOC_SETUP_CORE])
+    AC_REQUIRE([AC_PROG_CC])
+
+    AS_IF([test "x$4" != "x"],
+          [cat <<EOF
+
+###
+### Configuring netloc core
+###
+EOF])
+
+    # If no prefix was defined, set a good value
+    m4_ifval([$1],
+             [m4_define([netloc_config_prefix],[$1/])],
+             [m4_define([netloc_config_prefix], [])])
+
+    # These flags are specific to netloc, and should not be redundant
+    # with hwloc.  I.e., if the flag already exists in hwloc, there's
+    # no need to put it here.
+    NETLOC_CFLAGS=$HWLOC_CFLAGS
+    NETLOC_CPPFLAGS=$HWLOC_CPPFLAGS
+    NETLOC_LDFLAGS=$HWLOC_LDFLAGS
+    NETLOC_LIBS=
+    NETLOC_LIBS_PRIVATE=
+
+    # Setup the individual parts of Netloc
+    netloc_happy=yes
+    AS_IF([test "$netloc_happy" = "yes"],
+          [NETLOC_CHECK_PLATFORM([netloc_happy])])
+
+    AC_SUBST(NETLOC_CFLAGS)
+    AC_SUBST(NETLOC_CPPFLAGS)
+    AC_SUBST(NETLOC_LDFLAGS)
+    AC_SUBST(NETLOC_LIBS)
+    AC_SUBST(NETLOC_LIBS_PRIVATE)
+
+    # Set these values explicitly for embedded builds.  Exporting
+    # these values through *_EMBEDDED_* values gives us the freedom to
+    # do something different someday if we ever need to.  There's no
+    # need to fill these values in unless we're in embedded mode.
+    # Indeed, if we're building in embedded mode, we want NETLOC_LIBS
+    # to be empty so that nothing is linked into libnetloc_embedded.la
+    # itself -- only the upper-layer will link in anything required.
+
+    AS_IF([test "$hwloc_mode" = "embedded"],
+          [NETLOC_EMBEDDED_CFLAGS=$NETLOC_CFLAGS
+           NETLOC_EMBEDDED_CPPFLAGS=$NETLOC_CPPFLAGS
+           NETLOC_EMBEDDED_LDADD='$(HWLOC_top_builddir)/netloc/libnetloc_embedded.la'
+           NETLOC_EMBEDDED_LIBS=$NETLOC_LIBS
+           NETLOC_LIBS=],
+          [AC_CONFIG_FILES(netloc_config_prefix[utils/netloc/infiniband/netloc_ib_gather_raw])
+	   AC_CONFIG_COMMANDS([chmoding-netloc-scripts], [
+	       chmod +x ]hwloc_config_prefix[utils/netloc/infiniband/netloc_ib_gather_raw
+	     ])
+	  ])
+    AC_SUBST(NETLOC_EMBEDDED_CFLAGS)
+    AC_SUBST(NETLOC_EMBEDDED_CPPFLAGS)
+    AC_SUBST(NETLOC_EMBEDDED_LDADD)
+    AC_SUBST(NETLOC_EMBEDDED_LIBS)
+
+    AC_CONFIG_FILES(
+        netloc_config_prefix[netloc/Makefile]
+    )
+
+    AS_IF([test "$netloc_happy" = "yes"],
+          [$2],
+          [$3])
+])dnl
+
+AC_DEFUN([NETLOC_CHECK_PLATFORM], [
+    AC_CHECK_FUNC([asprintf])
+    AC_MSG_CHECKING([if netloc supports this platform])
+    AS_IF([test "$ac_cv_func_asprintf" != "yes"],
+          [$1=no netloc_missing_reason=" (asprintf missing)"])
+    AS_IF([test "$hwloc_windows" = "yes"],
+          [$1=no netloc_missing_reason=" (Windows platform)"])
+    AC_MSG_RESULT([$$1$netloc_missing_reason])
+
+    AC_CHECK_LIB(scotch, SCOTCH_archSub,
+        [scotch_found_headers=yes;
+        AC_DEFINE([NETLOC_SCOTCH], [1],
+            [Define to 1 if scotch is netlocscotch is enabled])
+    ], [], -lscotcherr)
+    AC_CHECK_HEADERS([mpi.h],
+            [mpi_found_headers=yes;
+            MPI_CPPFLAGS=`mpicc -showme:compile 2>/dev/null`
+            MPI_LDADD=`mpicc -showme:link 2>/dev/null`
+            AC_SUBST(MPI_CPPFLAGS)
+            AC_SUBST(MPI_LDADD)
+            break;])
+
+    AC_CHECK_PROG([xz],[xz],[yes],[no])
+])dnl
+
+AC_DEFUN([NETLOC_DO_AM_CONDITIONALS], [
+    AM_CONDITIONAL([BUILD_NETLOC], [test "$netloc_happy" = "yes"])
+    AM_CONDITIONAL([BUILD_NETLOCSCOTCH], [test "x$scotch_found_headers" = "xyes"])
+    AM_CONDITIONAL([BUILD_MPITOOLS], [test "x$mpi_found_headers" = "xyes"])
+    AM_CONDITIONAL([FOUND_XZ], [test "x$xz" = xyes])
+])dnl
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/configure.ac b/opal/mca/hwloc/hwloc2x/hwloc/configure.ac
new file mode 100644
index 0000000000..27f3c1a73a
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/configure.ac
@@ -0,0 +1,271 @@
+# -*- shell-script -*-
+#
+# Copyright © 2009      CNRS
+# Copyright © 2009-2016 Inria.  All rights reserved.
+# Copyright © 2009, 2011-2012      Université Bordeaux
+# Copyright © 2009-2014 Cisco Systems, Inc.  All rights reserved.
+#
+# See COPYING in top-level directory.
+#
+# Additional copyrights may follow
+#
+# $HEADER$
+#
+
+####################################################################
+# Autoconf, Automake, and Libtool bootstrapping
+####################################################################
+
+AC_INIT([hwloc],
+        [m4_normalize(esyscmd([config/hwloc_get_version.sh VERSION --version]))],
+        [http://www.open-mpi.org/projects/hwloc/], [hwloc])
+AC_PREREQ(2.63)
+AC_CONFIG_AUX_DIR(./config)
+# Note that this directory must *exactly* match what was specified via
+# -I in ACLOCAL_AMFLAGS in the top-level Makefile.am.
+AC_CONFIG_MACRO_DIR(./config)
+
+cat <<EOF
+
+###
+### Configuring hwloc distribution tarball
+### Startup tests
+###
+EOF
+
+# This must be before AM_INIT_AUTOMAKE
+AC_CANONICAL_TARGET
+
+# Init automake
+AM_INIT_AUTOMAKE([1.11 dist-bzip2 subdir-objects foreign tar-ustar parallel-tests -Wall -Werror])
+m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
+
+AC_LANG([C])
+AC_USE_SYSTEM_EXTENSIONS
+
+####################################################################
+# Setup the configure-results header file
+####################################################################
+
+AH_TOP([/* -*- c -*-
+ *
+ * Copyright © 2009, 2011, 2012 CNRS, inria., Université Bordeaux  All rights reserved.
+ * Copyright © 2009-2014 Cisco Systems, Inc.  All rights reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ *
+ * This file is automatically generated by configure.  Edits will be lost
+ * the next time you run configure!
+ */
+
+#ifndef HWLOC_CONFIGURE_H
+#define HWLOC_CONFIGURE_H
+])
+AH_BOTTOM([
+#endif /* HWLOC_CONFIGURE_H */
+])
+
+####################################################################
+# Setup Libtool
+####################################################################
+
+# We want new Libtool.  None of that old stuff.  Pfft.
+m4_ifdef([LT_PREREQ], [],
+         [m4_fatal([libtool version 2.2.6 or higher is required], [63])])
+LT_PREREQ([2.2.6])
+
+# Setup libtool, but disable F77, Java and Windows Resource
+# Compiler support -- we don't need that stuff.
+AM_ENABLE_SHARED
+AM_DISABLE_STATIC
+
+# This did not exist pre AM 1.11.x (where x is somewhere >0 and <3),
+# but it is necessary in AM 1.12.x.
+m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
+
+LT_INIT([dlopen win32-dll])
+LT_LANG([C])
+LT_LANG([C++])
+
+####################################################################
+# Setup C, C++ compilers
+####################################################################
+
+CFLAGS_save=$CFLAGS
+AC_PROG_CC
+AM_PROG_CC_C_O
+AC_PROG_CC_C99
+CFLAGS=$CFLAGS_save
+
+AC_ARG_VAR(CC_FOR_BUILD,[build system C compiler])
+AS_IF([test -z "$CC_FOR_BUILD"],[
+    AC_SUBST([CC_FOR_BUILD], [$CC])
+])
+
+####################################################################
+# CLI arguments
+####################################################################
+
+# Define hwloc's configure arguments
+HWLOC_DEFINE_ARGS
+
+# If debug mode, add -g
+AS_IF([test "$hwloc_debug" = "1"],
+      [CFLAGS="$CFLAGS -g"])
+
+# If the user didn't specifically ask for embedding mode, default to
+# standalone mode
+AS_IF([test "$enable_embedded_mode" != "yes"],
+      [AS_IF([test ! -d "$srcdir/doc"],
+             [AC_MSG_WARN([The hwloc source tree looks incomplete for a standalone])
+              AC_MSG_WARN([build.  Perhaps this hwloc tree is intended for an embedded])
+              AC_MSG_WARN([build?  Try using the --enable-embedded-mode switch.])
+              AC_MSG_ERROR([Cannot build standalone hwloc])],
+             [HWLOC_BUILD_STANDALONE])])
+
+####################################################################
+# Setup for the hwloc API
+####################################################################
+
+AC_SUBST([libhwloc_so_version])
+
+# Setup the hwloc core
+HWLOC_SETUP_CORE([], [], [AC_MSG_ERROR([Cannot build hwloc core])], [1])
+
+####################################################################
+# Setup the netloc API
+####################################################################
+
+AC_SUBST([libnetloc_so_version])
+
+AC_ARG_ENABLE([netloc],
+              [AC_HELP_STRING([--enable-netloc],
+                              [The Netloc functionality is enabled by default, but will be silently skipped it if cannot be built (e.g., not supported on your platform).  Using --enable-netloc will cause configure to abort if Netloc cannot be build.  Using --disable-netloc will cause configure to skip attempting to build netloc at all.])
+              ])
+
+AS_IF([test "$enable_netloc" != "no"],
+      [NETLOC_SETUP_CORE([], [],
+           [AS_IF([test "$enable_netloc" = "yes"],
+                  [AC_MSG_ERROR([Cannot build netloc core])])
+           ],
+           [1])
+      ])
+
+####################################################################
+# Version information
+####################################################################
+
+# HWLOC_VERSION was setup by HWLOC_SETUP_CORE above.
+
+# Make configure depend on the VERSION file, since it's used in AC_INIT
+AC_SUBST([CONFIGURE_DEPENDENCIES], ['$(top_srcdir)/VERSION'])
+
+# Override/fixup the version numbers set by AC_INIT, since on
+# developer builds, there's no good way to know what the version is
+# before running configure :(.  We only use the base version number
+# for the version set in AC_INIT.  This will always match reality
+# because we add the VERSION file (the only way to change the
+# major.minor.release{greek}) into the configure dependencies.
+PACKAGE_VERSION="$HWLOC_VERSION"
+PACKAGE_STRING="${PACKAGE_NAME} ${PACKAGE_VERSION}"
+VERSION="${PACKAGE_VERSION}"
+
+# For standalone configurations, we also include a .so version number.
+
+. $srcdir/VERSION
+
+####################################################################
+# Setup the rest of the infrastructure
+####################################################################
+
+# Setup hwloc's docs, utils, and tests
+AS_IF([test "$hwloc_mode" = "standalone"],
+      [HWLOC_SETUP_DOCS
+       HWLOC_SETUP_UTILS
+       HWLOC_SETUP_TESTS])
+
+cat <<EOF
+
+###
+### Performing final hwloc configuration
+###
+EOF
+
+# Run the AM_CONDITIONALs
+HWLOC_DO_AM_CONDITIONALS
+NETLOC_DO_AM_CONDITIONALS
+
+####################################################################
+# Final output
+####################################################################
+
+# Set the final flags
+CFLAGS="$HWLOC_EMBEDDED_CFLAGS $CFLAGS"
+CPPFLAGS="$HWLOC_EMBEDDED_CPPFLAGS $CPPFLAGS"
+LIBS="$HWLOC_EMBEDDED_LIBS $LIBS"
+
+# Party on
+AC_OUTPUT
+
+# Warn if we didn't have pkg-config
+if test "x$PKG_CONFIG" = x; then
+  cat << EOF
+
+************************************************************************
+Could not detect/enable some features such as libxml2 and Cairo support
+because pkg-config isn't available.
+************************************************************************
+EOF
+fi
+
+# Show which optional support we'll be building
+hwloc_xml_status=basic
+AS_IF([test "$hwloc_libxml2_happy" = "yes"], [hwloc_xml_status=full])
+netloc_status=no
+AS_IF([test "$netloc_happy" = "yes"], [netloc_status=yes])
+netlocscotch_status=no
+AS_IF([test "$scotch_found_headers" = "yes"], [netlocscotch_status=yes])
+
+# Prepare the I/O summary
+hwloc_probeio_list=
+if test "x$hwloc_pciaccess_happy" = "xyes" -o "x$hwloc_linuxpci_happy" = "xyes"; then
+  test "x$hwloc_pciaccess_happy" = "xyes" && hwloc_probepci_list=pciaccess
+  test "x$hwloc_pciaccess_happy$hwloc_linuxpci_happy" = "xyesyes" && hwloc_probepci_list="${hwloc_probepci_list}+"
+  test "x$hwloc_linuxio_happy" = "xyes" && hwloc_probepci_list="${hwloc_probepci_list}linux"
+  hwloc_probeio_list="$hwloc_probeio_list PCI($hwloc_probepci_list)"
+fi
+test "x$hwloc_linuxio_happy" = "xyes" && hwloc_probeio_list="$hwloc_probeio_list LinuxIO"
+test "x$hwloc_opencl_happy" = "xyes" && hwloc_probeio_list="$hwloc_probeio_list OpenCL"
+test "x$hwloc_have_cudart" = "xyes" && hwloc_probeio_list="$hwloc_probeio_list CUDA"
+test "x$hwloc_nvml_happy" = "xyes" && hwloc_probeio_list="$hwloc_probeio_list NVML"
+test "x$hwloc_gl_happy" = "xyes" && hwloc_probeio_list="$hwloc_probeio_list GL"
+# if nothing, say "no"
+test "x$hwloc_probeio_list" = "x" && hwloc_probeio_list=" no"
+
+# Beginning of generic support
+cat <<EOF
+
+-----------------------------------------------------------------------------
+Hwloc optional build support status (more details can be found above):
+
+Probe / display I/O devices:$hwloc_probeio_list
+Graphical output (Cairo):    $hwloc_cairo_happy
+XML input / output:          $hwloc_xml_status
+Netloc functionality:        $netloc_status (with scotch: $netlocscotch_status)
+EOF
+
+# Plugin support
+hwloc_plugin_summary=$hwloc_have_plugins
+test "x$hwloc_plugin_components" != "x" && hwloc_plugin_summary="yes ("`echo $hwloc_plugin_components`")" # echo removes the starting space
+cat <<EOF
+Plugin support:              $hwloc_plugin_summary
+EOF
+
+# End of generic support
+cat <<EOF
+-----------------------------------------------------------------------------
+
+EOF
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/contrib/hwloc-valgrind.supp b/opal/mca/hwloc/hwloc2x/hwloc/contrib/hwloc-valgrind.supp
new file mode 100644
index 0000000000..8992fa33af
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/contrib/hwloc-valgrind.supp
@@ -0,0 +1,161 @@
+# Copyright © 2012-2015 Inria.  All rights reserved.
+# See COPYING in top-level directory.
+
+# suppressions file to be passed to valgrind with
+#   --suppressions=/path/to/hwloc-valgrind.supp
+
+# zlib (brought by libpci or libxml2) doesn't cleanup its global state
+{
+   inflate_init
+   Memcheck:Cond
+   fun:inflateReset2
+   fun:inflateInit2_
+}
+
+# hwloc cannot free the global parser (with xmlCleanupParser()) because other threads may be using it
+{
+   xml_init_parser
+   Memcheck:Leak
+   ...
+   fun:xmlInitParser
+}
+
+# hwloc cannot free the global xml dict RMutex because it cannot call xmlCleanupParser() as explained above
+{
+   xml_dict_create_new_rmutex
+   Memcheck:Leak
+   fun:malloc
+   fun:xmlNewRMutex
+   ...
+   fun:xmlDictCreate
+}
+
+# ltdl dlopen global state?
+{
+   ltdl_dlopen_doit_leak
+   Memcheck:Leak
+   ...
+   fun:dl_open_worker
+   fun:_dl_catch_error
+   fun:_dl_open
+   fun:dlopen_doit
+}
+
+# ltdl_dlclose_leak
+{
+   ltdl_dlclose_leak
+   Memcheck:Leak
+   ...
+   fun:_dl_close_worker
+   fun:_dl_close
+   ...
+   fun:dlclose
+}
+
+# lt_dlforeachfile abusing paths
+{
+   lt_dlforeachfile_addr8
+   Memcheck:Addr8
+   fun:_wordcopy_fwd_dest_aligned
+   fun:__GI_memmove
+   fun:argz_insert
+   ...
+   fun:lt_dlforeachfile
+}
+
+# cuda
+{
+   cuda_leak
+   Memcheck:Leak
+   ...
+   obj:*libcuda*
+}
+
+# nvml
+{
+   nvmlInit_cond
+   Memcheck:Cond
+   ...
+   obj:*nvidia-ml*
+   ...
+   fun:nvmlInit
+}
+
+# amd opencl
+{
+   atical_leak
+   Memcheck:Leak
+   ...
+   obj:*libatical*
+}
+{
+   atical_cond
+   Memcheck:Cond
+   ...
+   obj:*libatical*
+}
+{
+   amdocl_leak
+   Memcheck:Leak
+   ...
+   obj:*libamdocl*
+}
+{
+   amdocl_param
+   Memcheck:Param
+   write(buf)
+   fun:*write*
+   obj:*libamdocl*
+}
+{
+   opencl_leak
+   Memcheck:Leak
+   ...
+   obj:*libOpenCL*
+   ...
+   fun:clGetPlatformIDs
+}
+{
+   libatiadl_xcb_leak
+   Memcheck:Leak
+   ...
+   obj:*libxcb*
+   ...
+   fun:XOpenDisplay
+   ...
+   obj:*libatiadl*
+}
+
+# 
+{
+   libpciaccess_device_name_leak
+   Memcheck:Leak
+   ...
+   fun:pci_device_get_device_name
+   fun:hwloc_look_libpci
+}
+{
+   libpciaccess_leak
+   Memcheck:Leak
+   ...
+   obj:*libpciaccess*
+   ...
+   fun:hwloc_look_libpci
+}
+
+# libudev global hashes
+{
+   libudev_hashmap_property
+   Memcheck:Leak
+   fun:malloc
+   ...
+   fun:udev_device_get_property_value
+}
+{
+   libudev_hashmap_sysname
+   Memcheck:Leak
+   fun:malloc
+   ...
+   fun:udev_device_new_from_subsystem_sysname
+}
+
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/contrib/misc/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/contrib/misc/Makefile.am
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/contrib/misc/Makefile.am
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/contrib/systemd/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/contrib/systemd/Makefile.am
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/contrib/systemd/Makefile.am
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/doc/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/doc/Makefile.am
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/doc/Makefile.am
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/doc/doxygen-config.cfg.in b/opal/mca/hwloc/hwloc2x/hwloc/doc/doxygen-config.cfg.in
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/doc/doxygen-config.cfg.in
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/doc/examples/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/doc/examples/Makefile.am
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/doc/examples/Makefile.am
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc.pc.in b/opal/mca/hwloc/hwloc2x/hwloc/hwloc.pc.in
new file mode 100644
index 0000000000..266319bb38
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc.pc.in
@@ -0,0 +1,12 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: hwloc
+Description: Hardware locality detection and management library
+Version: @HWLOC_VERSION@
+Requires.private: @HWLOC_REQUIRES@
+Cflags: -I${includedir}
+Libs: -L${libdir} -lhwloc
+Libs.private: @HWLOC_LIBS@ @HWLOC_LIBS_PRIVATE@
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/Makefile.am
new file mode 100644
index 0000000000..b7ad7455a7
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/Makefile.am
@@ -0,0 +1,230 @@
+# Copyright © 2009-2016 Inria.  All rights reserved.
+# Copyright © 2009-2012 Université Bordeaux
+# Copyright © 2009-2014 Cisco Systems, Inc.  All rights reserved.
+# Copyright © 2011-2012 Oracle and/or its affiliates.  All rights reserved.
+# See COPYING in top-level directory.
+
+AM_CFLAGS = $(HWLOC_CFLAGS)
+AM_CPPFLAGS = $(HWLOC_CPPFLAGS) -DHWLOC_INSIDE_LIBHWLOC
+AM_LDFLAGS = $(HWLOC_LDFLAGS)
+
+EXTRA_DIST = dolib.c
+
+# If we're in standalone mode, build the installable library.
+# Otherwise, build the embedded library.
+
+if HWLOC_BUILD_STANDALONE
+lib_LTLIBRARIES = libhwloc.la
+else
+noinst_LTLIBRARIES = libhwloc_embedded.la
+endif
+
+pluginsdir = @HWLOC_PLUGINS_DIR@
+plugins_LTLIBRARIES =
+plugins_ldflags = -module -avoid-version -lltdl
+# Beware that files are not rebuilt automatically when reconfiguring with different paths in these flags.
+AM_CPPFLAGS += -DHWLOC_PLUGINS_PATH=\"$(HWLOC_PLUGINS_PATH)\" -DRUNSTATEDIR=\"$(HWLOC_runstatedir)\"
+
+# Sources and ldflags
+
+sources = \
+        topology.c \
+        traversal.c \
+        distances.c \
+        components.c \
+        bind.c \
+        bitmap.c \
+        pci-common.c \
+        diff.c \
+        misc.c \
+        base64.c \
+        topology-noos.c \
+        topology-synthetic.c \
+        topology-xml.c \
+        topology-xml-nolibxml.c
+ldflags =
+
+# Conditionally add to the sources and ldflags
+
+if HWLOC_HAVE_LIBXML2
+if HWLOC_XML_LIBXML_BUILD_STATIC
+sources += topology-xml-libxml.c
+else
+plugins_LTLIBRARIES += hwloc_xml_libxml.la
+hwloc_xml_libxml_la_SOURCES = topology-xml-libxml.c
+hwloc_xml_libxml_la_CFLAGS = $(AM_CFLAGS) $(HWLOC_LIBXML2_CFLAGS) -DHWLOC_INSIDE_PLUGIN
+hwloc_xml_libxml_la_LDFLAGS = $(plugins_ldflags) $(HWLOC_LIBXML2_LIBS)
+endif
+endif HWLOC_HAVE_LIBXML2
+
+if HWLOC_HAVE_PCIACCESS
+if HWLOC_PCI_BUILD_STATIC
+sources += topology-pci.c
+else
+plugins_LTLIBRARIES += hwloc_pci.la
+hwloc_pci_la_SOURCES = topology-pci.c
+hwloc_pci_la_CFLAGS = $(AM_CFLAGS) $(HWLOC_PCIACCESS_CFLAGS) -DHWLOC_INSIDE_PLUGIN
+hwloc_pci_la_LDFLAGS = $(plugins_ldflags) $(HWLOC_PCIACCESS_LIBS)
+endif
+endif HWLOC_HAVE_PCIACCESS
+
+if HWLOC_HAVE_OPENCL
+if HWLOC_OPENCL_BUILD_STATIC
+sources += topology-opencl.c
+else
+plugins_LTLIBRARIES += hwloc_opencl.la
+hwloc_opencl_la_SOURCES = topology-opencl.c
+hwloc_opencl_la_CFLAGS = $(AM_CFLAGS) $(HWLOC_OPENCL_CFLAGS) -DHWLOC_INSIDE_PLUGIN
+hwloc_opencl_la_LDFLAGS = $(plugins_ldflags) $(HWLOC_OPENCL_LIBS)
+endif
+endif HWLOC_HAVE_OPENCL
+
+if HWLOC_HAVE_CUDART
+if HWLOC_CUDA_BUILD_STATIC
+sources += topology-cuda.c
+else
+plugins_LTLIBRARIES += hwloc_cuda.la
+hwloc_cuda_la_SOURCES = topology-cuda.c
+hwloc_cuda_la_CFLAGS = $(AM_CFLAGS) $(HWLOC_CUDA_CFLAGS) -DHWLOC_INSIDE_PLUGIN
+hwloc_cuda_la_LDFLAGS = $(plugins_ldflags) $(HWLOC_CUDA_LIBS)
+endif
+endif HWLOC_HAVE_CUDART
+
+if HWLOC_HAVE_NVML
+if HWLOC_NVML_BUILD_STATIC
+sources += topology-nvml.c
+else
+plugins_LTLIBRARIES += hwloc_nvml.la
+hwloc_nvml_la_SOURCES = topology-nvml.c
+hwloc_nvml_la_CFLAGS = $(AM_CFLAGS) $(HWLOC_NVML_CFLAGS) -DHWLOC_INSIDE_PLUGIN
+hwloc_nvml_la_LDFLAGS = $(plugins_ldflags) $(HWLOC_NVML_LIBS)
+endif
+endif HWLOC_HAVE_NVML
+
+if HWLOC_HAVE_GL
+if HWLOC_GL_BUILD_STATIC
+sources += topology-gl.c
+else
+plugins_LTLIBRARIES += hwloc_gl.la
+hwloc_gl_la_SOURCES = topology-gl.c
+hwloc_gl_la_CFLAGS = $(AM_CFLAGS) $(HWLOC_GL_CFLAGS) -DHWLOC_INSIDE_PLUGIN
+hwloc_gl_la_LDFLAGS = $(plugins_ldflags) $(HWLOC_GL_LIBS)
+endif
+endif HWLOC_HAVE_GL
+
+if HWLOC_HAVE_SOLARIS
+sources += topology-solaris.c
+sources += topology-solaris-chiptype.c
+endif HWLOC_HAVE_SOLARIS
+
+if HWLOC_HAVE_LINUX
+sources += topology-linux.c topology-hardwired.c
+endif HWLOC_HAVE_LINUX
+
+if HWLOC_HAVE_BGQ
+sources += topology-bgq.c
+endif HWLOC_HAVE_BGQ
+
+if HWLOC_HAVE_AIX
+sources += topology-aix.c
+ldflags += -lpthread
+endif HWLOC_HAVE_AIX
+
+if HWLOC_HAVE_HPUX
+sources += topology-hpux.c
+ldflags += -lpthread
+endif HWLOC_HAVE_HPUX
+
+if HWLOC_HAVE_WINDOWS
+sources += topology-windows.c
+endif HWLOC_HAVE_WINDOWS
+
+if HWLOC_HAVE_DARWIN
+sources += topology-darwin.c
+endif HWLOC_HAVE_DARWIN
+
+if HWLOC_HAVE_FREEBSD
+sources += topology-freebsd.c
+endif HWLOC_HAVE_FREEBSD
+
+if HWLOC_HAVE_NETBSD
+sources += topology-netbsd.c
+ldflags += -lpthread
+endif HWLOC_HAVE_NETBSD
+
+if HWLOC_HAVE_X86_CPUID
+sources += topology-x86.c
+endif HWLOC_HAVE_X86_CPUID
+
+if HWLOC_HAVE_GCC
+ldflags += -no-undefined
+endif HWLOC_HAVE_GCC
+
+
+if HWLOC_HAVE_WINDOWS
+# Windows specific rules
+
+LC_MESSAGES=C
+export LC_MESSAGES
+ldflags += -Xlinker --output-def -Xlinker .libs/libhwloc.def
+
+if HWLOC_HAVE_MS_LIB
+dolib$(EXEEXT): dolib.c
+	$(CC_FOR_BUILD) $< -o $@
+.libs/libhwloc.lib: libhwloc.la dolib$(EXEEXT)
+	[ ! -r .libs/libhwloc.def ] || ./dolib$(EXEEXT) "$(HWLOC_MS_LIB)" $(HWLOC_MS_LIB_ARCH) .libs/libhwloc.def $(libhwloc_so_version) .libs/libhwloc.lib
+all-local: .libs/libhwloc.lib
+clean-local:
+	$(RM) dolib$(EXEEXT)
+endif HWLOC_HAVE_MS_LIB
+
+install-exec-hook:
+	[ ! -r .libs/libhwloc.def ] || $(INSTALL) .libs/libhwloc.def $(DESTDIR)$(libdir)
+if HWLOC_HAVE_MS_LIB
+	[ ! -r .libs/libhwloc.def ] || $(INSTALL) .libs/libhwloc.lib $(DESTDIR)$(libdir)
+	[ ! -r .libs/libhwloc.def ] || $(INSTALL) .libs/libhwloc.exp $(DESTDIR)$(libdir)
+endif HWLOC_HAVE_MS_LIB
+
+uninstall-local:
+	rm -f $(DESTDIR)$(libdir)/libhwloc.def
+if HWLOC_HAVE_MS_LIB
+	rm -f $(DESTDIR)$(libdir)/libhwloc.lib $(DESTDIR)$(libdir)/libhwloc.exp
+endif HWLOC_HAVE_MS_LIB
+
+# End of Windows specific rules
+endif HWLOC_HAVE_WINDOWS
+
+
+# Installable library
+
+libhwloc_la_SOURCES = $(sources)
+libhwloc_la_LDFLAGS = $(ldflags) -version-info $(libhwloc_so_version) $(HWLOC_LIBS)
+
+if HWLOC_HAVE_PLUGINS
+AM_CPPFLAGS += $(LTDLINCL)
+libhwloc_la_LDFLAGS += -export-dynamic
+libhwloc_la_LIBADD = $(LIBLTDL)
+endif
+
+# Embedded library (note the lack of a .so version number -- that
+# intentionally only appears in the installable library).  Also note
+# the lack of _LDFLAGS -- all libs are added by the upper layer (via
+# HWLOC_EMBEDDED_LIBS).
+
+libhwloc_embedded_la_SOURCES = $(sources)
+
+# XML data (only install if we're building in standalone mode)
+
+if HWLOC_BUILD_STANDALONE
+xml_DATA = $(srcdir)/hwloc.dtd
+xmldir = $(pkgdatadir)
+EXTRA_DIST += hwloc.dtd
+endif
+
+DISTCLEANFILES = static-components.h
+
+if HWLOC_HAVE_PLUGINS
+check_LTLIBRARIES = hwloc_fake.la
+hwloc_fake_la_SOURCES = topology-fake.c
+hwloc_fake_la_LDFLAGS = $(plugins_ldflags) -rpath /nowhere # force libtool to build a shared-library even it's check-only
+endif
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/base64.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/base64.c
new file mode 100644
index 0000000000..4e1976fde4
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/base64.c
@@ -0,0 +1,306 @@
+/*
+ * Copyright © 2012 Inria.  All rights reserved.
+ * See COPYING in top-level directory.
+ *
+ * Modifications after import:
+ * - removed all #if
+ * - updated prototypes
+ * - updated #include
+ */
+
+/*	$OpenBSD: base64.c,v 1.5 2006/10/21 09:55:03 otto Exp $	*/
+
+/*
+ * Copyright (c) 1996 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
+ * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
+ * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+/*
+ * Portions Copyright (c) 1995 by International Business Machines, Inc.
+ *
+ * International Business Machines, Inc. (hereinafter called IBM) grants
+ * permission under its copyrights to use, copy, modify, and distribute this
+ * Software with or without fee, provided that the above copyright notice and
+ * all paragraphs of this notice appear in all copies, and that the name of IBM
+ * not be used in connection with the marketing of any product incorporating
+ * the Software or modifications thereof, without specific, written prior
+ * permission.
+ *
+ * To the extent it has a right to do so, IBM grants an immunity from suit
+ * under its patents, if any, for the use, sale or manufacture of products to
+ * the extent that such products are used for performing Domain Name System
+ * dynamic updates in TCP/IP networks by means of the Software.  No immunity is
+ * granted for any product per se or for any other function of any product.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", AND IBM DISCLAIMS ALL WARRANTIES,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE.  IN NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL,
+ * DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE, EVEN
+ * IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES.
+ */
+
+/* OPENBSD ORIGINAL: lib/libc/net/base64.c */
+
+static const char Base64[] =
+	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+static const char Pad64 = '=';
+
+/* (From RFC1521 and draft-ietf-dnssec-secext-03.txt)
+   The following encoding technique is taken from RFC 1521 by Borenstein
+   and Freed.  It is reproduced here in a slightly edited form for
+   convenience.
+
+   A 65-character subset of US-ASCII is used, enabling 6 bits to be
+   represented per printable character. (The extra 65th character, "=",
+   is used to signify a special processing function.)
+
+   The encoding process represents 24-bit groups of input bits as output
+   strings of 4 encoded characters. Proceeding from left to right, a
+   24-bit input group is formed by concatenating 3 8-bit input groups.
+   These 24 bits are then treated as 4 concatenated 6-bit groups, each
+   of which is translated into a single digit in the base64 alphabet.
+
+   Each 6-bit group is used as an index into an array of 64 printable
+   characters. The character referenced by the index is placed in the
+   output string.
+
+                         Table 1: The Base64 Alphabet
+
+      Value Encoding  Value Encoding  Value Encoding  Value Encoding
+          0 A            17 R            34 i            51 z
+          1 B            18 S            35 j            52 0
+          2 C            19 T            36 k            53 1
+          3 D            20 U            37 l            54 2
+          4 E            21 V            38 m            55 3
+          5 F            22 W            39 n            56 4
+          6 G            23 X            40 o            57 5
+          7 H            24 Y            41 p            58 6
+          8 I            25 Z            42 q            59 7
+          9 J            26 a            43 r            60 8
+         10 K            27 b            44 s            61 9
+         11 L            28 c            45 t            62 +
+         12 M            29 d            46 u            63 /
+         13 N            30 e            47 v
+         14 O            31 f            48 w         (pad) =
+         15 P            32 g            49 x
+         16 Q            33 h            50 y
+
+   Special processing is performed if fewer than 24 bits are available
+   at the end of the data being encoded.  A full encoding quantum is
+   always completed at the end of a quantity.  When fewer than 24 input
+   bits are available in an input group, zero bits are added (on the
+   right) to form an integral number of 6-bit groups.  Padding at the
+   end of the data is performed using the '=' character.
+
+   Since all base64 input is an integral number of octets, only the
+         -------------------------------------------------
+   following cases can arise:
+
+       (1) the final quantum of encoding input is an integral
+           multiple of 24 bits; here, the final unit of encoded
+	   output will be an integral multiple of 4 characters
+	   with no "=" padding,
+       (2) the final quantum of encoding input is exactly 8 bits;
+           here, the final unit of encoded output will be two
+	   characters followed by two "=" padding characters, or
+       (3) the final quantum of encoding input is exactly 16 bits;
+           here, the final unit of encoded output will be three
+	   characters followed by one "=" padding character.
+   */
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include <private/private.h>
+
+int
+hwloc_encode_to_base64(const char *src, size_t srclength, char *target, size_t targsize)
+{
+	size_t datalength = 0;
+	unsigned char input[3];
+	unsigned char output[4];
+	unsigned int i;
+
+	while (2 < srclength) {
+		input[0] = *src++;
+		input[1] = *src++;
+		input[2] = *src++;
+		srclength -= 3;
+
+		output[0] = input[0] >> 2;
+		output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
+		output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
+		output[3] = input[2] & 0x3f;
+
+		if (datalength + 4 > targsize)
+			return (-1);
+		target[datalength++] = Base64[output[0]];
+		target[datalength++] = Base64[output[1]];
+		target[datalength++] = Base64[output[2]];
+		target[datalength++] = Base64[output[3]];
+	}
+
+	/* Now we worry about padding. */
+	if (0 != srclength) {
+		/* Get what's left. */
+		input[0] = input[1] = input[2] = '\0';
+		for (i = 0; i < srclength; i++)
+			input[i] = *src++;
+
+		output[0] = input[0] >> 2;
+		output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
+		output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
+
+		if (datalength + 4 > targsize)
+			return (-1);
+		target[datalength++] = Base64[output[0]];
+		target[datalength++] = Base64[output[1]];
+		if (srclength == 1)
+			target[datalength++] = Pad64;
+		else
+			target[datalength++] = Base64[output[2]];
+		target[datalength++] = Pad64;
+	}
+	if (datalength >= targsize)
+		return (-1);
+	target[datalength] = '\0';	/* Returned value doesn't count \0. */
+	return (int)(datalength);
+}
+
+/* skips all whitespace anywhere.
+   converts characters, four at a time, starting at (or after)
+   src from base - 64 numbers into three 8 bit bytes in the target area.
+   it returns the number of data bytes stored at the target, or -1 on error.
+ */
+
+int
+hwloc_decode_from_base64(char const *src, char *target, size_t targsize)
+{
+	unsigned int tarindex, state;
+	int ch;
+	char *pos;
+
+	state = 0;
+	tarindex = 0;
+
+	while ((ch = *src++) != '\0') {
+		if (isspace(ch))	/* Skip whitespace anywhere. */
+			continue;
+
+		if (ch == Pad64)
+			break;
+
+		pos = strchr(Base64, ch);
+		if (pos == 0) 		/* A non-base64 character. */
+			return (-1);
+
+		switch (state) {
+		case 0:
+			if (target) {
+				if (tarindex >= targsize)
+					return (-1);
+				target[tarindex] = (char)(pos - Base64) << 2;
+			}
+			state = 1;
+			break;
+		case 1:
+			if (target) {
+				if (tarindex + 1 >= targsize)
+					return (-1);
+				target[tarindex]   |=  (pos - Base64) >> 4;
+				target[tarindex+1]  = ((pos - Base64) & 0x0f)
+							<< 4 ;
+			}
+			tarindex++;
+			state = 2;
+			break;
+		case 2:
+			if (target) {
+				if (tarindex + 1 >= targsize)
+					return (-1);
+				target[tarindex]   |=  (pos - Base64) >> 2;
+				target[tarindex+1]  = ((pos - Base64) & 0x03)
+							<< 6;
+			}
+			tarindex++;
+			state = 3;
+			break;
+		case 3:
+			if (target) {
+				if (tarindex >= targsize)
+					return (-1);
+				target[tarindex] |= (pos - Base64);
+			}
+			tarindex++;
+			state = 0;
+			break;
+		}
+	}
+
+	/*
+	 * We are done decoding Base-64 chars.  Let's see if we ended
+	 * on a byte boundary, and/or with erroneous trailing characters.
+	 */
+
+	if (ch == Pad64) {		/* We got a pad char. */
+		ch = *src++;		/* Skip it, get next. */
+		switch (state) {
+		case 0:		/* Invalid = in first position */
+		case 1:		/* Invalid = in second position */
+			return (-1);
+
+		case 2:		/* Valid, means one byte of info */
+			/* Skip any number of spaces. */
+			for (; ch != '\0'; ch = *src++)
+				if (!isspace(ch))
+					break;
+			/* Make sure there is another trailing = sign. */
+			if (ch != Pad64)
+				return (-1);
+			ch = *src++;		/* Skip the = */
+			/* Fall through to "single trailing =" case. */
+			/* FALLTHROUGH */
+
+		case 3:		/* Valid, means two bytes of info */
+			/*
+			 * We know this char is an =.  Is there anything but
+			 * whitespace after it?
+			 */
+			for (; ch != '\0'; ch = *src++)
+				if (!isspace(ch))
+					return (-1);
+
+			/*
+			 * Now make sure for cases 2 and 3 that the "extra"
+			 * bits that slopped past the last full byte were
+			 * zeros.  If we don't check them, they become a
+			 * subliminal channel.
+			 */
+			if (target && target[tarindex] != 0)
+				return (-1);
+		}
+	} else {
+		/*
+		 * We ended by seeing the end of the string.  Make sure we
+		 * have no partial bytes lying around.
+		 */
+		if (state != 0)
+			return (-1);
+	}
+
+	return (tarindex);
+}
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/bind.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/bind.c
new file mode 100644
index 0000000000..3a5e039254
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/bind.c
@@ -0,0 +1,951 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2016 Inria.  All rights reserved.
+ * Copyright © 2009-2010, 2012 Université Bordeaux
+ * Copyright © 2011-2015 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <private/private.h>
+#include <hwloc/helper.h>
+#ifdef HAVE_SYS_MMAN_H
+#  include <sys/mman.h>
+#endif
+/* <malloc.h> is only needed if we don't have posix_memalign() */
+#if defined(hwloc_getpagesize) && !defined(HAVE_POSIX_MEMALIGN) && defined(HAVE_MEMALIGN) && defined(HAVE_MALLOC_H)
+#include <malloc.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <stdlib.h>
+#include <errno.h>
+
+/* TODO: HWLOC_GNU_SYS,
+ *
+ * We could use glibc's sched_setaffinity generically when it is available
+ *
+ * Darwin and OpenBSD don't seem to have binding facilities.
+ */
+
+#define HWLOC_CPUBIND_ALLFLAGS (HWLOC_CPUBIND_PROCESS|HWLOC_CPUBIND_THREAD|HWLOC_CPUBIND_STRICT|HWLOC_CPUBIND_NOMEMBIND)
+
+static hwloc_const_bitmap_t
+hwloc_fix_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t set)
+{
+  hwloc_const_bitmap_t topology_set = hwloc_topology_get_topology_cpuset(topology);
+  hwloc_const_bitmap_t complete_set = hwloc_topology_get_complete_cpuset(topology);
+
+  if (!topology_set) {
+    /* The topology is composed of several systems, the cpuset is ambiguous. */
+    errno = EXDEV;
+    return NULL;
+  }
+
+  if (hwloc_bitmap_iszero(set)) {
+    errno = EINVAL;
+    return NULL;
+  }
+
+  if (!hwloc_bitmap_isincluded(set, complete_set)) {
+    errno = EINVAL;
+    return NULL;
+  }
+
+  if (hwloc_bitmap_isincluded(topology_set, set))
+    set = complete_set;
+
+  return set;
+}
+
+int
+hwloc_set_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t set, int flags)
+{
+  if (flags & ~HWLOC_CPUBIND_ALLFLAGS) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  set = hwloc_fix_cpubind(topology, set);
+  if (!set)
+    return -1;
+
+  if (flags & HWLOC_CPUBIND_PROCESS) {
+    if (topology->binding_hooks.set_thisproc_cpubind)
+      return topology->binding_hooks.set_thisproc_cpubind(topology, set, flags);
+  } else if (flags & HWLOC_CPUBIND_THREAD) {
+    if (topology->binding_hooks.set_thisthread_cpubind)
+      return topology->binding_hooks.set_thisthread_cpubind(topology, set, flags);
+  } else {
+    if (topology->binding_hooks.set_thisproc_cpubind) {
+      int err = topology->binding_hooks.set_thisproc_cpubind(topology, set, flags);
+      if (err >= 0 || errno != ENOSYS)
+        return err;
+      /* ENOSYS, fallback */
+    }
+    if (topology->binding_hooks.set_thisthread_cpubind)
+      return topology->binding_hooks.set_thisthread_cpubind(topology, set, flags);
+  }
+
+  errno = ENOSYS;
+  return -1;
+}
+
+int
+hwloc_get_cpubind(hwloc_topology_t topology, hwloc_bitmap_t set, int flags)
+{
+  if (flags & ~HWLOC_CPUBIND_ALLFLAGS) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (flags & HWLOC_CPUBIND_PROCESS) {
+    if (topology->binding_hooks.get_thisproc_cpubind)
+      return topology->binding_hooks.get_thisproc_cpubind(topology, set, flags);
+  } else if (flags & HWLOC_CPUBIND_THREAD) {
+    if (topology->binding_hooks.get_thisthread_cpubind)
+      return topology->binding_hooks.get_thisthread_cpubind(topology, set, flags);
+  } else {
+    if (topology->binding_hooks.get_thisproc_cpubind) {
+      int err = topology->binding_hooks.get_thisproc_cpubind(topology, set, flags);
+      if (err >= 0 || errno != ENOSYS)
+        return err;
+      /* ENOSYS, fallback */
+    }
+    if (topology->binding_hooks.get_thisthread_cpubind)
+      return topology->binding_hooks.get_thisthread_cpubind(topology, set, flags);
+  }
+
+  errno = ENOSYS;
+  return -1;
+}
+
+int
+hwloc_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_bitmap_t set, int flags)
+{
+  if (flags & ~HWLOC_CPUBIND_ALLFLAGS) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  set = hwloc_fix_cpubind(topology, set);
+  if (!set)
+    return -1;
+
+  if (topology->binding_hooks.set_proc_cpubind)
+    return topology->binding_hooks.set_proc_cpubind(topology, pid, set, flags);
+
+  errno = ENOSYS;
+  return -1;
+}
+
+int
+hwloc_get_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_bitmap_t set, int flags)
+{
+  if (flags & ~HWLOC_CPUBIND_ALLFLAGS) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (topology->binding_hooks.get_proc_cpubind)
+    return topology->binding_hooks.get_proc_cpubind(topology, pid, set, flags);
+
+  errno = ENOSYS;
+  return -1;
+}
+
+#ifdef hwloc_thread_t
+int
+hwloc_set_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t tid, hwloc_const_bitmap_t set, int flags)
+{
+  if (flags & ~HWLOC_CPUBIND_ALLFLAGS) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  set = hwloc_fix_cpubind(topology, set);
+  if (!set)
+    return -1;
+
+  if (topology->binding_hooks.set_thread_cpubind)
+    return topology->binding_hooks.set_thread_cpubind(topology, tid, set, flags);
+
+  errno = ENOSYS;
+  return -1;
+}
+
+int
+hwloc_get_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t tid, hwloc_bitmap_t set, int flags)
+{
+  if (flags & ~HWLOC_CPUBIND_ALLFLAGS) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (topology->binding_hooks.get_thread_cpubind)
+    return topology->binding_hooks.get_thread_cpubind(topology, tid, set, flags);
+
+  errno = ENOSYS;
+  return -1;
+}
+#endif
+
+int
+hwloc_get_last_cpu_location(hwloc_topology_t topology, hwloc_bitmap_t set, int flags)
+{
+  if (flags & ~HWLOC_CPUBIND_ALLFLAGS) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (flags & HWLOC_CPUBIND_PROCESS) {
+    if (topology->binding_hooks.get_thisproc_last_cpu_location)
+      return topology->binding_hooks.get_thisproc_last_cpu_location(topology, set, flags);
+  } else if (flags & HWLOC_CPUBIND_THREAD) {
+    if (topology->binding_hooks.get_thisthread_last_cpu_location)
+      return topology->binding_hooks.get_thisthread_last_cpu_location(topology, set, flags);
+  } else {
+    if (topology->binding_hooks.get_thisproc_last_cpu_location) {
+      int err = topology->binding_hooks.get_thisproc_last_cpu_location(topology, set, flags);
+      if (err >= 0 || errno != ENOSYS)
+        return err;
+      /* ENOSYS, fallback */
+    }
+    if (topology->binding_hooks.get_thisthread_last_cpu_location)
+      return topology->binding_hooks.get_thisthread_last_cpu_location(topology, set, flags);
+  }
+
+  errno = ENOSYS;
+  return -1;
+}
+
+int
+hwloc_get_proc_last_cpu_location(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_bitmap_t set, int flags)
+{
+  if (flags & ~HWLOC_CPUBIND_ALLFLAGS) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (topology->binding_hooks.get_proc_last_cpu_location)
+    return topology->binding_hooks.get_proc_last_cpu_location(topology, pid, set, flags);
+
+  errno = ENOSYS;
+  return -1;
+}
+
+#define HWLOC_MEMBIND_ALLFLAGS (HWLOC_MEMBIND_PROCESS|HWLOC_MEMBIND_THREAD|HWLOC_MEMBIND_STRICT|HWLOC_MEMBIND_MIGRATE|HWLOC_MEMBIND_NOCPUBIND|HWLOC_MEMBIND_BYNODESET)
+
+static hwloc_const_nodeset_t
+hwloc_fix_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset)
+{
+  hwloc_const_bitmap_t topology_nodeset = hwloc_topology_get_topology_nodeset(topology);
+  hwloc_const_bitmap_t complete_nodeset = hwloc_topology_get_complete_nodeset(topology);
+
+  if (!hwloc_topology_get_topology_cpuset(topology)) {
+    /* The topology is composed of several systems, the nodeset is thus
+     * ambiguous. */
+    errno = EXDEV;
+    return NULL;
+  }
+
+  if (!complete_nodeset) {
+    /* There is no NUMA node */
+    errno = ENODEV;
+    return NULL;
+  }
+
+  if (hwloc_bitmap_iszero(nodeset)) {
+    errno = EINVAL;
+    return NULL;
+  }
+
+  if (!hwloc_bitmap_isincluded(nodeset, complete_nodeset)) {
+    errno = EINVAL;
+    return NULL;
+  }
+
+  if (hwloc_bitmap_isincluded(topology_nodeset, nodeset))
+    return complete_nodeset;
+
+  return nodeset;
+}
+
+static int
+hwloc_fix_membind_cpuset(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_const_cpuset_t cpuset)
+{
+  hwloc_const_bitmap_t topology_set = hwloc_topology_get_topology_cpuset(topology);
+  hwloc_const_bitmap_t complete_set = hwloc_topology_get_complete_cpuset(topology);
+  hwloc_const_bitmap_t complete_nodeset = hwloc_topology_get_complete_nodeset(topology);
+
+  if (!topology_set) {
+    /* The topology is composed of several systems, the cpuset is thus
+     * ambiguous. */
+    errno = EXDEV;
+    return -1;
+  }
+
+  if (!complete_nodeset) {
+    /* There is no NUMA node */
+    errno = ENODEV;
+    return -1;
+  }
+
+  if (hwloc_bitmap_iszero(cpuset)) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (!hwloc_bitmap_isincluded(cpuset, complete_set)) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (hwloc_bitmap_isincluded(topology_set, cpuset)) {
+    hwloc_bitmap_copy(nodeset, complete_nodeset);
+    return 0;
+  }
+
+  hwloc_cpuset_to_nodeset(topology, cpuset, nodeset);
+  return 0;
+}
+
+static int
+hwloc_set_membind_by_nodeset(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  if (flags & ~HWLOC_MEMBIND_ALLFLAGS) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  nodeset = hwloc_fix_membind(topology, nodeset);
+  if (!nodeset)
+    return -1;
+
+  if (flags & HWLOC_MEMBIND_PROCESS) {
+    if (topology->binding_hooks.set_thisproc_membind)
+      return topology->binding_hooks.set_thisproc_membind(topology, nodeset, policy, flags);
+  } else if (flags & HWLOC_MEMBIND_THREAD) {
+    if (topology->binding_hooks.set_thisthread_membind)
+      return topology->binding_hooks.set_thisthread_membind(topology, nodeset, policy, flags);
+  } else {
+    if (topology->binding_hooks.set_thisproc_membind) {
+      int err = topology->binding_hooks.set_thisproc_membind(topology, nodeset, policy, flags);
+      if (err >= 0 || errno != ENOSYS)
+        return err;
+      /* ENOSYS, fallback */
+    }
+    if (topology->binding_hooks.set_thisthread_membind)
+      return topology->binding_hooks.set_thisthread_membind(topology, nodeset, policy, flags);
+  }
+
+  errno = ENOSYS;
+  return -1;
+}
+
+int
+hwloc_set_membind(hwloc_topology_t topology, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags)
+{
+  int ret;
+
+  if (flags & HWLOC_MEMBIND_BYNODESET) {
+    ret = hwloc_set_membind_by_nodeset(topology, set, policy, flags);
+  } else {
+    hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
+    if (hwloc_fix_membind_cpuset(topology, nodeset, set))
+      ret = -1;
+    else
+      ret = hwloc_set_membind_by_nodeset(topology, nodeset, policy, flags);
+    hwloc_bitmap_free(nodeset);
+  }
+  return ret;
+}
+
+static int
+hwloc_get_membind_by_nodeset(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags)
+{
+  if (flags & ~HWLOC_MEMBIND_ALLFLAGS) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (flags & HWLOC_MEMBIND_PROCESS) {
+    if (topology->binding_hooks.get_thisproc_membind)
+      return topology->binding_hooks.get_thisproc_membind(topology, nodeset, policy, flags);
+  } else if (flags & HWLOC_MEMBIND_THREAD) {
+    if (topology->binding_hooks.get_thisthread_membind)
+      return topology->binding_hooks.get_thisthread_membind(topology, nodeset, policy, flags);
+  } else {
+    if (topology->binding_hooks.get_thisproc_membind) {
+      int err = topology->binding_hooks.get_thisproc_membind(topology, nodeset, policy, flags);
+      if (err >= 0 || errno != ENOSYS)
+        return err;
+      /* ENOSYS, fallback */
+    }
+    if (topology->binding_hooks.get_thisthread_membind)
+      return topology->binding_hooks.get_thisthread_membind(topology, nodeset, policy, flags);
+  }
+
+  errno = ENOSYS;
+  return -1;
+}
+
+int
+hwloc_get_membind(hwloc_topology_t topology, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags)
+{
+  int ret;
+
+  if (flags & HWLOC_MEMBIND_BYNODESET) {
+    ret = hwloc_get_membind_by_nodeset(topology, set, policy, flags);
+  } else {
+    hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
+    ret = hwloc_get_membind_by_nodeset(topology, nodeset, policy, flags);
+    if (!ret)
+      hwloc_cpuset_from_nodeset(topology, set, nodeset);
+    hwloc_bitmap_free(nodeset);
+  }
+
+  return ret;
+}
+
+static int
+hwloc_set_proc_membind_by_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  if (flags & ~HWLOC_MEMBIND_ALLFLAGS) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  nodeset = hwloc_fix_membind(topology, nodeset);
+  if (!nodeset)
+    return -1;
+
+  if (topology->binding_hooks.set_proc_membind)
+    return topology->binding_hooks.set_proc_membind(topology, pid, nodeset, policy, flags);
+
+  errno = ENOSYS;
+  return -1;
+}
+
+
+int
+hwloc_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags)
+{
+  int ret;
+
+  if (flags & HWLOC_MEMBIND_BYNODESET) {
+    ret = hwloc_set_proc_membind_by_nodeset(topology, pid, set, policy, flags);
+  } else {
+    hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
+    if (hwloc_fix_membind_cpuset(topology, nodeset, set))
+      ret = -1;
+    else
+      ret = hwloc_set_proc_membind_by_nodeset(topology, pid, nodeset, policy, flags);
+    hwloc_bitmap_free(nodeset);
+  }
+
+  return ret;
+}
+
+static int
+hwloc_get_proc_membind_by_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags)
+{
+  if (flags & ~HWLOC_MEMBIND_ALLFLAGS) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (topology->binding_hooks.get_proc_membind)
+    return topology->binding_hooks.get_proc_membind(topology, pid, nodeset, policy, flags);
+
+  errno = ENOSYS;
+  return -1;
+}
+
+int
+hwloc_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags)
+{
+  int ret;
+
+  if (flags & HWLOC_MEMBIND_BYNODESET) {
+    ret = hwloc_get_proc_membind_by_nodeset(topology, pid, set, policy, flags);
+  } else {
+    hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
+    ret = hwloc_get_proc_membind_by_nodeset(topology, pid, nodeset, policy, flags);
+    if (!ret)
+      hwloc_cpuset_from_nodeset(topology, set, nodeset);
+    hwloc_bitmap_free(nodeset);
+  }
+
+  return ret;
+}
+
+static int
+hwloc_set_area_membind_by_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  if (flags & ~HWLOC_MEMBIND_ALLFLAGS) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (!len)
+    /* nothing to do */
+    return 0;
+
+  nodeset = hwloc_fix_membind(topology, nodeset);
+  if (!nodeset)
+    return -1;
+
+  if (topology->binding_hooks.set_area_membind)
+    return topology->binding_hooks.set_area_membind(topology, addr, len, nodeset, policy, flags);
+
+  errno = ENOSYS;
+  return -1;
+}
+
+int
+hwloc_set_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags)
+{
+  int ret;
+
+  if (flags & HWLOC_MEMBIND_BYNODESET) {
+    ret = hwloc_set_area_membind_by_nodeset(topology, addr, len, set, policy, flags);
+  } else {
+    hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
+    if (hwloc_fix_membind_cpuset(topology, nodeset, set))
+      ret = -1;
+    else
+      ret = hwloc_set_area_membind_by_nodeset(topology, addr, len, nodeset, policy, flags);
+    hwloc_bitmap_free(nodeset);
+  }
+
+  return ret;
+}
+
+static int
+hwloc_get_area_membind_by_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags)
+{
+  if (flags & ~HWLOC_MEMBIND_ALLFLAGS) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (!len) {
+    /* nothing to query */
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (topology->binding_hooks.get_area_membind)
+    return topology->binding_hooks.get_area_membind(topology, addr, len, nodeset, policy, flags);
+
+  errno = ENOSYS;
+  return -1;
+}
+
+int
+hwloc_get_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags)
+{
+  int ret;
+
+  if (flags & HWLOC_MEMBIND_BYNODESET) {
+    ret = hwloc_get_area_membind_by_nodeset(topology, addr, len, set, policy, flags);
+  } else {
+    hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
+    ret = hwloc_get_area_membind_by_nodeset(topology, addr, len, nodeset, policy, flags);
+    if (!ret)
+      hwloc_cpuset_from_nodeset(topology, set, nodeset);
+    hwloc_bitmap_free(nodeset);
+  }
+
+  return ret;
+}
+
+static int
+hwloc_get_area_memlocation_by_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_nodeset_t nodeset, int flags)
+{
+  if (flags & ~HWLOC_MEMBIND_ALLFLAGS) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (!len)
+    /* nothing to do */
+    return 0;
+
+  if (topology->binding_hooks.get_area_memlocation)
+    return topology->binding_hooks.get_area_memlocation(topology, addr, len, nodeset, flags);
+
+  errno = ENOSYS;
+  return -1;
+}
+
+int
+hwloc_get_area_memlocation(hwloc_topology_t topology, const void *addr, size_t len, hwloc_cpuset_t set, int flags)
+{
+  int ret;
+
+  if (flags & HWLOC_MEMBIND_BYNODESET) {
+    ret = hwloc_get_area_memlocation_by_nodeset(topology, addr, len, set, flags);
+  } else {
+    hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
+    ret = hwloc_get_area_memlocation_by_nodeset(topology, addr, len, nodeset, flags);
+    if (!ret)
+      hwloc_cpuset_from_nodeset(topology, set, nodeset);
+    hwloc_bitmap_free(nodeset);
+  }
+
+  return ret;
+}
+
+void *
+hwloc_alloc_heap(hwloc_topology_t topology __hwloc_attribute_unused, size_t len)
+{
+  void *p = NULL;
+#if defined(hwloc_getpagesize) && defined(HAVE_POSIX_MEMALIGN)
+  errno = posix_memalign(&p, hwloc_getpagesize(), len);
+  if (errno)
+    p = NULL;
+#elif defined(hwloc_getpagesize) && defined(HAVE_MEMALIGN)
+  p = memalign(hwloc_getpagesize(), len);
+#else
+  p = malloc(len);
+#endif
+  return p;
+}
+
+#ifdef MAP_ANONYMOUS
+void *
+hwloc_alloc_mmap(hwloc_topology_t topology __hwloc_attribute_unused, size_t len)
+{
+  void * buffer = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+  return buffer == MAP_FAILED ? NULL : buffer;
+}
+#endif
+
+int
+hwloc_free_heap(hwloc_topology_t topology __hwloc_attribute_unused, void *addr, size_t len __hwloc_attribute_unused)
+{
+  free(addr);
+  return 0;
+}
+
+#ifdef MAP_ANONYMOUS
+int
+hwloc_free_mmap(hwloc_topology_t topology __hwloc_attribute_unused, void *addr, size_t len)
+{
+  if (!addr)
+    return 0;
+  return munmap(addr, len);
+}
+#endif
+
+void *
+hwloc_alloc(hwloc_topology_t topology, size_t len)
+{
+  if (topology->binding_hooks.alloc)
+    return topology->binding_hooks.alloc(topology, len);
+  return hwloc_alloc_heap(topology, len);
+}
+
+static void *
+hwloc_alloc_membind_by_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  void *p;
+
+  if (flags & ~HWLOC_MEMBIND_ALLFLAGS) {
+    errno = EINVAL;
+    return NULL;
+  }
+
+  nodeset = hwloc_fix_membind(topology, nodeset);
+  if (!nodeset)
+    goto fallback;
+  if (flags & HWLOC_MEMBIND_MIGRATE) {
+    errno = EINVAL;
+    goto fallback;
+  }
+
+  if (topology->binding_hooks.alloc_membind)
+    return topology->binding_hooks.alloc_membind(topology, len, nodeset, policy, flags);
+  else if (topology->binding_hooks.set_area_membind) {
+    p = hwloc_alloc(topology, len);
+    if (!p)
+      return NULL;
+    if (topology->binding_hooks.set_area_membind(topology, p, len, nodeset, policy, flags) && flags & HWLOC_MEMBIND_STRICT) {
+      int error = errno;
+      free(p);
+      errno = error;
+      return NULL;
+    }
+    return p;
+  } else {
+    errno = ENOSYS;
+  }
+
+fallback:
+  if (flags & HWLOC_MEMBIND_STRICT)
+    /* Report error */
+    return NULL;
+  /* Never mind, allocate anyway */
+  return hwloc_alloc(topology, len);
+}
+
+void *
+hwloc_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags)
+{
+  void *ret;
+
+  if (flags & HWLOC_MEMBIND_BYNODESET) {
+    ret = hwloc_alloc_membind_by_nodeset(topology, len, set, policy, flags);
+  } else {
+    hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
+    if (hwloc_fix_membind_cpuset(topology, nodeset, set)) {
+      if (flags & HWLOC_MEMBIND_STRICT)
+	ret = NULL;
+      else
+	ret = hwloc_alloc(topology, len);
+    } else
+      ret = hwloc_alloc_membind_by_nodeset(topology, len, nodeset, policy, flags);
+    hwloc_bitmap_free(nodeset);
+  }
+
+  return ret;
+}
+
+int
+hwloc_free(hwloc_topology_t topology, void *addr, size_t len)
+{
+  if (topology->binding_hooks.free_membind)
+    return topology->binding_hooks.free_membind(topology, addr, len);
+  return hwloc_free_heap(topology, addr, len);
+}
+
+/*
+ * Empty binding hooks always returning success
+ */
+
+static int dontset_return_complete_cpuset(hwloc_topology_t topology, hwloc_cpuset_t set)
+{
+  hwloc_const_cpuset_t cpuset = hwloc_topology_get_complete_cpuset(topology);
+  if (cpuset) {
+    hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+    return 0;
+  } else
+    return -1;
+}
+
+static int dontset_thisthread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
+{
+  return 0;
+}
+static int dontget_thisthread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_bitmap_t set, int flags __hwloc_attribute_unused)
+{
+  return dontset_return_complete_cpuset(topology, set);
+}
+static int dontset_thisproc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
+{
+  return 0;
+}
+static int dontget_thisproc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_bitmap_t set, int flags __hwloc_attribute_unused)
+{
+  return dontset_return_complete_cpuset(topology, set);
+}
+static int dontset_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t pid __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
+{
+  return 0;
+}
+static int dontget_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t pid __hwloc_attribute_unused, hwloc_bitmap_t cpuset, int flags __hwloc_attribute_unused)
+{
+  return dontset_return_complete_cpuset(topology, cpuset);
+}
+#ifdef hwloc_thread_t
+static int dontset_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t tid __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
+{
+  return 0;
+}
+static int dontget_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t tid __hwloc_attribute_unused, hwloc_bitmap_t cpuset, int flags __hwloc_attribute_unused)
+{
+  return dontset_return_complete_cpuset(topology, cpuset);
+}
+#endif
+
+static int dontset_return_complete_nodeset(hwloc_topology_t topology, hwloc_nodeset_t set, hwloc_membind_policy_t *policy)
+{
+  hwloc_const_nodeset_t nodeset = hwloc_topology_get_complete_nodeset(topology);
+  if (nodeset) {
+    hwloc_bitmap_copy(set, hwloc_topology_get_complete_nodeset(topology));
+    *policy = HWLOC_MEMBIND_DEFAULT;
+    return 0;
+  } else
+    return -1;
+}
+
+static int dontset_thisproc_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, hwloc_membind_policy_t policy __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
+{
+  return 0;
+}
+static int dontget_thisproc_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags __hwloc_attribute_unused)
+{
+  return dontset_return_complete_nodeset(topology, set, policy);
+}
+
+static int dontset_thisthread_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, hwloc_membind_policy_t policy __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
+{
+  return 0;
+}
+static int dontget_thisthread_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags __hwloc_attribute_unused)
+{
+  return dontset_return_complete_nodeset(topology, set, policy);
+}
+
+static int dontset_proc_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t pid __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, hwloc_membind_policy_t policy __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
+{
+  return 0;
+}
+static int dontget_proc_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t pid __hwloc_attribute_unused, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags __hwloc_attribute_unused)
+{
+  return dontset_return_complete_nodeset(topology, set, policy);
+}
+
+static int dontset_area_membind(hwloc_topology_t topology __hwloc_attribute_unused, const void *addr __hwloc_attribute_unused, size_t size __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, hwloc_membind_policy_t policy __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
+{
+  return 0;
+}
+static int dontget_area_membind(hwloc_topology_t topology __hwloc_attribute_unused, const void *addr __hwloc_attribute_unused, size_t size __hwloc_attribute_unused, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags __hwloc_attribute_unused)
+{
+  return dontset_return_complete_nodeset(topology, set, policy);
+}
+static int dontget_area_memlocation(hwloc_topology_t topology __hwloc_attribute_unused, const void *addr __hwloc_attribute_unused, size_t size __hwloc_attribute_unused, hwloc_bitmap_t set, int flags __hwloc_attribute_unused)
+{
+  hwloc_membind_policy_t policy;
+  return dontset_return_complete_nodeset(topology, set, &policy);
+}
+
+static void * dontalloc_membind(hwloc_topology_t topology __hwloc_attribute_unused, size_t size __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, hwloc_membind_policy_t policy __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
+{
+  return malloc(size);
+}
+static int dontfree_membind(hwloc_topology_t topology __hwloc_attribute_unused, void *addr __hwloc_attribute_unused, size_t size __hwloc_attribute_unused)
+{
+  free(addr);
+  return 0;
+}
+
+static void hwloc_set_dummy_hooks(struct hwloc_binding_hooks *hooks,
+				  struct hwloc_topology_support *support __hwloc_attribute_unused)
+{
+  hooks->set_thisproc_cpubind = dontset_thisproc_cpubind;
+  hooks->get_thisproc_cpubind = dontget_thisproc_cpubind;
+  hooks->set_thisthread_cpubind = dontset_thisthread_cpubind;
+  hooks->get_thisthread_cpubind = dontget_thisthread_cpubind;
+  hooks->set_proc_cpubind = dontset_proc_cpubind;
+  hooks->get_proc_cpubind = dontget_proc_cpubind;
+#ifdef hwloc_thread_t
+  hooks->set_thread_cpubind = dontset_thread_cpubind;
+  hooks->get_thread_cpubind = dontget_thread_cpubind;
+#endif
+  hooks->get_thisproc_last_cpu_location = dontget_thisproc_cpubind; /* cpubind instead of last_cpu_location is ok */
+  hooks->get_thisthread_last_cpu_location = dontget_thisthread_cpubind; /* cpubind instead of last_cpu_location is ok */
+  hooks->get_proc_last_cpu_location = dontget_proc_cpubind; /* cpubind instead of last_cpu_location is ok */
+  /* TODO: get_thread_last_cpu_location */
+  hooks->set_thisproc_membind = dontset_thisproc_membind;
+  hooks->get_thisproc_membind = dontget_thisproc_membind;
+  hooks->set_thisthread_membind = dontset_thisthread_membind;
+  hooks->get_thisthread_membind = dontget_thisthread_membind;
+  hooks->set_proc_membind = dontset_proc_membind;
+  hooks->get_proc_membind = dontget_proc_membind;
+  hooks->set_area_membind = dontset_area_membind;
+  hooks->get_area_membind = dontget_area_membind;
+  hooks->get_area_memlocation = dontget_area_memlocation;
+  hooks->alloc_membind = dontalloc_membind;
+  hooks->free_membind = dontfree_membind;
+}
+
+void
+hwloc_set_native_binding_hooks(struct hwloc_binding_hooks *hooks, struct hwloc_topology_support *support)
+{
+#    ifdef HWLOC_LINUX_SYS
+    hwloc_set_linuxfs_hooks(hooks, support);
+#    endif /* HWLOC_LINUX_SYS */
+
+#    ifdef HWLOC_BGQ_SYS
+    hwloc_set_bgq_hooks(hooks, support);
+#    endif /* HWLOC_BGQ_SYS */
+
+#    ifdef HWLOC_AIX_SYS
+    hwloc_set_aix_hooks(hooks, support);
+#    endif /* HWLOC_AIX_SYS */
+
+#    ifdef HWLOC_SOLARIS_SYS
+    hwloc_set_solaris_hooks(hooks, support);
+#    endif /* HWLOC_SOLARIS_SYS */
+
+#    ifdef HWLOC_WIN_SYS
+    hwloc_set_windows_hooks(hooks, support);
+#    endif /* HWLOC_WIN_SYS */
+
+#    ifdef HWLOC_DARWIN_SYS
+    hwloc_set_darwin_hooks(hooks, support);
+#    endif /* HWLOC_DARWIN_SYS */
+
+#    ifdef HWLOC_FREEBSD_SYS
+    hwloc_set_freebsd_hooks(hooks, support);
+#    endif /* HWLOC_FREEBSD_SYS */
+
+#    ifdef HWLOC_NETBSD_SYS
+    hwloc_set_netbsd_hooks(hooks, support);
+#    endif /* HWLOC_NETBSD_SYS */
+
+#    ifdef HWLOC_HPUX_SYS
+    hwloc_set_hpux_hooks(hooks, support);
+#    endif /* HWLOC_HPUX_SYS */
+}
+
+/* If the represented system is actually not this system, use dummy binding hooks. */
+void
+hwloc_set_binding_hooks(struct hwloc_topology *topology)
+{
+  if (topology->is_thissystem) {
+    hwloc_set_native_binding_hooks(&topology->binding_hooks, &topology->support);
+    /* every hook not set above will return ENOSYS */
+  } else {
+    /* not this system, use dummy binding hooks that do nothing (but don't return ENOSYS) */
+    hwloc_set_dummy_hooks(&topology->binding_hooks, &topology->support);
+  }
+
+  /* if not is_thissystem, set_cpubind is fake
+   * and get_cpubind returns the whole system cpuset,
+   * so don't report that set/get_cpubind as supported
+   */
+  if (topology->is_thissystem) {
+#define DO(which,kind) \
+    if (topology->binding_hooks.kind) \
+      topology->support.which##bind->kind = 1;
+    DO(cpu,set_thisproc_cpubind);
+    DO(cpu,get_thisproc_cpubind);
+    DO(cpu,set_proc_cpubind);
+    DO(cpu,get_proc_cpubind);
+    DO(cpu,set_thisthread_cpubind);
+    DO(cpu,get_thisthread_cpubind);
+#ifdef hwloc_thread_t
+    DO(cpu,set_thread_cpubind);
+    DO(cpu,get_thread_cpubind);
+#endif
+    DO(cpu,get_thisproc_last_cpu_location);
+    DO(cpu,get_proc_last_cpu_location);
+    DO(cpu,get_thisthread_last_cpu_location);
+    DO(mem,set_thisproc_membind);
+    DO(mem,get_thisproc_membind);
+    DO(mem,set_thisthread_membind);
+    DO(mem,get_thisthread_membind);
+    DO(mem,set_proc_membind);
+    DO(mem,get_proc_membind);
+    DO(mem,set_area_membind);
+    DO(mem,get_area_membind);
+    DO(mem,get_area_memlocation);
+    DO(mem,alloc_membind);
+  }
+}
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/bitmap.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/bitmap.c
new file mode 100644
index 0000000000..650888c3b9
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/bitmap.c
@@ -0,0 +1,1522 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2017 Inria.  All rights reserved.
+ * Copyright © 2009-2011 Université Bordeaux
+ * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+#include <hwloc/autogen/config.h>
+#include <hwloc.h>
+#include <private/misc.h>
+#include <private/private.h>
+#include <hwloc/bitmap.h>
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <assert.h>
+#include <errno.h>
+#include <ctype.h>
+
+/*
+ * possible improvements:
+ * - have a way to change the initial allocation size:
+ *   add hwloc_bitmap_set_foo() to changes a global here,
+ *   and make the hwloc core call based on the early number of PUs
+ * - preallocate inside the bitmap structure (so that the whole structure is a cacheline for instance)
+ *   and allocate a dedicated array only later when reallocating larger
+ * - add a bitmap->ulongs_empty_first which guarantees that some first ulongs are empty,
+ *   making tests much faster for big bitmaps since there's no need to look at first ulongs.
+ *   no need for ulongs_empty_first to be exactly the max number of empty ulongs,
+ *   clearing bits that were set earlier isn't very common.
+ */
+
+/* magic number */
+#define HWLOC_BITMAP_MAGIC 0x20091007
+
+/* actual opaque type internals */
+struct hwloc_bitmap_s {
+  unsigned ulongs_count; /* how many ulong bitmasks are valid, >= 1 */
+  unsigned ulongs_allocated; /* how many ulong bitmasks are allocated, >= ulongs_count */
+  unsigned long *ulongs;
+  int infinite; /* set to 1 if all bits beyond ulongs are set */
+#ifdef HWLOC_DEBUG
+  int magic;
+#endif
+};
+
+/* overzealous check in debug-mode, not as powerful as valgrind but still useful */
+#ifdef HWLOC_DEBUG
+#define HWLOC__BITMAP_CHECK(set) do {				\
+  assert((set)->magic == HWLOC_BITMAP_MAGIC);			\
+  assert((set)->ulongs_count >= 1);				\
+  assert((set)->ulongs_allocated >= (set)->ulongs_count);	\
+} while (0)
+#else
+#define HWLOC__BITMAP_CHECK(set)
+#endif
+
+/* extract a subset from a set using an index or a cpu */
+#define HWLOC_SUBBITMAP_INDEX(cpu)		((cpu)/(HWLOC_BITS_PER_LONG))
+#define HWLOC_SUBBITMAP_CPU_ULBIT(cpu)		((cpu)%(HWLOC_BITS_PER_LONG))
+/* Read from a bitmap ulong without knowing whether x is valid.
+ * Writers should make sure that x is valid and modify set->ulongs[x] directly.
+ */
+#define HWLOC_SUBBITMAP_READULONG(set,x)	((x) < (set)->ulongs_count ? (set)->ulongs[x] : (set)->infinite ? HWLOC_SUBBITMAP_FULL : HWLOC_SUBBITMAP_ZERO)
+
+/* predefined subset values */
+#define HWLOC_SUBBITMAP_ZERO			0UL
+#define HWLOC_SUBBITMAP_FULL			(~0UL)
+#define HWLOC_SUBBITMAP_ULBIT(bit)		(1UL<<(bit))
+#define HWLOC_SUBBITMAP_CPU(cpu)		HWLOC_SUBBITMAP_ULBIT(HWLOC_SUBBITMAP_CPU_ULBIT(cpu))
+#define HWLOC_SUBBITMAP_ULBIT_TO(bit)		(HWLOC_SUBBITMAP_FULL>>(HWLOC_BITS_PER_LONG-1-(bit)))
+#define HWLOC_SUBBITMAP_ULBIT_FROM(bit)		(HWLOC_SUBBITMAP_FULL<<(bit))
+#define HWLOC_SUBBITMAP_ULBIT_FROMTO(begin,end)	(HWLOC_SUBBITMAP_ULBIT_TO(end) & HWLOC_SUBBITMAP_ULBIT_FROM(begin))
+
+struct hwloc_bitmap_s * hwloc_bitmap_alloc(void)
+{
+  struct hwloc_bitmap_s * set;
+
+  set = malloc(sizeof(struct hwloc_bitmap_s));
+  if (!set)
+    return NULL;
+
+  set->ulongs_count = 1;
+  set->ulongs_allocated = 64/sizeof(unsigned long);
+  set->ulongs = malloc(64);
+  if (!set->ulongs) {
+    free(set);
+    return NULL;
+  }
+
+  set->ulongs[0] = HWLOC_SUBBITMAP_ZERO;
+  set->infinite = 0;
+#ifdef HWLOC_DEBUG
+  set->magic = HWLOC_BITMAP_MAGIC;
+#endif
+  return set;
+}
+
+struct hwloc_bitmap_s * hwloc_bitmap_alloc_full(void)
+{
+  struct hwloc_bitmap_s * set = hwloc_bitmap_alloc();
+  if (set) {
+    set->infinite = 1;
+    set->ulongs[0] = HWLOC_SUBBITMAP_FULL;
+  }
+  return set;
+}
+
+void hwloc_bitmap_free(struct hwloc_bitmap_s * set)
+{
+  if (!set)
+    return;
+
+  HWLOC__BITMAP_CHECK(set);
+#ifdef HWLOC_DEBUG
+  set->magic = 0;
+#endif
+
+  free(set->ulongs);
+  free(set);
+}
+
+/* enlarge until it contains at least needed_count ulongs.
+ */
+static void
+hwloc_bitmap_enlarge_by_ulongs(struct hwloc_bitmap_s * set, unsigned needed_count)
+{
+  unsigned tmp = 1 << hwloc_flsl((unsigned long) needed_count - 1);
+  if (tmp > set->ulongs_allocated) {
+    unsigned long *tmpulongs;
+    tmpulongs = realloc(set->ulongs, tmp * sizeof(unsigned long));
+    assert(tmpulongs); /* FIXME: return errors from all bitmap functions? */
+    set->ulongs = tmpulongs;
+    set->ulongs_allocated = tmp;
+  }
+}
+
+/* enlarge until it contains at least needed_count ulongs,
+ * and update new ulongs according to the infinite field.
+ */
+static void
+hwloc_bitmap_realloc_by_ulongs(struct hwloc_bitmap_s * set, unsigned needed_count)
+{
+  unsigned i;
+
+  HWLOC__BITMAP_CHECK(set);
+
+  if (needed_count <= set->ulongs_count)
+    return;
+
+  /* realloc larger if needed */
+  hwloc_bitmap_enlarge_by_ulongs(set, needed_count);
+
+  /* fill the newly allocated subset depending on the infinite flag */
+  for(i=set->ulongs_count; i<needed_count; i++)
+    set->ulongs[i] = set->infinite ? HWLOC_SUBBITMAP_FULL : HWLOC_SUBBITMAP_ZERO;
+  set->ulongs_count = needed_count;
+}
+
+/* realloc until it contains at least cpu+1 bits */
+#define hwloc_bitmap_realloc_by_cpu_index(set, cpu) hwloc_bitmap_realloc_by_ulongs(set, ((cpu)/HWLOC_BITS_PER_LONG)+1)
+
+/* reset a bitmap to exactely the needed size.
+ * the caller must reinitialize all ulongs and the infinite flag later.
+ */
+static void
+hwloc_bitmap_reset_by_ulongs(struct hwloc_bitmap_s * set, unsigned needed_count)
+{
+  hwloc_bitmap_enlarge_by_ulongs(set, needed_count);
+  set->ulongs_count = needed_count;
+}
+
+/* reset until it contains exactly cpu+1 bits (roundup to a ulong).
+ * the caller must reinitialize all ulongs and the infinite flag later.
+ */
+#define hwloc_bitmap_reset_by_cpu_index(set, cpu) hwloc_bitmap_reset_by_ulongs(set, ((cpu)/HWLOC_BITS_PER_LONG)+1)
+
+struct hwloc_bitmap_s * hwloc_bitmap_dup(const struct hwloc_bitmap_s * old)
+{
+  struct hwloc_bitmap_s * new;
+
+  if (!old)
+    return NULL;
+
+  HWLOC__BITMAP_CHECK(old);
+
+  new = malloc(sizeof(struct hwloc_bitmap_s));
+  if (!new)
+    return NULL;
+
+  new->ulongs = malloc(old->ulongs_allocated * sizeof(unsigned long));
+  if (!new->ulongs) {
+    free(new);
+    return NULL;
+  }
+  new->ulongs_allocated = old->ulongs_allocated;
+  new->ulongs_count = old->ulongs_count;
+  memcpy(new->ulongs, old->ulongs, new->ulongs_count * sizeof(unsigned long));
+  new->infinite = old->infinite;
+#ifdef HWLOC_DEBUG
+  new->magic = HWLOC_BITMAP_MAGIC;
+#endif
+  return new;
+}
+
+void hwloc_bitmap_copy(struct hwloc_bitmap_s * dst, const struct hwloc_bitmap_s * src)
+{
+  HWLOC__BITMAP_CHECK(dst);
+  HWLOC__BITMAP_CHECK(src);
+
+  hwloc_bitmap_reset_by_ulongs(dst, src->ulongs_count);
+
+  memcpy(dst->ulongs, src->ulongs, src->ulongs_count * sizeof(unsigned long));
+  dst->infinite = src->infinite;
+}
+
+/* Strings always use 32bit groups */
+#define HWLOC_PRIxSUBBITMAP		"%08lx"
+#define HWLOC_BITMAP_SUBSTRING_SIZE	32
+#define HWLOC_BITMAP_SUBSTRING_LENGTH	(HWLOC_BITMAP_SUBSTRING_SIZE/4)
+#define HWLOC_BITMAP_STRING_PER_LONG	(HWLOC_BITS_PER_LONG/HWLOC_BITMAP_SUBSTRING_SIZE)
+
+int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, const struct hwloc_bitmap_s * __hwloc_restrict set)
+{
+  ssize_t size = buflen;
+  char *tmp = buf;
+  int res, ret = 0;
+  int needcomma = 0;
+  int i;
+  unsigned long accum = 0;
+  int accumed = 0;
+#if HWLOC_BITS_PER_LONG == HWLOC_BITMAP_SUBSTRING_SIZE
+  const unsigned long accum_mask = ~0UL;
+#else /* HWLOC_BITS_PER_LONG != HWLOC_BITMAP_SUBSTRING_SIZE */
+  const unsigned long accum_mask = ((1UL << HWLOC_BITMAP_SUBSTRING_SIZE) - 1) << (HWLOC_BITS_PER_LONG - HWLOC_BITMAP_SUBSTRING_SIZE);
+#endif /* HWLOC_BITS_PER_LONG != HWLOC_BITMAP_SUBSTRING_SIZE */
+
+  HWLOC__BITMAP_CHECK(set);
+
+  /* mark the end in case we do nothing later */
+  if (buflen > 0)
+    tmp[0] = '\0';
+
+  if (set->infinite) {
+    res = hwloc_snprintf(tmp, size, "0xf...f");
+    needcomma = 1;
+    if (res < 0)
+      return -1;
+    ret += res;
+    if (res >= size)
+      res = size>0 ? (int)size - 1 : 0;
+    tmp += res;
+    size -= res;
+  }
+
+  i=set->ulongs_count-1;
+
+  if (set->infinite) {
+    /* ignore starting FULL since we have 0xf...f already */
+    while (i>=0 && set->ulongs[i] == HWLOC_SUBBITMAP_FULL)
+      i--;
+  } else {
+    /* ignore starting ZERO except the last one */
+    while (i>=0 && set->ulongs[i] == HWLOC_SUBBITMAP_ZERO)
+      i--;
+  }
+
+  while (i>=0 || accumed) {
+    /* Refill accumulator */
+    if (!accumed) {
+      accum = set->ulongs[i--];
+      accumed = HWLOC_BITS_PER_LONG;
+    }
+
+    if (accum & accum_mask) {
+      /* print the whole subset if not empty */
+        res = hwloc_snprintf(tmp, size, needcomma ? ",0x" HWLOC_PRIxSUBBITMAP : "0x" HWLOC_PRIxSUBBITMAP,
+		     (accum & accum_mask) >> (HWLOC_BITS_PER_LONG - HWLOC_BITMAP_SUBSTRING_SIZE));
+      needcomma = 1;
+    } else if (i == -1 && accumed == HWLOC_BITMAP_SUBSTRING_SIZE) {
+      /* print a single 0 to mark the last subset */
+      res = hwloc_snprintf(tmp, size, needcomma ? ",0x0" : "0x0");
+    } else if (needcomma) {
+      res = hwloc_snprintf(tmp, size, ",");
+    } else {
+      res = 0;
+    }
+    if (res < 0)
+      return -1;
+    ret += res;
+
+#if HWLOC_BITS_PER_LONG == HWLOC_BITMAP_SUBSTRING_SIZE
+    accum = 0;
+    accumed = 0;
+#else
+    accum <<= HWLOC_BITMAP_SUBSTRING_SIZE;
+    accumed -= HWLOC_BITMAP_SUBSTRING_SIZE;
+#endif
+
+    if (res >= size)
+      res = size>0 ? (int)size - 1 : 0;
+
+    tmp += res;
+    size -= res;
+  }
+
+  /* if didn't display anything, display 0x0 */
+  if (!ret) {
+    res = hwloc_snprintf(tmp, size, "0x0");
+    if (res < 0)
+      return -1;
+    ret += res;
+  }
+
+  return ret;
+}
+
+int hwloc_bitmap_asprintf(char ** strp, const struct hwloc_bitmap_s * __hwloc_restrict set)
+{
+  int len;
+  char *buf;
+
+  HWLOC__BITMAP_CHECK(set);
+
+  len = hwloc_bitmap_snprintf(NULL, 0, set);
+  buf = malloc(len+1);
+  *strp = buf;
+  return hwloc_bitmap_snprintf(buf, len+1, set);
+}
+
+int hwloc_bitmap_sscanf(struct hwloc_bitmap_s *set, const char * __hwloc_restrict string)
+{
+  const char * current = string;
+  unsigned long accum = 0;
+  int count=0;
+  int infinite = 0;
+
+  /* count how many substrings there are */
+  count++;
+  while ((current = strchr(current+1, ',')) != NULL)
+    count++;
+
+  current = string;
+  if (!strncmp("0xf...f", current, 7)) {
+    current += 7;
+    if (*current != ',') {
+      /* special case for infinite/full bitmap */
+      hwloc_bitmap_fill(set);
+      return 0;
+    }
+    current++;
+    infinite = 1;
+    count--;
+  }
+
+  hwloc_bitmap_reset_by_ulongs(set, (count + HWLOC_BITMAP_STRING_PER_LONG - 1) / HWLOC_BITMAP_STRING_PER_LONG);
+  set->infinite = 0;
+
+  while (*current != '\0') {
+    unsigned long val;
+    char *next;
+    val = strtoul(current, &next, 16);
+
+    assert(count > 0);
+    count--;
+
+    accum |= (val << ((count * HWLOC_BITMAP_SUBSTRING_SIZE) % HWLOC_BITS_PER_LONG));
+    if (!(count % HWLOC_BITMAP_STRING_PER_LONG)) {
+      set->ulongs[count / HWLOC_BITMAP_STRING_PER_LONG] = accum;
+      accum = 0;
+    }
+
+    if (*next != ',') {
+      if (*next || count > 0)
+	goto failed;
+      else
+	break;
+    }
+    current = (const char*) next+1;
+  }
+
+  set->infinite = infinite; /* set at the end, to avoid spurious realloc with filled new ulongs */
+
+  return 0;
+
+ failed:
+  /* failure to parse */
+  hwloc_bitmap_zero(set);
+  return -1;
+}
+
+int hwloc_bitmap_list_snprintf(char * __hwloc_restrict buf, size_t buflen, const struct hwloc_bitmap_s * __hwloc_restrict set)
+{
+  int prev = -1;
+  hwloc_bitmap_t reverse;
+  ssize_t size = buflen;
+  char *tmp = buf;
+  int res, ret = 0;
+  int needcomma = 0;
+
+  HWLOC__BITMAP_CHECK(set);
+
+  reverse = hwloc_bitmap_alloc(); /* FIXME: add hwloc_bitmap_alloc_size() + hwloc_bitmap_init_allocated() to avoid malloc? */
+  hwloc_bitmap_not(reverse, set);
+
+  /* mark the end in case we do nothing later */
+  if (buflen > 0)
+    tmp[0] = '\0';
+
+  while (1) {
+    int begin, end;
+
+    begin = hwloc_bitmap_next(set, prev);
+    if (begin == -1)
+      break;
+    end = hwloc_bitmap_next(reverse, begin);
+
+    if (end == begin+1) {
+      res = hwloc_snprintf(tmp, size, needcomma ? ",%d" : "%d", begin);
+    } else if (end == -1) {
+      res = hwloc_snprintf(tmp, size, needcomma ? ",%d-" : "%d-", begin);
+    } else {
+      res = hwloc_snprintf(tmp, size, needcomma ? ",%d-%d" : "%d-%d", begin, end-1);
+    }
+    if (res < 0) {
+      hwloc_bitmap_free(reverse);
+      return -1;
+    }
+    ret += res;
+
+    if (res >= size)
+      res = size>0 ? (int)size - 1 : 0;
+
+    tmp += res;
+    size -= res;
+    needcomma = 1;
+
+    if (end == -1)
+      break;
+    else
+      prev = end - 1;
+  }
+
+  hwloc_bitmap_free(reverse);
+
+  return ret;
+}
+
+int hwloc_bitmap_list_asprintf(char ** strp, const struct hwloc_bitmap_s * __hwloc_restrict set)
+{
+  int len;
+  char *buf;
+
+  HWLOC__BITMAP_CHECK(set);
+
+  len = hwloc_bitmap_list_snprintf(NULL, 0, set);
+  buf = malloc(len+1);
+  *strp = buf;
+  return hwloc_bitmap_list_snprintf(buf, len+1, set);
+}
+
+int hwloc_bitmap_list_sscanf(struct hwloc_bitmap_s *set, const char * __hwloc_restrict string)
+{
+  const char * current = string;
+  char *next;
+  long begin = -1, val;
+
+  hwloc_bitmap_zero(set);
+
+  while (*current != '\0') {
+
+    /* ignore empty ranges */
+    while (*current == ',')
+      current++;
+
+    val = strtoul(current, &next, 0);
+    /* make sure we got at least one digit */
+    if (next == current)
+      goto failed;
+
+    if (begin != -1) {
+      /* finishing a range */
+      hwloc_bitmap_set_range(set, begin, val);
+      begin = -1;
+
+    } else if (*next == '-') {
+      /* starting a new range */
+      if (*(next+1) == '\0') {
+	/* infinite range */
+	hwloc_bitmap_set_range(set, val, -1);
+        break;
+      } else {
+	/* normal range */
+	begin = val;
+      }
+
+    } else if (*next == ',' || *next == '\0') {
+      /* single digit */
+      hwloc_bitmap_set(set, val);
+    }
+
+    if (*next == '\0')
+      break;
+    current = next+1;
+  }
+
+  return 0;
+
+ failed:
+  /* failure to parse */
+  hwloc_bitmap_zero(set);
+  return -1;
+}
+
+int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, const struct hwloc_bitmap_s * __hwloc_restrict set)
+{
+  ssize_t size = buflen;
+  char *tmp = buf;
+  int res, ret = 0;
+  int started = 0;
+  int i;
+
+  HWLOC__BITMAP_CHECK(set);
+
+  /* mark the end in case we do nothing later */
+  if (buflen > 0)
+    tmp[0] = '\0';
+
+  if (set->infinite) {
+    res = hwloc_snprintf(tmp, size, "0xf...f");
+    started = 1;
+    if (res < 0)
+      return -1;
+    ret += res;
+    if (res >= size)
+      res = size>0 ? (int)size - 1 : 0;
+    tmp += res;
+    size -= res;
+  }
+
+  i=set->ulongs_count-1;
+
+  if (set->infinite) {
+    /* ignore starting FULL since we have 0xf...f already */
+    while (i>=0 && set->ulongs[i] == HWLOC_SUBBITMAP_FULL)
+      i--;
+  } else {
+    /* ignore starting ZERO except the last one */
+    while (i>=1 && set->ulongs[i] == HWLOC_SUBBITMAP_ZERO)
+      i--;
+  }
+
+  while (i>=0) {
+    unsigned long val = set->ulongs[i--];
+    if (started) {
+      /* print the whole subset */
+#if HWLOC_BITS_PER_LONG == 64
+      res = hwloc_snprintf(tmp, size, "%016lx", val);
+#else
+      res = hwloc_snprintf(tmp, size, "%08lx", val);
+#endif
+    } else if (val || i == -1) {
+      res = hwloc_snprintf(tmp, size, "0x%lx", val);
+      started = 1;
+    } else {
+      res = 0;
+    }
+    if (res < 0)
+      return -1;
+    ret += res;
+    if (res >= size)
+      res = size>0 ? (int)size - 1 : 0;
+    tmp += res;
+    size -= res;
+  }
+
+  /* if didn't display anything, display 0x0 */
+  if (!ret) {
+    res = hwloc_snprintf(tmp, size, "0x0");
+    if (res < 0)
+      return -1;
+    ret += res;
+  }
+
+  return ret;
+}
+
+int hwloc_bitmap_taskset_asprintf(char ** strp, const struct hwloc_bitmap_s * __hwloc_restrict set)
+{
+  int len;
+  char *buf;
+
+  HWLOC__BITMAP_CHECK(set);
+
+  len = hwloc_bitmap_taskset_snprintf(NULL, 0, set);
+  buf = malloc(len+1);
+  *strp = buf;
+  return hwloc_bitmap_taskset_snprintf(buf, len+1, set);
+}
+
+int hwloc_bitmap_taskset_sscanf(struct hwloc_bitmap_s *set, const char * __hwloc_restrict string)
+{
+  const char * current = string;
+  int chars;
+  int count;
+  int infinite = 0;
+
+  if (!strncmp("0xf...f", current, 7)) {
+    /* infinite bitmap */
+    infinite = 1;
+    current += 7;
+    if (*current == '\0') {
+      /* special case for infinite/full bitmap */
+      hwloc_bitmap_fill(set);
+      return 0;
+    }
+  } else {
+    /* finite bitmap */
+    if (!strncmp("0x", current, 2))
+      current += 2;
+    if (*current == '\0') {
+      /* special case for empty bitmap */
+      hwloc_bitmap_zero(set);
+      return 0;
+    }
+  }
+  /* we know there are other characters now */
+
+  chars = (int)strlen(current);
+  count = (chars * 4 + HWLOC_BITS_PER_LONG - 1) / HWLOC_BITS_PER_LONG;
+
+  hwloc_bitmap_reset_by_ulongs(set, count);
+  set->infinite = 0;
+
+  while (*current != '\0') {
+    int tmpchars;
+    char ustr[17];
+    unsigned long val;
+    char *next;
+
+    tmpchars = chars % (HWLOC_BITS_PER_LONG/4);
+    if (!tmpchars)
+      tmpchars = (HWLOC_BITS_PER_LONG/4);
+
+    memcpy(ustr, current, tmpchars);
+    ustr[tmpchars] = '\0';
+    val = strtoul(ustr, &next, 16);
+    if (*next != '\0')
+      goto failed;
+
+    set->ulongs[count-1] = val;
+
+    current += tmpchars;
+    chars -= tmpchars;
+    count--;
+  }
+
+  set->infinite = infinite; /* set at the end, to avoid spurious realloc with filled new ulongs */
+
+  return 0;
+
+ failed:
+  /* failure to parse */
+  hwloc_bitmap_zero(set);
+  return -1;
+}
+
+static void hwloc_bitmap__zero(struct hwloc_bitmap_s *set)
+{
+	unsigned i;
+	for(i=0; i<set->ulongs_count; i++)
+		set->ulongs[i] = HWLOC_SUBBITMAP_ZERO;
+	set->infinite = 0;
+}
+
+void hwloc_bitmap_zero(struct hwloc_bitmap_s * set)
+{
+	HWLOC__BITMAP_CHECK(set);
+
+	hwloc_bitmap_reset_by_ulongs(set, 1);
+	hwloc_bitmap__zero(set);
+}
+
+static void hwloc_bitmap__fill(struct hwloc_bitmap_s * set)
+{
+	unsigned i;
+	for(i=0; i<set->ulongs_count; i++)
+		set->ulongs[i] = HWLOC_SUBBITMAP_FULL;
+	set->infinite = 1;
+}
+
+void hwloc_bitmap_fill(struct hwloc_bitmap_s * set)
+{
+	HWLOC__BITMAP_CHECK(set);
+
+	hwloc_bitmap_reset_by_ulongs(set, 1);
+	hwloc_bitmap__fill(set);
+}
+
+void hwloc_bitmap_from_ulong(struct hwloc_bitmap_s *set, unsigned long mask)
+{
+	HWLOC__BITMAP_CHECK(set);
+
+	hwloc_bitmap_reset_by_ulongs(set, 1);
+	set->ulongs[0] = mask; /* there's always at least one ulong allocated */
+	set->infinite = 0;
+}
+
+void hwloc_bitmap_from_ith_ulong(struct hwloc_bitmap_s *set, unsigned i, unsigned long mask)
+{
+	unsigned j;
+
+	HWLOC__BITMAP_CHECK(set);
+
+	hwloc_bitmap_reset_by_ulongs(set, i+1);
+	set->ulongs[i] = mask;
+	for(j=0; j<i; j++)
+		set->ulongs[j] = HWLOC_SUBBITMAP_ZERO;
+	set->infinite = 0;
+}
+
+unsigned long hwloc_bitmap_to_ulong(const struct hwloc_bitmap_s *set)
+{
+	HWLOC__BITMAP_CHECK(set);
+
+	return set->ulongs[0]; /* there's always at least one ulong allocated */
+}
+
+unsigned long hwloc_bitmap_to_ith_ulong(const struct hwloc_bitmap_s *set, unsigned i)
+{
+	HWLOC__BITMAP_CHECK(set);
+
+	return HWLOC_SUBBITMAP_READULONG(set, i);
+}
+
+void hwloc_bitmap_only(struct hwloc_bitmap_s * set, unsigned cpu)
+{
+	unsigned index_ = HWLOC_SUBBITMAP_INDEX(cpu);
+
+	HWLOC__BITMAP_CHECK(set);
+
+	hwloc_bitmap_reset_by_cpu_index(set, cpu);
+	hwloc_bitmap__zero(set);
+	set->ulongs[index_] |= HWLOC_SUBBITMAP_CPU(cpu);
+}
+
+void hwloc_bitmap_allbut(struct hwloc_bitmap_s * set, unsigned cpu)
+{
+	unsigned index_ = HWLOC_SUBBITMAP_INDEX(cpu);
+
+	HWLOC__BITMAP_CHECK(set);
+
+	hwloc_bitmap_reset_by_cpu_index(set, cpu);
+	hwloc_bitmap__fill(set);
+	set->ulongs[index_] &= ~HWLOC_SUBBITMAP_CPU(cpu);
+}
+
+void hwloc_bitmap_set(struct hwloc_bitmap_s * set, unsigned cpu)
+{
+	unsigned index_ = HWLOC_SUBBITMAP_INDEX(cpu);
+
+	HWLOC__BITMAP_CHECK(set);
+
+	/* nothing to do if setting inside the infinite part of the bitmap */
+	if (set->infinite && cpu >= set->ulongs_count * HWLOC_BITS_PER_LONG)
+		return;
+
+	hwloc_bitmap_realloc_by_cpu_index(set, cpu);
+	set->ulongs[index_] |= HWLOC_SUBBITMAP_CPU(cpu);
+}
+
+void hwloc_bitmap_set_range(struct hwloc_bitmap_s * set, unsigned begincpu, int _endcpu)
+{
+	unsigned i;
+	unsigned beginset,endset;
+	unsigned endcpu = (unsigned) _endcpu;
+
+	HWLOC__BITMAP_CHECK(set);
+
+	if (endcpu < begincpu)
+		return;
+	if (set->infinite && begincpu >= set->ulongs_count * HWLOC_BITS_PER_LONG)
+		/* setting only in the already-set infinite part, nothing to do */
+		return;
+
+	if (_endcpu == -1) {
+		/* infinite range */
+
+		/* make sure we can play with the ulong that contains begincpu */
+		hwloc_bitmap_realloc_by_cpu_index(set, begincpu);
+		/* update the ulong that contains begincpu */
+		beginset = HWLOC_SUBBITMAP_INDEX(begincpu);
+		set->ulongs[beginset] |= HWLOC_SUBBITMAP_ULBIT_FROM(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu));
+		/* set ulongs after begincpu if any already allocated */
+		for(i=beginset+1; i<set->ulongs_count; i++)
+			set->ulongs[i] = HWLOC_SUBBITMAP_FULL;
+		/* mark the infinity as set */
+		set->infinite = 1;
+	} else {
+		/* finite range */
+
+		/* ignore the part of the range that overlaps with the already-set infinite part */
+		if (set->infinite && endcpu >= set->ulongs_count * HWLOC_BITS_PER_LONG)
+			endcpu = set->ulongs_count * HWLOC_BITS_PER_LONG - 1;
+		/* make sure we can play with the ulongs that contain begincpu and endcpu */
+		hwloc_bitmap_realloc_by_cpu_index(set, endcpu);
+		/* update first and last ulongs */
+		beginset = HWLOC_SUBBITMAP_INDEX(begincpu);
+		endset = HWLOC_SUBBITMAP_INDEX(endcpu);
+		if (beginset == endset) {
+			set->ulongs[beginset] |= HWLOC_SUBBITMAP_ULBIT_FROMTO(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu), HWLOC_SUBBITMAP_CPU_ULBIT(endcpu));
+		} else {
+			set->ulongs[beginset] |= HWLOC_SUBBITMAP_ULBIT_FROM(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu));
+			set->ulongs[endset] |= HWLOC_SUBBITMAP_ULBIT_TO(HWLOC_SUBBITMAP_CPU_ULBIT(endcpu));
+		}
+		/* set ulongs in the middle of the range */
+		for(i=beginset+1; i<endset; i++)
+			set->ulongs[i] = HWLOC_SUBBITMAP_FULL;
+	}
+}
+
+void hwloc_bitmap_set_ith_ulong(struct hwloc_bitmap_s *set, unsigned i, unsigned long mask)
+{
+	HWLOC__BITMAP_CHECK(set);
+
+	hwloc_bitmap_realloc_by_ulongs(set, i+1);
+	set->ulongs[i] = mask;
+}
+
+void hwloc_bitmap_clr(struct hwloc_bitmap_s * set, unsigned cpu)
+{
+	unsigned index_ = HWLOC_SUBBITMAP_INDEX(cpu);
+
+	HWLOC__BITMAP_CHECK(set);
+
+	/* nothing to do if clearing inside the infinitely-unset part of the bitmap */
+	if (!set->infinite && cpu >= set->ulongs_count * HWLOC_BITS_PER_LONG)
+		return;
+
+	hwloc_bitmap_realloc_by_cpu_index(set, cpu);
+	set->ulongs[index_] &= ~HWLOC_SUBBITMAP_CPU(cpu);
+}
+
+void hwloc_bitmap_clr_range(struct hwloc_bitmap_s * set, unsigned begincpu, int _endcpu)
+{
+	unsigned i;
+	unsigned beginset,endset;
+	unsigned endcpu = (unsigned) _endcpu;
+
+	HWLOC__BITMAP_CHECK(set);
+
+	if (endcpu < begincpu)
+		return;
+
+	if (!set->infinite && begincpu >= set->ulongs_count * HWLOC_BITS_PER_LONG)
+		/* clearing only in the already-unset infinite part, nothing to do */
+		return;
+
+	if (_endcpu == -1) {
+		/* infinite range */
+
+		/* make sure we can play with the ulong that contains begincpu */
+		hwloc_bitmap_realloc_by_cpu_index(set, begincpu);
+		/* update the ulong that contains begincpu */
+		beginset = HWLOC_SUBBITMAP_INDEX(begincpu);
+		set->ulongs[beginset] &= ~HWLOC_SUBBITMAP_ULBIT_FROM(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu));
+		/* clear ulong after begincpu if any already allocated */
+		for(i=beginset+1; i<set->ulongs_count; i++)
+			set->ulongs[i] = HWLOC_SUBBITMAP_ZERO;
+		/* mark the infinity as unset */
+		set->infinite = 0;
+	} else {
+		/* finite range */
+
+		/* ignore the part of the range that overlaps with the already-unset infinite part */
+		if (!set->infinite && endcpu >= set->ulongs_count * HWLOC_BITS_PER_LONG)
+			endcpu = set->ulongs_count * HWLOC_BITS_PER_LONG - 1;
+		/* make sure we can play with the ulongs that contain begincpu and endcpu */
+		hwloc_bitmap_realloc_by_cpu_index(set, endcpu);
+		/* update first and last ulongs */
+		beginset = HWLOC_SUBBITMAP_INDEX(begincpu);
+		endset = HWLOC_SUBBITMAP_INDEX(endcpu);
+		if (beginset == endset) {
+			set->ulongs[beginset] &= ~HWLOC_SUBBITMAP_ULBIT_FROMTO(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu), HWLOC_SUBBITMAP_CPU_ULBIT(endcpu));
+		} else {
+			set->ulongs[beginset] &= ~HWLOC_SUBBITMAP_ULBIT_FROM(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu));
+			set->ulongs[endset] &= ~HWLOC_SUBBITMAP_ULBIT_TO(HWLOC_SUBBITMAP_CPU_ULBIT(endcpu));
+		}
+		/* clear ulongs in the middle of the range */
+		for(i=beginset+1; i<endset; i++)
+			set->ulongs[i] = HWLOC_SUBBITMAP_ZERO;
+	}
+}
+
+int hwloc_bitmap_isset(const struct hwloc_bitmap_s * set, unsigned cpu)
+{
+	unsigned index_ = HWLOC_SUBBITMAP_INDEX(cpu);
+
+	HWLOC__BITMAP_CHECK(set);
+
+	return (HWLOC_SUBBITMAP_READULONG(set, index_) & HWLOC_SUBBITMAP_CPU(cpu)) != 0;
+}
+
+int hwloc_bitmap_iszero(const struct hwloc_bitmap_s *set)
+{
+	unsigned i;
+
+	HWLOC__BITMAP_CHECK(set);
+
+	if (set->infinite)
+		return 0;
+	for(i=0; i<set->ulongs_count; i++)
+		if (set->ulongs[i] != HWLOC_SUBBITMAP_ZERO)
+			return 0;
+	return 1;
+}
+
+int hwloc_bitmap_isfull(const struct hwloc_bitmap_s *set)
+{
+	unsigned i;
+
+	HWLOC__BITMAP_CHECK(set);
+
+	if (!set->infinite)
+		return 0;
+	for(i=0; i<set->ulongs_count; i++)
+		if (set->ulongs[i] != HWLOC_SUBBITMAP_FULL)
+			return 0;
+	return 1;
+}
+
+int hwloc_bitmap_isequal (const struct hwloc_bitmap_s *set1, const struct hwloc_bitmap_s *set2)
+{
+	unsigned count1 = set1->ulongs_count;
+	unsigned count2 = set2->ulongs_count;
+	unsigned min_count = count1 < count2 ? count1 : count2;
+	unsigned i;
+
+	HWLOC__BITMAP_CHECK(set1);
+	HWLOC__BITMAP_CHECK(set2);
+
+	for(i=0; i<min_count; i++)
+		if (set1->ulongs[i] != set2->ulongs[i])
+			return 0;
+
+	if (count1 != count2) {
+		unsigned long w1 = set1->infinite ? HWLOC_SUBBITMAP_FULL : HWLOC_SUBBITMAP_ZERO;
+		unsigned long w2 = set2->infinite ? HWLOC_SUBBITMAP_FULL : HWLOC_SUBBITMAP_ZERO;
+		for(i=min_count; i<count1; i++) {
+			if (set1->ulongs[i] != w2)
+				return 0;
+		}
+		for(i=min_count; i<count2; i++) {
+			if (set2->ulongs[i] != w1)
+				return 0;
+		}
+	}
+
+	if (set1->infinite != set2->infinite)
+		return 0;
+
+	return 1;
+}
+
+int hwloc_bitmap_intersects (const struct hwloc_bitmap_s *set1, const struct hwloc_bitmap_s *set2)
+{
+	unsigned count1 = set1->ulongs_count;
+	unsigned count2 = set2->ulongs_count;
+	unsigned min_count = count1 < count2 ? count1 : count2;
+	unsigned i;
+
+	HWLOC__BITMAP_CHECK(set1);
+	HWLOC__BITMAP_CHECK(set2);
+
+	for(i=0; i<min_count; i++)
+		if (set1->ulongs[i] & set2->ulongs[i])
+			return 1;
+
+	if (count1 != count2) {
+		if (set2->infinite) {
+			for(i=min_count; i<set1->ulongs_count; i++)
+				if (set1->ulongs[i])
+					return 1;
+		}
+		if (set1->infinite) {
+			for(i=min_count; i<set2->ulongs_count; i++)
+				if (set2->ulongs[i])
+					return 1;
+		}
+	}
+
+	if (set1->infinite && set2->infinite)
+		return 1;
+
+	return 0;
+}
+
+int hwloc_bitmap_isincluded (const struct hwloc_bitmap_s *sub_set, const struct hwloc_bitmap_s *super_set)
+{
+	unsigned super_count = super_set->ulongs_count;
+	unsigned sub_count = sub_set->ulongs_count;
+	unsigned min_count = super_count < sub_count ? super_count : sub_count;
+	unsigned i;
+
+	HWLOC__BITMAP_CHECK(sub_set);
+	HWLOC__BITMAP_CHECK(super_set);
+
+	for(i=0; i<min_count; i++)
+		if (super_set->ulongs[i] != (super_set->ulongs[i] | sub_set->ulongs[i]))
+			return 0;
+
+	if (super_count != sub_count) {
+		if (!super_set->infinite)
+			for(i=min_count; i<sub_count; i++)
+				if (sub_set->ulongs[i])
+					return 0;
+		if (sub_set->infinite)
+			for(i=min_count; i<super_count; i++)
+				if (super_set->ulongs[i] != HWLOC_SUBBITMAP_FULL)
+					return 0;
+	}
+
+	if (sub_set->infinite && !super_set->infinite)
+		return 0;
+
+	return 1;
+}
+
+void hwloc_bitmap_or (struct hwloc_bitmap_s *res, const struct hwloc_bitmap_s *set1, const struct hwloc_bitmap_s *set2)
+{
+	/* cache counts so that we can reset res even if it's also set1 or set2 */
+	unsigned count1 = set1->ulongs_count;
+	unsigned count2 = set2->ulongs_count;
+	unsigned max_count = count1 > count2 ? count1 : count2;
+	unsigned min_count = count1 + count2 - max_count;
+	unsigned i;
+
+	HWLOC__BITMAP_CHECK(res);
+	HWLOC__BITMAP_CHECK(set1);
+	HWLOC__BITMAP_CHECK(set2);
+
+	hwloc_bitmap_reset_by_ulongs(res, max_count);
+
+	for(i=0; i<min_count; i++)
+		res->ulongs[i] = set1->ulongs[i] | set2->ulongs[i];
+
+	if (count1 != count2) {
+		if (min_count < count1) {
+			if (set2->infinite) {
+				res->ulongs_count = min_count;
+			} else {
+				for(i=min_count; i<max_count; i++)
+					res->ulongs[i] = set1->ulongs[i];
+			}
+		} else {
+			if (set1->infinite) {
+				res->ulongs_count = min_count;
+			} else {
+				for(i=min_count; i<max_count; i++)
+					res->ulongs[i] = set2->ulongs[i];
+			}
+		}
+	}
+
+	res->infinite = set1->infinite || set2->infinite;
+}
+
+void hwloc_bitmap_and (struct hwloc_bitmap_s *res, const struct hwloc_bitmap_s *set1, const struct hwloc_bitmap_s *set2)
+{
+	/* cache counts so that we can reset res even if it's also set1 or set2 */
+	unsigned count1 = set1->ulongs_count;
+	unsigned count2 = set2->ulongs_count;
+	unsigned max_count = count1 > count2 ? count1 : count2;
+	unsigned min_count = count1 + count2 - max_count;
+	unsigned i;
+
+	HWLOC__BITMAP_CHECK(res);
+	HWLOC__BITMAP_CHECK(set1);
+	HWLOC__BITMAP_CHECK(set2);
+
+	hwloc_bitmap_reset_by_ulongs(res, max_count);
+
+	for(i=0; i<min_count; i++)
+		res->ulongs[i] = set1->ulongs[i] & set2->ulongs[i];
+
+	if (count1 != count2) {
+		if (min_count < count1) {
+			if (set2->infinite) {
+				for(i=min_count; i<max_count; i++)
+					res->ulongs[i] = set1->ulongs[i];
+			} else {
+				res->ulongs_count = min_count;
+			}
+		} else {
+			if (set1->infinite) {
+				for(i=min_count; i<max_count; i++)
+					res->ulongs[i] = set2->ulongs[i];
+			} else {
+				res->ulongs_count = min_count;
+			}
+		}
+	}
+
+	res->infinite = set1->infinite && set2->infinite;
+}
+
+void hwloc_bitmap_andnot (struct hwloc_bitmap_s *res, const struct hwloc_bitmap_s *set1, const struct hwloc_bitmap_s *set2)
+{
+	/* cache counts so that we can reset res even if it's also set1 or set2 */
+	unsigned count1 = set1->ulongs_count;
+	unsigned count2 = set2->ulongs_count;
+	unsigned max_count = count1 > count2 ? count1 : count2;
+	unsigned min_count = count1 + count2 - max_count;
+	unsigned i;
+
+	HWLOC__BITMAP_CHECK(res);
+	HWLOC__BITMAP_CHECK(set1);
+	HWLOC__BITMAP_CHECK(set2);
+
+	hwloc_bitmap_reset_by_ulongs(res, max_count);
+
+	for(i=0; i<min_count; i++)
+		res->ulongs[i] = set1->ulongs[i] & ~set2->ulongs[i];
+
+	if (count1 != count2) {
+		if (min_count < count1) {
+			if (!set2->infinite) {
+				for(i=min_count; i<max_count; i++)
+					res->ulongs[i] = set1->ulongs[i];
+			} else {
+				res->ulongs_count = min_count;
+			}
+		} else {
+			if (set1->infinite) {
+				for(i=min_count; i<max_count; i++)
+					res->ulongs[i] = ~set2->ulongs[i];
+			} else {
+				res->ulongs_count = min_count;
+			}
+		}
+	}
+
+	res->infinite = set1->infinite && !set2->infinite;
+}
+
+void hwloc_bitmap_xor (struct hwloc_bitmap_s *res, const struct hwloc_bitmap_s *set1, const struct hwloc_bitmap_s *set2)
+{
+	/* cache counts so that we can reset res even if it's also set1 or set2 */
+	unsigned count1 = set1->ulongs_count;
+	unsigned count2 = set2->ulongs_count;
+	unsigned max_count = count1 > count2 ? count1 : count2;
+	unsigned min_count = count1 + count2 - max_count;
+	unsigned i;
+
+	HWLOC__BITMAP_CHECK(res);
+	HWLOC__BITMAP_CHECK(set1);
+	HWLOC__BITMAP_CHECK(set2);
+
+	hwloc_bitmap_reset_by_ulongs(res, max_count);
+
+	for(i=0; i<min_count; i++)
+		res->ulongs[i] = set1->ulongs[i] ^ set2->ulongs[i];
+
+	if (count1 != count2) {
+		if (min_count < count1) {
+			unsigned long w2 = set2->infinite ? HWLOC_SUBBITMAP_FULL : HWLOC_SUBBITMAP_ZERO;
+			for(i=min_count; i<max_count; i++)
+				res->ulongs[i] = set1->ulongs[i] ^ w2;
+		} else {
+			unsigned long w1 = set1->infinite ? HWLOC_SUBBITMAP_FULL : HWLOC_SUBBITMAP_ZERO;
+			for(i=min_count; i<max_count; i++)
+				res->ulongs[i] = set2->ulongs[i] ^ w1;
+		}
+	}
+
+	res->infinite = (!set1->infinite) != (!set2->infinite);
+}
+
+void hwloc_bitmap_not (struct hwloc_bitmap_s *res, const struct hwloc_bitmap_s *set)
+{
+	unsigned count = set->ulongs_count;
+	unsigned i;
+
+	HWLOC__BITMAP_CHECK(res);
+	HWLOC__BITMAP_CHECK(set);
+
+	hwloc_bitmap_reset_by_ulongs(res, count);
+
+	for(i=0; i<count; i++)
+		res->ulongs[i] = ~set->ulongs[i];
+
+	res->infinite = !set->infinite;
+}
+
+int hwloc_bitmap_first(const struct hwloc_bitmap_s * set)
+{
+	unsigned i;
+
+	HWLOC__BITMAP_CHECK(set);
+
+	for(i=0; i<set->ulongs_count; i++) {
+		/* subsets are unsigned longs, use ffsl */
+		unsigned long w = set->ulongs[i];
+		if (w)
+			return hwloc_ffsl(w) - 1 + HWLOC_BITS_PER_LONG*i;
+	}
+
+	if (set->infinite)
+		return set->ulongs_count * HWLOC_BITS_PER_LONG;
+
+	return -1;
+}
+
+int hwloc_bitmap_last(const struct hwloc_bitmap_s * set)
+{
+	int i;
+
+	HWLOC__BITMAP_CHECK(set);
+
+	if (set->infinite)
+		return -1;
+
+	for(i=set->ulongs_count-1; i>=0; i--) {
+		/* subsets are unsigned longs, use flsl */
+		unsigned long w = set->ulongs[i];
+		if (w)
+			return hwloc_flsl(w) - 1 + HWLOC_BITS_PER_LONG*i;
+	}
+
+	return -1;
+}
+
+int hwloc_bitmap_next(const struct hwloc_bitmap_s * set, int prev_cpu)
+{
+	unsigned i = HWLOC_SUBBITMAP_INDEX(prev_cpu + 1);
+
+	HWLOC__BITMAP_CHECK(set);
+
+	if (i >= set->ulongs_count) {
+		if (set->infinite)
+			return prev_cpu + 1;
+		else
+			return -1;
+	}
+
+	for(; i<set->ulongs_count; i++) {
+		/* subsets are unsigned longs, use ffsl */
+		unsigned long w = set->ulongs[i];
+
+		/* if the prev cpu is in the same word as the possible next one,
+		   we need to mask out previous cpus */
+		if (prev_cpu >= 0 && HWLOC_SUBBITMAP_INDEX((unsigned) prev_cpu) == i)
+			w &= ~HWLOC_SUBBITMAP_ULBIT_TO(HWLOC_SUBBITMAP_CPU_ULBIT(prev_cpu));
+
+		if (w)
+			return hwloc_ffsl(w) - 1 + HWLOC_BITS_PER_LONG*i;
+	}
+
+	if (set->infinite)
+		return set->ulongs_count * HWLOC_BITS_PER_LONG;
+
+	return -1;
+}
+
+void hwloc_bitmap_singlify(struct hwloc_bitmap_s * set)
+{
+	unsigned i;
+	int found = 0;
+
+	HWLOC__BITMAP_CHECK(set);
+
+	for(i=0; i<set->ulongs_count; i++) {
+		if (found) {
+			set->ulongs[i] = HWLOC_SUBBITMAP_ZERO;
+			continue;
+		} else {
+			/* subsets are unsigned longs, use ffsl */
+			unsigned long w = set->ulongs[i];
+			if (w) {
+				int _ffs = hwloc_ffsl(w);
+				set->ulongs[i] = HWLOC_SUBBITMAP_CPU(_ffs-1);
+				found = 1;
+			}
+		}
+	}
+
+	if (set->infinite) {
+		if (found) {
+			set->infinite = 0;
+		} else {
+			/* set the first non allocated bit */
+			unsigned first = set->ulongs_count * HWLOC_BITS_PER_LONG;
+			set->infinite = 0; /* do not let realloc fill the newly allocated sets */
+			hwloc_bitmap_set(set, first);
+		}
+	}
+}
+
+int hwloc_bitmap_compare_first(const struct hwloc_bitmap_s * set1, const struct hwloc_bitmap_s * set2)
+{
+	unsigned count1 = set1->ulongs_count;
+	unsigned count2 = set2->ulongs_count;
+	unsigned max_count = count1 > count2 ? count1 : count2;
+	unsigned min_count = count1 + count2 - max_count;
+	unsigned i;
+
+	HWLOC__BITMAP_CHECK(set1);
+	HWLOC__BITMAP_CHECK(set2);
+
+	for(i=0; i<min_count; i++) {
+		unsigned long w1 = set1->ulongs[i];
+		unsigned long w2 = set2->ulongs[i];
+		if (w1 || w2) {
+			int _ffs1 = hwloc_ffsl(w1);
+			int _ffs2 = hwloc_ffsl(w2);
+			/* if both have a bit set, compare for real */
+			if (_ffs1 && _ffs2)
+				return _ffs1-_ffs2;
+			/* one is empty, and it is considered higher, so reverse-compare them */
+			return _ffs2-_ffs1;
+		}
+	}
+
+	if (count1 != count2) {
+		if (min_count < count2) {
+			for(i=min_count; i<count2; i++) {
+				unsigned long w2 = set2->ulongs[i];
+				if (set1->infinite)
+					return -!(w2 & 1);
+				else if (w2)
+					return 1;
+			}
+		} else {
+			for(i=min_count; i<count1; i++) {
+				unsigned long w1 = set1->ulongs[i];
+				if (set2->infinite)
+					return !(w1 & 1);
+				else if (w1)
+					return -1;
+			}
+		}
+	}
+
+	return !!set1->infinite - !!set2->infinite;
+}
+
+int hwloc_bitmap_compare(const struct hwloc_bitmap_s * set1, const struct hwloc_bitmap_s * set2)
+{
+	unsigned count1 = set1->ulongs_count;
+	unsigned count2 = set2->ulongs_count;
+	unsigned max_count = count1 > count2 ? count1 : count2;
+	unsigned min_count = count1 + count2 - max_count;
+	int i;
+
+	HWLOC__BITMAP_CHECK(set1);
+	HWLOC__BITMAP_CHECK(set2);
+
+	if ((!set1->infinite) != (!set2->infinite))
+		return !!set1->infinite - !!set2->infinite;
+
+	if (count1 != count2) {
+		if (min_count < count2) {
+			unsigned long val1 = set1->infinite ? HWLOC_SUBBITMAP_FULL :  HWLOC_SUBBITMAP_ZERO;
+			for(i=max_count-1; i>=(signed) min_count; i--) {
+				unsigned long val2 = set2->ulongs[i];
+				if (val1 == val2)
+					continue;
+				return val1 < val2 ? -1 : 1;
+			}
+		} else {
+			unsigned long val2 = set2->infinite ? HWLOC_SUBBITMAP_FULL :  HWLOC_SUBBITMAP_ZERO;
+			for(i=max_count-1; i>=(signed) min_count; i--) {
+				unsigned long val1 = set1->ulongs[i];
+				if (val1 == val2)
+					continue;
+				return val1 < val2 ? -1 : 1;
+			}
+		}
+	}
+
+	for(i=min_count-1; i>=0; i--) {
+		unsigned long val1 = set1->ulongs[i];
+		unsigned long val2 = set2->ulongs[i];
+		if (val1 == val2)
+			continue;
+		return val1 < val2 ? -1 : 1;
+	}
+
+	return 0;
+}
+
+int hwloc_bitmap_weight(const struct hwloc_bitmap_s * set)
+{
+	int weight = 0;
+	unsigned i;
+
+	HWLOC__BITMAP_CHECK(set);
+
+	if (set->infinite)
+		return -1;
+
+	for(i=0; i<set->ulongs_count; i++)
+		weight += hwloc_weight_long(set->ulongs[i]);
+	return weight;
+}
+
+int hwloc_bitmap_compare_inclusion(const struct hwloc_bitmap_s * set1, const struct hwloc_bitmap_s * set2)
+{
+	unsigned max_count = set1->ulongs_count > set2->ulongs_count ? set1->ulongs_count : set2->ulongs_count;
+	int result = HWLOC_BITMAP_EQUAL; /* means empty sets return equal */
+	int empty1 = 1;
+	int empty2 = 1;
+	unsigned i;
+
+	HWLOC__BITMAP_CHECK(set1);
+	HWLOC__BITMAP_CHECK(set2);
+
+	for(i=0; i<max_count; i++) {
+	  unsigned long val1 = HWLOC_SUBBITMAP_READULONG(set1, (unsigned) i);
+	  unsigned long val2 = HWLOC_SUBBITMAP_READULONG(set2, (unsigned) i);
+
+	  if (!val1) {
+	    if (!val2)
+	      /* both empty, no change */
+	      continue;
+
+	    /* val1 empty, val2 not */
+	    if (result == HWLOC_BITMAP_CONTAINS) {
+	      if (!empty2)
+		return HWLOC_BITMAP_INTERSECTS;
+	      result = HWLOC_BITMAP_DIFFERENT;
+	    } else if (result == HWLOC_BITMAP_EQUAL) {
+	      result = HWLOC_BITMAP_INCLUDED;
+	    }
+	    /* no change otherwise */
+
+	  } else if (!val2) {
+	    /* val2 empty, val1 not */
+	    if (result == HWLOC_BITMAP_INCLUDED) {
+	      if (!empty1)
+		return HWLOC_BITMAP_INTERSECTS;
+	      result = HWLOC_BITMAP_DIFFERENT;
+	    } else if (result == HWLOC_BITMAP_EQUAL) {
+	      result = HWLOC_BITMAP_CONTAINS;
+	    }
+	    /* no change otherwise */
+
+	  } else if (val1 == val2) {
+	    /* equal and not empty */
+	    if (result == HWLOC_BITMAP_DIFFERENT)
+	      return HWLOC_BITMAP_INTERSECTS;
+	    /* equal/contains/included unchanged */
+
+	  } else if ((val1 & val2) == val1) {
+	    /* included and not empty */
+	    if (result == HWLOC_BITMAP_CONTAINS || result == HWLOC_BITMAP_DIFFERENT)
+	      return HWLOC_BITMAP_INTERSECTS;
+	    /* equal/included unchanged */
+	    result = HWLOC_BITMAP_INCLUDED;
+
+	  } else if ((val1 & val2) == val2) {
+	    /* contains and not empty */
+	    if (result == HWLOC_BITMAP_INCLUDED || result == HWLOC_BITMAP_DIFFERENT)
+	      return HWLOC_BITMAP_INTERSECTS;
+	    /* equal/contains unchanged */
+	    result = HWLOC_BITMAP_CONTAINS;
+
+	  } else if ((val1 & val2) != 0) {
+	    /* intersects and not empty */
+	    return HWLOC_BITMAP_INTERSECTS;
+
+	  } else {
+	    /* different and not empty */
+
+	    /* equal/included/contains with non-empty sets means intersects */
+	    if (result == HWLOC_BITMAP_EQUAL && !empty1 /* implies !empty2 */)
+	      return HWLOC_BITMAP_INTERSECTS;
+	    if (result == HWLOC_BITMAP_INCLUDED && !empty1)
+	      return HWLOC_BITMAP_INTERSECTS;
+	    if (result == HWLOC_BITMAP_CONTAINS && !empty2)
+	      return HWLOC_BITMAP_INTERSECTS;
+	    /* otherwise means different */
+	    result = HWLOC_BITMAP_DIFFERENT;
+	  }
+
+	  empty1 &= !val1;
+	  empty2 &= !val2;
+	}
+
+	if (!set1->infinite) {
+	  if (set2->infinite) {
+	    /* set2 infinite only */
+	    if (result == HWLOC_BITMAP_CONTAINS) {
+	      if (!empty2)
+		return HWLOC_BITMAP_INTERSECTS;
+	      result = HWLOC_BITMAP_DIFFERENT;
+	    } else if (result == HWLOC_BITMAP_EQUAL) {
+	      result = HWLOC_BITMAP_INCLUDED;
+	    }
+	    /* no change otherwise */
+	  }
+	} else if (!set2->infinite) {
+	  /* set1 infinite only */
+	  if (result == HWLOC_BITMAP_INCLUDED) {
+	    if (!empty1)
+	      return HWLOC_BITMAP_INTERSECTS;
+	    result = HWLOC_BITMAP_DIFFERENT;
+	  } else if (result == HWLOC_BITMAP_EQUAL) {
+	    result = HWLOC_BITMAP_CONTAINS;
+	  }
+	  /* no change otherwise */
+	} else {
+	  /* both infinite */
+	  if (result == HWLOC_BITMAP_DIFFERENT)
+	    return HWLOC_BITMAP_INTERSECTS;
+	  /* equal/contains/included unchanged */
+	}
+
+	return result;
+}
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/components.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/components.c
new file mode 100644
index 0000000000..4af6dde7a5
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/components.c
@@ -0,0 +1,784 @@
+/*
+ * Copyright © 2009-2017 Inria.  All rights reserved.
+ * Copyright © 2012 Université Bordeaux
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <private/private.h>
+#include <private/xml.h>
+#include <private/misc.h>
+
+#define HWLOC_COMPONENT_STOP_NAME "stop"
+#define HWLOC_COMPONENT_EXCLUDE_CHAR '-'
+#define HWLOC_COMPONENT_SEPS ","
+
+/* list of all registered discovery components, sorted by priority, higher priority first.
+ * noos is last because its priority is 0.
+ * others' priority is 10.
+ */
+static struct hwloc_disc_component * hwloc_disc_components = NULL;
+
+static unsigned hwloc_components_users = 0; /* first one initializes, last ones destroys */
+
+static int hwloc_components_verbose = 0;
+#ifdef HWLOC_HAVE_PLUGINS
+static int hwloc_plugins_verbose = 0;
+static const char * hwloc_plugins_blacklist = NULL;
+#endif
+
+/* hwloc_components_mutex serializes:
+ * - loading/unloading plugins, and modifications of the hwloc_plugins list
+ * - calls to ltdl, including in hwloc_check_plugin_namespace()
+ * - registration of components with hwloc_disc_component_register()
+ *   and hwloc_xml_callbacks_register()
+ */
+#ifdef HWLOC_WIN_SYS
+/* Basic mutex on top of InterlockedCompareExchange() on windows,
+ * Far from perfect, but easy to maintain, and way enough given that this code will never be needed for real. */
+#include <windows.h>
+static LONG hwloc_components_mutex = 0;
+#define HWLOC_COMPONENTS_LOCK() do {						\
+  while (InterlockedCompareExchange(&hwloc_components_mutex, 1, 0) != 0)	\
+    SwitchToThread();								\
+} while (0)
+#define HWLOC_COMPONENTS_UNLOCK() do {						\
+  assert(hwloc_components_mutex == 1);						\
+  hwloc_components_mutex = 0;							\
+} while (0)
+
+#elif defined HWLOC_HAVE_PTHREAD_MUTEX
+/* pthread mutex if available (except on windows) */
+#include <pthread.h>
+static pthread_mutex_t hwloc_components_mutex = PTHREAD_MUTEX_INITIALIZER;
+#define HWLOC_COMPONENTS_LOCK() pthread_mutex_lock(&hwloc_components_mutex)
+#define HWLOC_COMPONENTS_UNLOCK() pthread_mutex_unlock(&hwloc_components_mutex)
+
+#else /* HWLOC_WIN_SYS || HWLOC_HAVE_PTHREAD_MUTEX */
+#error No mutex implementation available
+#endif
+
+
+#ifdef HWLOC_HAVE_PLUGINS
+
+#include <ltdl.h>
+
+/* array of pointers to dynamically loaded plugins */
+static struct hwloc__plugin_desc {
+  char *name;
+  struct hwloc_component *component;
+  char *filename;
+  lt_dlhandle handle;
+  struct hwloc__plugin_desc *next;
+} *hwloc_plugins = NULL;
+
+static int
+hwloc__dlforeach_cb(const char *filename, void *_data __hwloc_attribute_unused)
+{
+  const char *basename;
+  lt_dlhandle handle;
+  char *componentsymbolname = NULL;
+  struct hwloc_component *component;
+  struct hwloc__plugin_desc *desc, **prevdesc;
+
+  if (hwloc_plugins_verbose)
+    fprintf(stderr, "Plugin dlforeach found `%s'\n", filename);
+
+  basename = strrchr(filename, '/');
+  if (!basename)
+    basename = filename;
+  else
+    basename++;
+
+  if (hwloc_plugins_blacklist && strstr(hwloc_plugins_blacklist, basename)) {
+    if (hwloc_plugins_verbose)
+      fprintf(stderr, "Plugin `%s' is blacklisted in the environment\n", basename);
+    goto out;
+  }
+
+  /* dlopen and get the component structure */
+  handle = lt_dlopenext(filename);
+  if (!handle) {
+    if (hwloc_plugins_verbose)
+      fprintf(stderr, "Failed to load plugin: %s\n", lt_dlerror());
+    goto out;
+  }
+  componentsymbolname = malloc(strlen(basename)+10+1);
+  sprintf(componentsymbolname, "%s_component", basename);
+  component = lt_dlsym(handle, componentsymbolname);
+  if (!component) {
+    if (hwloc_plugins_verbose)
+      fprintf(stderr, "Failed to find component symbol `%s'\n",
+	      componentsymbolname);
+    goto out_with_handle;
+  }
+  if (component->abi != HWLOC_COMPONENT_ABI) {
+    if (hwloc_plugins_verbose)
+      fprintf(stderr, "Plugin symbol ABI %u instead of %d\n",
+	      component->abi, HWLOC_COMPONENT_ABI);
+    goto out_with_handle;
+  }
+  if (hwloc_plugins_verbose)
+    fprintf(stderr, "Plugin contains expected symbol `%s'\n",
+	    componentsymbolname);
+  free(componentsymbolname);
+  componentsymbolname = NULL;
+
+  if (HWLOC_COMPONENT_TYPE_DISC == component->type) {
+    if (strncmp(basename, "hwloc_", 6)) {
+      if (hwloc_plugins_verbose)
+	fprintf(stderr, "Plugin name `%s' doesn't match its type DISCOVERY\n", basename);
+      goto out_with_handle;
+    }
+  } else if (HWLOC_COMPONENT_TYPE_XML == component->type) {
+    if (strncmp(basename, "hwloc_xml_", 10)) {
+      if (hwloc_plugins_verbose)
+	fprintf(stderr, "Plugin name `%s' doesn't match its type XML\n", basename);
+      goto out_with_handle;
+    }
+  } else {
+    if (hwloc_plugins_verbose)
+      fprintf(stderr, "Plugin name `%s' has invalid type %u\n",
+	      basename, (unsigned) component->type);
+    goto out_with_handle;
+  }
+
+  /* allocate a plugin_desc and queue it */
+  desc = malloc(sizeof(*desc));
+  if (!desc)
+    goto out_with_handle;
+  desc->name = strdup(basename);
+  desc->filename = strdup(filename);
+  desc->component = component;
+  desc->handle = handle;
+  desc->next = NULL;
+  if (hwloc_plugins_verbose)
+    fprintf(stderr, "Plugin descriptor `%s' ready\n", basename);
+
+  /* append to the list */
+  prevdesc = &hwloc_plugins;
+  while (*prevdesc)
+    prevdesc = &((*prevdesc)->next);
+  *prevdesc = desc;
+  if (hwloc_plugins_verbose)
+    fprintf(stderr, "Plugin descriptor `%s' queued\n", basename);
+  return 0;
+
+ out_with_handle:
+  lt_dlclose(handle);
+  free(componentsymbolname); /* NULL if already freed */
+ out:
+  return 0;
+}
+
+static void
+hwloc_plugins_exit(void)
+{
+  struct hwloc__plugin_desc *desc, *next;
+
+  if (hwloc_plugins_verbose)
+    fprintf(stderr, "Closing all plugins\n");
+
+  desc = hwloc_plugins;
+  while (desc) {
+    next = desc->next;
+    lt_dlclose(desc->handle);
+    free(desc->name);
+    free(desc->filename);
+    free(desc);
+    desc = next;
+  }
+  hwloc_plugins = NULL;
+
+  lt_dlexit();
+}
+
+static int
+hwloc_plugins_init(void)
+{
+  const char *verboseenv;
+  const char *path = HWLOC_PLUGINS_PATH;
+  const char *env;
+  int err;
+
+  verboseenv = getenv("HWLOC_PLUGINS_VERBOSE");
+  hwloc_plugins_verbose = verboseenv ? atoi(verboseenv) : 0;
+
+  hwloc_plugins_blacklist = getenv("HWLOC_PLUGINS_BLACKLIST");
+
+  err = lt_dlinit();
+  if (err)
+    goto out;
+
+  env = getenv("HWLOC_PLUGINS_PATH");
+  if (env)
+    path = env;
+
+  hwloc_plugins = NULL;
+
+  if (hwloc_plugins_verbose)
+    fprintf(stderr, "Starting plugin dlforeach in %s\n", path);
+  err = lt_dlforeachfile(path, hwloc__dlforeach_cb, NULL);
+  if (err)
+    goto out_with_init;
+
+  return 0;
+
+ out_with_init:
+  hwloc_plugins_exit();
+ out:
+  return -1;
+}
+
+#endif /* HWLOC_HAVE_PLUGINS */
+
+static const char *
+hwloc_disc_component_type_string(hwloc_disc_component_type_t type)
+{
+  switch (type) {
+  case HWLOC_DISC_COMPONENT_TYPE_CPU: return "cpu";
+  case HWLOC_DISC_COMPONENT_TYPE_GLOBAL: return "global";
+  case HWLOC_DISC_COMPONENT_TYPE_MISC: return "misc";
+  default: return "**unknown**";
+  }
+}
+
+static int
+hwloc_disc_component_register(struct hwloc_disc_component *component,
+			      const char *filename)
+{
+  struct hwloc_disc_component **prev;
+
+  /* check that the component name is valid */
+  if (!strcmp(component->name, HWLOC_COMPONENT_STOP_NAME)) {
+    if (hwloc_components_verbose)
+      fprintf(stderr, "Cannot register discovery component with reserved name `" HWLOC_COMPONENT_STOP_NAME "'\n");
+    return -1;
+  }
+  if (strchr(component->name, HWLOC_COMPONENT_EXCLUDE_CHAR)
+      || strcspn(component->name, HWLOC_COMPONENT_SEPS) != strlen(component->name)) {
+    if (hwloc_components_verbose)
+      fprintf(stderr, "Cannot register discovery component with name `%s' containing reserved characters `%c" HWLOC_COMPONENT_SEPS "'\n",
+	      component->name, HWLOC_COMPONENT_EXCLUDE_CHAR);
+    return -1;
+  }
+  /* check that the component type is valid */
+  switch ((unsigned) component->type) {
+  case HWLOC_DISC_COMPONENT_TYPE_CPU:
+  case HWLOC_DISC_COMPONENT_TYPE_GLOBAL:
+  case HWLOC_DISC_COMPONENT_TYPE_MISC:
+    break;
+  default:
+    fprintf(stderr, "Cannot register discovery component `%s' with unknown type %u\n",
+	    component->name, (unsigned) component->type);
+    return -1;
+  }
+
+  prev = &hwloc_disc_components;
+  while (NULL != *prev) {
+    if (!strcmp((*prev)->name, component->name)) {
+      /* if two components have the same name, only keep the highest priority one */
+      if ((*prev)->priority < component->priority) {
+	/* drop the existing component */
+	if (hwloc_components_verbose)
+	  fprintf(stderr, "Dropping previously registered discovery component `%s', priority %u lower than new one %u\n",
+		  (*prev)->name, (*prev)->priority, component->priority);
+	*prev = (*prev)->next;
+      } else {
+	/* drop the new one */
+	if (hwloc_components_verbose)
+	  fprintf(stderr, "Ignoring new discovery component `%s', priority %u lower than previously registered one %u\n",
+		  component->name, component->priority, (*prev)->priority);
+	return -1;
+      }
+    }
+    prev = &((*prev)->next);
+  }
+  if (hwloc_components_verbose)
+    fprintf(stderr, "Registered %s discovery component `%s' with priority %u (%s%s)\n",
+	    hwloc_disc_component_type_string(component->type), component->name, component->priority,
+	    filename ? "from plugin " : "statically build", filename ? filename : "");
+
+  prev = &hwloc_disc_components;
+  while (NULL != *prev) {
+    if ((*prev)->priority < component->priority)
+      break;
+    prev = &((*prev)->next);
+  }
+  component->next = *prev;
+  *prev = component;
+  return 0;
+}
+
+#include <static-components.h>
+
+static void (**hwloc_component_finalize_cbs)(unsigned long);
+static unsigned hwloc_component_finalize_cb_count;
+
+void
+hwloc_components_init(void)
+{
+#ifdef HWLOC_HAVE_PLUGINS
+  struct hwloc__plugin_desc *desc;
+#endif
+  const char *verboseenv;
+  unsigned i;
+
+  HWLOC_COMPONENTS_LOCK();
+  assert((unsigned) -1 != hwloc_components_users);
+  if (0 != hwloc_components_users++) {
+    HWLOC_COMPONENTS_UNLOCK();
+    return;
+  }
+
+  verboseenv = getenv("HWLOC_COMPONENTS_VERBOSE");
+  hwloc_components_verbose = verboseenv ? atoi(verboseenv) : 0;
+
+#ifdef HWLOC_HAVE_PLUGINS
+  hwloc_plugins_init();
+#endif
+
+  hwloc_component_finalize_cbs = NULL;
+  hwloc_component_finalize_cb_count = 0;
+  /* count the max number of finalize callbacks */
+  for(i=0; NULL != hwloc_static_components[i]; i++)
+    hwloc_component_finalize_cb_count++;
+#ifdef HWLOC_HAVE_PLUGINS
+  for(desc = hwloc_plugins; NULL != desc; desc = desc->next)
+    hwloc_component_finalize_cb_count++;
+#endif
+  if (hwloc_component_finalize_cb_count) {
+    hwloc_component_finalize_cbs = calloc(hwloc_component_finalize_cb_count,
+					  sizeof(*hwloc_component_finalize_cbs));
+    assert(hwloc_component_finalize_cbs);
+    /* forget that max number and recompute the real one below */
+    hwloc_component_finalize_cb_count = 0;
+  }
+
+  /* hwloc_static_components is created by configure in static-components.h */
+  for(i=0; NULL != hwloc_static_components[i]; i++) {
+    if (hwloc_static_components[i]->flags) {
+      fprintf(stderr, "Ignoring static component with invalid flags %lx\n",
+	      hwloc_static_components[i]->flags);
+      continue;
+    }
+
+    /* initialize the component */
+    if (hwloc_static_components[i]->init && hwloc_static_components[i]->init(0) < 0) {
+      if (hwloc_components_verbose)
+	fprintf(stderr, "Ignoring static component, failed to initialize\n");
+      continue;
+    }
+    /* queue ->finalize() callback if any */
+    if (hwloc_static_components[i]->finalize)
+      hwloc_component_finalize_cbs[hwloc_component_finalize_cb_count++] = hwloc_static_components[i]->finalize;
+
+    /* register for real now */
+    if (HWLOC_COMPONENT_TYPE_DISC == hwloc_static_components[i]->type)
+      hwloc_disc_component_register(hwloc_static_components[i]->data, NULL);
+    else if (HWLOC_COMPONENT_TYPE_XML == hwloc_static_components[i]->type)
+      hwloc_xml_callbacks_register(hwloc_static_components[i]->data);
+    else
+      assert(0);
+  }
+
+  /* dynamic plugins */
+#ifdef HWLOC_HAVE_PLUGINS
+  for(desc = hwloc_plugins; NULL != desc; desc = desc->next) {
+    if (desc->component->flags) {
+      fprintf(stderr, "Ignoring plugin `%s' component with invalid flags %lx\n",
+	      desc->name, desc->component->flags);
+      continue;
+    }
+
+    /* initialize the component */
+    if (desc->component->init && desc->component->init(0) < 0) {
+      if (hwloc_components_verbose)
+	fprintf(stderr, "Ignoring plugin `%s', failed to initialize\n", desc->name);
+      continue;
+    }
+    /* queue ->finalize() callback if any */
+    if (desc->component->finalize)
+      hwloc_component_finalize_cbs[hwloc_component_finalize_cb_count++] = desc->component->finalize;
+
+    /* register for real now */
+    if (HWLOC_COMPONENT_TYPE_DISC == desc->component->type)
+      hwloc_disc_component_register(desc->component->data, desc->filename);
+    else if (HWLOC_COMPONENT_TYPE_XML == desc->component->type)
+      hwloc_xml_callbacks_register(desc->component->data);
+    else
+      assert(0);
+  }
+#endif
+
+  HWLOC_COMPONENTS_UNLOCK();
+}
+
+void
+hwloc_backends_init(struct hwloc_topology *topology)
+{
+  topology->backends = NULL;
+  topology->backend_excludes = 0;
+}
+
+static struct hwloc_disc_component *
+hwloc_disc_component_find(int type /* hwloc_disc_component_type_t or -1 if any */,
+			       const char *name /* name of NULL if any */)
+{
+  struct hwloc_disc_component *comp = hwloc_disc_components;
+  while (NULL != comp) {
+    if ((-1 == type || type == (int) comp->type)
+       && (NULL == name || !strcmp(name, comp->name)))
+      return comp;
+    comp = comp->next;
+  }
+  return NULL;
+}
+
+/* used by set_xml(), set_synthetic(), ... environment variables, ... to force the first backend */
+int
+hwloc_disc_component_force_enable(struct hwloc_topology *topology,
+				  int envvar_forced,
+				  int type, const char *name,
+				  const void *data1, const void *data2, const void *data3)
+{
+  struct hwloc_disc_component *comp;
+  struct hwloc_backend *backend;
+
+  if (topology->is_loaded) {
+    errno = EBUSY;
+    return -1;
+  }
+
+  comp = hwloc_disc_component_find(type, name);
+  if (!comp) {
+    errno = ENOSYS;
+    return -1;
+  }
+
+  backend = comp->instantiate(comp, data1, data2, data3);
+  if (backend) {
+    backend->envvar_forced = envvar_forced;
+    if (topology->backends)
+      hwloc_backends_disable_all(topology);
+    return hwloc_backend_enable(topology, backend);
+  } else
+    return -1;
+}
+
+static int
+hwloc_disc_component_try_enable(struct hwloc_topology *topology,
+				struct hwloc_disc_component *comp,
+				const char *comparg,
+				int envvar_forced)
+{
+  struct hwloc_backend *backend;
+
+  if (topology->backend_excludes & comp->type) {
+    if (hwloc_components_verbose)
+      /* do not warn if envvar_forced since system-wide HWLOC_COMPONENTS must be silently ignored after set_xml() etc.
+       */
+      fprintf(stderr, "Excluding %s discovery component `%s', conflicts with excludes 0x%x\n",
+	      hwloc_disc_component_type_string(comp->type), comp->name, topology->backend_excludes);
+    return -1;
+  }
+
+  backend = comp->instantiate(comp, comparg, NULL, NULL);
+  if (!backend) {
+    if (hwloc_components_verbose || envvar_forced)
+      fprintf(stderr, "Failed to instantiate discovery component `%s'\n", comp->name);
+    return -1;
+  }
+
+  backend->envvar_forced = envvar_forced;
+  return hwloc_backend_enable(topology, backend);
+}
+
+void
+hwloc_disc_components_enable_others(struct hwloc_topology *topology)
+{
+  struct hwloc_disc_component *comp;
+  struct hwloc_backend *backend;
+  int tryall = 1;
+  const char *_env;
+  char *env; /* we'll to modify the env value, so duplicate it */
+
+  _env = getenv("HWLOC_COMPONENTS");
+  env = _env ? strdup(_env) : NULL;
+
+  /* enable explicitly listed components */
+  if (env) {
+    char *curenv = env;
+    size_t s;
+
+    while (*curenv) {
+      s = strcspn(curenv, HWLOC_COMPONENT_SEPS);
+      if (s) {
+	char c;
+
+	/* replace linuxpci with linuxio for backward compatibility with pre-v2.0 */
+	if (!strncmp(curenv, "linuxpci", 8) && s == 8) {
+	  curenv[5] = 'i';
+	  curenv[6] = 'o';
+	  curenv[7] = *HWLOC_COMPONENT_SEPS;
+	} else if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR && !strncmp(curenv+1, "linuxpci", 8) && s == 9) {
+	  curenv[6] = 'i';
+	  curenv[7] = 'o';
+	  curenv[8] = *HWLOC_COMPONENT_SEPS;
+	  /* skip this name, it's a negated one */
+	  goto nextname;
+	}
+
+	if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR)
+	  goto nextname;
+
+	if (!strncmp(curenv, HWLOC_COMPONENT_STOP_NAME, s)) {
+	  tryall = 0;
+	  break;
+	}
+
+	/* save the last char and replace with \0 */
+	c = curenv[s];
+	curenv[s] = '\0';
+
+	comp = hwloc_disc_component_find(-1, curenv);
+	if (comp) {
+	  hwloc_disc_component_try_enable(topology, comp, NULL, 1 /* envvar forced */);
+	} else {
+	  fprintf(stderr, "Cannot find discovery component `%s'\n", curenv);
+	}
+
+	/* restore chars (the second loop below needs env to be unmodified) */
+	curenv[s] = c;
+      }
+
+nextname:
+      curenv += s;
+      if (*curenv)
+	/* Skip comma */
+	curenv++;
+    }
+  }
+
+  /* env is still the same, the above loop didn't modify it */
+
+  /* now enable remaining components (except the explicitly '-'-listed ones) */
+  if (tryall) {
+    comp = hwloc_disc_components;
+    while (NULL != comp) {
+      /* check if this component was explicitly excluded in env */
+      if (env) {
+	char *curenv = env;
+	while (*curenv) {
+	  size_t s = strcspn(curenv, HWLOC_COMPONENT_SEPS);
+	  if (curenv[0] == HWLOC_COMPONENT_EXCLUDE_CHAR && !strncmp(curenv+1, comp->name, s-1) && strlen(comp->name) == s-1) {
+	    if (hwloc_components_verbose)
+	      fprintf(stderr, "Excluding %s discovery component `%s' because of HWLOC_COMPONENTS environment variable\n",
+	    hwloc_disc_component_type_string(comp->type), comp->name);
+	    goto nextcomp;
+	  }
+	  curenv += s;
+	  if (*curenv)
+	    /* Skip comma */
+	    curenv++;
+	}
+      }
+      hwloc_disc_component_try_enable(topology, comp, NULL, 0 /* defaults, not envvar forced */);
+nextcomp:
+      comp = comp->next;
+    }
+  }
+
+  if (hwloc_components_verbose) {
+    /* print a summary */
+    int first = 1;
+    backend = topology->backends;
+    fprintf(stderr, "Final list of enabled discovery components: ");
+    while (backend != NULL) {
+      fprintf(stderr, "%s%s", first ? "" : ",", backend->component->name);
+      backend = backend->next;
+      first = 0;
+    }
+    fprintf(stderr, "\n");
+  }
+
+  free(env);
+}
+
+void
+hwloc_components_fini(void)
+{
+  unsigned i;
+
+  HWLOC_COMPONENTS_LOCK();
+  assert(0 != hwloc_components_users);
+  if (0 != --hwloc_components_users) {
+    HWLOC_COMPONENTS_UNLOCK();
+    return;
+  }
+
+  for(i=0; i<hwloc_component_finalize_cb_count; i++)
+    hwloc_component_finalize_cbs[hwloc_component_finalize_cb_count-i-1](0);
+  free(hwloc_component_finalize_cbs);
+  hwloc_component_finalize_cbs = NULL;
+  hwloc_component_finalize_cb_count = 0;
+
+  /* no need to unlink/free the list of components, they'll be unloaded below */
+
+  hwloc_disc_components = NULL;
+  hwloc_xml_callbacks_reset();
+
+#ifdef HWLOC_HAVE_PLUGINS
+  hwloc_plugins_exit();
+#endif
+
+  HWLOC_COMPONENTS_UNLOCK();
+}
+
+struct hwloc_backend *
+hwloc_backend_alloc(struct hwloc_disc_component *component)
+{
+  struct hwloc_backend * backend = malloc(sizeof(*backend));
+  if (!backend) {
+    errno = ENOMEM;
+    return NULL;
+  }
+  backend->component = component;
+  backend->flags = 0;
+  backend->discover = NULL;
+  backend->get_pci_busid_cpuset = NULL;
+  backend->disable = NULL;
+  backend->is_thissystem = -1;
+  backend->next = NULL;
+  backend->envvar_forced = 0;
+  return backend;
+}
+
+static void
+hwloc_backend_disable(struct hwloc_backend *backend)
+{
+  if (backend->disable)
+    backend->disable(backend);
+  free(backend);
+}
+
+int
+hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *backend)
+{
+  struct hwloc_backend **pprev;
+
+  /* check backend flags */
+  if (backend->flags) {
+    fprintf(stderr, "Cannot enable %s discovery component `%s' with unknown flags %lx\n",
+	    hwloc_disc_component_type_string(backend->component->type), backend->component->name, backend->flags);
+    return -1;
+  }
+
+  /* make sure we didn't already enable this backend, we don't want duplicates */
+  pprev = &topology->backends;
+  while (NULL != *pprev) {
+    if ((*pprev)->component == backend->component) {
+      if (hwloc_components_verbose)
+	fprintf(stderr, "Cannot enable %s discovery component `%s' twice\n",
+		hwloc_disc_component_type_string(backend->component->type), backend->component->name);
+      hwloc_backend_disable(backend);
+      errno = EBUSY;
+      return -1;
+    }
+    pprev = &((*pprev)->next);
+  }
+
+  if (hwloc_components_verbose)
+    fprintf(stderr, "Enabling %s discovery component `%s'\n",
+	    hwloc_disc_component_type_string(backend->component->type), backend->component->name);
+
+  /* enqueue at the end */
+  pprev = &topology->backends;
+  while (NULL != *pprev)
+    pprev = &((*pprev)->next);
+  backend->next = *pprev;
+  *pprev = backend;
+
+  backend->topology = topology;
+  topology->backend_excludes |= backend->component->excludes;
+  return 0;
+}
+
+void
+hwloc_backends_is_thissystem(struct hwloc_topology *topology)
+{
+  struct hwloc_backend *backend;
+  const char *local_env;
+
+  /* Apply is_thissystem topology flag before we enforce envvar backends.
+   * If the application changed the backend with set_foo(),
+   * it may use set_flags() update the is_thissystem flag here.
+   * If it changes the backend with environment variables below,
+   * it may use HWLOC_THISSYSTEM envvar below as well.
+   */
+
+  topology->is_thissystem = 1;
+
+  /* apply thissystem from normally-given backends (envvar_forced=0, either set_foo() or defaults) */
+  backend = topology->backends;
+  while (backend != NULL) {
+    if (backend->envvar_forced == 0 && backend->is_thissystem != -1) {
+      assert(backend->is_thissystem == 0);
+      topology->is_thissystem = 0;
+    }
+    backend = backend->next;
+  }
+
+  /* override set_foo() with flags */
+  if (topology->flags & HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM)
+    topology->is_thissystem = 1;
+
+  /* now apply envvar-forced backend (envvar_forced=1) */
+  backend = topology->backends;
+  while (backend != NULL) {
+    if (backend->envvar_forced == 1 && backend->is_thissystem != -1) {
+      assert(backend->is_thissystem == 0);
+      topology->is_thissystem = 0;
+    }
+    backend = backend->next;
+  }
+
+  /* override with envvar-given flag */
+  local_env = getenv("HWLOC_THISSYSTEM");
+  if (local_env)
+    topology->is_thissystem = atoi(local_env);
+}
+
+void
+hwloc_backends_find_callbacks(struct hwloc_topology *topology)
+{
+  struct hwloc_backend *backend = topology->backends;
+  /* use the first backend's get_pci_busid_cpuset callback */
+  topology->get_pci_busid_cpuset_backend = NULL;
+  while (backend != NULL) {
+    if (backend->get_pci_busid_cpuset) {
+      topology->get_pci_busid_cpuset_backend = backend;
+      return;
+    }
+    backend = backend->next;
+  }
+  return;
+}
+
+void
+hwloc_backends_disable_all(struct hwloc_topology *topology)
+{
+  struct hwloc_backend *backend;
+
+  while (NULL != (backend = topology->backends)) {
+    struct hwloc_backend *next = backend->next;
+    if (hwloc_components_verbose)
+      fprintf(stderr, "Disabling %s discovery component `%s'\n",
+	      hwloc_disc_component_type_string(backend->component->type), backend->component->name);
+    hwloc_backend_disable(backend);
+    topology->backends = next;
+  }
+  topology->backends = NULL;
+  topology->backend_excludes = 0;
+}
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/diff.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/diff.c
new file mode 100644
index 0000000000..ec7b67685f
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/diff.c
@@ -0,0 +1,468 @@
+/*
+ * Copyright © 2013-2017 Inria.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+#include <private/private.h>
+#include <private/misc.h>
+
+int hwloc_topology_diff_destroy(hwloc_topology_diff_t diff)
+{
+	hwloc_topology_diff_t next;
+	while (diff) {
+		next = diff->generic.next;
+		switch (diff->generic.type) {
+		default:
+			break;
+		case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR:
+			switch (diff->obj_attr.diff.generic.type) {
+			default:
+				break;
+			case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME:
+			case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO:
+				free(diff->obj_attr.diff.string.name);
+				free(diff->obj_attr.diff.string.oldvalue);
+				free(diff->obj_attr.diff.string.newvalue);
+				break;
+			}
+			break;
+		}
+		free(diff);
+		diff = next;
+	}
+	return 0;
+}
+
+/************************
+ * Computing diffs
+ */
+
+static void hwloc_append_diff(hwloc_topology_diff_t newdiff,
+			      hwloc_topology_diff_t *firstdiffp,
+			      hwloc_topology_diff_t *lastdiffp)
+{
+	if (*firstdiffp)
+		(*lastdiffp)->generic.next = newdiff;
+	else
+		*firstdiffp = newdiff;
+	*lastdiffp = newdiff;
+	newdiff->generic.next = NULL;
+}
+
+static int hwloc_append_diff_too_complex(hwloc_obj_t obj1,
+					 hwloc_topology_diff_t *firstdiffp,
+					 hwloc_topology_diff_t *lastdiffp)
+{
+	hwloc_topology_diff_t newdiff;
+	newdiff = malloc(sizeof(*newdiff));
+	if (!newdiff)
+		return -1;
+
+	newdiff->too_complex.type = HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX;
+	newdiff->too_complex.obj_depth = obj1->depth;
+	newdiff->too_complex.obj_index = obj1->logical_index;
+	hwloc_append_diff(newdiff, firstdiffp, lastdiffp);
+	return 0;
+}
+
+static int hwloc_append_diff_obj_attr_string(hwloc_obj_t obj,
+					     hwloc_topology_diff_obj_attr_type_t type,
+					     const char *name,
+					     const char *oldvalue,
+					     const char *newvalue,
+					     hwloc_topology_diff_t *firstdiffp,
+					     hwloc_topology_diff_t *lastdiffp)
+{
+	hwloc_topology_diff_t newdiff;
+	newdiff = malloc(sizeof(*newdiff));
+	if (!newdiff)
+		return -1;
+
+	newdiff->obj_attr.type = HWLOC_TOPOLOGY_DIFF_OBJ_ATTR;
+	newdiff->obj_attr.obj_depth = obj->depth;
+	newdiff->obj_attr.obj_index = obj->logical_index;
+	newdiff->obj_attr.diff.string.type = type;
+	newdiff->obj_attr.diff.string.name = name ? strdup(name) : NULL;
+	newdiff->obj_attr.diff.string.oldvalue = oldvalue ? strdup(oldvalue) : NULL;
+	newdiff->obj_attr.diff.string.newvalue = newvalue ? strdup(newvalue) : NULL;
+	hwloc_append_diff(newdiff, firstdiffp, lastdiffp);
+	return 0;
+}
+
+static int hwloc_append_diff_obj_attr_uint64(hwloc_obj_t obj,
+					     hwloc_topology_diff_obj_attr_type_t type,
+					     hwloc_uint64_t idx,
+					     hwloc_uint64_t oldvalue,
+					     hwloc_uint64_t newvalue,
+					     hwloc_topology_diff_t *firstdiffp,
+					     hwloc_topology_diff_t *lastdiffp)
+{
+	hwloc_topology_diff_t newdiff;
+	newdiff = malloc(sizeof(*newdiff));
+	if (!newdiff)
+		return -1;
+
+	newdiff->obj_attr.type = HWLOC_TOPOLOGY_DIFF_OBJ_ATTR;
+	newdiff->obj_attr.obj_depth = obj->depth;
+	newdiff->obj_attr.obj_index = obj->logical_index;
+	newdiff->obj_attr.diff.uint64.type = type;
+	newdiff->obj_attr.diff.uint64.index = idx;
+	newdiff->obj_attr.diff.uint64.oldvalue = oldvalue;
+	newdiff->obj_attr.diff.uint64.newvalue = newvalue;
+	hwloc_append_diff(newdiff, firstdiffp, lastdiffp);
+	return 0;
+}
+
+static int
+hwloc_diff_trees(hwloc_topology_t topo1, hwloc_obj_t obj1,
+		 hwloc_topology_t topo2, hwloc_obj_t obj2,
+		 unsigned flags,
+		 hwloc_topology_diff_t *firstdiffp, hwloc_topology_diff_t *lastdiffp)
+{
+	unsigned i;
+	int err;
+	hwloc_obj_t child1, child2;
+
+	if (obj1->depth != obj2->depth)
+		goto out_too_complex;
+
+	if (obj1->type != obj2->type)
+		goto out_too_complex;
+	if ((!obj1->subtype) != (!obj2->subtype)
+	    || (obj1->subtype && strcmp(obj1->subtype, obj2->subtype)))
+		goto out_too_complex;
+
+	if (obj1->os_index != obj2->os_index)
+		/* we could allow different os_index for non-PU non-NUMAnode objects
+		 * but it's likely useless anyway */
+		goto out_too_complex;
+
+#define _SETS_DIFFERENT(_set1, _set2) \
+ (   ( !(_set1) != !(_set2) ) \
+  || ( (_set1) && !hwloc_bitmap_isequal(_set1, _set2) ) )
+#define SETS_DIFFERENT(_set, _obj1, _obj2) _SETS_DIFFERENT((_obj1)->_set, (_obj2)->_set)
+	if (SETS_DIFFERENT(cpuset, obj1, obj2)
+	    || SETS_DIFFERENT(complete_cpuset, obj1, obj2)
+	    || SETS_DIFFERENT(allowed_cpuset, obj1, obj2)
+	    || SETS_DIFFERENT(nodeset, obj1, obj2)
+	    || SETS_DIFFERENT(complete_nodeset, obj1, obj2)
+	    || SETS_DIFFERENT(allowed_nodeset, obj1, obj2))
+		goto out_too_complex;
+
+	/* no need to check logical_index, sibling_rank, symmetric_subtree,
+	 * the parents did it */
+
+	/* gp_index don't have to be strictly identical */
+
+	if ((!obj1->name) != (!obj2->name)
+	    || (obj1->name && strcmp(obj1->name, obj2->name))) {
+		err = hwloc_append_diff_obj_attr_string(obj1,
+						       HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME,
+						       NULL,
+						       obj1->name,
+						       obj2->name,
+						       firstdiffp, lastdiffp);
+		if (err < 0)
+			return err;
+	}
+
+	/* memory */
+	if (obj1->memory.local_memory != obj2->memory.local_memory) {
+		err = hwloc_append_diff_obj_attr_uint64(obj1,
+						       HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_SIZE,
+						       0,
+						       obj1->memory.local_memory,
+						       obj2->memory.local_memory,
+						       firstdiffp, lastdiffp);
+		if (err < 0)
+			return err;
+	}
+	/* ignore memory page_types */
+
+	/* type-specific attrs */
+	switch (obj1->type) {
+	default:
+		break;
+	case HWLOC_OBJ_L1CACHE:
+	case HWLOC_OBJ_L2CACHE:
+	case HWLOC_OBJ_L3CACHE:
+	case HWLOC_OBJ_L4CACHE:
+	case HWLOC_OBJ_L5CACHE:
+	case HWLOC_OBJ_L1ICACHE:
+	case HWLOC_OBJ_L2ICACHE:
+	case HWLOC_OBJ_L3ICACHE:
+		if (memcmp(obj1->attr, obj2->attr, sizeof(obj1->attr->cache)))
+			goto out_too_complex;
+		break;
+	case HWLOC_OBJ_GROUP:
+		if (memcmp(obj1->attr, obj2->attr, sizeof(obj1->attr->group)))
+			goto out_too_complex;
+		break;
+	case HWLOC_OBJ_PCI_DEVICE:
+		if (memcmp(obj1->attr, obj2->attr, sizeof(obj1->attr->pcidev)))
+			goto out_too_complex;
+		break;
+	case HWLOC_OBJ_BRIDGE:
+		if (memcmp(obj1->attr, obj2->attr, sizeof(obj1->attr->bridge)))
+			goto out_too_complex;
+		break;
+	case HWLOC_OBJ_OS_DEVICE:
+		if (memcmp(obj1->attr, obj2->attr, sizeof(obj1->attr->osdev)))
+			goto out_too_complex;
+		break;
+	}
+
+	/* infos */
+	if (obj1->infos_count != obj2->infos_count)
+		goto out_too_complex;
+	for(i=0; i<obj1->infos_count; i++) {
+		if (strcmp(obj1->infos[i].name, obj2->infos[i].name))
+			goto out_too_complex;
+		if (strcmp(obj1->infos[i].value, obj2->infos[i].value)) {
+			err = hwloc_append_diff_obj_attr_string(obj1,
+							       HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO,
+							       obj1->infos[i].name,
+							       obj1->infos[i].value,
+							       obj2->infos[i].value,
+							       firstdiffp, lastdiffp);
+			if (err < 0)
+				return err;
+		}
+	}
+
+	/* ignore userdata */
+
+	/* children */
+	for(child1 = obj1->first_child, child2 = obj2->first_child;
+	    child1 != NULL && child2 != NULL;
+	    child1 = child1->next_sibling, child2 = child2->next_sibling) {
+		err = hwloc_diff_trees(topo1, child1,
+				       topo2, child2,
+				       flags,
+				       firstdiffp, lastdiffp);
+		if (err < 0)
+			return err;
+	}
+	if (child1 || child2)
+		goto out_too_complex;
+
+	/* I/O children */
+	for(child1 = obj1->io_first_child, child2 = obj2->io_first_child;
+	    child1 != NULL && child2 != NULL;
+	    child1 = child1->next_sibling, child2 = child2->next_sibling) {
+		err = hwloc_diff_trees(topo1, child1,
+				       topo2, child2,
+				       flags,
+				       firstdiffp, lastdiffp);
+		if (err < 0)
+			return err;
+	}
+	if (child1 || child2)
+		goto out_too_complex;
+
+	/* misc children */
+	for(child1 = obj1->misc_first_child, child2 = obj2->misc_first_child;
+	    child1 != NULL && child2 != NULL;
+	    child1 = child1->next_sibling, child2 = child2->next_sibling) {
+		err = hwloc_diff_trees(topo1, child1,
+				       topo2, child2,
+				       flags,
+				       firstdiffp, lastdiffp);
+		if (err < 0)
+			return err;
+	}
+	if (child1 || child2)
+		goto out_too_complex;
+
+	return 0;
+
+out_too_complex:
+	hwloc_append_diff_too_complex(obj1, firstdiffp, lastdiffp);
+	return 0;
+}
+
+int hwloc_topology_diff_build(hwloc_topology_t topo1,
+			      hwloc_topology_t topo2,
+			      unsigned long flags,
+			      hwloc_topology_diff_t *diffp)
+{
+	hwloc_topology_diff_t lastdiff, tmpdiff;
+	struct hwloc_internal_distances_s *dist1, *dist2;
+	unsigned i;
+	int err;
+
+	if (!topo1->is_loaded || !topo2->is_loaded) {
+	  errno = EINVAL;
+	  return -1;
+	}
+
+	if (flags != 0) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	*diffp = NULL;
+	err = hwloc_diff_trees(topo1, hwloc_get_root_obj(topo1),
+			       topo2, hwloc_get_root_obj(topo2),
+			       flags,
+			       diffp, &lastdiff);
+	if (!err) {
+		tmpdiff = *diffp;
+		while (tmpdiff) {
+			if (tmpdiff->generic.type == HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX) {
+				err = 1;
+				break;
+			}
+			tmpdiff = tmpdiff->generic.next;
+		}
+	}
+
+	if (!err) {
+		/* distances */
+		hwloc_internal_distances_refresh(topo1);
+		hwloc_internal_distances_refresh(topo2);
+		dist1 = topo1->first_dist;
+		dist2 = topo2->first_dist;
+		while (dist1 || dist2) {
+			if (!!dist1 != !!dist2) {
+				hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff);
+				err = 1;
+				break;
+			}
+			if (dist1->type != dist2->type
+			    || dist1->nbobjs != dist2->nbobjs
+			    || dist1->kind != dist2->kind
+			    || memcmp(dist1->values, dist2->values, dist1->nbobjs * dist1->nbobjs * sizeof(*dist1->values))) {
+				hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff);
+				err = 1;
+				break;
+			}
+			for(i=0; i<dist1->nbobjs; i++)
+				/* gp_index isn't enforced above. so compare logical_index instead, which is enforced. requires distances refresh() above */
+				if (dist1->objs[i]->logical_index != dist2->objs[i]->logical_index) {
+					hwloc_append_diff_too_complex(hwloc_get_root_obj(topo1), diffp, &lastdiff);
+					err = 1;
+					break;
+				}
+			dist1 = dist1->next;
+			dist2 = dist2->next;
+		}
+	}
+
+	return err;
+}
+
+/********************
+ * Applying diffs
+ */
+
+static int
+hwloc_apply_diff_one(hwloc_topology_t topology,
+		     hwloc_topology_diff_t diff,
+		     unsigned long flags)
+{
+	int reverse = !!(flags & HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE);
+
+	switch (diff->generic.type) {
+	case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR: {
+		struct hwloc_topology_diff_obj_attr_s *obj_attr = &diff->obj_attr;
+		hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, obj_attr->obj_depth, obj_attr->obj_index);
+		if (!obj)
+			return -1;
+
+		switch (obj_attr->diff.generic.type) {
+		case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_SIZE: {
+			hwloc_obj_t tmpobj;
+			hwloc_uint64_t oldvalue = reverse ? obj_attr->diff.uint64.newvalue : obj_attr->diff.uint64.oldvalue;
+			hwloc_uint64_t newvalue = reverse ? obj_attr->diff.uint64.oldvalue : obj_attr->diff.uint64.newvalue;
+			hwloc_uint64_t valuediff = newvalue - oldvalue;
+			if (obj->memory.local_memory != oldvalue)
+				return -1;
+			obj->memory.local_memory = newvalue;
+			tmpobj = obj;
+			while (tmpobj) {
+				tmpobj->memory.total_memory += valuediff;
+				tmpobj = tmpobj->parent;
+			}
+			break;
+		}
+		case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME: {
+			const char *oldvalue = reverse ? obj_attr->diff.string.newvalue : obj_attr->diff.string.oldvalue;
+			const char *newvalue = reverse ? obj_attr->diff.string.oldvalue : obj_attr->diff.string.newvalue;
+			if (!obj->name || strcmp(obj->name, oldvalue))
+				return -1;
+			free(obj->name);
+			obj->name = strdup(newvalue);
+			break;
+		}
+		case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO: {
+			const char *name = obj_attr->diff.string.name;
+			const char *oldvalue = reverse ? obj_attr->diff.string.newvalue : obj_attr->diff.string.oldvalue;
+			const char *newvalue = reverse ? obj_attr->diff.string.oldvalue : obj_attr->diff.string.newvalue;
+			unsigned i;
+			int found = 0;
+			for(i=0; i<obj->infos_count; i++) {
+				if (!strcmp(obj->infos[i].name, name)
+				    && !strcmp(obj->infos[i].value, oldvalue)) {
+					free(obj->infos[i].value);
+					obj->infos[i].value = strdup(newvalue);
+					found = 1;
+					break;
+				}
+			}
+			if (!found)
+				return -1;
+			break;
+		}
+		default:
+			return -1;
+		}
+
+		break;
+	}
+	default:
+		return -1;
+	}
+
+	return 0;
+}
+
+int hwloc_topology_diff_apply(hwloc_topology_t topology,
+			      hwloc_topology_diff_t diff,
+			      unsigned long flags)
+{
+	hwloc_topology_diff_t tmpdiff, tmpdiff2;
+	int err, nr;
+
+	if (!topology->is_loaded) {
+	  errno = EINVAL;
+	  return -1;
+	}
+
+	if (flags & ~HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	tmpdiff = diff;
+	nr = 0;
+	while (tmpdiff) {
+		nr++;
+		err = hwloc_apply_diff_one(topology, tmpdiff, flags);
+		if (err < 0)
+			goto cancel;
+		tmpdiff = tmpdiff->generic.next;
+	}
+	return 0;
+
+cancel:
+	tmpdiff2 = tmpdiff;
+	tmpdiff = diff;
+	while (tmpdiff != tmpdiff2) {
+		hwloc_apply_diff_one(topology, tmpdiff, flags ^ HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE);
+		tmpdiff = tmpdiff->generic.next;
+	}
+	errno = EINVAL;
+	return -nr; /* return the index (starting at 1) of the first element that couldn't be applied */
+}
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/distances.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/distances.c
new file mode 100644
index 0000000000..02f2352d64
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/distances.c
@@ -0,0 +1,927 @@
+/*
+ * Copyright © 2010-2017 Inria.  All rights reserved.
+ * Copyright © 2011-2012 Université Bordeaux
+ * Copyright © 2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <private/private.h>
+#include <private/debug.h>
+#include <private/misc.h>
+
+#include <float.h>
+#include <math.h>
+
+/******************************************************
+ * Global init, prepare, destroy, dup
+ */
+
+/* called during topology init() */
+void hwloc_internal_distances_init(struct hwloc_topology *topology)
+{
+  topology->first_dist = topology->last_dist = NULL;
+}
+
+/* called at the beginning of load() */
+void hwloc_internal_distances_prepare(struct hwloc_topology *topology)
+{
+  char *env;
+  hwloc_localeswitch_declare;
+
+  topology->grouping = 1;
+  if (topology->type_filter[HWLOC_OBJ_GROUP] == HWLOC_TYPE_FILTER_KEEP_NONE)
+    topology->grouping = 0;
+  env = getenv("HWLOC_GROUPING");
+  if (env && !atoi(env))
+    topology->grouping = 0;
+
+  if (topology->grouping) {
+    topology->grouping_next_subkind = 0;
+
+    HWLOC_BUILD_ASSERT(sizeof(topology->grouping_accuracies)/sizeof(*topology->grouping_accuracies) == 5);
+    topology->grouping_accuracies[0] = 0.0f;
+    topology->grouping_accuracies[1] = 0.01f;
+    topology->grouping_accuracies[2] = 0.02f;
+    topology->grouping_accuracies[3] = 0.05f;
+    topology->grouping_accuracies[4] = 0.1f;
+    topology->grouping_nbaccuracies = 5;
+
+    hwloc_localeswitch_init();
+    env = getenv("HWLOC_GROUPING_ACCURACY");
+    if (!env) {
+      /* only use 0.0 */
+      topology->grouping_nbaccuracies = 1;
+    } else if (strcmp(env, "try")) {
+      /* use the given value */
+      topology->grouping_nbaccuracies = 1;
+      topology->grouping_accuracies[0] = (float) atof(env);
+    } /* otherwise try all values */
+    hwloc_localeswitch_fini();
+
+    topology->grouping_verbose = 0;
+    env = getenv("HWLOC_GROUPING_VERBOSE");
+    if (env)
+      topology->grouping_verbose = atoi(env);
+  }
+}
+
+static void hwloc_internal_distances_free(struct hwloc_internal_distances_s *dist)
+{
+  free(dist->indexes);
+  free(dist->objs);
+  free(dist->values);
+  free(dist);
+}
+
+/* called during topology destroy */
+void hwloc_internal_distances_destroy(struct hwloc_topology * topology)
+{
+  struct hwloc_internal_distances_s *dist, *next = topology->first_dist;
+  while ((dist = next) != NULL) {
+    next = dist->next;
+    hwloc_internal_distances_free(dist);
+  }
+  topology->first_dist = topology->last_dist = NULL;
+}
+
+static int hwloc_internal_distances_dup_one(struct hwloc_topology *new, struct hwloc_internal_distances_s *olddist)
+{
+  struct hwloc_internal_distances_s *newdist;
+  unsigned nbobjs = olddist->nbobjs;
+
+  newdist = malloc(sizeof(*newdist));
+  if (!newdist)
+    return -1;
+
+  newdist->type = olddist->type;
+  newdist->nbobjs = nbobjs;
+  newdist->kind = olddist->kind;
+
+  newdist->indexes = malloc(nbobjs * sizeof(*newdist->indexes));
+  newdist->objs = calloc(nbobjs, sizeof(*newdist->objs));
+  newdist->objs_are_valid = 0;
+  newdist->values = malloc(nbobjs*nbobjs * sizeof(*newdist->values));
+  if (!newdist->indexes || !newdist->objs || !newdist->values) {
+    hwloc_internal_distances_free(newdist);
+    return -1;
+  }
+
+  memcpy(newdist->indexes, olddist->indexes, nbobjs * sizeof(*newdist->indexes));
+  memcpy(newdist->values, olddist->values, nbobjs*nbobjs * sizeof(*newdist->values));
+
+  newdist->next = NULL;
+  newdist->prev = new->last_dist;
+  if (new->last_dist)
+    new->last_dist->next = newdist;
+  else
+    new->first_dist = newdist;
+  new->last_dist = newdist;
+
+  return 0;
+}
+
+/* called by topology_dup() */
+int hwloc_internal_distances_dup(struct hwloc_topology *new, struct hwloc_topology *old)
+{
+  struct hwloc_internal_distances_s *olddist;
+  int err;
+  for(olddist = old->first_dist; olddist; olddist = olddist->next) {
+    err = hwloc_internal_distances_dup_one(new, olddist);
+    if (err < 0)
+      return err;
+  }
+  return 0;
+}
+
+/******************************************************
+ * Remove distances from the topology
+ */
+
+int hwloc_distances_remove(hwloc_topology_t topology)
+{
+  if (!topology->is_loaded) {
+    errno = EINVAL;
+    return -1;
+  }
+  hwloc_internal_distances_destroy(topology);
+  return 0;
+}
+
+int hwloc_distances_remove_by_depth(hwloc_topology_t topology, unsigned depth)
+{
+  struct hwloc_internal_distances_s *dist, *next;
+  hwloc_obj_type_t type;
+
+  if (!topology->is_loaded) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  /* switch back to types since we don't support groups for now */
+  type = hwloc_get_depth_type(topology, depth);
+  if (type == (hwloc_obj_type_t)-1) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  next = topology->first_dist;
+  while ((dist = next) != NULL) {
+    next = dist->next;
+    if (dist->type == type) {
+      if (next)
+	next->prev = dist->prev;
+      else
+	topology->last_dist = dist->prev;
+      if (dist->prev)
+	dist->prev->next = dist->next;
+      else
+	topology->first_dist = dist->next;
+      hwloc_internal_distances_free(dist);
+    }
+  }
+
+  return 0;
+}
+
+/******************************************************
+ * Add distances to the topology
+ */
+
+static void
+hwloc__groups_by_distances(struct hwloc_topology *topology, unsigned nbobjs, struct hwloc_obj **objs, uint64_t *values, unsigned long kind, unsigned nbaccuracies, float *accuracies, int needcheck);
+
+/* insert a distance matrix in the topology.
+ * the caller gives us the distances and objs pointers, we'll free them later.
+ */
+static int
+hwloc_internal_distances__add(hwloc_topology_t topology,
+			      hwloc_obj_type_t type, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *indexes, uint64_t *values,
+			      unsigned long kind)
+{
+  struct hwloc_internal_distances_s *dist = calloc(1, sizeof(*dist));
+  if (!dist)
+    goto err;
+
+  dist->type = type;
+  dist->nbobjs = nbobjs;
+  dist->kind = kind;
+
+  if (!objs) {
+    assert(indexes);
+    /* we only have indexes, we'll refresh objs from there */
+    dist->indexes = indexes;
+    dist->objs = calloc(nbobjs, sizeof(hwloc_obj_t));
+    if (!dist->objs)
+      goto err_with_dist;
+    dist->objs_are_valid = 0;
+
+  } else {
+    unsigned i;
+    assert(!indexes);
+    /* we only have objs, generate the indexes arrays so that we can refresh objs later */
+    dist->objs = objs;
+    dist->objs_are_valid = 1;
+    dist->indexes = malloc(nbobjs * sizeof(*dist->indexes));
+    if (!dist->indexes)
+      goto err_with_dist;
+    if (dist->type == HWLOC_OBJ_PU || dist->type == HWLOC_OBJ_NUMANODE) {
+      for(i=0; i<nbobjs; i++)
+	dist->indexes[i] = objs[i]->os_index;
+    } else {
+      for(i=0; i<nbobjs; i++)
+	dist->indexes[i] = objs[i]->gp_index;
+    }
+  }
+
+  dist->values = values;
+
+  if (topology->last_dist)
+    topology->last_dist->next = dist;
+  else
+    topology->first_dist = dist;
+  dist->prev = topology->last_dist;
+  dist->next = NULL;
+  topology->last_dist = dist;
+  return 0;
+
+ err_with_dist:
+  free(dist);
+ err:
+  free(objs);
+  free(indexes);
+  free(values);
+  return -1;
+}
+
+int hwloc_internal_distances_add_by_index(hwloc_topology_t topology,
+					  hwloc_obj_type_t type, unsigned nbobjs, uint64_t *indexes, uint64_t *values,
+					  unsigned long kind, unsigned long flags)
+{
+  if (nbobjs < 2) {
+    errno = EINVAL;
+    goto err;
+  }
+
+  /* cannot group without objects,
+   * and we don't group from XML anyway since the hwloc that generated the XML should have grouped already.
+   */
+  if (flags & HWLOC_DISTANCES_FLAG_GROUP) {
+    errno = EINVAL;
+    goto err;
+  }
+
+  return hwloc_internal_distances__add(topology, type, nbobjs, NULL, indexes, values, kind);
+
+ err:
+  free(indexes);
+  free(values);
+  return -1;
+}
+
+int hwloc_internal_distances_add(hwloc_topology_t topology,
+				 unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values,
+				 unsigned long kind, unsigned long flags)
+{
+  if (nbobjs < 2) {
+    errno = EINVAL;
+    goto err;
+  }
+
+  if (topology->grouping && (flags & HWLOC_DISTANCES_FLAG_GROUP)) {
+    float full_accuracy = 0.f;
+    float *accuracies;
+    unsigned nbaccuracies;
+
+    if (flags & HWLOC_DISTANCES_FLAG_GROUP_INACCURATE) {
+      accuracies = topology->grouping_accuracies;
+      nbaccuracies = topology->grouping_nbaccuracies;
+    } else {
+      accuracies = &full_accuracy;
+      nbaccuracies = 1;
+    }
+
+    if (topology->grouping_verbose) {
+      unsigned i, j;
+      int gp = (objs[0]->type != HWLOC_OBJ_NUMANODE && objs[0]->type != HWLOC_OBJ_PU);
+      fprintf(stderr, "Trying to group objects using distance matrix:\n");
+      fprintf(stderr, "%s", gp ? "gp_index" : "os_index");
+      for(j=0; j<nbobjs; j++)
+	fprintf(stderr, " % 5d", (int)(gp ? objs[j]->gp_index : objs[j]->os_index));
+      fprintf(stderr, "\n");
+      for(i=0; i<nbobjs; i++) {
+	fprintf(stderr, "  % 5d", (int)(gp ? objs[i]->gp_index : objs[i]->os_index));
+	for(j=0; j<nbobjs; j++)
+	  fprintf(stderr, " % 5lld", (long long) values[i*nbobjs + j]);
+	fprintf(stderr, "\n");
+      }
+    }
+
+    hwloc__groups_by_distances(topology, nbobjs, objs, values,
+			       kind, nbaccuracies, accuracies, 1 /* check the first matrice */);
+  }
+
+  return hwloc_internal_distances__add(topology, objs[0]->type, nbobjs, objs, NULL, values, kind);
+
+ err:
+  free(objs);
+  free(values);
+  return -1;
+}
+
+#define HWLOC_DISTANCES_KIND_FROM_ALL (HWLOC_DISTANCES_KIND_FROM_OS|HWLOC_DISTANCES_KIND_FROM_USER)
+#define HWLOC_DISTANCES_KIND_MEANS_ALL (HWLOC_DISTANCES_KIND_MEANS_LATENCY|HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH)
+#define HWLOC_DISTANCES_KIND_ALL (HWLOC_DISTANCES_KIND_FROM_ALL|HWLOC_DISTANCES_KIND_MEANS_ALL)
+#define HWLOC_DISTANCES_FLAG_ALL (HWLOC_DISTANCES_FLAG_GROUP|HWLOC_DISTANCES_FLAG_GROUP_INACCURATE)
+
+/* The actual function exported to the user
+ */
+int hwloc_distances_add(hwloc_topology_t topology,
+			unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values,
+			unsigned long kind, unsigned long flags)
+{
+  hwloc_obj_type_t type;
+  unsigned i;
+  uint64_t *_values;
+  hwloc_obj_t *_objs;
+  int err;
+
+  if (nbobjs < 2 || !objs || !values || !topology->is_loaded) {
+    errno = EINVAL;
+    return -1;
+  }
+  if ((kind & ~HWLOC_DISTANCES_KIND_ALL)
+      || hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_FROM_ALL) != 1
+      || hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_MEANS_ALL) != 1
+      || (flags & ~HWLOC_DISTANCES_FLAG_ALL)) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  /* no strict need to check for duplicates, things shouldn't break */
+
+  type = objs[0]->type;
+  if (type == HWLOC_OBJ_GROUP) {
+    /* not supported yet, would require we save the subkind together with the type. */
+    errno = EINVAL;
+    return -1;
+  }
+
+  for(i=1; i<nbobjs; i++)
+    if (!objs[i] || objs[i]->type != type) {
+      errno = EINVAL;
+      return -1;
+    }
+
+  /* copy the input arrays and give them to the topology */
+  _objs = malloc(nbobjs*sizeof(hwloc_obj_t));
+  _values = malloc(nbobjs*nbobjs*sizeof(*_values));
+  if (!_objs || !_values)
+    goto out_with_arrays;
+
+  memcpy(_objs, objs, nbobjs*sizeof(hwloc_obj_t));
+  memcpy(_values, values, nbobjs*nbobjs*sizeof(*_values));
+  err = hwloc_internal_distances_add(topology, nbobjs, _objs, _values, kind, flags);
+  if (err < 0)
+    goto out; /* _objs and _values freed in hwloc_internal_distances_add() */
+
+  /* in case we added some groups, see if we need to reconnect */
+  hwloc_topology_reconnect(topology, 0);
+
+  return 0;
+
+ out_with_arrays:
+  free(_values);
+  free(_objs);
+ out:
+  return -1;
+}
+
+/******************************************************
+ * Refresh objects in distances
+ */
+
+static hwloc_obj_t hwloc_find_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index)
+{
+  hwloc_obj_t obj = hwloc_get_obj_by_type(topology, type, 0);
+  while (obj) {
+    if (obj->gp_index == gp_index)
+      return obj;
+    obj = obj->next_cousin;
+  }
+  return NULL;
+}
+
+static void
+hwloc_internal_distances_restrict(struct hwloc_internal_distances_s *dist,
+				  hwloc_obj_t *objs,
+				  unsigned disappeared)
+{
+  unsigned nbobjs = dist->nbobjs;
+  unsigned i, newi;
+  unsigned j, newj;
+
+  for(i=0, newi=0; i<nbobjs; i++)
+    if (objs[i]) {
+      for(j=0, newj=0; j<nbobjs; j++)
+	if (objs[j]) {
+	  dist->values[newi*(nbobjs-disappeared)+newj] = dist->values[i*nbobjs+j];
+	  newj++;
+	}
+      newi++;
+    }
+
+  for(i=0, newi=0; i<nbobjs; i++)
+    if (objs[i]) {
+      objs[newi] = objs[i];
+      dist->indexes[newi] = dist->indexes[i];
+      newi++;
+    }
+
+  dist->nbobjs -= disappeared;
+}
+
+static int
+hwloc_internal_distances_refresh_one(hwloc_topology_t topology,
+				     struct hwloc_internal_distances_s *dist)
+{
+  hwloc_obj_type_t type = dist->type;
+  unsigned nbobjs = dist->nbobjs;
+  hwloc_obj_t *objs = dist->objs;
+  uint64_t *indexes = dist->indexes;
+  unsigned disappeared = 0;
+  unsigned i;
+
+  if (dist->objs_are_valid)
+    return 0;
+
+  for(i=0; i<nbobjs; i++) {
+    hwloc_obj_t obj;
+    /* TODO use cpuset/nodeset to find pus/numas from the root?
+     * faster than traversing the entire level?
+     */
+    if (type == HWLOC_OBJ_PU)
+      obj = hwloc_get_pu_obj_by_os_index(topology, (unsigned) indexes[i]);
+    else if (type == HWLOC_OBJ_NUMANODE)
+      obj = hwloc_get_numanode_obj_by_os_index(topology, (unsigned) indexes[i]);
+    else
+      obj = hwloc_find_obj_by_type_and_gp_index(topology, type, indexes[i]);
+    objs[i] = obj;
+    if (!obj)
+      disappeared++;
+  }
+
+  if (nbobjs-disappeared < 2)
+    /* became useless, drop */
+    return -1;
+
+  if (disappeared)
+    hwloc_internal_distances_restrict(dist, objs, disappeared);
+
+  dist->objs_are_valid = 1;
+  return 0;
+}
+
+void
+hwloc_internal_distances_refresh(hwloc_topology_t topology)
+{
+  struct hwloc_internal_distances_s *dist, *next;
+
+  for(dist = topology->first_dist; dist; dist = next) {
+    next = dist->next;
+
+    if (hwloc_internal_distances_refresh_one(topology, dist) < 0) {
+      if (dist->prev)
+	dist->prev->next = next;
+      else
+	topology->first_dist = next;
+      if (next)
+	next->prev = dist->prev;
+      else
+	topology->last_dist = dist->prev;
+      hwloc_internal_distances_free(dist);
+      continue;
+    }
+  }
+}
+
+void
+hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology)
+{
+  struct hwloc_internal_distances_s *dist;
+  for(dist = topology->first_dist; dist; dist = dist->next)
+    dist->objs_are_valid = 0;
+}
+
+/******************************************************
+ * User API for getting distances
+ */
+
+void
+hwloc_distances_release(hwloc_topology_t topology __hwloc_attribute_unused,
+			struct hwloc_distances_s *distances)
+{
+  free(distances->values);
+  free(distances->objs);
+  free(distances);
+}
+
+static struct hwloc_distances_s *
+hwloc_distances_get_one(hwloc_topology_t topology __hwloc_attribute_unused,
+			struct hwloc_internal_distances_s *dist)
+{
+  struct hwloc_distances_s *distances;
+  unsigned nbobjs;
+
+  distances = malloc(sizeof(*distances));
+  if (!distances)
+    return NULL;
+
+  nbobjs = distances->nbobjs = dist->nbobjs;
+
+  distances->objs = malloc(nbobjs * sizeof(hwloc_obj_t));
+  if (!distances->objs)
+    goto out;
+  memcpy(distances->objs, dist->objs, nbobjs * sizeof(hwloc_obj_t));
+
+  distances->values = malloc(nbobjs * nbobjs * sizeof(*distances->values));
+  if (!distances->values)
+    goto out_with_objs;
+  memcpy(distances->values, dist->values, nbobjs*nbobjs*sizeof(*distances->values));
+
+  distances->kind = dist->kind;
+  return distances;
+
+ out_with_objs:
+  free(distances->objs);
+ out:
+  free(distances);
+  return NULL;
+}
+
+static int
+hwloc__distances_get(hwloc_topology_t topology,
+		     hwloc_obj_type_t type,
+		     unsigned *nrp, struct hwloc_distances_s **distancesp,
+		     unsigned long kind, unsigned long flags __hwloc_attribute_unused)
+{
+  struct hwloc_internal_distances_s *dist;
+  unsigned nr = 0, i;
+
+  /* We could return the internal arrays (as const),
+   * but it would require to prevent removing distances between get() and free().
+   * Not performance critical anyway.
+   */
+
+  if (flags) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  /* we could refresh only the distances that match, but we won't have many distances anyway,
+   * so performance is totally negligible.
+   */
+  hwloc_internal_distances_refresh(topology);
+
+  for(dist = topology->first_dist; dist; dist = dist->next) {
+    unsigned long kind_from = kind & HWLOC_DISTANCES_KIND_FROM_ALL;
+    unsigned long kind_means = kind & HWLOC_DISTANCES_KIND_MEANS_ALL;
+
+    if (type != HWLOC_OBJ_TYPE_NONE && type != dist->type)
+      continue;
+
+    if (kind_from && !(kind_from & dist->kind))
+      continue;
+    if (kind_means && !(kind_means & dist->kind))
+      continue;
+
+    if (nr < *nrp) {
+      struct hwloc_distances_s *distances = hwloc_distances_get_one(topology, dist);
+      if (!distances)
+	goto error;
+      distancesp[nr] = distances;
+    }
+    nr++;
+  }
+
+  for(i=nr; i<*nrp; i++)
+    distancesp[i] = NULL;
+  *nrp = nr;
+  return 0;
+
+ error:
+  for(i=0; i<nr; i++)
+    hwloc_distances_release(topology, distancesp[i]);
+  return -1;
+}
+
+int
+hwloc_distances_get(hwloc_topology_t topology,
+		    unsigned *nrp, struct hwloc_distances_s **distancesp,
+		    unsigned long kind, unsigned long flags)
+{
+  if (flags || !topology->is_loaded) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  return hwloc__distances_get(topology, HWLOC_OBJ_TYPE_NONE, nrp, distancesp, kind, flags);
+}
+
+int
+hwloc_distances_get_by_depth(hwloc_topology_t topology, unsigned depth,
+			     unsigned *nrp, struct hwloc_distances_s **distancesp,
+			     unsigned long kind, unsigned long flags)
+{
+  hwloc_obj_type_t type;
+
+  if (flags || !topology->is_loaded) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  /* switch back to types since we don't support groups for now */
+  type = hwloc_get_depth_type(topology, depth);
+  if (type == (hwloc_obj_type_t)-1) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  return hwloc__distances_get(topology, type, nrp, distancesp, kind, flags);
+}
+
+/******************************************************
+ * Grouping objects according to distances
+ */
+
+static void hwloc_report_user_distance_error(const char *msg, int line)
+{
+    static int reported = 0;
+
+    if (!reported && !hwloc_hide_errors()) {
+        fprintf(stderr, "****************************************************************************\n");
+        fprintf(stderr, "* hwloc %s has encountered what looks like an error from user-given distances.\n", HWLOC_VERSION);
+        fprintf(stderr, "*\n");
+        fprintf(stderr, "* %s\n", msg);
+        fprintf(stderr, "* Error occurred in topology.c line %d\n", line);
+        fprintf(stderr, "*\n");
+        fprintf(stderr, "* Please make sure that distances given through the interface or environment\n");
+        fprintf(stderr, "* variables do not contradict any other topology information.\n");
+        fprintf(stderr, "****************************************************************************\n");
+        reported = 1;
+    }
+}
+
+static int hwloc_compare_values(uint64_t a, uint64_t b, float accuracy)
+{
+  if (accuracy != 0.0f && fabsf((float)a-(float)b) < (float)a * accuracy)
+    return 0;
+  return a < b ? -1 : a == b ? 0 : 1;
+}
+
+/*
+ * Place objects in groups if they are in a transitive graph of minimal values.
+ * Return how many groups were created, or 0 if some incomplete distance graphs were found.
+ */
+static unsigned
+hwloc__find_groups_by_min_distance(unsigned nbobjs,
+				   uint64_t *_values,
+				   float accuracy,
+				   unsigned *groupids,
+				   int verbose)
+{
+  uint64_t min_distance = UINT64_MAX;
+  unsigned groupid = 1;
+  unsigned i,j,k;
+  unsigned skipped = 0;
+
+#define VALUE(i, j) _values[(i) * nbobjs + (j)]
+
+  memset(groupids, 0, nbobjs*sizeof(*groupids));
+
+  /* find the minimal distance */
+  for(i=0; i<nbobjs; i++)
+    for(j=0; j<nbobjs; j++) /* check the entire matrix, it may not be perfectly symmetric depending on the accuracy */
+      if (i != j && VALUE(i, j) < min_distance) /* no accuracy here, we want the real minimal */
+        min_distance = VALUE(i, j);
+  hwloc_debug("  found minimal distance %llu between objects\n", (unsigned long long) min_distance);
+
+  if (min_distance == UINT64_MAX)
+    return 0;
+
+  /* build groups of objects connected with this distance */
+  for(i=0; i<nbobjs; i++) {
+    unsigned size;
+    int firstfound;
+
+    /* if already grouped, skip */
+    if (groupids[i])
+      continue;
+
+    /* start a new group */
+    groupids[i] = groupid;
+    size = 1;
+    firstfound = i;
+
+    while (firstfound != -1) {
+      /* we added new objects to the group, the first one was firstfound.
+       * rescan all connections from these new objects (starting at first found) to any other objects,
+       * so as to find new objects minimally-connected by transivity.
+       */
+      int newfirstfound = -1;
+      for(j=firstfound; j<nbobjs; j++)
+	if (groupids[j] == groupid)
+	  for(k=0; k<nbobjs; k++)
+              if (!groupids[k] && !hwloc_compare_values(VALUE(j, k), min_distance, accuracy)) {
+	      groupids[k] = groupid;
+	      size++;
+	      if (newfirstfound == -1)
+		newfirstfound = k;
+	      if (i == j)
+		hwloc_debug("  object %u is minimally connected to %u\n", k, i);
+	      else
+	        hwloc_debug("  object %u is minimally connected to %u through %u\n", k, i, j);
+	    }
+      firstfound = newfirstfound;
+    }
+
+    if (size == 1) {
+      /* cancel this useless group, ignore this object and try from the next one */
+      groupids[i] = 0;
+      skipped++;
+      continue;
+    }
+
+    /* valid this group */
+    groupid++;
+    if (verbose)
+      fprintf(stderr, " Found transitive graph with %u objects with minimal distance %llu accuracy %f\n",
+	      size, (unsigned long long) min_distance, accuracy);
+  }
+
+  if (groupid == 2 && !skipped)
+    /* we created a single group containing all objects, ignore it */
+    return 0;
+
+  /* return the last id, since it's also the number of used group ids */
+  return groupid-1;
+}
+
+/* check that the matrix is ok */
+static int
+hwloc__check_grouping_matrix(unsigned nbobjs, uint64_t *_values, float accuracy, int verbose)
+{
+  unsigned i,j;
+  for(i=0; i<nbobjs; i++) {
+    for(j=i+1; j<nbobjs; j++) {
+      /* should be symmetric */
+      if (hwloc_compare_values(VALUE(i, j), VALUE(j, i), accuracy)) {
+	if (verbose)
+	  fprintf(stderr, " Distance matrix asymmetric ([%u,%u]=%llu != [%u,%u]=%llu), aborting\n",
+		  i, j, (unsigned long long) VALUE(i, j), j, i, (unsigned long long) VALUE(j, i));
+	return -1;
+      }
+      /* diagonal is smaller than everything else */
+      if (hwloc_compare_values(VALUE(i, j), VALUE(i, i), accuracy) <= 0) {
+	if (verbose)
+	  fprintf(stderr, " Distance to self not strictly minimal ([%u,%u]=%llu <= [%u,%u]=%llu), aborting\n",
+		  i, j, (unsigned long long) VALUE(i, j), i, i, (unsigned long long) VALUE(i, i));
+	return -1;
+      }
+    }
+  }
+  return 0;
+}
+
+/*
+ * Look at object physical distances to group them.
+ */
+static void
+hwloc__groups_by_distances(struct hwloc_topology *topology,
+			   unsigned nbobjs,
+			   struct hwloc_obj **objs,
+			   uint64_t *_values,
+			   unsigned long kind,
+			   unsigned nbaccuracies,
+			   float *accuracies,
+			   int needcheck)
+{
+  unsigned *groupids = NULL;
+  unsigned nbgroups = 0;
+  unsigned i,j;
+  int verbose = topology->grouping_verbose;
+
+  if (nbobjs <= 2)
+      return;
+
+  if (!(kind & HWLOC_DISTANCES_KIND_MEANS_LATENCY))
+    /* don't know use to use those for grouping */
+    /* TODO hwloc__find_groups_by_max_distance() for bandwidth */
+    return;
+
+  groupids = malloc(sizeof(unsigned) * nbobjs);
+  if (NULL == groupids) {
+      return;
+  }
+
+  for(i=0; i<nbaccuracies; i++) {
+    if (verbose)
+      fprintf(stderr, "Trying to group %u %s objects according to physical distances with accuracy %f\n",
+	      nbobjs, hwloc_type_name(objs[0]->type), accuracies[i]);
+    if (needcheck && hwloc__check_grouping_matrix(nbobjs, _values, accuracies[i], verbose) < 0)
+      continue;
+    nbgroups = hwloc__find_groups_by_min_distance(nbobjs, _values, accuracies[i], groupids, verbose);
+    if (nbgroups)
+      break;
+  }
+  if (!nbgroups)
+    goto outter_free;
+
+  /* For convenience, put these declarations inside a block.  It's a
+     crying shame we can't use C99 syntax here, and have to do a bunch
+     of mallocs. :-( */
+  {
+      hwloc_obj_t *groupobjs = NULL;
+      unsigned *groupsizes = NULL;
+      uint64_t *groupvalues = NULL;
+      unsigned failed = 0;
+
+      groupobjs = malloc(sizeof(hwloc_obj_t) * nbgroups);
+      groupsizes = malloc(sizeof(unsigned) * nbgroups);
+      groupvalues = malloc(sizeof(uint64_t) * nbgroups * nbgroups);
+      if (NULL == groupobjs || NULL == groupsizes || NULL == groupvalues) {
+          goto inner_free;
+      }
+      /* create new Group objects and record their size */
+      memset(&(groupsizes[0]), 0, sizeof(groupsizes[0]) * nbgroups);
+      for(i=0; i<nbgroups; i++) {
+          /* create the Group object */
+          hwloc_obj_t group_obj, res_obj;
+          group_obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, -1);
+          group_obj->cpuset = hwloc_bitmap_alloc();
+          group_obj->attr->group.kind = HWLOC_GROUP_KIND_DISTANCE;
+          group_obj->attr->group.subkind = topology->grouping_next_subkind;
+          for (j=0; j<nbobjs; j++)
+	    if (groupids[j] == i+1) {
+	      /* assemble the group sets */
+	      hwloc_obj_add_other_obj_sets(group_obj, objs[j]);
+              groupsizes[i]++;
+            }
+          hwloc_debug_1arg_bitmap("adding Group object with %u objects and cpuset %s\n",
+                                  groupsizes[i], group_obj->cpuset);
+          res_obj = hwloc__insert_object_by_cpuset(topology, group_obj,
+						   (kind & HWLOC_DISTANCES_KIND_FROM_USER) ? hwloc_report_user_distance_error : hwloc_report_os_error);
+	  /* res_obj may be NULL on failure to insert. */
+	  if (!res_obj)
+	    failed++;
+	  /* or it may be different from groupobjs if we got groups from XML import before grouping */
+          groupobjs[i] = res_obj;
+      }
+      topology->grouping_next_subkind++;
+
+      if (failed)
+	/* don't try to group above if we got a NULL group here, just keep this incomplete level */
+	goto inner_free;
+
+      /* factorize values */
+      memset(&(groupvalues[0]), 0, sizeof(groupvalues[0]) * nbgroups * nbgroups);
+#undef VALUE
+#define VALUE(i, j) _values[(i) * nbobjs + (j)]
+#define GROUP_VALUE(i, j) groupvalues[(i) * nbgroups + (j)]
+      for(i=0; i<nbobjs; i++)
+	if (groupids[i])
+	  for(j=0; j<nbobjs; j++)
+	    if (groupids[j])
+                GROUP_VALUE(groupids[i]-1, groupids[j]-1) += VALUE(i, j);
+      for(i=0; i<nbgroups; i++)
+          for(j=0; j<nbgroups; j++) {
+              unsigned groupsize = groupsizes[i]*groupsizes[j];
+              GROUP_VALUE(i, j) /= groupsize;
+          }
+#ifdef HWLOC_DEBUG
+      hwloc_debug("%s", "generated new distance matrix between groups:\n");
+      hwloc_debug("%s", "  index");
+      for(j=0; j<nbgroups; j++)
+	hwloc_debug(" % 5d", (int) j); /* print index because os_index is -1 for Groups */
+      hwloc_debug("%s", "\n");
+      for(i=0; i<nbgroups; i++) {
+	hwloc_debug("  % 5d", (int) i);
+	for(j=0; j<nbgroups; j++)
+	  hwloc_debug(" %llu", (unsigned long long) GROUP_VALUE(i, j));
+	hwloc_debug("%s", "\n");
+      }
+#endif
+
+      hwloc__groups_by_distances(topology, nbgroups, groupobjs, groupvalues, kind, nbaccuracies, accuracies, 0 /* no need to check generated matrix */);
+
+  inner_free:
+      /* Safely free everything */
+      free(groupobjs);
+      free(groupsizes);
+      free(groupvalues);
+  }
+
+ outter_free:
+  free(groupids);
+}
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/dolib.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/dolib.c
new file mode 100644
index 0000000000..0b2835a5da
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/dolib.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009 inria.  All rights reserved.
+ * Copyright © 2009, 2012 Université Bordeaux
+ * See COPYING in top-level directory.
+ */
+
+/* Wrapper to avoid msys' tendency to turn / into \ and : into ;  */
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char *argv[]) {
+  char *prog, *arch, *def, *version, *lib;
+  char s[1024];
+  char name[16];
+  int current, age, revision;
+
+  if (argc != 6) {
+    fprintf(stderr,"bad number of arguments");
+    exit(EXIT_FAILURE);
+  }
+
+  prog = argv[1];
+  arch = argv[2];
+  def = argv[3];
+  version = argv[4];
+  lib = argv[5];
+
+  if (sscanf(version, "%d:%d:%d", &current, &revision, &age) != 3)
+    exit(EXIT_FAILURE);
+
+  _snprintf(name, sizeof(name), "libhwloc-%d", current - age);
+  printf("using soname %s\n", name);
+
+  _snprintf(s, sizeof(s), "\"%s\" /machine:%s /def:%s /name:%s /out:%s",
+      prog, arch, def, name, lib);
+  if (system(s)) {
+    fprintf(stderr, "%s failed\n", s);
+    exit(EXIT_FAILURE);
+  }
+
+  exit(EXIT_SUCCESS);
+}
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/misc.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/misc.c
new file mode 100644
index 0000000000..18892946a0
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/misc.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2015 Inria.  All rights reserved.
+ * Copyright © 2009-2010 Université Bordeaux
+ * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+#include <private/private.h>
+#include <private/misc.h>
+
+#include <stdarg.h>
+#ifdef HAVE_SYS_UTSNAME_H
+#include <sys/utsname.h>
+#endif
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <ctype.h>
+
+#ifdef HAVE_PROGRAM_INVOCATION_NAME
+#include <errno.h>
+extern char *program_invocation_name;
+#endif
+#ifdef HAVE___PROGNAME
+extern char *__progname;
+#endif
+
+int hwloc_snprintf(char *str, size_t size, const char *format, ...)
+{
+  int ret;
+  va_list ap;
+  static char bin;
+  size_t fakesize;
+  char *fakestr;
+
+  /* Some systems crash on str == NULL */
+  if (!size) {
+    str = &bin;
+    size = 1;
+  }
+
+  va_start(ap, format);
+  ret = vsnprintf(str, size, format, ap);
+  va_end(ap);
+
+  if (ret >= 0 && (size_t) ret != size-1)
+    return ret;
+
+  /* vsnprintf returned size-1 or -1. That could be a system which reports the
+   * written data and not the actually required room. Try increasing buffer
+   * size to get the latter. */
+
+  fakesize = size;
+  fakestr = NULL;
+  do {
+    fakesize *= 2;
+    free(fakestr);
+    fakestr = malloc(fakesize);
+    if (NULL == fakestr)
+      return -1;
+    va_start(ap, format);
+    errno = 0;
+    ret = vsnprintf(fakestr, fakesize, format, ap);
+    va_end(ap);
+  } while ((size_t) ret == fakesize-1 || (ret < 0 && (!errno || errno == ERANGE)));
+
+  if (ret >= 0 && size) {
+    if (size > (size_t) ret+1)
+      size = ret+1;
+    memcpy(str, fakestr, size-1);
+    str[size-1] = 0;
+  }
+  free(fakestr);
+
+  return ret;
+}
+
+int hwloc_namecoloncmp(const char *haystack, const char *needle, size_t n)
+{
+  size_t i = 0;
+  while (*haystack && *haystack != ':') {
+    int ha = *haystack++;
+    int low_h = tolower(ha);
+    int ne = *needle++;
+    int low_n = tolower(ne);
+    if (low_h != low_n)
+      return 1;
+    i++;
+  }
+  return i < n;
+}
+
+void hwloc_add_uname_info(struct hwloc_topology *topology __hwloc_attribute_unused,
+			  void *cached_uname __hwloc_attribute_unused)
+{
+#ifdef HAVE_UNAME
+  struct utsname _utsname, *utsname;
+
+  if (hwloc_obj_get_info_by_name(topology->levels[0][0], "OSName"))
+    /* don't annotate twice */
+    return;
+
+  if (cached_uname)
+    utsname = (struct utsname *) cached_uname;
+  else {
+    utsname = &_utsname;
+    if (uname(utsname) < 0)
+      return;
+  }
+
+  if (*utsname->sysname)
+    hwloc_obj_add_info(topology->levels[0][0], "OSName", utsname->sysname);
+  if (*utsname->release)
+    hwloc_obj_add_info(topology->levels[0][0], "OSRelease", utsname->release);
+  if (*utsname->version)
+    hwloc_obj_add_info(topology->levels[0][0], "OSVersion", utsname->version);
+  if (*utsname->nodename)
+    hwloc_obj_add_info(topology->levels[0][0], "HostName", utsname->nodename);
+  if (*utsname->machine)
+    hwloc_obj_add_info(topology->levels[0][0], "Architecture", utsname->machine);
+#endif /* HAVE_UNAME */
+}
+
+char *
+hwloc_progname(struct hwloc_topology *topology __hwloc_attribute_unused)
+{
+#if HAVE_DECL_GETMODULEFILENAME
+  char name[256], *basename;
+  unsigned res = GetModuleFileName(NULL, name, sizeof(name));
+  if (res == sizeof(name) || !res)
+    return NULL;
+  basename = strrchr(name, '\\');
+  if (!basename)
+    basename = name;
+  else
+    basename++;
+  return strdup(basename);
+#else /* !HAVE_GETMODULEFILENAME */
+  const char *name, *basename;
+#if HAVE_DECL_GETPROGNAME
+  name = getprogname(); /* FreeBSD, NetBSD, some Solaris */
+#elif HAVE_DECL_GETEXECNAME
+  name = getexecname(); /* Solaris */
+#elif defined HAVE_PROGRAM_INVOCATION_NAME
+  name = program_invocation_name; /* Glibc. BGQ CNK. */
+  /* could use program_invocation_short_name directly, but we have the code to remove the path below anyway */
+#elif defined HAVE___PROGNAME
+  name = __progname; /* fallback for most unix, used for OpenBSD */
+#else
+  /* TODO: _NSGetExecutablePath(path, &size) on Darwin */
+  /* TODO: AIX, HPUX */
+  name = NULL;
+#endif
+  if (!name)
+    return NULL;
+  basename = strrchr(name, '/');
+  if (!basename)
+    basename = name;
+  else
+    basename++;
+  return strdup(basename);
+#endif /* !HAVE_GETMODULEFILENAME */
+}
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/pci-common.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/pci-common.c
new file mode 100644
index 0000000000..7e456496b6
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/pci-common.c
@@ -0,0 +1,954 @@
+/*
+ * Copyright © 2009-2017 Inria.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <hwloc/plugins.h>
+#include <private/private.h>
+#include <private/debug.h>
+#include <private/misc.h>
+
+#include <fcntl.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <sys/stat.h>
+
+#ifdef HWLOC_WIN_SYS
+#include <io.h>
+#define open _open
+#define read _read
+#define close _close
+#endif
+
+static void
+hwloc_pci_forced_locality_parse_one(struct hwloc_topology *topology,
+				    const char *string /* must contain a ' ' */,
+				    unsigned *allocated)
+{
+  unsigned nr = topology->pci_forced_locality_nr;
+  unsigned domain, bus_first, bus_last, dummy;
+  hwloc_bitmap_t set;
+  char *tmp;
+
+  if (sscanf(string, "%x:%x-%x %x", &domain, &bus_first, &bus_last, &dummy) == 4) {
+    /* fine */
+  } else if (sscanf(string, "%x:%x %x", &domain, &bus_first, &dummy) == 3) {
+    bus_last = bus_first;
+  } else if (sscanf(string, "%x %x", &domain, &dummy) == 2) {
+    bus_first = 0;
+    bus_last = 255;
+  } else
+    return;
+
+  tmp = strchr(string, ' ');
+  if (!tmp)
+    return;
+  tmp++;
+
+  set = hwloc_bitmap_alloc();
+  hwloc_bitmap_sscanf(set, tmp);
+
+  if (!*allocated) {
+    topology->pci_forced_locality = malloc(sizeof(*topology->pci_forced_locality));
+    if (!topology->pci_forced_locality)
+      goto out_with_set; /* failed to allocate, ignore this forced locality */
+    *allocated = 1;
+  } else if (nr >= *allocated) {
+    struct hwloc_pci_forced_locality_s *tmplocs;
+    tmplocs = realloc(topology->pci_forced_locality,
+		      2 * *allocated * sizeof(*topology->pci_forced_locality));
+    if (!tmplocs)
+      goto out_with_set; /* failed to allocate, ignore this forced locality */
+    topology->pci_forced_locality = tmplocs;
+    *allocated *= 2;
+  }
+
+  topology->pci_forced_locality[nr].domain = domain;
+  topology->pci_forced_locality[nr].bus_first = bus_first;
+  topology->pci_forced_locality[nr].bus_last = bus_last;
+  topology->pci_forced_locality[nr].cpuset = set;
+  topology->pci_forced_locality_nr++;
+  return;
+
+ out_with_set:
+  hwloc_bitmap_free(set);
+  return;
+}
+
+static void
+hwloc_pci_forced_locality_parse(struct hwloc_topology *topology, const char *_env)
+{
+  char *env = strdup(_env);
+  unsigned allocated = 0;
+  char *tmp = env;
+
+  while (1) {
+    size_t len = strcspn(tmp, ";\r\n");
+    char *next = NULL;
+
+    if (tmp[len] != '\0') {
+      tmp[len] = '\0';
+      if (tmp[len+1] != '\0')
+	next = &tmp[len]+1;
+    }
+
+    hwloc_pci_forced_locality_parse_one(topology, tmp, &allocated);
+
+    if (next)
+      tmp = next;
+    else
+      break;
+  }
+
+  free(env);
+}
+
+void
+hwloc_pci_discovery_init(struct hwloc_topology *topology)
+{
+  topology->pci_nonzero_domains = 0;
+  topology->need_pci_belowroot_apply_locality = 0;
+
+  topology->pci_has_forced_locality = 0;
+  topology->pci_forced_locality_nr = 0;
+  topology->pci_forced_locality = NULL;
+}
+
+void
+hwloc_pci_discovery_prepare(struct hwloc_topology *topology)
+{
+  char *env;
+
+  env = getenv("HWLOC_PCI_LOCALITY");
+  if (env) {
+    int fd;
+
+    topology->pci_has_forced_locality = 1;
+
+    fd = open(env, O_RDONLY);
+    if (fd >= 0) {
+      struct stat st;
+      char *buffer;
+      int err = fstat(fd, &st);
+      if (!err) {
+	if (st.st_size <= 64*1024) { /* random limit large enough to store multiple cpusets for thousands of PUs */
+	  buffer = malloc(st.st_size+1);
+	  if (read(fd, buffer, st.st_size) == st.st_size) {
+	    buffer[st.st_size] = '\0';
+	    hwloc_pci_forced_locality_parse(topology, buffer);
+	  }
+	  free(buffer);
+	} else {
+	  fprintf(stderr, "Ignoring HWLOC_PCI_LOCALITY file `%s' too large (%lu bytes)\n",
+		  env, (unsigned long) st.st_size);
+	}
+      }
+      close(fd);
+    } else
+      hwloc_pci_forced_locality_parse(topology, env);
+  }
+}
+
+void
+hwloc_pci_discovery_exit(struct hwloc_topology *topology __hwloc_attribute_unused)
+{
+  unsigned i;
+  for(i=0; i<topology->pci_forced_locality_nr; i++)
+    hwloc_bitmap_free(topology->pci_forced_locality[i].cpuset);
+  free(topology->pci_forced_locality);
+
+  hwloc_pci_discovery_init(topology);
+}
+
+#ifdef HWLOC_DEBUG
+static void
+hwloc_pci_traverse_print_cb(void * cbdata __hwloc_attribute_unused,
+			    struct hwloc_obj *pcidev)
+{
+  char busid[14];
+  hwloc_obj_t parent;
+
+  /* indent */
+  parent = pcidev->parent;
+  while (parent) {
+    hwloc_debug("%s", "  ");
+    parent = parent->parent;
+  }
+
+  snprintf(busid, sizeof(busid), "%04x:%02x:%02x.%01x",
+           pcidev->attr->pcidev.domain, pcidev->attr->pcidev.bus, pcidev->attr->pcidev.dev, pcidev->attr->pcidev.func);
+
+  if (pcidev->type == HWLOC_OBJ_BRIDGE) {
+    if (pcidev->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST)
+      hwloc_debug("HostBridge");
+    else
+      hwloc_debug("%s Bridge [%04x:%04x]", busid,
+		  pcidev->attr->pcidev.vendor_id, pcidev->attr->pcidev.device_id);
+    hwloc_debug(" to %04x:[%02x:%02x]\n",
+		pcidev->attr->bridge.downstream.pci.domain, pcidev->attr->bridge.downstream.pci.secondary_bus, pcidev->attr->bridge.downstream.pci.subordinate_bus);
+  } else
+    hwloc_debug("%s Device [%04x:%04x (%04x:%04x) rev=%02x class=%04x]\n", busid,
+		pcidev->attr->pcidev.vendor_id, pcidev->attr->pcidev.device_id,
+		pcidev->attr->pcidev.subvendor_id, pcidev->attr->pcidev.subdevice_id,
+		pcidev->attr->pcidev.revision, pcidev->attr->pcidev.class_id);
+}
+
+static void
+hwloc_pci__traverse(void * cbdata, struct hwloc_obj *tree,
+		    void (*cb)(void * cbdata, struct hwloc_obj *))
+{
+  struct hwloc_obj *child = tree;
+  while (child) {
+    cb(cbdata, child);
+    if (child->type == HWLOC_OBJ_BRIDGE && child->io_first_child)
+      hwloc_pci__traverse(cbdata, child->io_first_child, cb);
+    child = child->next_sibling;
+  }
+}
+
+static void
+hwloc_pci_traverse(void * cbdata, struct hwloc_obj *tree,
+		   void (*cb)(void * cbdata, struct hwloc_obj *))
+{
+  hwloc_pci__traverse(cbdata, tree, cb);
+}
+#endif /* HWLOC_DEBUG */
+
+enum hwloc_pci_busid_comparison_e {
+  HWLOC_PCI_BUSID_LOWER,
+  HWLOC_PCI_BUSID_HIGHER,
+  HWLOC_PCI_BUSID_INCLUDED,
+  HWLOC_PCI_BUSID_SUPERSET
+};
+
+static enum hwloc_pci_busid_comparison_e
+hwloc_pci_compare_busids(struct hwloc_obj *a, struct hwloc_obj *b)
+{
+#ifdef HWLOC_DEBUG
+  if (a->type == HWLOC_OBJ_BRIDGE)
+    assert(a->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI);
+  if (b->type == HWLOC_OBJ_BRIDGE)
+    assert(b->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI);
+#endif
+
+  if (a->attr->pcidev.domain < b->attr->pcidev.domain)
+    return HWLOC_PCI_BUSID_LOWER;
+  if (a->attr->pcidev.domain > b->attr->pcidev.domain)
+    return HWLOC_PCI_BUSID_HIGHER;
+
+  if (a->type == HWLOC_OBJ_BRIDGE
+      && b->attr->pcidev.bus >= a->attr->bridge.downstream.pci.secondary_bus
+      && b->attr->pcidev.bus <= a->attr->bridge.downstream.pci.subordinate_bus)
+    return HWLOC_PCI_BUSID_SUPERSET;
+  if (b->type == HWLOC_OBJ_BRIDGE
+      && a->attr->pcidev.bus >= b->attr->bridge.downstream.pci.secondary_bus
+      && a->attr->pcidev.bus <= b->attr->bridge.downstream.pci.subordinate_bus)
+    return HWLOC_PCI_BUSID_INCLUDED;
+
+  if (a->attr->pcidev.bus < b->attr->pcidev.bus)
+    return HWLOC_PCI_BUSID_LOWER;
+  if (a->attr->pcidev.bus > b->attr->pcidev.bus)
+    return HWLOC_PCI_BUSID_HIGHER;
+
+  if (a->attr->pcidev.dev < b->attr->pcidev.dev)
+    return HWLOC_PCI_BUSID_LOWER;
+  if (a->attr->pcidev.dev > b->attr->pcidev.dev)
+    return HWLOC_PCI_BUSID_HIGHER;
+
+  if (a->attr->pcidev.func < b->attr->pcidev.func)
+    return HWLOC_PCI_BUSID_LOWER;
+  if (a->attr->pcidev.func > b->attr->pcidev.func)
+    return HWLOC_PCI_BUSID_HIGHER;
+
+  /* Should never reach here.  Abort on both debug builds and
+     non-debug builds */
+  assert(0);
+  fprintf(stderr, "Bad assertion in hwloc %s:%d (aborting)\n", __FILE__, __LINE__);
+  exit(1);
+}
+
+static void
+hwloc_pci_add_object(struct hwloc_obj *parent, struct hwloc_obj **parent_io_first_child_p, struct hwloc_obj *new)
+{
+  struct hwloc_obj **curp, **childp;
+
+  curp = parent_io_first_child_p;
+  while (*curp) {
+    enum hwloc_pci_busid_comparison_e comp = hwloc_pci_compare_busids(new, *curp);
+    switch (comp) {
+    case HWLOC_PCI_BUSID_HIGHER:
+      /* go further */
+      curp = &(*curp)->next_sibling;
+      continue;
+    case HWLOC_PCI_BUSID_INCLUDED:
+      /* insert new below current bridge */
+      hwloc_pci_add_object(*curp, &(*curp)->io_first_child, new);
+      return;
+    case HWLOC_PCI_BUSID_LOWER:
+    case HWLOC_PCI_BUSID_SUPERSET: {
+      /* insert new before current */
+      new->next_sibling = *curp;
+      *curp = new;
+      new->parent = parent;
+      if (new->type == HWLOC_OBJ_BRIDGE) {
+	/* look at remaining siblings and move some below new */
+	childp = &new->io_first_child;
+	curp = &new->next_sibling;
+	while (*curp) {
+	  hwloc_obj_t cur = *curp;
+	  if (hwloc_pci_compare_busids(new, cur) == HWLOC_PCI_BUSID_LOWER) {
+	    /* this sibling remains under root, after new. */
+	    if (cur->attr->pcidev.domain > new->attr->pcidev.domain
+		|| cur->attr->pcidev.bus > new->attr->bridge.downstream.pci.subordinate_bus)
+	      /* this sibling is even above new's subordinate bus, no other sibling could go below new */
+	      return;
+	    curp = &cur->next_sibling;
+	  } else {
+	    /* this sibling goes under new */
+	    *childp = cur;
+	    *curp = cur->next_sibling;
+	    (*childp)->parent = new;
+	    (*childp)->next_sibling = NULL;
+	    childp = &(*childp)->next_sibling;
+	  }
+	}
+      }
+      return;
+    }
+    }
+  }
+  /* add to the end of the list if higher than everybody */
+  new->parent = parent;
+  new->next_sibling = NULL;
+  *curp = new;
+}
+
+void
+hwloc_pci_tree_insert_by_busid(struct hwloc_obj **treep,
+			       struct hwloc_obj *obj)
+{
+  hwloc_pci_add_object(NULL /* no parent on top of tree */, treep, obj);
+}
+
+int
+hwloc_pci_tree_attach_belowroot(struct hwloc_topology *topology, struct hwloc_obj *old_tree)
+{
+  struct hwloc_obj **next_hb_p;
+  enum hwloc_type_filter_e bfilter;
+
+  if (!old_tree)
+    /* found nothing, exit */
+    return 0;
+
+#ifdef HWLOC_DEBUG
+  hwloc_debug("%s", "\nPCI hierarchy:\n");
+  hwloc_pci_traverse(NULL, old_tree, hwloc_pci_traverse_print_cb);
+  hwloc_debug("%s", "\n");
+#endif
+
+  next_hb_p = &hwloc_get_root_obj(topology)->io_first_child;
+  while (*next_hb_p)
+    next_hb_p = &((*next_hb_p)->next_sibling);
+
+  bfilter = topology->type_filter[HWLOC_OBJ_BRIDGE];
+  if (bfilter == HWLOC_TYPE_FILTER_KEEP_NONE) {
+    *next_hb_p = old_tree;
+    topology->modified = 1;
+    goto done;
+  }
+
+  /*
+   * tree points to all objects connected to any upstream bus in the machine.
+   * We now create one real hostbridge object per upstream bus.
+   * It's not actually a PCI device so we have to create it.
+   */
+  while (old_tree) {
+    /* start a new host bridge */
+    struct hwloc_obj *hostbridge = hwloc_alloc_setup_object(topology, HWLOC_OBJ_BRIDGE, -1);
+    struct hwloc_obj **dstnextp = &hostbridge->io_first_child;
+    struct hwloc_obj **srcnextp = &old_tree;
+    struct hwloc_obj *child = *srcnextp;
+    unsigned short current_domain = child->attr->pcidev.domain;
+    unsigned char current_bus = child->attr->pcidev.bus;
+    unsigned char current_subordinate = current_bus;
+
+    hwloc_debug("Starting new PCI hostbridge %04x:%02x\n", current_domain, current_bus);
+
+  next_child:
+    /* remove next child from tree */
+    *srcnextp = child->next_sibling;
+    /* append it to hostbridge */
+    *dstnextp = child;
+    child->parent = hostbridge;
+    child->next_sibling = NULL;
+    dstnextp = &child->next_sibling;
+
+    /* compute hostbridge secondary/subordinate buses */
+    if (child->type == HWLOC_OBJ_BRIDGE
+	&& child->attr->bridge.downstream.pci.subordinate_bus > current_subordinate)
+      current_subordinate = child->attr->bridge.downstream.pci.subordinate_bus;
+
+    /* use next child if it has the same domains/bus */
+    child = *srcnextp;
+    if (child
+	&& child->attr->pcidev.domain == current_domain
+	&& child->attr->pcidev.bus == current_bus)
+      goto next_child;
+
+    /* finish setting up this hostbridge */
+    hostbridge->attr->bridge.upstream_type = HWLOC_OBJ_BRIDGE_HOST;
+    hostbridge->attr->bridge.downstream_type = HWLOC_OBJ_BRIDGE_PCI;
+    hostbridge->attr->bridge.downstream.pci.domain = current_domain;
+    hostbridge->attr->bridge.downstream.pci.secondary_bus = current_bus;
+    hostbridge->attr->bridge.downstream.pci.subordinate_bus = current_subordinate;
+    hwloc_debug("New PCI hostbridge %04x:[%02x-%02x]\n",
+		current_domain, current_bus, current_subordinate);
+
+    if (current_domain)
+      topology->pci_nonzero_domains = 1;
+
+    *next_hb_p = hostbridge;
+    next_hb_p = &hostbridge->next_sibling;
+    topology->modified = 1; /* needed in case somebody reconnects levels before the core calls hwloc_pci_belowroot_apply_locality()
+			     * or if hwloc_pci_belowroot_apply_locality() keeps hostbridges below root.
+			     */
+  }
+
+ done:
+  topology->need_pci_belowroot_apply_locality = 1;
+  return 0;
+}
+
+static struct hwloc_obj *
+hwloc_pci_fixup_busid_parent(struct hwloc_topology *topology __hwloc_attribute_unused,
+			     struct hwloc_pcidev_attr_s *busid,
+			     struct hwloc_obj *parent)
+{
+  /* Xeon E5v3 in cluster-on-die mode only have PCI on the first NUMA node of each package.
+   * but many dual-processor host report the second PCI hierarchy on 2nd NUMA of first package.
+   */
+  if (parent->depth >= 2
+      && parent->type == HWLOC_OBJ_NUMANODE
+      && parent->sibling_rank == 1 && parent->parent->arity == 2
+      && parent->parent->type == HWLOC_OBJ_PACKAGE
+      && parent->parent->sibling_rank == 0 && parent->parent->parent->arity == 2) {
+    const char *cpumodel = hwloc_obj_get_info_by_name(parent->parent, "CPUModel");
+    if (cpumodel && strstr(cpumodel, "Xeon")) {
+      if (!hwloc_hide_errors()) {
+	fprintf(stderr, "****************************************************************************\n");
+	fprintf(stderr, "* hwloc %s has encountered an incorrect PCI locality information.\n", HWLOC_VERSION);
+	fprintf(stderr, "* PCI bus %04x:%02x is supposedly close to 2nd NUMA node of 1st package,\n",
+		busid->domain, busid->bus);
+	fprintf(stderr, "* however hwloc believes this is impossible on this architecture.\n");
+	fprintf(stderr, "* Therefore the PCI bus will be moved to 1st NUMA node of 2nd package.\n");
+	fprintf(stderr, "*\n");
+	fprintf(stderr, "* If you feel this fixup is wrong, disable it by setting in your environment\n");
+	fprintf(stderr, "* HWLOC_PCI_%04x_%02x_LOCALCPUS= (empty value), and report the problem\n",
+		busid->domain, busid->bus);
+	fprintf(stderr, "* to the hwloc's user mailing list together with the XML output of lstopo.\n");
+	fprintf(stderr, "*\n");
+	fprintf(stderr, "* You may silence this message by setting HWLOC_HIDE_ERRORS=1 in your environment.\n");
+	fprintf(stderr, "****************************************************************************\n");
+      }
+      return parent->parent->next_sibling->first_child;
+    }
+  }
+
+  return parent;
+}
+
+static struct hwloc_obj *
+hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcidev_attr_s *busid)
+{
+  hwloc_bitmap_t cpuset = hwloc_bitmap_alloc();
+  hwloc_obj_t parent;
+  int forced = 0;
+  int noquirks = 0;
+  unsigned i;
+  int err;
+
+  /* try to match a forced locality */
+  if (topology->pci_has_forced_locality) {
+    for(i=0; i<topology->pci_forced_locality_nr; i++) {
+      if (busid->domain == topology->pci_forced_locality[i].domain
+	  && busid->bus >= topology->pci_forced_locality[i].bus_first
+	  && busid->bus <= topology->pci_forced_locality[i].bus_last) {
+	hwloc_bitmap_copy(cpuset, topology->pci_forced_locality[i].cpuset);
+	forced = 1;
+	break;
+      }
+    }
+    /* if pci locality was forced, even empty, don't let quirks change what the OS reports */
+    noquirks = 1;
+  }
+
+  /* deprecated force locality variables */
+  if (!forced) {
+    const char *env;
+    char envname[256];
+    /* override the cpuset with the environment if given */
+    snprintf(envname, sizeof(envname), "HWLOC_PCI_%04x_%02x_LOCALCPUS",
+	     busid->domain, busid->bus);
+    env = getenv(envname);
+    if (env) {
+      static int reported = 0;
+      if (!topology->pci_has_forced_locality && !reported) {
+	fprintf(stderr, "Environment variable %s is deprecated, please use HWLOC_PCI_LOCALITY instead.\n", env);
+	reported = 1;
+      }
+      if (*env) {
+	/* force the cpuset */
+	hwloc_debug("Overriding localcpus using %s in the environment\n", envname);
+	hwloc_bitmap_sscanf(cpuset, env);
+	forced = 1;
+      }
+      /* if env exists, even empty, don't let quirks change what the OS reports */
+      noquirks = 1;
+    }
+  }
+
+  if (!forced) {
+    /* get the cpuset by asking the OS backend. */
+    struct hwloc_backend *backend = topology->get_pci_busid_cpuset_backend;
+    if (backend)
+      err = backend->get_pci_busid_cpuset(backend, busid, cpuset);
+    else
+      err = -1;
+    if (err < 0)
+      /* if we got nothing, assume this PCI bus is attached to the top of hierarchy */
+      hwloc_bitmap_copy(cpuset, hwloc_topology_get_topology_cpuset(topology));
+  }
+
+  hwloc_debug_bitmap("Attaching PCI tree to cpuset %s\n", cpuset);
+
+  parent = hwloc_find_insert_io_parent_by_complete_cpuset(topology, cpuset);
+  if (parent) {
+    if (!noquirks)
+      /* We found a valid parent. Check that the OS didn't report invalid locality */
+      parent = hwloc_pci_fixup_busid_parent(topology, busid, parent);
+  } else {
+    /* Fallback to root */
+    parent = hwloc_get_root_obj(topology);
+  }
+
+  hwloc_bitmap_free(cpuset);
+  return parent;
+}
+
+struct hwloc_obj *
+hwloc_pci_find_busid_parent(struct hwloc_topology *topology,
+			    unsigned domain, unsigned bus, unsigned dev, unsigned func)
+{
+  struct hwloc_pcidev_attr_s busid;
+  busid.domain = domain;
+  busid.bus = bus;
+  busid.dev = dev;
+  busid.func = func;
+  return hwloc__pci_find_busid_parent(topology, &busid);
+}
+
+int
+hwloc_pci_belowroot_apply_locality(struct hwloc_topology *topology)
+{
+  struct hwloc_obj *root = hwloc_get_root_obj(topology);
+  struct hwloc_obj **listp, *obj;
+
+  if (!topology->need_pci_belowroot_apply_locality)
+    return 0;
+  topology->need_pci_belowroot_apply_locality = 0;
+
+  /* root->io_first_child contains some PCI hierarchies, any maybe some non-PCI things.
+   * insert the PCI trees according to their PCI-locality.
+   */
+  listp = &root->io_first_child;
+  while ((obj = *listp) != NULL) {
+    struct hwloc_pcidev_attr_s *busid;
+    struct hwloc_obj *parent;
+
+    /* skip non-PCI objects */
+    if (obj->type != HWLOC_OBJ_PCI_DEVICE
+	&& !(obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI)
+	&& !(obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)) {
+      listp = &obj->next_sibling;
+      continue;
+    }
+
+    if (obj->type == HWLOC_OBJ_PCI_DEVICE
+	|| (obj->type == HWLOC_OBJ_BRIDGE
+	    && obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI))
+      busid = &obj->attr->pcidev;
+    else {
+      /* hostbridges don't have a PCI busid for looking up locality, use their first child if PCI */
+      hwloc_obj_t child = obj->io_first_child;
+      if (child && (child->type == HWLOC_OBJ_PCI_DEVICE
+		    || (child->type == HWLOC_OBJ_BRIDGE
+			&& child->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)))
+	busid = &obj->io_first_child->attr->pcidev;
+      else
+	continue;
+    }
+
+    /* attach the object (and children) where it belongs */
+    parent = hwloc__pci_find_busid_parent(topology, busid);
+    if (parent == root) {
+      /* keep this object here */
+      listp = &obj->next_sibling;
+    } else {
+      /* dequeue this object */
+      *listp = obj->next_sibling;
+      obj->next_sibling = NULL;
+      hwloc_insert_object_by_parent(topology, parent, obj);
+    }
+  }
+
+  return 0;
+}
+
+static struct hwloc_obj *
+hwloc__pci_belowroot_find_by_busid(hwloc_obj_t parent,
+				   unsigned domain, unsigned bus, unsigned dev, unsigned func)
+{
+  hwloc_obj_t child = parent->io_first_child;
+
+  for ( ; child; child = child->next_sibling) {
+    if (child->type == HWLOC_OBJ_PCI_DEVICE
+	|| (child->type == HWLOC_OBJ_BRIDGE
+	    && child->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)) {
+      if (child->attr->pcidev.domain == domain
+	  && child->attr->pcidev.bus == bus
+	  && child->attr->pcidev.dev == dev
+	  && child->attr->pcidev.func == func)
+	/* that's the right bus id */
+	return child;
+      if (child->attr->pcidev.domain > domain
+	  || (child->attr->pcidev.domain == domain
+	      && child->attr->pcidev.bus > bus))
+	/* bus id too high, won't find anything later, return parent */
+	return parent;
+      if (child->type == HWLOC_OBJ_BRIDGE
+	  && child->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI
+	  && child->attr->bridge.downstream.pci.domain == domain
+	  && child->attr->bridge.downstream.pci.secondary_bus <= bus
+	  && child->attr->bridge.downstream.pci.subordinate_bus >= bus)
+	/* not the right bus id, but it's included in the bus below that bridge */
+	return hwloc__pci_belowroot_find_by_busid(child, domain, bus, dev, func);
+
+    } else if (child->type == HWLOC_OBJ_BRIDGE
+	       && child->attr->bridge.upstream_type != HWLOC_OBJ_BRIDGE_PCI
+	       && child->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI
+	       /* non-PCI to PCI bridge, just look at the subordinate bus */
+	       && child->attr->bridge.downstream.pci.domain == domain
+	       && child->attr->bridge.downstream.pci.secondary_bus <= bus
+	       && child->attr->bridge.downstream.pci.subordinate_bus >= bus) {
+      /* contains our bus, recurse */
+      return hwloc__pci_belowroot_find_by_busid(child, domain, bus, dev, func);
+    }
+  }
+  /* didn't find anything, return parent */
+  return parent;
+}
+
+struct hwloc_obj *
+hwloc_pci_belowroot_find_by_busid(struct hwloc_topology *topology,
+				  unsigned domain, unsigned bus, unsigned dev, unsigned func)
+{
+  hwloc_obj_t root = hwloc_get_root_obj(topology);
+  hwloc_obj_t parent = hwloc__pci_belowroot_find_by_busid(root, domain, bus, dev, func);
+  if (parent == root)
+    return NULL;
+  else
+    return parent;
+}
+
+#define HWLOC_PCI_STATUS 0x06
+#define HWLOC_PCI_STATUS_CAP_LIST 0x10
+#define HWLOC_PCI_CAPABILITY_LIST 0x34
+#define HWLOC_PCI_CAP_LIST_ID 0
+#define HWLOC_PCI_CAP_LIST_NEXT 1
+
+unsigned
+hwloc_pci_find_cap(const unsigned char *config, unsigned cap)
+{
+  unsigned char seen[256] = { 0 };
+  unsigned char ptr; /* unsigned char to make sure we stay within the 256-byte config space */
+
+  if (!(config[HWLOC_PCI_STATUS] & HWLOC_PCI_STATUS_CAP_LIST))
+    return 0;
+
+  for (ptr = config[HWLOC_PCI_CAPABILITY_LIST] & ~3;
+       ptr; /* exit if next is 0 */
+       ptr = config[ptr + HWLOC_PCI_CAP_LIST_NEXT] & ~3) {
+    unsigned char id;
+
+    /* Looped around! */
+    if (seen[ptr])
+      break;
+    seen[ptr] = 1;
+
+    id = config[ptr + HWLOC_PCI_CAP_LIST_ID];
+    if (id == cap)
+      return ptr;
+    if (id == 0xff) /* exit if id is 0 or 0xff */
+      break;
+  }
+  return 0;
+}
+
+#define HWLOC_PCI_EXP_LNKSTA 0x12
+#define HWLOC_PCI_EXP_LNKSTA_SPEED 0x000f
+#define HWLOC_PCI_EXP_LNKSTA_WIDTH 0x03f0
+
+int
+hwloc_pci_find_linkspeed(const unsigned char *config,
+			 unsigned offset, float *linkspeed)
+{
+  unsigned linksta, speed, width;
+  float lanespeed;
+
+  memcpy(&linksta, &config[offset + HWLOC_PCI_EXP_LNKSTA], 4);
+  speed = linksta & HWLOC_PCI_EXP_LNKSTA_SPEED; /* PCIe generation */
+  width = (linksta & HWLOC_PCI_EXP_LNKSTA_WIDTH) >> 4; /* how many lanes */
+  /* PCIe Gen1 = 2.5GT/s signal-rate per lane with 8/10 encoding    = 0.25GB/s data-rate per lane
+   * PCIe Gen2 = 5  GT/s signal-rate per lane with 8/10 encoding    = 0.5 GB/s data-rate per lane
+   * PCIe Gen3 = 8  GT/s signal-rate per lane with 128/130 encoding = 1   GB/s data-rate per lane
+   * PCIe Gen4 = 16 GT/s signal-rate per lane with 128/130 encoding = 2   GB/s data-rate per lane
+   */
+
+  /* lanespeed in Gbit/s */
+  if (speed <= 2)
+    lanespeed = 2.5f * speed * 0.8f;
+  else
+    lanespeed = 8.0f * (1<<(speed-3)) * 128/130; /* assume Gen5 will be 32 GT/s and so on */
+
+  /* linkspeed in GB/s */
+  *linkspeed = lanespeed * width / 8;
+  return 0;
+}
+
+#define HWLOC_PCI_HEADER_TYPE 0x0e
+#define HWLOC_PCI_HEADER_TYPE_BRIDGE 1
+#define HWLOC_PCI_CLASS_BRIDGE_PCI 0x0604
+
+hwloc_obj_type_t
+hwloc_pci_check_bridge_type(unsigned device_class, const unsigned char *config)
+{
+  unsigned char headertype;
+
+  if (device_class != HWLOC_PCI_CLASS_BRIDGE_PCI)
+    return HWLOC_OBJ_PCI_DEVICE;
+
+  headertype = config[HWLOC_PCI_HEADER_TYPE] & 0x7f;
+  return (headertype == HWLOC_PCI_HEADER_TYPE_BRIDGE)
+    ? HWLOC_OBJ_BRIDGE : HWLOC_OBJ_PCI_DEVICE;
+}
+
+#define HWLOC_PCI_PRIMARY_BUS 0x18
+#define HWLOC_PCI_SECONDARY_BUS 0x19
+#define HWLOC_PCI_SUBORDINATE_BUS 0x1a
+
+int
+hwloc_pci_setup_bridge_attr(hwloc_obj_t obj,
+			    const unsigned char *config)
+{
+  struct hwloc_bridge_attr_s *battr = &obj->attr->bridge;
+  struct hwloc_pcidev_attr_s *pattr = &battr->upstream.pci;
+
+  if (config[HWLOC_PCI_PRIMARY_BUS] != pattr->bus) {
+    /* Sometimes the config space contains 00 instead of the actual primary bus number.
+     * Always trust the bus ID because it was built by the system which has more information
+     * to workaround such problems (e.g. ACPI information about PCI parent/children).
+     */
+    hwloc_debug("  %04x:%02x:%02x.%01x bridge with (ignored) invalid PCI_PRIMARY_BUS %02x\n",
+		pattr->domain, pattr->bus, pattr->dev, pattr->func, config[HWLOC_PCI_PRIMARY_BUS]);
+  }
+
+  obj->type = HWLOC_OBJ_BRIDGE;
+  battr->upstream_type = HWLOC_OBJ_BRIDGE_PCI;
+  battr->downstream_type = HWLOC_OBJ_BRIDGE_PCI;
+  battr->downstream.pci.domain = pattr->domain;
+  battr->downstream.pci.secondary_bus = config[HWLOC_PCI_SECONDARY_BUS];
+  battr->downstream.pci.subordinate_bus = config[HWLOC_PCI_SUBORDINATE_BUS];
+
+  if (battr->downstream.pci.secondary_bus <= pattr->bus
+      || battr->downstream.pci.subordinate_bus <= pattr->bus
+      || battr->downstream.pci.secondary_bus > battr->downstream.pci.subordinate_bus) {
+    /* This should catch most cases of invalid bridge information
+     * (e.g. 00 for secondary and subordinate).
+     * Ideally we would also check that [secondary-subordinate] is included
+     * in the parent bridge [secondary+1:subordinate]. But that's hard to do
+     * because objects may be discovered out of order (especially in the fsroot case).
+     */
+    hwloc_debug("  %04x:%02x:%02x.%01x bridge has invalid secondary-subordinate buses [%02x-%02x]\n",
+		pattr->domain, pattr->bus, pattr->dev, pattr->func,
+		battr->downstream.pci.secondary_bus, battr->downstream.pci.subordinate_bus);
+    hwloc_free_unlinked_object(obj);
+    return -1;
+  }
+
+  return 0;
+}
+
+const char *
+hwloc_pci_class_string(unsigned short class_id)
+{
+  /* See https://pci-ids.ucw.cz/read/PD/ */
+  switch ((class_id & 0xff00) >> 8) {
+    case 0x00:
+      switch (class_id) {
+	case 0x0001: return "VGA";
+      }
+      break;
+    case 0x01:
+      switch (class_id) {
+	case 0x0100: return "SCSI";
+	case 0x0101: return "IDE";
+	case 0x0102: return "Floppy";
+	case 0x0103: return "IPI";
+	case 0x0104: return "RAID";
+	case 0x0105: return "ATA";
+	case 0x0106: return "SATA";
+	case 0x0107: return "SAS";
+	case 0x0108: return "NVMExp";
+      }
+      return "Storage";
+    case 0x02:
+      switch (class_id) {
+	case 0x0200: return "Ethernet";
+	case 0x0201: return "TokenRing";
+	case 0x0202: return "FDDI";
+	case 0x0203: return "ATM";
+	case 0x0204: return "ISDN";
+	case 0x0205: return "WorldFip";
+	case 0x0206: return "PICMG";
+	case 0x0207: return "InfiniBand";
+	case 0x0208: return "Fabric";
+      }
+      return "Network";
+    case 0x03:
+      switch (class_id) {
+	case 0x0300: return "VGA";
+	case 0x0301: return "XGA";
+	case 0x0302: return "3D";
+      }
+      return "Display";
+    case 0x04:
+      switch (class_id) {
+	case 0x0400: return "MultimediaVideo";
+	case 0x0401: return "MultimediaAudio";
+	case 0x0402: return "Telephony";
+	case 0x0403: return "AudioDevice";
+      }
+      return "Multimedia";
+    case 0x05:
+      switch (class_id) {
+	case 0x0500: return "RAM";
+	case 0x0501: return "Flash";
+      }
+      return "Memory";
+    case 0x06:
+      switch (class_id) {
+	case 0x0600: return "HostBridge";
+	case 0x0601: return "ISABridge";
+	case 0x0602: return "EISABridge";
+	case 0x0603: return "MicroChannelBridge";
+	case 0x0604: return "PCIBridge";
+	case 0x0605: return "PCMCIABridge";
+	case 0x0606: return "NubusBridge";
+	case 0x0607: return "CardBusBridge";
+	case 0x0608: return "RACEwayBridge";
+	case 0x0609: return "SemiTransparentPCIBridge";
+	case 0x060a: return "InfiniBandPCIHostBridge";
+      }
+      return "Bridge";
+    case 0x07:
+      switch (class_id) {
+	case 0x0700: return "Serial";
+	case 0x0701: return "Parallel";
+	case 0x0702: return "MultiportSerial";
+	case 0x0703: return "Model";
+	case 0x0704: return "GPIB";
+	case 0x0705: return "SmartCard";
+      }
+      return "Communication";
+    case 0x08:
+      switch (class_id) {
+	case 0x0800: return "PIC";
+	case 0x0801: return "DMA";
+	case 0x0802: return "Timer";
+	case 0x0803: return "RTC";
+	case 0x0804: return "PCIHotPlug";
+	case 0x0805: return "SDHost";
+	case 0x0806: return "IOMMU";
+      }
+      return "SystemPeripheral";
+    case 0x09:
+      switch (class_id) {
+	case 0x0900: return "Keyboard";
+	case 0x0901: return "DigitizerPen";
+	case 0x0902: return "Mouse";
+	case 0x0903: return "Scanern";
+	case 0x0904: return "Gameport";
+      }
+      return "Input";
+    case 0x0a:
+      return "DockingStation";
+    case 0x0b:
+      switch (class_id) {
+	case 0x0b00: return "386";
+	case 0x0b01: return "486";
+	case 0x0b02: return "Pentium";
+/* 0x0b03 and 0x0b04 might be Pentium and P6 ? */
+	case 0x0b10: return "Alpha";
+	case 0x0b20: return "PowerPC";
+	case 0x0b30: return "MIPS";
+	case 0x0b40: return "Co-Processor";
+      }
+      return "Processor";
+    case 0x0c:
+      switch (class_id) {
+	case 0x0c00: return "FireWire";
+	case 0x0c01: return "ACCESS";
+	case 0x0c02: return "SSA";
+	case 0x0c03: return "USB";
+	case 0x0c04: return "FibreChannel";
+	case 0x0c05: return "SMBus";
+	case 0x0c06: return "InfiniBand";
+	case 0x0c07: return "IPMI-SMIC";
+	case 0x0c08: return "SERCOS";
+	case 0x0c09: return "CANBUS";
+      }
+      return "SerialBus";
+    case 0x0d:
+      switch (class_id) {
+	case 0x0d00: return "IRDA";
+	case 0x0d01: return "ConsumerIR";
+	case 0x0d10: return "RF";
+	case 0x0d11: return "Bluetooth";
+	case 0x0d12: return "Broadband";
+	case 0x0d20: return "802.1a";
+	case 0x0d21: return "802.1b";
+      }
+      return "Wireless";
+    case 0x0e:
+      switch (class_id) {
+	case 0x0e00: return "I2O";
+      }
+      return "Intelligent";
+    case 0x0f:
+      return "Satellite";
+    case 0x10:
+      return "Encryption";
+    case 0x11:
+      return "SignalProcessing";
+    case 0x12:
+      return "ProcessingAccelerator";
+    case 0x13:
+      return "Instrumentation";
+    case 0x40:
+      return "Co-Processor";
+  }
+  return "Other";
+}
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-aix.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-aix.c
new file mode 100644
index 0000000000..0c8bbe3357
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-aix.c
@@ -0,0 +1,875 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2016 Inria.  All rights reserved.
+ * Copyright © 2009-2011, 2013 Université Bordeaux
+ * Copyright © 2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/* TODO: use SIGRECONFIG & dr_reconfig for state change */
+
+#include <private/autogen/config.h>
+
+#include <sys/types.h>
+#ifdef HAVE_DIRENT_H
+#include <dirent.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include <hwloc.h>
+#include <private/private.h>
+#include <private/misc.h>
+#include <private/debug.h>
+
+#include <procinfo.h>
+#include <sys/types.h>
+#include <sys/rset.h>
+#include <sys/processor.h>
+#include <sys/thread.h>
+#include <sys/mman.h>
+#include <sys/systemcfg.h>
+
+#ifndef __power_pc
+#define __power_pc() 0
+#endif
+#ifndef __power_4
+#define __power_4() 0
+#endif
+#ifndef __power_5
+#define __power_5() 0
+#endif
+#ifndef __power_6
+#define __power_6() 0
+#endif
+#ifndef __power_7
+#define __power_7() 0
+#endif
+
+static int
+hwloc_aix_set_sth_cpubind(hwloc_topology_t topology, rstype_t what, rsid_t who, pid_t pid, hwloc_const_bitmap_t hwloc_set, int flags __hwloc_attribute_unused)
+{
+  rsethandle_t rad;
+  int res;
+  unsigned cpu;
+
+  if (flags & HWLOC_CPUBIND_NOMEMBIND) {
+    errno = ENOSYS;
+    return -1;
+  }
+
+  /* The resulting binding is always strict */
+
+  if (hwloc_bitmap_isequal(hwloc_set, hwloc_topology_get_complete_cpuset(topology))) {
+    if (ra_detachrset(what, who, 0))
+      return -1;
+    return 0;
+  }
+
+  rad = rs_alloc(RS_EMPTY);
+  hwloc_bitmap_foreach_begin(cpu, hwloc_set)
+    rs_op(RS_ADDRESOURCE, rad, NULL, R_PROCS, cpu);
+  hwloc_bitmap_foreach_end();
+
+  res = ra_attachrset(what, who, rad, 0);
+  if (res < 0 && errno == EPERM) {
+    /* EPERM may mean that one thread has ben bound with bindprocessor().
+     * Unbind the entire process (we can't unbind individual threads)
+     * and try again.
+     */
+    bindprocessor(BINDPROCESS, pid, PROCESSOR_CLASS_ANY);
+    res = ra_attachrset(what, who, rad, 0);
+  }
+
+  rs_free(rad);
+  return res;
+}
+
+static int
+hwloc_aix_get_sth_rset_cpubind(hwloc_topology_t topology, rstype_t what, rsid_t who, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused, int *boundp)
+{
+  rsethandle_t rset;
+  unsigned cpu, maxcpus;
+  int res = -1;
+  int bound = 0;
+
+  rset = rs_alloc(RS_EMPTY);
+
+  if (ra_getrset(what, who, 0, rset) == -1)
+    goto out;
+
+  hwloc_bitmap_zero(hwloc_set);
+  maxcpus = rs_getinfo(rset, R_MAXPROCS, 0);
+  for (cpu = 0; cpu < maxcpus; cpu++)
+    if (rs_op(RS_TESTRESOURCE, rset, NULL, R_PROCS, cpu) == 1)
+      hwloc_bitmap_set(hwloc_set, cpu);
+    else
+      bound = 1;
+  hwloc_bitmap_and(hwloc_set, hwloc_set, hwloc_topology_get_complete_cpuset(topology));
+  res = 0;
+  *boundp = bound;
+
+out:
+  rs_free(rset);
+  return res;
+}
+
+static int
+hwloc_aix_get_pid_getthrds_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, pid_t pid, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused)
+{
+#if HWLOC_BITS_PER_LONG == 64
+  struct thrdentry64 thread_info;
+  tid64_t next_thread;
+#else
+  struct thrdsinfo thread_info;
+  tid_t next_thread;
+#endif
+
+  next_thread = 0;
+  /* TODO: get multiple at once */
+#if HWLOC_BITS_PER_LONG == 64
+  while (getthrds64 (pid, &thread_info, sizeof (thread_info),
+                     &next_thread, 1) == 1) {
+#else
+  while (getthrds   (pid, &thread_info, sizeof (thread_info),
+                     &next_thread, 1) == 1) {
+#endif
+    if (PROCESSOR_CLASS_ANY != thread_info.ti_cpuid)
+      hwloc_bitmap_set(hwloc_set, thread_info.ti_cpuid);
+    else
+      hwloc_bitmap_fill(hwloc_set);
+  }
+  /* TODO: what if the thread list changes and we get nothing? */
+
+  return 0;
+}
+
+static int
+hwloc_aix_get_tid_getthrds_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, tid_t tid, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused)
+{
+#if HWLOC_BITS_PER_LONG == 64
+  struct thrdentry64 thread_info;
+  tid64_t next_thread;
+#else
+  struct thrdsinfo thread_info;
+  tid_t next_thread;
+#endif
+  pid_t pid = getpid();
+
+  next_thread = 0;
+  /* TODO: get multiple at once */
+#if HWLOC_BITS_PER_LONG == 64
+  while (getthrds64 (pid, &thread_info, sizeof (thread_info),
+                     &next_thread, 1) == 1) {
+#else
+  while (getthrds   (pid, &thread_info, sizeof (thread_info),
+                     &next_thread, 1) == 1) {
+#endif
+    if (thread_info.ti_tid == tid) {
+      if (PROCESSOR_CLASS_ANY != thread_info.ti_cpuid)
+	hwloc_bitmap_set(hwloc_set, thread_info.ti_cpuid);
+      else
+	hwloc_bitmap_fill(hwloc_set);
+      break;
+    }
+  }
+  /* TODO: what if the thread goes away in the meantime? */
+
+  return 0;
+}
+
+static int
+hwloc_aix_set_thisproc_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags)
+{
+  rsid_t who;
+  who.at_pid = getpid();
+  return hwloc_aix_set_sth_cpubind(topology, R_PROCESS, who, who.at_pid, hwloc_set, flags);
+}
+
+static int
+hwloc_aix_get_thisproc_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, int flags)
+{
+  int ret, bound;
+  rsid_t who;
+  who.at_pid = getpid();
+  ret = hwloc_aix_get_sth_rset_cpubind(topology, R_PROCESS, who, hwloc_set, flags, &bound);
+  if (!ret && !bound) {
+    hwloc_bitmap_zero(hwloc_set);
+    ret = hwloc_aix_get_pid_getthrds_cpubind(topology, who.at_pid, hwloc_set, flags);
+  }
+  return ret;
+}
+
+#ifdef R_THREAD
+static int
+hwloc_aix_set_thisthread_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags)
+{
+  rsid_t who;
+  who.at_tid = thread_self();
+  return hwloc_aix_set_sth_cpubind(topology, R_THREAD, who, getpid(), hwloc_set, flags);
+}
+
+static int
+hwloc_aix_get_thisthread_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, int flags)
+{
+  int ret, bound;
+  rsid_t who;
+  who.at_tid = thread_self();
+  ret = hwloc_aix_get_sth_rset_cpubind(topology, R_THREAD, who, hwloc_set, flags, &bound);
+  if (!ret && !bound) {
+    hwloc_bitmap_zero(hwloc_set);
+    ret = hwloc_aix_get_tid_getthrds_cpubind(topology, who.at_tid, hwloc_set, flags);
+  }
+  return ret;
+}
+#endif /* R_THREAD */
+
+static int
+hwloc_aix_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_bitmap_t hwloc_set, int flags)
+{
+  rsid_t who;
+  who.at_pid = pid;
+  return hwloc_aix_set_sth_cpubind(topology, R_PROCESS, who, pid, hwloc_set, flags);
+}
+
+static int
+hwloc_aix_get_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_bitmap_t hwloc_set, int flags)
+{
+  int ret, bound;
+  rsid_t who;
+  who.at_pid = pid;
+  ret = hwloc_aix_get_sth_rset_cpubind(topology, R_PROCESS, who, hwloc_set, flags, &bound);
+  if (!ret && !bound) {
+    hwloc_bitmap_zero(hwloc_set);
+    ret = hwloc_aix_get_pid_getthrds_cpubind(topology, who.at_pid, hwloc_set, flags);
+  }
+  return ret;
+}
+
+#ifdef R_THREAD
+#ifdef HWLOC_HAVE_PTHREAD_GETTHRDS_NP
+static int
+hwloc_aix_set_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t pthread, hwloc_const_bitmap_t hwloc_set, int flags)
+{
+  struct __pthrdsinfo info;
+  int size;
+  if ((errno = pthread_getthrds_np(&pthread, PTHRDSINFO_QUERY_TID, &info, sizeof(info), NULL, &size)))
+    return -1;
+  {
+    rsid_t who;
+    who.at_tid = info.__pi_tid;
+    return hwloc_aix_set_sth_cpubind(topology, R_THREAD, who, getpid(), hwloc_set, flags);
+  }
+}
+
+static int
+hwloc_aix_get_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t pthread, hwloc_bitmap_t hwloc_set, int flags)
+{
+  struct __pthrdsinfo info;
+  int size;
+  if (pthread_getthrds_np(&pthread, PTHRDSINFO_QUERY_TID, &info, sizeof(info), NULL, &size))
+    return -1;
+  {
+    int ret, bound;
+    rsid_t who;
+    who.at_tid = info.__pi_tid;
+    ret = hwloc_aix_get_sth_rset_cpubind(topology, R_THREAD, who, hwloc_set, flags, &bound);
+    if (!ret && !bound) {
+      hwloc_bitmap_zero(hwloc_set);
+      ret = hwloc_aix_get_tid_getthrds_cpubind(topology, who.at_tid, hwloc_set, flags);
+    }
+    return ret;
+  }
+}
+#endif /* HWLOC_HAVE_PTHREAD_GETTHRDS_NP */
+#endif /* R_THREAD */
+
+static int
+hwloc_aix_get_thisthread_last_cpu_location(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused)
+{
+  cpu_t cpu;
+
+  if (topology->pid) {
+    errno = ENOSYS;
+    return -1;
+  }
+
+  cpu = mycpu();
+  if (cpu < 0)
+    return -1;
+
+  hwloc_bitmap_only(hwloc_set, cpu);
+  return 0;
+}
+
+#ifdef P_DEFAULT
+
+static int
+hwloc_aix_membind_policy_from_hwloc(uint_t *aix_policy, int policy)
+{
+  switch (policy) {
+    case HWLOC_MEMBIND_DEFAULT:
+    case HWLOC_MEMBIND_BIND:
+      *aix_policy = P_DEFAULT;
+      break;
+    case HWLOC_MEMBIND_FIRSTTOUCH:
+      *aix_policy = P_FIRST_TOUCH;
+      break;
+    case HWLOC_MEMBIND_INTERLEAVE:
+      *aix_policy = P_BALANCED;
+      break;
+    default:
+      errno = ENOSYS;
+      return -1;
+  }
+  return 0;
+}
+
+static int
+hwloc_aix_prepare_membind(hwloc_topology_t topology, rsethandle_t *rad, hwloc_const_nodeset_t nodeset, int flags __hwloc_attribute_unused)
+{
+  rsethandle_t rset, noderad;
+  int MCMlevel;
+  int node;
+
+  MCMlevel = rs_getinfo(NULL, R_MCMSDL, 0);
+  if ((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM))
+    rset = rs_alloc(RS_ALL);
+  else
+    rset = rs_alloc(RS_PARTITION);
+  *rad = rs_alloc(RS_EMPTY);
+  noderad = rs_alloc(RS_EMPTY);
+
+  hwloc_bitmap_foreach_begin(node, nodeset)
+    /* we used MCMlevel rad number for node->os_index during lookup */
+    rs_getrad(rset, noderad, MCMlevel, node, 0);
+    rs_op(RS_UNION, noderad, *rad, 0, 0);
+  hwloc_bitmap_foreach_end();
+
+  rs_free(rset);
+  rs_free(noderad);
+
+  return 0;
+}
+
+static int
+hwloc_aix_set_sth_membind(hwloc_topology_t topology, rstype_t what, rsid_t who, pid_t pid, hwloc_const_bitmap_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  rsethandle_t rad;
+  int res;
+
+  if (flags & HWLOC_MEMBIND_NOCPUBIND) {
+    errno = ENOSYS;
+    return -1;
+  }
+
+  switch (policy) {
+    case HWLOC_MEMBIND_DEFAULT:
+    case HWLOC_MEMBIND_BIND:
+      break;
+    default:
+      errno = ENOSYS;
+      return -1;
+  }
+
+  if (hwloc_aix_prepare_membind(topology, &rad, nodeset, flags))
+    return -1;
+
+  res = ra_attachrset(what, who, rad, 0);
+  if (res < 0 && errno == EPERM) {
+    /* EPERM may mean that one thread has ben bound with bindprocessor().
+     * Unbind the entire process (we can't unbind individual threads)
+     * and try again.
+     */
+    bindprocessor(BINDPROCESS, pid, PROCESSOR_CLASS_ANY);
+    res = ra_attachrset(what, who, rad, 0);
+  }
+
+  rs_free(rad);
+  return res;
+}
+
+static int
+hwloc_aix_get_sth_membind(hwloc_topology_t topology, rstype_t what, rsid_t who, hwloc_bitmap_t nodeset, hwloc_membind_policy_t *policy, int flags __hwloc_attribute_unused)
+{
+  hwloc_bitmap_t hwloc_set;
+  rsethandle_t rset;
+  unsigned cpu, maxcpus;
+  int res = -1;
+  int depth, n, i;
+
+  depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
+  assert(depth >= 0);
+  n = hwloc_get_nbobjs_by_depth(topology, depth);
+
+  rset = rs_alloc(RS_EMPTY);
+
+  if (ra_getrset(what, who, 0, rset) == -1)
+    goto out;
+
+  hwloc_set = hwloc_bitmap_alloc();
+
+  maxcpus = rs_getinfo(rset, R_MAXPROCS, 0);
+  for (cpu = 0; cpu < maxcpus; cpu++)
+    if (rs_op(RS_TESTRESOURCE, rset, NULL, R_PROCS, cpu) == 1)
+      hwloc_bitmap_set(hwloc_set, cpu);
+  hwloc_bitmap_and(hwloc_set, hwloc_set, hwloc_topology_get_complete_cpuset(topology));
+
+  hwloc_bitmap_zero(nodeset);
+  for (i = 0; i < n; i++) {
+    hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
+    if (hwloc_bitmap_isincluded(obj->cpuset, hwloc_set))
+      hwloc_bitmap_set(nodeset, obj->os_index);
+  }
+
+  hwloc_bitmap_free(hwloc_set);
+
+  *policy = HWLOC_MEMBIND_BIND;
+  res = 0;
+
+out:
+  rs_free(rset);
+  return res;
+}
+
+static int
+hwloc_aix_set_thisproc_membind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, hwloc_membind_policy_t policy, int flags)
+{
+  rsid_t who;
+  who.at_pid = getpid();
+  return hwloc_aix_set_sth_membind(topology, R_PROCESS, who, who.at_pid, hwloc_set, policy, flags);
+}
+
+static int
+hwloc_aix_get_thisproc_membind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, hwloc_membind_policy_t *policy, int flags)
+{
+  rsid_t who;
+  who.at_pid = getpid();
+  return hwloc_aix_get_sth_membind(topology, R_PROCESS, who, hwloc_set, policy, flags);
+}
+
+#ifdef R_THREAD
+static int
+hwloc_aix_set_thisthread_membind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, hwloc_membind_policy_t policy, int flags)
+{
+  rsid_t who;
+  who.at_tid = thread_self();
+  return hwloc_aix_set_sth_membind(topology, R_THREAD, who, getpid(), hwloc_set, policy, flags);
+}
+
+static int
+hwloc_aix_get_thisthread_membind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, hwloc_membind_policy_t *policy, int flags)
+{
+  rsid_t who;
+  who.at_tid = thread_self();
+  return hwloc_aix_get_sth_membind(topology, R_THREAD, who, hwloc_set, policy, flags);
+}
+#endif /* R_THREAD */
+
+static int
+hwloc_aix_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_bitmap_t hwloc_set, hwloc_membind_policy_t policy, int flags)
+{
+  rsid_t who;
+  who.at_pid = pid;
+  return hwloc_aix_set_sth_membind(topology, R_PROCESS, who, pid, hwloc_set, policy, flags);
+}
+
+static int
+hwloc_aix_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_bitmap_t hwloc_set, hwloc_membind_policy_t *policy, int flags)
+{
+  rsid_t who;
+  who.at_pid = pid;
+  return hwloc_aix_get_sth_membind(topology, R_PROCESS, who, hwloc_set, policy, flags);
+}
+
+#ifdef R_THREAD
+#if 0 /* def HWLOC_HAVE_PTHREAD_GETTHRDS_NP */
+static int
+hwloc_aix_set_thread_membind(hwloc_topology_t topology, hwloc_thread_t pthread, hwloc_const_bitmap_t hwloc_set, hwloc_membind_policy_t policy, int flags)
+{
+  struct __pthrdsinfo info;
+  int size;
+  if ((errno = pthread_getthrds_np(&pthread, PTHRDSINFO_QUERY_TID, &info, sizeof(info), NULL, &size)))
+    return -1;
+  {
+    rsid_t who;
+    who.at_tid = info.__pi_tid;
+    return hwloc_aix_set_sth_membind(topology, R_THREAD, who, getpid(), hwloc_set, policy, flags);
+  }
+}
+
+static int
+hwloc_aix_get_thread_membind(hwloc_topology_t topology, hwloc_thread_t pthread, hwloc_bitmap_t hwloc_set, hwloc_membind_policy_t *policy, int flags)
+{
+  struct __pthrdsinfo info;
+  int size;
+  if (pthread_getthrds_np(&pthread, PTHRDSINFO_QUERY_TID, &info, sizeof(info), NULL, &size))
+    return -1;
+  {
+    rsid_t who;
+    who.at_tid = info.__pi_tid;
+    return hwloc_aix_get_sth_membind(topology, R_THREAD, who, hwloc_set, policy, flags);
+  }
+}
+#endif /* HWLOC_HAVE_PTHREAD_GETTHRDS_NP */
+#endif /* R_THREAD */
+
+#if 0
+/* TODO: seems to be right, but doesn't seem to be working (EINVAL), even after
+ * aligning the range on 64K... */
+static int
+hwloc_aix_set_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  subrange_t subrange;
+  rsid_t rsid = { .at_subrange = &subrange };
+  uint_t aix_policy;
+  int ret;
+  fprintf(stderr,"yop\n");
+
+  if ((flags & (HWLOC_MEMBIND_MIGRATE|HWLOC_MEMBIND_STRICT))
+            == (HWLOC_MEMBIND_MIGRATE|HWLOC_MEMBIND_STRICT)) {
+    errno = ENOSYS;
+    return -1;
+  }
+
+  subrange.su_offset = (uintptr_t) addr;
+  subrange.su_length = len;
+  subrange.su_rstype = R_RSET;
+
+  if (hwloc_aix_membind_policy_from_hwloc(&aix_policy, policy))
+    return -1;
+
+  if (hwloc_aix_prepare_membind(topology, &subrange.su_rsid.at_rset, nodeset, flags))
+    return -1;
+
+  subrange.su_policy = aix_policy;
+
+  res = ra_attachrset(R_SUBRANGE, rsid, subrange.su_rsid.at_rset, 0);
+  if (res < 0 && errno == EPERM) {
+    /* EPERM may mean that one thread has ben bound with bindprocessor().
+     * Unbind the entire process (we can't unbind individual threads)
+     * and try again.
+     * FIXME: actually check that this EPERM can happen
+     */
+    bindprocessor(BINDPROCESS, getpid(), PROCESSOR_CLASS_ANY);
+    res = ra_attachrset(R_SUBRANGE, rsid, subrange.su_rsid.at_rset, 0);
+  }
+
+  rs_free(subrange.su_rsid.at_rset);
+  return ret;
+}
+#endif
+
+static void *
+hwloc_aix_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  void *ret;
+  rsid_t rsid;
+  uint_t aix_policy;
+
+  if (hwloc_aix_membind_policy_from_hwloc(&aix_policy, policy))
+    return hwloc_alloc_or_fail(topology, len, flags);
+
+  if (hwloc_aix_prepare_membind(topology, &rsid.at_rset, nodeset, flags))
+    return hwloc_alloc_or_fail(topology, len, flags);
+
+  ret = ra_mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0, R_RSET, rsid, aix_policy);
+
+  rs_free(rsid.at_rset);
+  return ret == (void*)-1 ? NULL : ret;
+}
+#endif /* P_DEFAULT */
+
+static void
+look_rset(int sdl, hwloc_obj_type_t type, struct hwloc_topology *topology, int level)
+{
+  rsethandle_t rset, rad;
+  int i,maxcpus,j;
+  int nbnodes;
+  struct hwloc_obj *obj;
+
+  if ((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM))
+    rset = rs_alloc(RS_ALL);
+  else
+    rset = rs_alloc(RS_PARTITION);
+  rad = rs_alloc(RS_EMPTY);
+  nbnodes = rs_numrads(rset, sdl, 0);
+  if (nbnodes == -1) {
+    perror("rs_numrads");
+    return;
+  }
+
+  for (i = 0; i < nbnodes; i++) {
+    hwloc_bitmap_t cpuset;
+    unsigned os_index = (unsigned) -1; /* no os_index except for PU and NUMANODE below */
+
+    if (rs_getrad(rset, rad, sdl, i, 0)) {
+      fprintf(stderr,"rs_getrad(%d) failed: %s\n", i, strerror(errno));
+      continue;
+    }
+    if (!rs_getinfo(rad, R_NUMPROCS, 0))
+      continue;
+
+    maxcpus = rs_getinfo(rad, R_MAXPROCS, 0);
+    cpuset = hwloc_bitmap_alloc();
+    for (j = 0; j < maxcpus; j++) {
+      if (rs_op(RS_TESTRESOURCE, rad, NULL, R_PROCS, j))
+	hwloc_bitmap_set(cpuset, j);
+    }
+
+    if (type == HWLOC_OBJ_PU) {
+      os_index = hwloc_bitmap_first(cpuset);
+      hwloc_debug("Found PU #%u inside node %d for sdl %d\n", os_index, i, sdl);
+      assert(hwloc_bitmap_weight(cpuset) == 1);
+    } else if (type == HWLOC_OBJ_NUMANODE) {
+      /* NUMA node os_index isn't used for binding, just use the rad number to get unique values.
+       * Note that we'll use that fact in hwloc_aix_prepare_membind(). */
+      os_index = i;
+      hwloc_debug("Using os_index #%u for NUMA node inside node %d for sdl %d\n", os_index, i, sdl);
+    }
+
+    obj = hwloc_alloc_setup_object(topology, type, os_index);
+    obj->cpuset = cpuset;
+
+    switch(type) {
+      case HWLOC_OBJ_NUMANODE:
+	obj->nodeset = hwloc_bitmap_alloc();
+	hwloc_bitmap_set(obj->nodeset, i);
+	obj->memory.local_memory = 0; /* TODO: odd, rs_getinfo(rad, R_MEMSIZE, 0) << 10 returns the total memory ... */
+	obj->memory.page_types_len = 2;
+	obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types));
+	memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types));
+	obj->memory.page_types[0].size = hwloc_getpagesize();
+#if HAVE_DECL__SC_LARGE_PAGESIZE
+	obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
+#endif
+	/* TODO: obj->memory.page_types[1].count = rs_getinfo(rset, R_LGPGFREE, 0) / hugepagesize */
+	break;
+      case HWLOC_OBJ_L2CACHE:
+	obj->attr->cache.size = _system_configuration.L2_cache_size;
+	obj->attr->cache.associativity = _system_configuration.L2_cache_asc;
+
+	obj->attr->cache.linesize = 0; /* unknown by default */
+	if (__power_pc())
+	  if (__power_4() || __power_5() || __power_6() || __power_7())
+	    obj->attr->cache.linesize = 128;
+
+	obj->attr->cache.depth = 2;
+	obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; /* OK for power[4567], unknown for others */
+	break;
+      case HWLOC_OBJ_GROUP:
+	obj->attr->group.kind = HWLOC_GROUP_KIND_AIX_SDL_UNKNOWN;
+	obj->attr->group.subkind = level;
+	break;
+      case HWLOC_OBJ_CORE:
+      {
+	hwloc_obj_t obj2, obj3;
+	obj2 = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1CACHE, -1);
+	obj2->cpuset = hwloc_bitmap_dup(obj->cpuset);
+	obj2->attr->cache.size = _system_configuration.dcache_size;
+	obj2->attr->cache.associativity = _system_configuration.dcache_asc;
+	obj2->attr->cache.linesize = _system_configuration.dcache_line;
+	obj2->attr->cache.depth = 1;
+	if (_system_configuration.cache_attrib & (1<<30)) {
+	  /* Unified cache */
+	  obj2->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
+	  hwloc_debug("Adding an L1u cache for core %d\n", i);
+	} else {
+	  /* Separate Instruction and Data caches */
+	  obj2->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
+	  hwloc_debug("Adding an L1d cache for core %d\n", i);
+
+	  if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) {
+	    obj3 = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1ICACHE, -1);
+	    obj3->cpuset = hwloc_bitmap_dup(obj->cpuset);
+	    obj3->attr->cache.size = _system_configuration.icache_size;
+	    obj3->attr->cache.associativity = _system_configuration.icache_asc;
+	    obj3->attr->cache.linesize = _system_configuration.icache_line;
+	    obj3->attr->cache.depth = 1;
+	    obj3->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
+	    hwloc_debug("Adding an L1i cache for core %d\n", i);
+	    hwloc_insert_object_by_cpuset(topology, obj3);
+	  }
+	}
+	if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1CACHE))
+	  hwloc_insert_object_by_cpuset(topology, obj2);
+	else
+	  hwloc_free_unlinked_object(obj2); /* FIXME: don't built at all, just build the cpuset in case l1/l1i needs it */
+	break;
+      }
+      default:
+	break;
+    }
+    hwloc_debug_2args_bitmap("%s %d has cpuset %s\n",
+	       hwloc_type_name(type),
+	       i, obj->cpuset);
+    if (hwloc_filter_check_keep_object_type(topology, obj->type))
+      hwloc_insert_object_by_cpuset(topology, obj);
+    else
+      hwloc_free_unlinked_object(obj);
+  }
+
+  rs_free(rset);
+  rs_free(rad);
+}
+
+static int
+hwloc_look_aix(struct hwloc_backend *backend)
+{
+  struct hwloc_topology *topology = backend->topology;
+  int i;
+
+  if (topology->levels[0][0]->cpuset)
+    /* somebody discovered things */
+    return -1;
+
+  hwloc_alloc_obj_cpusets(topology->levels[0][0]);
+
+  /* TODO: R_LGPGDEF/R_LGPGFREE for large pages */
+
+  hwloc_debug("Note: SMPSDL is at %d\n", rs_getinfo(NULL, R_SMPSDL, 0));
+#ifdef R_REF1SDL
+  hwloc_debug("Note: REF1SDL is at %d\n", rs_getinfo(NULL, R_REF1SDL, 0));
+#endif
+
+  for (i=0; i<=rs_getinfo(NULL, R_MAXSDL, 0); i++)
+    {
+      int known = 0;
+#if 0
+      if (i == rs_getinfo(NULL, R_SMPSDL, 0))
+	/* Not enabled for now because I'm not sure what it corresponds to. On
+	 * decrypthon it contains all the cpus. Is it a "machine" or a "system"
+	 * level ?
+	 */
+	{
+	  hwloc_debug("looking AIX \"SMP\" sdl %d\n", i);
+	  look_rset(i, HWLOC_OBJ_MACHINE, topology, i);
+	  known = 1;
+	}
+#endif
+      if (i == rs_getinfo(NULL, R_MCMSDL, 0))
+	{
+	  hwloc_debug("looking AIX node sdl %d\n", i);
+	  look_rset(i, HWLOC_OBJ_NUMANODE, topology, i);
+	  known = 1;
+	}
+#      ifdef R_L2CSDL
+      if (i == rs_getinfo(NULL, R_L2CSDL, 0))
+	{
+	  hwloc_debug("looking AIX L2 sdl %d\n", i);
+	  look_rset(i, HWLOC_OBJ_L2CACHE, topology, i);
+	  known = 1;
+	}
+#      endif
+#      ifdef R_PCORESDL
+      if (i == rs_getinfo(NULL, R_PCORESDL, 0))
+	{
+	  hwloc_debug("looking AIX core sdl %d\n", i);
+	  look_rset(i, HWLOC_OBJ_CORE, topology, i);
+	  known = 1;
+	}
+#      endif
+      if (i == rs_getinfo(NULL, R_MAXSDL, 0))
+	{
+	  hwloc_debug("looking AIX max sdl %d\n", i);
+	  look_rset(i, HWLOC_OBJ_PU, topology, i);
+	  known = 1;
+          topology->support.discovery->pu = 1;
+	}
+
+      /* Don't know how it should be rendered, make a misc object for it.  */
+      if (!known)
+	{
+	  hwloc_debug("looking AIX unknown sdl %d\n", i);
+	  look_rset(i, HWLOC_OBJ_GROUP, topology, i);
+	}
+    }
+
+  hwloc_obj_add_info(topology->levels[0][0], "Backend", "AIX");
+  hwloc_add_uname_info(topology, NULL);
+  return 0;
+}
+
+void
+hwloc_set_aix_hooks(struct hwloc_binding_hooks *hooks,
+		    struct hwloc_topology_support *support __hwloc_attribute_unused)
+{
+  hooks->set_proc_cpubind = hwloc_aix_set_proc_cpubind;
+  hooks->get_proc_cpubind = hwloc_aix_get_proc_cpubind;
+#ifdef R_THREAD
+#ifdef HWLOC_HAVE_PTHREAD_GETTHRDS_NP
+  hooks->set_thread_cpubind = hwloc_aix_set_thread_cpubind;
+  hooks->get_thread_cpubind = hwloc_aix_get_thread_cpubind;
+#endif /* HWLOC_HAVE_PTHREAD_GETTHRDS_NP */
+#endif /* R_THREAD */
+  hooks->set_thisproc_cpubind = hwloc_aix_set_thisproc_cpubind;
+  hooks->get_thisproc_cpubind = hwloc_aix_get_thisproc_cpubind;
+#ifdef R_THREAD
+  hooks->set_thisthread_cpubind = hwloc_aix_set_thisthread_cpubind;
+  hooks->get_thisthread_cpubind = hwloc_aix_get_thisthread_cpubind;
+#endif /* R_THREAD */
+  hooks->get_thisthread_last_cpu_location = hwloc_aix_get_thisthread_last_cpu_location;
+  /* TODO: get_last_cpu_location: mycpu() only works for the current thread? */
+#ifdef P_DEFAULT
+  hooks->set_proc_membind = hwloc_aix_set_proc_membind;
+  hooks->get_proc_membind = hwloc_aix_get_proc_membind;
+#ifdef R_THREAD
+#if 0 /* def HWLOC_HAVE_PTHREAD_GETTHRDS_NP */
+  /* Does it really make sense to set the memory binding of another thread? */
+  hooks->set_thread_membind = hwloc_aix_set_thread_membind;
+  hooks->get_thread_membind = hwloc_aix_get_thread_membind;
+#endif /* HWLOC_HAVE_PTHREAD_GETTHRDS_NP */
+#endif /* R_THREAD */
+  hooks->set_thisproc_membind = hwloc_aix_set_thisproc_membind;
+  hooks->get_thisproc_membind = hwloc_aix_get_thisproc_membind;
+#ifdef R_THREAD
+  hooks->set_thisthread_membind = hwloc_aix_set_thisthread_membind;
+  hooks->get_thisthread_membind = hwloc_aix_get_thisthread_membind;
+#endif /* R_THREAD */
+  /* hooks->set_area_membind = hwloc_aix_set_area_membind; */
+  /* get_area_membind is not available */
+  hooks->alloc_membind = hwloc_aix_alloc_membind;
+  hooks->alloc = hwloc_alloc_mmap;
+  hooks->free_membind = hwloc_free_mmap;
+  support->membind->firsttouch_membind = 1;
+  support->membind->bind_membind = 1;
+  support->membind->interleave_membind = 1;
+#endif /* P_DEFAULT */
+}
+
+static struct hwloc_backend *
+hwloc_aix_component_instantiate(struct hwloc_disc_component *component,
+				const void *_data1 __hwloc_attribute_unused,
+				const void *_data2 __hwloc_attribute_unused,
+				const void *_data3 __hwloc_attribute_unused)
+{
+  struct hwloc_backend *backend;
+  backend = hwloc_backend_alloc(component);
+  if (!backend)
+    return NULL;
+  backend->discover = hwloc_look_aix;
+  return backend;
+}
+
+static struct hwloc_disc_component hwloc_aix_disc_component = {
+  HWLOC_DISC_COMPONENT_TYPE_CPU,
+  "aix",
+  HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+  hwloc_aix_component_instantiate,
+  50,
+  NULL
+};
+
+const struct hwloc_component hwloc_aix_component = {
+  HWLOC_COMPONENT_ABI,
+  NULL, NULL,
+  HWLOC_COMPONENT_TYPE_DISC,
+  0,
+  &hwloc_aix_disc_component
+};
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-bgq.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-bgq.c
new file mode 100644
index 0000000000..4dbc70ccb9
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-bgq.c
@@ -0,0 +1,301 @@
+/*
+ * Copyright © 2013-2017 Inria.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+
+#include <hwloc.h>
+#include <private/private.h>
+#include <private/debug.h>
+
+#include <stdlib.h>
+#include <pthread.h>
+#include <sys/utsname.h>
+#include <spi/include/kernel/location.h>
+#include <spi/include/kernel/process.h>
+
+#ifndef HWLOC_DISABLE_BGQ_PORT_TEST
+
+#define HWLOC_BGQ_CORES 17 /* spare core ignored for now */
+
+static int
+hwloc_bgq__get_allowed_resources(struct hwloc_topology *topology)
+{
+  const char *env;
+  unsigned i;
+
+  /* mark the 17th core (OS-reserved) as disallowed */
+  hwloc_bitmap_clr_range(topology->levels[0][0]->allowed_cpuset, (HWLOC_BGQ_CORES-1)*4, HWLOC_BGQ_CORES*4-1);
+
+  if (topology->is_thissystem) { /* don't call CNK unless thissystem */
+    env = getenv("BG_THREADMODEL");
+    if (!env || atoi(env) != 2) {
+      /* process cannot use cores/threads outside of its Kernel_ThreadMask() unless BG_THREADMODEL=2 */
+      uint64_t bgmask = Kernel_ThreadMask(Kernel_MyTcoord());
+      /* the mask is reversed, manually reverse it */
+	for(i=0; i<64; i++)
+	if (((bgmask >> i) & 1) == 0)
+	  hwloc_bitmap_clr(topology->levels[0][0]->allowed_cpuset, 63-i);
+    }
+  }
+  return 0;
+}
+
+static int
+hwloc_look_bgq(struct hwloc_backend *backend)
+{
+  struct hwloc_topology *topology = backend->topology;
+  hwloc_bitmap_t set;
+  hwloc_obj_t obj;
+  unsigned i;
+
+  if (topology->levels[0][0]->cpuset)
+    /* somebody discovered things */
+    return -1;
+
+  hwloc_alloc_obj_cpusets(topology->levels[0][0]);
+
+  hwloc_bgq__get_allowed_resources(topology);
+
+  /* a single memory bank */
+  obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_NUMANODE, 0);
+  set = hwloc_bitmap_alloc();
+  hwloc_bitmap_set_range(set, 0, HWLOC_BGQ_CORES*4-1);
+  obj->cpuset = set;
+  set = hwloc_bitmap_alloc();
+  hwloc_bitmap_set(set, 0);
+  obj->nodeset = set;
+  obj->memory.local_memory = 16ULL*1024*1024*1024ULL;
+  hwloc_insert_object_by_cpuset(topology, obj);
+
+  set = hwloc_bitmap_alloc();
+  hwloc_bitmap_set_range(set, 0, HWLOC_BGQ_CORES*4-1);
+
+  /* shared L2 */
+  if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L2CACHE)) {
+    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L2CACHE, -1);
+    obj->cpuset = hwloc_bitmap_dup(set);
+    obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
+    obj->attr->cache.depth = 2;
+    obj->attr->cache.size = 32*1024*1024;
+    obj->attr->cache.linesize = 128;
+    obj->attr->cache.associativity = 16;
+    hwloc_insert_object_by_cpuset(topology, obj);
+  }
+
+  /* package */
+  if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) {
+    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PACKAGE, 0);
+    obj->cpuset = set;
+    hwloc_obj_add_info(obj, "CPUModel", "IBM PowerPC A2");
+    hwloc_insert_object_by_cpuset(topology, obj);
+  } else
+    hwloc_bitmap_free(set);
+
+  /* Cores */
+  for(i=0; i<HWLOC_BGQ_CORES; i++) {
+    set = hwloc_bitmap_alloc();
+    hwloc_bitmap_set_range(set, i*4, i*4+3);
+
+    /* L1d */
+    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1CACHE)) {
+      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1CACHE, -1);
+      obj->cpuset = hwloc_bitmap_dup(set);
+      obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
+      obj->attr->cache.depth = 1;
+      obj->attr->cache.size = 16*1024;
+      obj->attr->cache.linesize = 64;
+      obj->attr->cache.associativity = 8;
+      hwloc_insert_object_by_cpuset(topology, obj);
+    }
+    /* L1i */
+    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) {
+      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1ICACHE, -1);
+      obj->cpuset = hwloc_bitmap_dup(set);
+      obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
+      obj->attr->cache.depth = 1;
+      obj->attr->cache.size = 16*1024;
+      obj->attr->cache.linesize = 64;
+      obj->attr->cache.associativity = 4;
+      hwloc_insert_object_by_cpuset(topology, obj);
+    }
+    /* there's also a L1p "prefetch cache" of 4kB with 128B lines */
+
+    /* Core */
+    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) {
+      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_CORE, i);
+      obj->cpuset = set;
+      hwloc_insert_object_by_cpuset(topology, obj);
+    } else
+      hwloc_bitmap_free(set);
+  }
+
+  /* PUs */
+  hwloc_setup_pu_level(topology, HWLOC_BGQ_CORES*4);
+
+  /* Add BGQ specific information */
+
+  hwloc_obj_add_info(topology->levels[0][0], "Backend", "BGQ");
+  hwloc_add_uname_info(topology, NULL);
+  return 0;
+}
+
+static int
+hwloc_bgq_get_thread_cpubind(hwloc_topology_t topology, pthread_t thread, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused)
+{
+  unsigned pu;
+  cpu_set_t bg_set;
+  int err;
+
+  if (topology->pid) {
+    errno = ENOSYS;
+    return -1;
+  }
+  err = pthread_getaffinity_np(thread, sizeof(bg_set), &bg_set);
+  if (err) {
+    errno = err;
+    return -1;
+  }
+  for(pu=0; pu<64; pu++)
+    if (CPU_ISSET(pu, &bg_set)) {
+      /* the binding cannot contain multiple PUs */
+      hwloc_bitmap_only(hwloc_set, pu);
+      break;
+    }
+  return 0;
+}
+
+static int
+hwloc_bgq_get_thisthread_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused)
+{
+  if (topology->pid) {
+    errno = ENOSYS;
+    return -1;
+  }
+  hwloc_bitmap_only(hwloc_set, Kernel_ProcessorID());
+  return 0;
+}
+
+static int
+hwloc_bgq_set_thread_cpubind(hwloc_topology_t topology, pthread_t thread, hwloc_const_bitmap_t hwloc_set, int flags)
+{
+  unsigned pu;
+  cpu_set_t bg_set;
+  int err;
+
+  if (topology->pid) {
+    errno = ENOSYS;
+    return -1;
+  }
+  /* the binding cannot contain multiple PUs.
+   * keep the first PU only, and error out if STRICT.
+   */
+  if (hwloc_bitmap_weight(hwloc_set) != 1) {
+    if ((flags & HWLOC_CPUBIND_STRICT)) {
+      errno = ENOSYS;
+      return -1;
+    }
+  }
+  pu = hwloc_bitmap_first(hwloc_set);
+  CPU_ZERO(&bg_set);
+  CPU_SET(pu, &bg_set);
+  err = pthread_setaffinity_np(thread, sizeof(bg_set), &bg_set);
+  if (err) {
+    errno = err;
+    return -1;
+  }
+  return 0;
+}
+
+static int
+hwloc_bgq_set_thisthread_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags)
+{
+  return hwloc_bgq_set_thread_cpubind(topology, pthread_self(), hwloc_set, flags);
+}
+
+static int
+hwloc_bgq_get_allowed_resources(struct hwloc_topology *topology)
+{
+  /* Loading BGQ from XML isn't much useful since everything is hardwired anyway.
+   * But still implement XML + this callback in case portable applications want to always use XMLs.
+   */
+
+  /* In theory, when applying local restrictions to a XML-loaded topology,
+   * we should check that the current topology contains 1 NUMA nodes and 17*4 PUs.
+   *
+   * Just trust the user when he sets THISSYSTEM=1.
+   */
+  return hwloc_bgq__get_allowed_resources(topology);
+}
+
+void
+hwloc_set_bgq_hooks(struct hwloc_binding_hooks *hooks __hwloc_attribute_unused,
+		    struct hwloc_topology_support *support __hwloc_attribute_unused)
+{
+  hooks->set_thisthread_cpubind = hwloc_bgq_set_thisthread_cpubind;
+  hooks->set_thread_cpubind = hwloc_bgq_set_thread_cpubind;
+  hooks->get_thisthread_cpubind = hwloc_bgq_get_thisthread_cpubind;
+  hooks->get_thread_cpubind = hwloc_bgq_get_thread_cpubind;
+  /* threads cannot be bound to more than one PU, so get_last_cpu_location == get_cpubind */
+  hooks->get_thisthread_last_cpu_location = hwloc_bgq_get_thisthread_cpubind;
+  /* hooks->get_thread_last_cpu_location = hwloc_bgq_get_thread_cpubind; */
+
+  hooks->get_allowed_resources = hwloc_bgq_get_allowed_resources;
+}
+
+static struct hwloc_backend *
+hwloc_bgq_component_instantiate(struct hwloc_disc_component *component,
+				const void *_data1 __hwloc_attribute_unused,
+				const void *_data2 __hwloc_attribute_unused,
+				const void *_data3 __hwloc_attribute_unused)
+{
+  struct utsname utsname;
+  struct hwloc_backend *backend;
+  int forced_nonbgq = 0;
+  int err;
+
+  err = uname(&utsname);
+  if (err || strcmp(utsname.sysname, "CNK") || strcmp(utsname.machine, "BGQ")) {
+    const char *env = getenv("HWLOC_FORCE_BGQ");
+    if (!env || !atoi(env)) {
+      fprintf(stderr, "*** Found unexpected uname sysname `%s' machine `%s'.\n", utsname.sysname, utsname.machine);
+      fprintf(stderr, "*** The BlueGene/Q backend (bgq) is only enabled by default on compute nodes\n"
+		      "*** (where uname returns sysname=CNK and machine=BGQ).\n"
+		      "*** If you know you *really* want to run the bgq backend on this non-compute node,\n"
+		      "*** set HWLOC_FORCE_BGQ=1 in the environment.\n"
+		      "*** If you just want to discover the native topology of this non-compute node,\n"
+		      "*** do not pass any BlueGene/Q-specific options on the configure command-line.\n");
+      return NULL;
+    } else {
+      forced_nonbgq = 1;
+    }
+  }
+
+  backend = hwloc_backend_alloc(component);
+  if (!backend)
+    return NULL;
+  backend->discover = hwloc_look_bgq;
+  if (forced_nonbgq)
+    backend->is_thissystem = 0;
+  return backend;
+}
+
+static struct hwloc_disc_component hwloc_bgq_disc_component = {
+  HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+  "bgq",
+  ~0,
+  hwloc_bgq_component_instantiate,
+  50,
+  NULL
+};
+
+const struct hwloc_component hwloc_bgq_component = {
+  HWLOC_COMPONENT_ABI,
+  NULL, NULL,
+  HWLOC_COMPONENT_TYPE_DISC,
+  0,
+  &hwloc_bgq_disc_component
+};
+
+#endif /* !HWLOC_DISABLE_BGQ_PORT_TEST */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-cuda.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-cuda.c
new file mode 100644
index 0000000000..f96ac60f5f
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-cuda.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright © 2011 Université Bordeaux
+ * Copyright © 2012-2017 Inria.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <hwloc/plugins.h>
+#include <hwloc/cudart.h>
+
+/* private headers allowed for convenience because this plugin is built within hwloc */
+#include <private/misc.h>
+#include <private/debug.h>
+
+#include <cuda_runtime_api.h>
+
+static unsigned hwloc_cuda_cores_per_MP(int major, int minor)
+{
+  /* FP32 cores per MP, based on CUDA C Programming Guide, Annex G */
+  switch (major) {
+    case 1:
+      switch (minor) {
+        case 0:
+        case 1:
+        case 2:
+        case 3: return 8;
+      }
+      break;
+    case 2:
+      switch (minor) {
+        case 0: return 32;
+        case 1: return 48;
+      }
+      break;
+    case 3:
+      return 192;
+    case 5:
+      return 128;
+    case 6:
+      switch (minor) {
+        case 0: return 64;
+        case 1:
+        case 2: return 128;
+      }
+      break;
+    case 7:
+      return 64;
+  }
+  hwloc_debug("unknown compute capability %d.%d, disabling core display.\n", major, minor);
+  return 0;
+}
+
+static int
+hwloc_cuda_discover(struct hwloc_backend *backend)
+{
+  struct hwloc_topology *topology = backend->topology;
+  enum hwloc_type_filter_e filter;
+  cudaError_t cures;
+  int nb, i;
+
+  hwloc_topology_get_type_filter(topology, HWLOC_OBJ_OS_DEVICE, &filter);
+  if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
+    return 0;
+
+  cures = cudaGetDeviceCount(&nb);
+  if (cures)
+    return -1;
+
+  for (i = 0; i < nb; i++) {
+    int domain, bus, dev;
+    char cuda_name[32];
+    char number[32];
+    struct cudaDeviceProp prop;
+    hwloc_obj_t cuda_device, parent;
+    unsigned cores;
+
+    cuda_device = hwloc_alloc_setup_object(topology, HWLOC_OBJ_OS_DEVICE, -1);
+    snprintf(cuda_name, sizeof(cuda_name), "cuda%d", i);
+    cuda_device->name = strdup(cuda_name);
+    cuda_device->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN;
+    cuda_device->attr->osdev.type = HWLOC_OBJ_OSDEV_COPROC;
+
+    cuda_device->subtype = strdup("CUDA");
+    hwloc_obj_add_info(cuda_device, "Backend", "CUDA");
+    hwloc_obj_add_info(cuda_device, "GPUVendor", "NVIDIA Corporation");
+
+    cures = cudaGetDeviceProperties(&prop, i);
+    if (!cures)
+      hwloc_obj_add_info(cuda_device, "GPUModel", prop.name);
+
+    snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.totalGlobalMem) >> 10);
+    hwloc_obj_add_info(cuda_device, "CUDAGlobalMemorySize", number);
+
+    snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.l2CacheSize) >> 10);
+    hwloc_obj_add_info(cuda_device, "CUDAL2CacheSize", number);
+
+    snprintf(number, sizeof(number), "%d", prop.multiProcessorCount);
+    hwloc_obj_add_info(cuda_device, "CUDAMultiProcessors", number);
+
+    cores = hwloc_cuda_cores_per_MP(prop.major, prop.minor);
+    if (cores) {
+      snprintf(number, sizeof(number), "%u", cores);
+      hwloc_obj_add_info(cuda_device, "CUDACoresPerMP", number);
+    }
+
+    snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.sharedMemPerBlock) >> 10);
+    hwloc_obj_add_info(cuda_device, "CUDASharedMemorySizePerMP", number);
+
+    parent = NULL;
+    if (hwloc_cudart_get_device_pci_ids(NULL /* topology unused */, i, &domain, &bus, &dev) == 0) {
+      parent = hwloc_pci_belowroot_find_by_busid(topology, domain, bus, dev, 0);
+      if (!parent)
+	parent = hwloc_pci_find_busid_parent(topology, domain, bus, dev, 0);
+    }
+    if (!parent)
+      parent = hwloc_get_root_obj(topology);
+
+    hwloc_insert_object_by_parent(topology, parent, cuda_device);
+  }
+
+  return 0;
+}
+
+static struct hwloc_backend *
+hwloc_cuda_component_instantiate(struct hwloc_disc_component *component,
+                                 const void *_data1 __hwloc_attribute_unused,
+                                 const void *_data2 __hwloc_attribute_unused,
+                                 const void *_data3 __hwloc_attribute_unused)
+{
+  struct hwloc_backend *backend;
+
+  backend = hwloc_backend_alloc(component);
+  if (!backend)
+    return NULL;
+  /* the first callback will initialize those */
+  backend->discover = hwloc_cuda_discover;
+  return backend;
+}
+
+static struct hwloc_disc_component hwloc_cuda_disc_component = {
+  HWLOC_DISC_COMPONENT_TYPE_MISC,
+  "cuda",
+  HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+  hwloc_cuda_component_instantiate,
+  10, /* after pci */
+  NULL
+};
+
+static int
+hwloc_cuda_component_init(unsigned long flags)
+{
+  if (flags)
+    return -1;
+  if (hwloc_plugin_check_namespace("cuda", "hwloc_backend_alloc") < 0)
+    return -1;
+  return 0;
+}
+
+#ifdef HWLOC_INSIDE_PLUGIN
+HWLOC_DECLSPEC extern const struct hwloc_component hwloc_cuda_component;
+#endif
+
+const struct hwloc_component hwloc_cuda_component = {
+  HWLOC_COMPONENT_ABI,
+  hwloc_cuda_component_init, NULL,
+  HWLOC_COMPONENT_TYPE_DISC,
+  0,
+  &hwloc_cuda_disc_component
+};
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-darwin.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-darwin.c
new file mode 100644
index 0000000000..c3b97fdb42
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-darwin.c
@@ -0,0 +1,307 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2016 Inria.  All rights reserved.
+ * Copyright © 2009-2013 Université Bordeaux
+ * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/* Detect topology change: registering for power management changes and check
+ * if for example hw.activecpu changed */
+
+/* Apparently, Darwin people do not _want_ to provide binding functions.  */
+
+#include <private/autogen/config.h>
+
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include <hwloc.h>
+#include <private/private.h>
+#include <private/debug.h>
+
+static int
+hwloc_look_darwin(struct hwloc_backend *backend)
+{
+  struct hwloc_topology *topology = backend->topology;
+  int64_t _nprocs;
+  unsigned nprocs;
+  int64_t _npackages;
+  unsigned i, j, cpu;
+  struct hwloc_obj *obj;
+  size_t size;
+  int64_t l1dcachesize, l1icachesize;
+  int64_t cacheways[2];
+  int64_t l2cachesize;
+  int64_t cachelinesize;
+  int64_t memsize;
+  char cpumodel[64];
+
+  if (topology->levels[0][0]->cpuset)
+    /* somebody discovered things */
+    return -1;
+
+  hwloc_alloc_obj_cpusets(topology->levels[0][0]);
+
+  if (hwloc_get_sysctlbyname("hw.ncpu", &_nprocs) || _nprocs <= 0)
+    return -1;
+  nprocs = _nprocs;
+  topology->support.discovery->pu = 1;
+
+  hwloc_debug("%u procs\n", nprocs);
+
+  size = sizeof(cpumodel);
+  if (sysctlbyname("machdep.cpu.brand_string", cpumodel, &size, NULL, 0))
+    cpumodel[0] = '\0';
+
+  if (!hwloc_get_sysctlbyname("hw.packages", &_npackages) && _npackages > 0) {
+    unsigned npackages = _npackages;
+    int64_t _cores_per_package;
+    int64_t _logical_per_package;
+    unsigned logical_per_package;
+
+    hwloc_debug("%u packages\n", npackages);
+
+    if (!hwloc_get_sysctlbyname("machdep.cpu.logical_per_package", &_logical_per_package) && _logical_per_package > 0)
+      logical_per_package = _logical_per_package;
+    else
+      /* Assume the trivia.  */
+      logical_per_package = nprocs / npackages;
+
+    hwloc_debug("%u threads per package\n", logical_per_package);
+
+    if (nprocs == npackages * logical_per_package
+	&& hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE))
+      for (i = 0; i < npackages; i++) {
+        obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PACKAGE, i);
+        obj->cpuset = hwloc_bitmap_alloc();
+        for (cpu = i*logical_per_package; cpu < (i+1)*logical_per_package; cpu++)
+          hwloc_bitmap_set(obj->cpuset, cpu);
+
+        hwloc_debug_1arg_bitmap("package %u has cpuset %s\n",
+                   i, obj->cpuset);
+
+        if (cpumodel[0] != '\0')
+          hwloc_obj_add_info(obj, "CPUModel", cpumodel);
+        hwloc_insert_object_by_cpuset(topology, obj);
+      }
+    else
+      if (cpumodel[0] != '\0')
+        hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel);
+
+    if (!hwloc_get_sysctlbyname("machdep.cpu.cores_per_package", &_cores_per_package) && _cores_per_package > 0
+	&& hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) {
+      unsigned cores_per_package = _cores_per_package;
+      hwloc_debug("%u cores per package\n", cores_per_package);
+
+      if (!(logical_per_package % cores_per_package))
+        for (i = 0; i < npackages * cores_per_package; i++) {
+          obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_CORE, i);
+          obj->cpuset = hwloc_bitmap_alloc();
+          for (cpu = i*(logical_per_package/cores_per_package);
+               cpu < (i+1)*(logical_per_package/cores_per_package);
+               cpu++)
+            hwloc_bitmap_set(obj->cpuset, cpu);
+
+          hwloc_debug_1arg_bitmap("core %u has cpuset %s\n",
+                     i, obj->cpuset);
+          hwloc_insert_object_by_cpuset(topology, obj);
+        }
+    }
+  } else
+    if (cpumodel[0] != '\0')
+      hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel);
+
+  if (hwloc_get_sysctlbyname("hw.l1dcachesize", &l1dcachesize))
+    l1dcachesize = 0;
+
+  if (hwloc_get_sysctlbyname("hw.l1icachesize", &l1icachesize))
+    l1icachesize = 0;
+
+  if (hwloc_get_sysctlbyname("hw.l2cachesize", &l2cachesize))
+    l2cachesize = 0;
+
+  if (hwloc_get_sysctlbyname("machdep.cpu.cache.L1_associativity", &cacheways[0]))
+    cacheways[0] = 0;
+  else if (cacheways[0] == 0xff)
+    cacheways[0] = -1;
+
+  if (hwloc_get_sysctlbyname("machdep.cpu.cache.L2_associativity", &cacheways[1]))
+    cacheways[1] = 0;
+  else if (cacheways[1] == 0xff)
+    cacheways[1] = -1;
+
+  if (hwloc_get_sysctlbyname("hw.cachelinesize", &cachelinesize))
+    cachelinesize = 0;
+
+  if (hwloc_get_sysctlbyname("hw.memsize", &memsize))
+    memsize = 0;
+
+  if (!sysctlbyname("hw.cacheconfig", NULL, &size, NULL, 0)) {
+    unsigned n = size / sizeof(uint32_t);
+    uint64_t *cacheconfig = NULL;
+    uint64_t *cachesize = NULL;
+    uint32_t *cacheconfig32 = NULL;
+
+    cacheconfig = malloc(sizeof(uint64_t) * n);
+    if (NULL == cacheconfig) {
+        goto out;
+    }
+    cachesize = malloc(sizeof(uint64_t) * n);
+    if (NULL == cachesize) {
+        goto out;
+    }
+    cacheconfig32 = malloc(sizeof(uint32_t) * n);
+    if (NULL == cacheconfig32) {
+        goto out;
+    }
+
+    if ((!sysctlbyname("hw.cacheconfig", cacheconfig, &size, NULL, 0))) {
+      /* Yeech. Darwin seemingly has changed from 32bit to 64bit integers for
+       * cacheconfig, with apparently no way for detection. Assume the machine
+       * won't have more than 4 billion cpus */
+      if (cacheconfig[0] > 0xFFFFFFFFUL) {
+        memcpy(cacheconfig32, cacheconfig, size);
+        for (i = 0 ; i < size / sizeof(uint32_t); i++)
+          cacheconfig[i] = cacheconfig32[i];
+      }
+
+      memset(cachesize, 0, sizeof(uint64_t) * n);
+      size = sizeof(uint64_t) * n;
+      if (sysctlbyname("hw.cachesize", cachesize, &size, NULL, 0)) {
+        if (n > 0)
+          cachesize[0] = memsize;
+        if (n > 1)
+          cachesize[1] = l1dcachesize;
+        if (n > 2)
+          cachesize[2] = l2cachesize;
+      }
+
+      hwloc_debug("%s", "caches");
+      for (i = 0; i < n && cacheconfig[i]; i++)
+        hwloc_debug(" %"PRIu64"(%"PRIu64"kB)", cacheconfig[i], cachesize[i] / 1024);
+
+      /* Now we know how many caches there are */
+      n = i;
+      hwloc_debug("\n%u cache levels\n", n - 1);
+
+      /* For each cache level (0 is memory) */
+      for (i = 0; i < n; i++) {
+        /* cacheconfig tells us how many cpus share it, let's iterate on each cache */
+        for (j = 0; j < (nprocs / cacheconfig[i]); j++) {
+	  if (!i) {
+	    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_NUMANODE, j);
+            obj->nodeset = hwloc_bitmap_alloc();
+            hwloc_bitmap_set(obj->nodeset, j);
+          } else {
+	    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1CACHE+i-1, -1);
+	  }
+          obj->cpuset = hwloc_bitmap_alloc();
+          for (cpu = j*cacheconfig[i];
+               cpu < ((j+1)*cacheconfig[i]);
+               cpu++)
+            hwloc_bitmap_set(obj->cpuset, cpu);
+
+          if (i == 1 && l1icachesize
+	      && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) {
+            /* FIXME assuming that L1i and L1d are shared the same way. Darwin
+             * does not yet provide a way to know.  */
+            hwloc_obj_t l1i = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1ICACHE, -1);
+            l1i->cpuset = hwloc_bitmap_dup(obj->cpuset);
+            hwloc_debug_1arg_bitmap("L1icache %u has cpuset %s\n",
+                j, l1i->cpuset);
+            l1i->attr->cache.depth = i;
+            l1i->attr->cache.size = l1icachesize;
+            l1i->attr->cache.linesize = cachelinesize;
+            l1i->attr->cache.associativity = 0;
+            l1i->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
+
+            hwloc_insert_object_by_cpuset(topology, l1i);
+          }
+          if (i) {
+            hwloc_debug_2args_bitmap("L%ucache %u has cpuset %s\n",
+                i, j, obj->cpuset);
+            obj->attr->cache.depth = i;
+            obj->attr->cache.size = cachesize[i];
+            obj->attr->cache.linesize = cachelinesize;
+            if (i <= sizeof(cacheways) / sizeof(cacheways[0]))
+              obj->attr->cache.associativity = cacheways[i-1];
+            else
+              obj->attr->cache.associativity = 0;
+            if (i == 1 && l1icachesize)
+              obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
+            else
+              obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
+          } else {
+            hwloc_debug_1arg_bitmap("node %u has cpuset %s\n",
+                j, obj->cpuset);
+	    obj->memory.local_memory = cachesize[i];
+	    obj->memory.page_types_len = 2;
+	    obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types));
+	    memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types));
+	    obj->memory.page_types[0].size = hwloc_getpagesize();
+#if HAVE_DECL__SC_LARGE_PAGESIZE
+	    obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
+#endif
+          }
+
+	  if (hwloc_filter_check_keep_object_type(topology, obj->type))
+	    hwloc_insert_object_by_cpuset(topology, obj);
+	  else
+	    hwloc_free_unlinked_object(obj); /* FIXME: don't built at all, just build the cpuset in case l1i needs it */
+        }
+      }
+    }
+  out:
+    free(cacheconfig);
+    free(cachesize);
+    free(cacheconfig32);
+  }
+
+
+  /* add PU objects */
+  hwloc_setup_pu_level(topology, nprocs);
+
+  hwloc_obj_add_info(topology->levels[0][0], "Backend", "Darwin");
+  hwloc_add_uname_info(topology, NULL);
+  return 0;
+}
+
+void
+hwloc_set_darwin_hooks(struct hwloc_binding_hooks *hooks __hwloc_attribute_unused,
+		       struct hwloc_topology_support *support __hwloc_attribute_unused)
+{
+}
+
+static struct hwloc_backend *
+hwloc_darwin_component_instantiate(struct hwloc_disc_component *component,
+				   const void *_data1 __hwloc_attribute_unused,
+				   const void *_data2 __hwloc_attribute_unused,
+				   const void *_data3 __hwloc_attribute_unused)
+{
+  struct hwloc_backend *backend;
+  backend = hwloc_backend_alloc(component);
+  if (!backend)
+    return NULL;
+  backend->discover = hwloc_look_darwin;
+  return backend;
+}
+
+static struct hwloc_disc_component hwloc_darwin_disc_component = {
+  HWLOC_DISC_COMPONENT_TYPE_CPU,
+  "darwin",
+  HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+  hwloc_darwin_component_instantiate,
+  50,
+  NULL
+};
+
+const struct hwloc_component hwloc_darwin_component = {
+  HWLOC_COMPONENT_ABI,
+  NULL, NULL,
+  HWLOC_COMPONENT_TYPE_DISC,
+  0,
+  &hwloc_darwin_disc_component
+};
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-fake.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-fake.c
new file mode 100644
index 0000000000..e3e22a0a13
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-fake.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright © 2012-2014 Inria.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <private/private.h>
+
+#include <stdlib.h>
+
+static struct hwloc_backend *
+hwloc_fake_component_instantiate(struct hwloc_disc_component *component __hwloc_attribute_unused,
+				 const void *_data1 __hwloc_attribute_unused,
+				 const void *_data2 __hwloc_attribute_unused,
+				 const void *_data3 __hwloc_attribute_unused)
+{
+  if (getenv("HWLOC_DEBUG_FAKE_COMPONENT"))
+    printf("fake component instantiated\n");
+  return NULL;
+}
+
+static struct hwloc_disc_component hwloc_fake_disc_component = {
+  HWLOC_DISC_COMPONENT_TYPE_MISC, /* so that it's always enabled when using the OS discovery */
+  "fake",
+  0, /* nothing to exclude */
+  hwloc_fake_component_instantiate,
+  100, /* make sure it's loaded before anything conflicting excludes it */
+  NULL
+};
+
+static int
+hwloc_fake_component_init(unsigned long flags)
+{
+  if (flags)
+    return -1;
+  if (hwloc_plugin_check_namespace("fake", "hwloc_backend_alloc") < 0)
+    return -1;
+  if (getenv("HWLOC_DEBUG_FAKE_COMPONENT"))
+    printf("fake component initialized\n");
+  return 0;
+}
+
+static void
+hwloc_fake_component_finalize(unsigned long flags)
+{
+  if (flags)
+    return;
+  if (getenv("HWLOC_DEBUG_FAKE_COMPONENT"))
+    printf("fake component finalized\n");
+}
+
+HWLOC_DECLSPEC extern const struct hwloc_component hwloc_fake_component; /* never linked statically in the core */
+
+const struct hwloc_component hwloc_fake_component = {
+  HWLOC_COMPONENT_ABI,
+  hwloc_fake_component_init, hwloc_fake_component_finalize,
+  HWLOC_COMPONENT_TYPE_DISC,
+  0,
+  &hwloc_fake_disc_component
+};
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-freebsd.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-freebsd.c
new file mode 100644
index 0000000000..3578306a9a
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-freebsd.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2015 Inria.  All rights reserved.
+ * Copyright © 2009-2010, 2012 Université Bordeaux
+ * Copyright © 2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+
+#include <sys/types.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <sys/param.h>
+#include <pthread.h>
+#ifdef HAVE_PTHREAD_NP_H
+#include <pthread_np.h>
+#endif
+#ifdef HAVE_SYS_CPUSET_H
+#include <sys/cpuset.h>
+#endif
+#ifdef HAVE_SYS_SYSCTL_H
+#include <sys/sysctl.h>
+#endif
+
+#include <hwloc.h>
+#include <private/private.h>
+#include <private/debug.h>
+
+#if defined(HAVE_SYS_CPUSET_H) && defined(HAVE_CPUSET_SETAFFINITY)
+static void
+hwloc_freebsd_bsd2hwloc(hwloc_bitmap_t hwloc_cpuset, const cpuset_t *cset)
+{
+  unsigned cpu;
+  hwloc_bitmap_zero(hwloc_cpuset);
+  for (cpu = 0; cpu < CPU_SETSIZE; cpu++)
+    if (CPU_ISSET(cpu, cset))
+      hwloc_bitmap_set(hwloc_cpuset, cpu);
+}
+
+static void
+hwloc_freebsd_hwloc2bsd(hwloc_const_bitmap_t hwloc_cpuset, cpuset_t *cset)
+{
+  unsigned cpu;
+  CPU_ZERO(cset);
+  for (cpu = 0; cpu < CPU_SETSIZE; cpu++)
+    if (hwloc_bitmap_isset(hwloc_cpuset, cpu))
+      CPU_SET(cpu, cset);
+}
+
+static int
+hwloc_freebsd_set_sth_affinity(hwloc_topology_t topology __hwloc_attribute_unused, cpulevel_t level, cpuwhich_t which, id_t id, hwloc_const_bitmap_t hwloc_cpuset, int flags __hwloc_attribute_unused)
+{
+  cpuset_t cset;
+
+  hwloc_freebsd_hwloc2bsd(hwloc_cpuset, &cset);
+
+  if (cpuset_setaffinity(level, which, id, sizeof(cset), &cset))
+    return -1;
+
+  return 0;
+}
+
+static int
+hwloc_freebsd_get_sth_affinity(hwloc_topology_t topology __hwloc_attribute_unused, cpulevel_t level, cpuwhich_t which, id_t id, hwloc_bitmap_t hwloc_cpuset, int flags __hwloc_attribute_unused)
+{
+  cpuset_t cset;
+
+  if (cpuset_getaffinity(level, which, id, sizeof(cset), &cset))
+    return -1;
+
+  hwloc_freebsd_bsd2hwloc(hwloc_cpuset, &cset);
+  return 0;
+}
+
+static int
+hwloc_freebsd_set_thisproc_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_cpuset, int flags)
+{
+  return hwloc_freebsd_set_sth_affinity(topology, CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, hwloc_cpuset, flags);
+}
+
+static int
+hwloc_freebsd_get_thisproc_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_cpuset, int flags)
+{
+  return hwloc_freebsd_get_sth_affinity(topology, CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, hwloc_cpuset, flags);
+}
+
+static int
+hwloc_freebsd_set_thisthread_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_cpuset, int flags)
+{
+  return hwloc_freebsd_set_sth_affinity(topology, CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, hwloc_cpuset, flags);
+}
+
+static int
+hwloc_freebsd_get_thisthread_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_cpuset, int flags)
+{
+  return hwloc_freebsd_get_sth_affinity(topology, CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, hwloc_cpuset, flags);
+}
+
+static int
+hwloc_freebsd_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_bitmap_t hwloc_cpuset, int flags)
+{
+  return hwloc_freebsd_set_sth_affinity(topology, CPU_LEVEL_WHICH, CPU_WHICH_PID, pid, hwloc_cpuset, flags);
+}
+
+static int
+hwloc_freebsd_get_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_bitmap_t hwloc_cpuset, int flags)
+{
+  return hwloc_freebsd_get_sth_affinity(topology, CPU_LEVEL_WHICH, CPU_WHICH_PID, pid, hwloc_cpuset, flags);
+}
+
+#ifdef hwloc_thread_t
+
+#if HAVE_DECL_PTHREAD_SETAFFINITY_NP
+#pragma weak pthread_setaffinity_np
+static int
+hwloc_freebsd_set_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t tid, hwloc_const_bitmap_t hwloc_cpuset, int flags __hwloc_attribute_unused)
+{
+  int err;
+  cpuset_t cset;
+
+  if (!pthread_setaffinity_np) {
+    errno = ENOSYS;
+    return -1;
+  }
+
+  hwloc_freebsd_hwloc2bsd(hwloc_cpuset, &cset);
+
+  err = pthread_setaffinity_np(tid, sizeof(cset), &cset);
+
+  if (err) {
+    errno = err;
+    return -1;
+  }
+
+  return 0;
+}
+#endif
+
+#if HAVE_DECL_PTHREAD_GETAFFINITY_NP
+#pragma weak pthread_getaffinity_np
+static int
+hwloc_freebsd_get_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t tid, hwloc_bitmap_t hwloc_cpuset, int flags __hwloc_attribute_unused)
+{
+  int err;
+  cpuset_t cset;
+
+  if (!pthread_getaffinity_np) {
+    errno = ENOSYS;
+    return -1;
+  }
+
+  err = pthread_getaffinity_np(tid, sizeof(cset), &cset);
+
+  if (err) {
+    errno = err;
+    return -1;
+  }
+
+  hwloc_freebsd_bsd2hwloc(hwloc_cpuset, &cset);
+  return 0;
+}
+#endif
+#endif
+#endif
+
+#if (defined HAVE_SYSCTL) && (defined HAVE_SYS_SYSCTL_H)
+static void
+hwloc_freebsd_node_meminfo_info(struct hwloc_topology *topology)
+{
+       int mib[2] = { CTL_HW, HW_PHYSMEM };
+       unsigned long physmem;
+       size_t len = sizeof(physmem);
+       sysctl(mib, 2, &physmem, &len, NULL, 0);
+       topology->levels[0][0]->memory.local_memory = physmem;
+       /* we don't know anything about NUMA nodes in this backend.
+        * let another backend or the core move that memory to the right NUMA node */
+}
+#endif
+
+static int
+hwloc_look_freebsd(struct hwloc_backend *backend)
+{
+  struct hwloc_topology *topology = backend->topology;
+  unsigned nbprocs = hwloc_fallback_nbprocessors(topology);
+
+  if (!topology->levels[0][0]->cpuset) {
+    /* Nobody (even the x86 backend) created objects yet, setup basic objects */
+    hwloc_alloc_obj_cpusets(topology->levels[0][0]);
+    hwloc_setup_pu_level(topology, nbprocs);
+  }
+
+  /* Add FreeBSD specific information */
+#if (defined HAVE_SYSCTL) && (defined HAVE_SYS_SYSCTL_H)
+  hwloc_freebsd_node_meminfo_info(topology);
+#endif
+  hwloc_obj_add_info(topology->levels[0][0], "Backend", "FreeBSD");
+  hwloc_add_uname_info(topology, NULL);
+  return 0;
+}
+
+void
+hwloc_set_freebsd_hooks(struct hwloc_binding_hooks *hooks __hwloc_attribute_unused,
+			struct hwloc_topology_support *support __hwloc_attribute_unused)
+{
+#if defined(HAVE_SYS_CPUSET_H) && defined(HAVE_CPUSET_SETAFFINITY)
+  hooks->set_thisproc_cpubind = hwloc_freebsd_set_thisproc_cpubind;
+  hooks->get_thisproc_cpubind = hwloc_freebsd_get_thisproc_cpubind;
+  hooks->set_thisthread_cpubind = hwloc_freebsd_set_thisthread_cpubind;
+  hooks->get_thisthread_cpubind = hwloc_freebsd_get_thisthread_cpubind;
+  hooks->set_proc_cpubind = hwloc_freebsd_set_proc_cpubind;
+  hooks->get_proc_cpubind = hwloc_freebsd_get_proc_cpubind;
+#ifdef hwloc_thread_t
+#if HAVE_DECL_PTHREAD_SETAFFINITY_NP
+  hooks->set_thread_cpubind = hwloc_freebsd_set_thread_cpubind;
+#endif
+#if HAVE_DECL_PTHREAD_GETAFFINITY_NP
+  hooks->get_thread_cpubind = hwloc_freebsd_get_thread_cpubind;
+#endif
+#endif
+#endif
+  /* TODO: get_last_cpu_location: find out ki_lastcpu */
+}
+
+static struct hwloc_backend *
+hwloc_freebsd_component_instantiate(struct hwloc_disc_component *component,
+				    const void *_data1 __hwloc_attribute_unused,
+				    const void *_data2 __hwloc_attribute_unused,
+				    const void *_data3 __hwloc_attribute_unused)
+{
+  struct hwloc_backend *backend;
+  backend = hwloc_backend_alloc(component);
+  if (!backend)
+    return NULL;
+  backend->discover = hwloc_look_freebsd;
+  return backend;
+}
+
+static struct hwloc_disc_component hwloc_freebsd_disc_component = {
+  HWLOC_DISC_COMPONENT_TYPE_CPU,
+  "freebsd",
+  HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+  hwloc_freebsd_component_instantiate,
+  50,
+  NULL
+};
+
+const struct hwloc_component hwloc_freebsd_component = {
+  HWLOC_COMPONENT_ABI,
+  NULL, NULL,
+  HWLOC_COMPONENT_TYPE_DISC,
+  0,
+  &hwloc_freebsd_disc_component
+};
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-gl.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-gl.c
new file mode 100644
index 0000000000..1f264bd571
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-gl.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright © 2012-2013 Blue Brain Project, BBP/EPFL. All rights reserved.
+ * Copyright © 2012-2017 Inria.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <hwloc/plugins.h>
+
+/* private headers allowed for convenience because this plugin is built within hwloc */
+#include <private/misc.h>
+#include <private/debug.h>
+
+#include <stdarg.h>
+#include <errno.h>
+#include <X11/Xlib.h>
+#include <NVCtrl/NVCtrl.h>
+#include <NVCtrl/NVCtrlLib.h>
+
+#define HWLOC_GL_SERVER_MAX 10
+#define HWLOC_GL_SCREEN_MAX 10
+
+static int
+hwloc_gl_discover(struct hwloc_backend *backend)
+{
+  struct hwloc_topology *topology = backend->topology;
+  enum hwloc_type_filter_e filter;
+  unsigned i;
+  int err;
+
+  hwloc_topology_get_type_filter(topology, HWLOC_OBJ_OS_DEVICE, &filter);
+  if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
+    return 0;
+
+  for (i = 0; i < HWLOC_GL_SERVER_MAX; ++i) {
+    Display* display;
+    char displayName[10];
+    int opcode, event, error;
+    unsigned j;
+
+    /* open X server */
+    snprintf(displayName, sizeof(displayName), ":%u", i);
+    display = XOpenDisplay(displayName);
+    if (!display)
+      continue;
+
+    /* Check for NV-CONTROL extension (it's per server) */
+    if(!XQueryExtension(display, "NV-CONTROL", &opcode, &event, &error)) {
+      XCloseDisplay(display);
+      continue;
+    }
+
+    for (j = 0; j < (unsigned) ScreenCount(display) && j < HWLOC_GL_SCREEN_MAX; j++) {
+      hwloc_obj_t osdev, parent;
+      const int screen = j;
+      unsigned int *ptr_binary_data;
+      int data_length;
+      int gpu_number;
+      int nv_ctrl_pci_bus;
+      int nv_ctrl_pci_device;
+      int nv_ctrl_pci_domain;
+      int nv_ctrl_pci_func;
+      char *productname;
+      char name[64];
+
+      /* the server supports NV-CONTROL but it may contain non-NVIDIA screen that don't support it */
+      if (!XNVCTRLIsNvScreen(display, screen))
+        continue;
+
+      /* Gets the GPU number attached to the default screen. */
+      /* For further details, see the <NVCtrl/NVCtrlLib.h> */
+      err = XNVCTRLQueryTargetBinaryData (display, NV_CTRL_TARGET_TYPE_X_SCREEN, screen, 0,
+                                          NV_CTRL_BINARY_DATA_GPUS_USED_BY_XSCREEN,
+                                          (unsigned char **) &ptr_binary_data, &data_length);
+      if (!err)
+        continue;
+
+      gpu_number = ptr_binary_data[1];
+      free(ptr_binary_data);
+
+#ifdef NV_CTRL_PCI_DOMAIN
+      /* Gets the ID's of the GPU defined by gpu_number
+       * For further details, see the <NVCtrl/NVCtrlLib.h> */
+      err = XNVCTRLQueryTargetAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0,
+                                        NV_CTRL_PCI_DOMAIN, &nv_ctrl_pci_domain);
+      if (!err)
+        continue;
+#else
+      nv_ctrl_pci_domain = 0;
+#endif
+
+      err = XNVCTRLQueryTargetAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0,
+                                        NV_CTRL_PCI_BUS, &nv_ctrl_pci_bus);
+      if (!err)
+        continue;
+
+      err = XNVCTRLQueryTargetAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0,
+                                        NV_CTRL_PCI_DEVICE, &nv_ctrl_pci_device);
+      if (!err)
+        continue;
+
+      err = XNVCTRLQueryTargetAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0,
+                                        NV_CTRL_PCI_FUNCTION, &nv_ctrl_pci_func);
+      if (!err)
+        continue;
+
+      productname = NULL;
+      err = XNVCTRLQueryTargetStringAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0,
+                                              NV_CTRL_STRING_PRODUCT_NAME, &productname);
+
+      snprintf(name, sizeof(name), ":%u.%u", i, j);
+
+      osdev = hwloc_alloc_setup_object(topology, HWLOC_OBJ_OS_DEVICE, -1);
+      osdev->name = strdup(name);
+      osdev->logical_index = -1;
+      osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU;
+      hwloc_obj_add_info(osdev, "Backend", "GL");
+      hwloc_obj_add_info(osdev, "GPUVendor", "NVIDIA Corporation");
+      if (productname)
+	hwloc_obj_add_info(osdev, "GPUModel", productname);
+
+      parent = hwloc_pci_belowroot_find_by_busid(topology, nv_ctrl_pci_domain, nv_ctrl_pci_bus, nv_ctrl_pci_device, nv_ctrl_pci_func);
+      if (!parent)
+	parent = hwloc_pci_find_busid_parent(topology, nv_ctrl_pci_domain, nv_ctrl_pci_bus, nv_ctrl_pci_device, nv_ctrl_pci_func);
+      if (!parent)
+	parent = hwloc_get_root_obj(topology);
+
+      hwloc_insert_object_by_parent(topology, parent, osdev);
+
+      hwloc_debug("GL device %s (product %s) on PCI %04x:%02x:%02x.%01x\n",
+		  name, productname,
+		  (unsigned) nv_ctrl_pci_domain, (unsigned) nv_ctrl_pci_bus, (unsigned) nv_ctrl_pci_device, (unsigned) nv_ctrl_pci_func);
+    }
+    XCloseDisplay(display);
+  }
+
+  return 0;
+}
+
+static struct hwloc_backend *
+hwloc_gl_component_instantiate(struct hwloc_disc_component *component,
+			       const void *_data1 __hwloc_attribute_unused,
+			       const void *_data2 __hwloc_attribute_unused,
+			       const void *_data3 __hwloc_attribute_unused)
+{
+  struct hwloc_backend *backend;
+
+  backend = hwloc_backend_alloc(component);
+  if (!backend)
+    return NULL;
+  backend->discover = hwloc_gl_discover;
+  return backend;
+}
+
+static struct hwloc_disc_component hwloc_gl_disc_component = {
+  HWLOC_DISC_COMPONENT_TYPE_MISC,
+  "gl",
+  HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+  hwloc_gl_component_instantiate,
+  10, /* after pci */
+  NULL
+};
+
+static int
+hwloc_gl_component_init(unsigned long flags)
+{
+  if (flags)
+    return -1;
+  if (hwloc_plugin_check_namespace("gl", "hwloc_backend_alloc") < 0)
+    return -1;
+  return 0;
+}
+
+#ifdef HWLOC_INSIDE_PLUGIN
+HWLOC_DECLSPEC extern const struct hwloc_component hwloc_gl_component;
+#endif
+
+const struct hwloc_component hwloc_gl_component = {
+  HWLOC_COMPONENT_ABI,
+  hwloc_gl_component_init, NULL,
+  HWLOC_COMPONENT_TYPE_DISC,
+  0,
+  &hwloc_gl_disc_component
+};
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-hardwired.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-hardwired.c
new file mode 100644
index 0000000000..8c5b0a7285
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-hardwired.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright © 2015-2016 Inria.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+
+#include <hwloc.h>
+#include <private/private.h>
+
+int hwloc_look_hardwired_fujitsu_k(struct hwloc_topology *topology)
+{
+  /* If a broken core gets disabled, its bit disappears and other core bits are NOT shifted towards 0.
+   * Node is not given to user job, not need to handle that case properly.
+   */
+  unsigned i;
+  hwloc_obj_t obj;
+  hwloc_bitmap_t set;
+
+  for(i=0; i<8; i++) {
+    set = hwloc_bitmap_alloc();
+    hwloc_bitmap_set(set, i);
+
+    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) {
+      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1ICACHE, -1);
+      obj->cpuset = hwloc_bitmap_dup(set);
+      obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
+      obj->attr->cache.depth = 1;
+      obj->attr->cache.size = 32*1024;
+      obj->attr->cache.linesize = 128;
+      obj->attr->cache.associativity = 2;
+      hwloc_insert_object_by_cpuset(topology, obj);
+    }
+    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1CACHE)) {
+      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1CACHE, -1);
+      obj->cpuset = hwloc_bitmap_dup(set);
+      obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
+      obj->attr->cache.depth = 1;
+      obj->attr->cache.size = 32*1024;
+      obj->attr->cache.linesize = 128;
+      obj->attr->cache.associativity = 2;
+      hwloc_insert_object_by_cpuset(topology, obj);
+    }
+    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) {
+      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_CORE, i);
+      obj->cpuset = set;
+      hwloc_insert_object_by_cpuset(topology, obj);
+    } else
+      hwloc_bitmap_free(set);
+  }
+
+  set = hwloc_bitmap_alloc();
+  hwloc_bitmap_set_range(set, 0, 7);
+
+  if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L2CACHE)) {
+    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L2CACHE, -1);
+    obj->cpuset = hwloc_bitmap_dup(set);
+    obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
+    obj->attr->cache.depth = 2;
+    obj->attr->cache.size = 6*1024*1024;
+    obj->attr->cache.linesize = 128;
+    obj->attr->cache.associativity = 12;
+    hwloc_insert_object_by_cpuset(topology, obj);
+  }
+  if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) {
+    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PACKAGE, 0);
+    obj->cpuset = set;
+    hwloc_obj_add_info(obj, "CPUVendor", "Fujitsu");
+    hwloc_obj_add_info(obj, "CPUModel", "SPARC64 VIIIfx");
+    hwloc_insert_object_by_cpuset(topology, obj);
+  } else
+    hwloc_bitmap_free(set);
+
+  hwloc_setup_pu_level(topology, 8);
+
+  return 0;
+}
+
+int hwloc_look_hardwired_fujitsu_fx10(struct hwloc_topology *topology)
+{
+  /* If a broken core gets disabled, its bit disappears and other core bits are NOT shifted towards 0.
+   * Node is not given to user job, not need to handle that case properly.
+   */
+  unsigned i;
+  hwloc_obj_t obj;
+  hwloc_bitmap_t set;
+
+  for(i=0; i<16; i++) {
+    set = hwloc_bitmap_alloc();
+    hwloc_bitmap_set(set, i);
+
+    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) {
+      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1ICACHE, -1);
+      obj->cpuset = hwloc_bitmap_dup(set);
+      obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
+      obj->attr->cache.depth = 1;
+      obj->attr->cache.size = 32*1024;
+      obj->attr->cache.linesize = 128;
+      obj->attr->cache.associativity = 2;
+      hwloc_insert_object_by_cpuset(topology, obj);
+    }
+    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1CACHE)) {
+      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1CACHE, -1);
+      obj->cpuset = hwloc_bitmap_dup(set);
+      obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
+      obj->attr->cache.depth = 1;
+      obj->attr->cache.size = 32*1024;
+      obj->attr->cache.linesize = 128;
+      obj->attr->cache.associativity = 2;
+      hwloc_insert_object_by_cpuset(topology, obj);
+    }
+    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) {
+      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_CORE, i);
+      obj->cpuset = set;
+      hwloc_insert_object_by_cpuset(topology, obj);
+    } else
+      hwloc_bitmap_free(set);
+  }
+
+  set = hwloc_bitmap_alloc();
+  hwloc_bitmap_set_range(set, 0, 15);
+
+  if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L2CACHE)) {
+    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L2CACHE, -1);
+    obj->cpuset = hwloc_bitmap_dup(set);
+    obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
+    obj->attr->cache.depth = 2;
+    obj->attr->cache.size = 12*1024*1024;
+    obj->attr->cache.linesize = 128;
+    obj->attr->cache.associativity = 24;
+    hwloc_insert_object_by_cpuset(topology, obj);
+  }
+  if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) {
+    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PACKAGE, 0);
+    obj->cpuset = set;
+    hwloc_obj_add_info(obj, "CPUVendor", "Fujitsu");
+    hwloc_obj_add_info(obj, "CPUModel", "SPARC64 IXfx");
+    hwloc_insert_object_by_cpuset(topology, obj);
+  } else
+    hwloc_bitmap_free(set);
+
+  hwloc_setup_pu_level(topology, 16);
+
+  return 0;
+}
+
+int hwloc_look_hardwired_fujitsu_fx100(struct hwloc_topology *topology)
+{
+  /* If a broken core gets disabled, its bit disappears and other core bits are NOT shifted towards 0.
+   * Node is not given to user job, not need to handle that case properly.
+   */
+  unsigned i;
+  hwloc_obj_t obj;
+  hwloc_bitmap_t set;
+
+  for(i=0; i<34; i++) {
+    set = hwloc_bitmap_alloc();
+    hwloc_bitmap_set(set, i);
+
+    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) {
+      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1ICACHE, -1);
+      obj->cpuset = hwloc_bitmap_dup(set);
+      obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
+      obj->attr->cache.depth = 1;
+      obj->attr->cache.size = 64*1024;
+      obj->attr->cache.linesize = 256;
+      obj->attr->cache.associativity = 4;
+      hwloc_insert_object_by_cpuset(topology, obj);
+    }
+    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1CACHE)) {
+      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1CACHE, -1);
+      obj->cpuset = hwloc_bitmap_dup(set);
+      obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
+      obj->attr->cache.depth = 1;
+      obj->attr->cache.size = 64*1024;
+      obj->attr->cache.linesize = 256;
+      obj->attr->cache.associativity = 4;
+      hwloc_insert_object_by_cpuset(topology, obj);
+    }
+    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) {
+      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_CORE, i);
+      obj->cpuset = set;
+      hwloc_insert_object_by_cpuset(topology, obj);
+    } else
+      hwloc_bitmap_free(set);
+  }
+
+  if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L2CACHE)) {
+    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L2CACHE, -1);
+    obj->cpuset = hwloc_bitmap_alloc();
+    hwloc_bitmap_set_range(obj->cpuset, 0, 15);
+    hwloc_bitmap_set(obj->cpuset, 32);
+    obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
+    obj->attr->cache.depth = 2;
+    obj->attr->cache.size = 12*1024*1024;
+    obj->attr->cache.linesize = 256;
+    obj->attr->cache.associativity = 24;
+    hwloc_insert_object_by_cpuset(topology, obj);
+
+    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L2CACHE, -1);
+    obj->cpuset = hwloc_bitmap_alloc();
+    hwloc_bitmap_set_range(obj->cpuset, 16, 31);
+    hwloc_bitmap_set(obj->cpuset, 33);
+    obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
+    obj->attr->cache.depth = 2;
+    obj->attr->cache.size = 12*1024*1024;
+    obj->attr->cache.linesize = 256;
+    obj->attr->cache.associativity = 24;
+    hwloc_insert_object_by_cpuset(topology, obj);
+  }
+  if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) {
+    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PACKAGE, 0);
+    obj->cpuset = hwloc_bitmap_alloc();
+    hwloc_bitmap_set_range(obj->cpuset, 0, 33);
+    hwloc_obj_add_info(obj, "CPUVendor", "Fujitsu");
+    hwloc_obj_add_info(obj, "CPUModel", "SPARC64 XIfx");
+    hwloc_insert_object_by_cpuset(topology, obj);
+  }
+
+  hwloc_setup_pu_level(topology, 34);
+
+  return 0;
+}
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-hpux.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-hpux.c
new file mode 100644
index 0000000000..ad5afb4ad3
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-hpux.c
@@ -0,0 +1,312 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2017 Inria.  All rights reserved.
+ * Copyright © 2009-2010, 2013 Université Bordeaux
+ * Copyright © 2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/* TODO: psets? (Only for root)
+ * since 11i 1.6:
+   _SC_PSET_SUPPORT
+   pset_create/destroy/assign/setattr
+   pset_ctl/getattr
+   pset_bind()
+   pthread_pset_bind_np()
+ */
+
+#include <private/autogen/config.h>
+
+#include <sys/types.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+
+#include <hwloc.h>
+#include <private/private.h>
+#include <private/debug.h>
+
+#include <sys/mpctl.h>
+#include <sys/mman.h>
+#include <pthread.h>
+
+static ldom_t
+hwloc_hpux_find_ldom(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set)
+{
+  int has_numa = sysconf(_SC_CCNUMA_SUPPORT) == 1;
+  hwloc_obj_t obj;
+
+  if (!has_numa)
+    return -1;
+
+  obj = hwloc_get_first_largest_obj_inside_cpuset(topology, hwloc_set);
+  if (!hwloc_bitmap_isequal(obj->cpuset, hwloc_set))
+    /* Does not correspond to exactly one node */
+    return -1;
+  /* obj is the highest possibly matching object, but some (single) child (with same cpuset) could match too */
+  while (obj->type != HWLOC_OBJ_NUMANODE) {
+    /* try the first child, in case it has the same cpuset */
+    if (!obj->first_child
+	|| !obj->first_child->cpuset
+	|| !hwloc_bitmap_isequal(obj->cpuset, obj->first_child->cpuset))
+      return -1;
+    obj = obj->first_child;
+  }
+
+  return obj->os_index;
+}
+
+static spu_t
+hwloc_hpux_find_spu(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_bitmap_t hwloc_set)
+{
+  spu_t cpu;
+
+  cpu = hwloc_bitmap_first(hwloc_set);
+  if (cpu != -1 && hwloc_bitmap_weight(hwloc_set) == 1)
+    return cpu;
+  return -1;
+}
+
+/* Note: get_cpubind not available on HP-UX */
+static int
+hwloc_hpux_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_bitmap_t hwloc_set, int flags)
+{
+  ldom_t ldom;
+  spu_t cpu;
+
+  /* Drop previous binding */
+  mpctl(MPC_SETLDOM, MPC_LDOMFLOAT, pid);
+  mpctl(MPC_SETPROCESS, MPC_SPUFLOAT, pid);
+
+  if (hwloc_bitmap_isequal(hwloc_set, hwloc_topology_get_complete_cpuset(topology)))
+    return 0;
+
+  ldom = hwloc_hpux_find_ldom(topology, hwloc_set);
+  if (ldom != -1)
+    return mpctl(MPC_SETLDOM, ldom, pid);
+
+  cpu = hwloc_hpux_find_spu(topology, hwloc_set);
+  if (cpu != -1)
+    return mpctl((flags & HWLOC_CPUBIND_STRICT) ? MPC_SETPROCESS_FORCE : MPC_SETPROCESS, cpu, pid);
+
+  errno = EXDEV;
+  return -1;
+}
+
+static int
+hwloc_hpux_set_thisproc_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags)
+{
+  return hwloc_hpux_set_proc_cpubind(topology, MPC_SELFPID, hwloc_set, flags);
+}
+
+#ifdef hwloc_thread_t
+static int
+hwloc_hpux_set_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t pthread, hwloc_const_bitmap_t hwloc_set, int flags)
+{
+  ldom_t ldom, ldom2;
+  spu_t cpu, cpu2;
+
+  /* Drop previous binding */
+  pthread_ldom_bind_np(&ldom2, PTHREAD_LDOMFLOAT_NP, pthread);
+  pthread_processor_bind_np(PTHREAD_BIND_ADVISORY_NP, &cpu2, PTHREAD_SPUFLOAT_NP, pthread);
+
+  if (hwloc_bitmap_isequal(hwloc_set, hwloc_topology_get_complete_cpuset(topology)))
+    return 0;
+
+  ldom = hwloc_hpux_find_ldom(topology, hwloc_set);
+  if (ldom != -1)
+    return pthread_ldom_bind_np(&ldom2, ldom, pthread);
+
+  cpu = hwloc_hpux_find_spu(topology, hwloc_set);
+  if (cpu != -1)
+    return pthread_processor_bind_np((flags & HWLOC_CPUBIND_STRICT) ? PTHREAD_BIND_FORCED_NP : PTHREAD_BIND_ADVISORY_NP, &cpu2, cpu, pthread);
+
+  errno = EXDEV;
+  return -1;
+}
+
+static int
+hwloc_hpux_set_thisthread_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags)
+{
+  return hwloc_hpux_set_thread_cpubind(topology, PTHREAD_SELFTID_NP, hwloc_set, flags);
+}
+#endif
+
+/* According to HP docs, HP-UX up to 11iv2 don't support migration */
+
+#ifdef MAP_MEM_FIRST_TOUCH
+static void*
+hwloc_hpux_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  int mmap_flags;
+  void *p;
+
+  /* Can not give a set of nodes.  */
+  if (!hwloc_bitmap_isequal(nodeset, hwloc_topology_get_complete_nodeset(topology))) {
+    errno = EXDEV;
+    return hwloc_alloc_or_fail(topology, len, flags);
+  }
+
+  switch (policy) {
+    case HWLOC_MEMBIND_DEFAULT:
+    case HWLOC_MEMBIND_BIND:
+      mmap_flags = 0;
+      break;
+    case HWLOC_MEMBIND_FIRSTTOUCH:
+      mmap_flags = MAP_MEM_FIRST_TOUCH;
+      break;
+    case HWLOC_MEMBIND_INTERLEAVE:
+      mmap_flags = MAP_MEM_INTERLEAVED;
+      break;
+    default:
+      errno = ENOSYS;
+      return NULL;
+  }
+
+  p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | mmap_flags, -1, 0);
+  return p == MAP_FAILED ? NULL : p;
+}
+#endif /* MAP_MEM_FIRST_TOUCH */
+
+static int
+hwloc_look_hpux(struct hwloc_backend *backend)
+{
+  struct hwloc_topology *topology = backend->topology;
+  int has_numa = sysconf(_SC_CCNUMA_SUPPORT) == 1;
+  hwloc_obj_t *nodes = NULL, obj;
+  spu_t currentcpu;
+  ldom_t currentnode;
+  int i, nbnodes = 0;
+
+  if (topology->levels[0][0]->cpuset)
+    /* somebody discovered things */
+    return -1;
+
+  hwloc_alloc_obj_cpusets(topology->levels[0][0]);
+
+  if (has_numa) {
+    nbnodes = mpctl((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) ?
+      MPC_GETNUMLDOMS_SYS : MPC_GETNUMLDOMS, 0, 0);
+
+    hwloc_debug("%d nodes\n", nbnodes);
+
+    nodes = malloc(nbnodes * sizeof(*nodes));
+
+    i = 0;
+    currentnode = mpctl((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) ?
+      MPC_GETFIRSTLDOM_SYS : MPC_GETFIRSTLDOM, 0, 0);
+    while (currentnode != -1 && i < nbnodes) {
+      hwloc_debug("node %d is %d\n", i, currentnode);
+      nodes[i] = obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_NUMANODE, currentnode);
+      obj->cpuset = hwloc_bitmap_alloc();
+      obj->nodeset = hwloc_bitmap_alloc();
+      hwloc_bitmap_set(obj->nodeset, currentnode);
+      /* TODO: obj->attr->node.memory_kB */
+      /* TODO: obj->attr->node.huge_page_free */
+
+      currentnode = mpctl((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) ?
+        MPC_GETNEXTLDOM_SYS : MPC_GETNEXTLDOM, currentnode, 0);
+      i++;
+    }
+  }
+
+  i = 0;
+  currentcpu = mpctl((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) ?
+      MPC_GETFIRSTSPU_SYS : MPC_GETFIRSTSPU, 0,0);
+  while (currentcpu != -1) {
+    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PU, currentcpu);
+    obj->cpuset = hwloc_bitmap_alloc();
+    hwloc_bitmap_set(obj->cpuset, currentcpu);
+
+    hwloc_debug("cpu %d\n", currentcpu);
+
+    if (nodes) {
+      /* Add this cpu to its node */
+      currentnode = mpctl(MPC_SPUTOLDOM, currentcpu, 0);
+      /* Hopefully it's just the same as previous cpu */
+      if (i >= nbnodes || (ldom_t) nodes[i]->os_index != currentnode)
+        for (i = 0; i < nbnodes; i++)
+          if ((ldom_t) nodes[i]->os_index == currentnode)
+            break;
+      if (i < nbnodes) {
+        hwloc_bitmap_set(nodes[i]->cpuset, currentcpu);
+        hwloc_debug("is in node %d\n", i);
+      } else {
+        hwloc_debug("%s", "is in no node?!\n");
+      }
+    }
+
+    /* Add cpu */
+    hwloc_insert_object_by_cpuset(topology, obj);
+
+    currentcpu = mpctl((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) ?
+      MPC_GETNEXTSPU_SYS : MPC_GETNEXTSPU, currentcpu, 0);
+  }
+
+  if (nodes) {
+    /* Add nodes */
+    for (i = 0 ; i < nbnodes ; i++)
+      hwloc_insert_object_by_cpuset(topology, nodes[i]);
+    free(nodes);
+  }
+
+  topology->support.discovery->pu = 1;
+
+  hwloc_obj_add_info(topology->levels[0][0], "Backend", "HP-UX");
+  hwloc_add_uname_info(topology, NULL);
+  return 0;
+}
+
+void
+hwloc_set_hpux_hooks(struct hwloc_binding_hooks *hooks,
+		     struct hwloc_topology_support *support __hwloc_attribute_unused)
+{
+  hooks->set_proc_cpubind = hwloc_hpux_set_proc_cpubind;
+  hooks->set_thisproc_cpubind = hwloc_hpux_set_thisproc_cpubind;
+#ifdef hwloc_thread_t
+  hooks->set_thread_cpubind = hwloc_hpux_set_thread_cpubind;
+  hooks->set_thisthread_cpubind = hwloc_hpux_set_thisthread_cpubind;
+#endif
+#ifdef MAP_MEM_FIRST_TOUCH
+  hooks->alloc_membind = hwloc_hpux_alloc_membind;
+  hooks->alloc = hwloc_alloc_mmap;
+  hooks->free_membind = hwloc_free_mmap;
+  support->membind->firsttouch_membind = 1;
+  support->membind->bind_membind = 1;
+  support->membind->interleave_membind = 1;
+#endif /* MAP_MEM_FIRST_TOUCH */
+}
+
+static struct hwloc_backend *
+hwloc_hpux_component_instantiate(struct hwloc_disc_component *component,
+				 const void *_data1 __hwloc_attribute_unused,
+				 const void *_data2 __hwloc_attribute_unused,
+				 const void *_data3 __hwloc_attribute_unused)
+{
+  struct hwloc_backend *backend;
+  backend = hwloc_backend_alloc(component);
+  if (!backend)
+    return NULL;
+  backend->discover = hwloc_look_hpux;
+  return backend;
+}
+
+static struct hwloc_disc_component hwloc_hpux_disc_component = {
+  HWLOC_DISC_COMPONENT_TYPE_CPU,
+  "hpux",
+  HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+  hwloc_hpux_component_instantiate,
+  50,
+  NULL
+};
+
+const struct hwloc_component hwloc_hpux_component = {
+  HWLOC_COMPONENT_ABI,
+  NULL, NULL,
+  HWLOC_COMPONENT_TYPE_DISC,
+  0,
+  &hwloc_hpux_disc_component
+};
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-linux.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-linux.c
new file mode 100644
index 0000000000..355b179aad
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-linux.c
@@ -0,0 +1,5790 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2017 Inria.  All rights reserved.
+ * Copyright © 2009-2013, 2015 Université Bordeaux
+ * Copyright © 2009-2014 Cisco Systems, Inc.  All rights reserved.
+ * Copyright © 2015 Intel, Inc.  All rights reserved.
+ * Copyright © 2010 IBM
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <hwloc/linux.h>
+#include <private/misc.h>
+#include <private/private.h>
+#include <private/misc.h>
+#include <private/debug.h>
+
+#include <limits.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+#ifdef HAVE_DIRENT_H
+#include <dirent.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HWLOC_HAVE_LIBUDEV
+#include <libudev.h>
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sched.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <mntent.h>
+
+struct hwloc_linux_backend_data_s {
+  char *root_path; /* NULL if unused */
+  int root_fd; /* The file descriptor for the file system root, used when browsing, e.g., Linux' sysfs and procfs. */
+  int is_real_fsroot; /* Boolean saying whether root_fd points to the real filesystem root of the system */
+#ifdef HWLOC_HAVE_LIBUDEV
+  struct udev *udev; /* Global udev context */
+#endif
+  char *dumped_hwdata_dirname;
+  enum {
+    HWLOC_LINUX_ARCH_X86, /* x86 32 or 64bits, including k1om (KNC) */
+    HWLOC_LINUX_ARCH_IA64,
+    HWLOC_LINUX_ARCH_ARM,
+    HWLOC_LINUX_ARCH_POWER,
+    HWLOC_LINUX_ARCH_UNKNOWN
+  } arch;
+  int is_knl;
+  int is_amd_with_CU;
+  struct utsname utsname; /* fields contain \0 when unknown */
+  unsigned fallback_nbprocessors;
+  unsigned pagesize;
+};
+
+
+
+/***************************
+ * Misc Abstraction layers *
+ ***************************/
+
+#include <linux/unistd.h>
+
+#if !(defined HWLOC_HAVE_SCHED_SETAFFINITY) && (defined HWLOC_HAVE_SYSCALL)
+/* libc doesn't have support for sched_setaffinity, make system call
+ * ourselves: */
+#    ifndef __NR_sched_setaffinity
+#       ifdef __i386__
+#         define __NR_sched_setaffinity 241
+#       elif defined(__x86_64__)
+#         define __NR_sched_setaffinity 203
+#       elif defined(__ia64__)
+#         define __NR_sched_setaffinity 1231
+#       elif defined(__hppa__)
+#         define __NR_sched_setaffinity 211
+#       elif defined(__alpha__)
+#         define __NR_sched_setaffinity 395
+#       elif defined(__s390__)
+#         define __NR_sched_setaffinity 239
+#       elif defined(__sparc__)
+#         define __NR_sched_setaffinity 261
+#       elif defined(__m68k__)
+#         define __NR_sched_setaffinity 311
+#       elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__powerpc64__) || defined(__ppc64__)
+#         define __NR_sched_setaffinity 222
+#       elif defined(__arm__)
+#         define __NR_sched_setaffinity 241
+#       elif defined(__cris__)
+#         define __NR_sched_setaffinity 241
+/*#       elif defined(__mips__)
+  #         define __NR_sched_setaffinity TODO (32/64/nabi) */
+#       else
+#         warning "don't know the syscall number for sched_setaffinity on this architecture, will not support binding"
+#         define sched_setaffinity(pid, lg, mask) (errno = ENOSYS, -1)
+#       endif
+#    endif
+#    ifndef sched_setaffinity
+#      define sched_setaffinity(pid, lg, mask) syscall(__NR_sched_setaffinity, pid, lg, mask)
+#    endif
+#    ifndef __NR_sched_getaffinity
+#       ifdef __i386__
+#         define __NR_sched_getaffinity 242
+#       elif defined(__x86_64__)
+#         define __NR_sched_getaffinity 204
+#       elif defined(__ia64__)
+#         define __NR_sched_getaffinity 1232
+#       elif defined(__hppa__)
+#         define __NR_sched_getaffinity 212
+#       elif defined(__alpha__)
+#         define __NR_sched_getaffinity 396
+#       elif defined(__s390__)
+#         define __NR_sched_getaffinity 240
+#       elif defined(__sparc__)
+#         define __NR_sched_getaffinity 260
+#       elif defined(__m68k__)
+#         define __NR_sched_getaffinity 312
+#       elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__powerpc64__) || defined(__ppc64__)
+#         define __NR_sched_getaffinity 223
+#       elif defined(__arm__)
+#         define __NR_sched_getaffinity 242
+#       elif defined(__cris__)
+#         define __NR_sched_getaffinity 242
+/*#       elif defined(__mips__)
+  #         define __NR_sched_getaffinity TODO (32/64/nabi) */
+#       else
+#         warning "don't know the syscall number for sched_getaffinity on this architecture, will not support getting binding"
+#         define sched_getaffinity(pid, lg, mask) (errno = ENOSYS, -1)
+#       endif
+#    endif
+#    ifndef sched_getaffinity
+#      define sched_getaffinity(pid, lg, mask) (syscall(__NR_sched_getaffinity, pid, lg, mask) < 0 ? -1 : 0)
+#    endif
+#endif
+
+/* numa syscalls are only in libnuma, but libnuma devel headers aren't widely installed.
+ * just redefine these syscalls to avoid requiring libnuma devel headers just because of these missing syscalls.
+ * __NR_foo should be defined in headers in all modern platforms.
+ * Just redefine the basic ones on important platform when not to hard to detect/define.
+ */
+
+#ifndef MPOL_DEFAULT
+# define MPOL_DEFAULT 0
+#endif
+#ifndef MPOL_PREFERRED
+# define MPOL_PREFERRED 1
+#endif
+#ifndef MPOL_BIND
+# define MPOL_BIND 2
+#endif
+#ifndef MPOL_INTERLEAVE
+# define MPOL_INTERLEAVE 3
+#endif
+#ifndef MPOL_F_ADDR
+# define  MPOL_F_ADDR (1<<1)
+#endif
+#ifndef MPOL_MF_STRICT
+# define MPOL_MF_STRICT (1<<0)
+#endif
+#ifndef MPOL_MF_MOVE
+# define MPOL_MF_MOVE (1<<1)
+#endif
+
+#ifndef __NR_mbind
+# ifdef __i386__
+#  define __NR_mbind 274
+# elif defined(__x86_64__)
+#  define __NR_mbind 237
+# elif defined(__ia64__)
+#  define __NR_mbind 1259
+# elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__powerpc64__) || defined(__ppc64__)
+#  define __NR_mbind 259
+# elif defined(__sparc__)
+#  define __NR_mbind 353
+# endif
+#endif
+static __hwloc_inline long hwloc_mbind(void *addr __hwloc_attribute_unused,
+				       unsigned long len __hwloc_attribute_unused,
+				       int mode __hwloc_attribute_unused,
+				       const unsigned long *nodemask __hwloc_attribute_unused,
+				       unsigned long maxnode __hwloc_attribute_unused,
+				       unsigned flags __hwloc_attribute_unused)
+{
+#if (defined __NR_mbind) && (defined HWLOC_HAVE_SYSCALL)
+  return syscall(__NR_mbind, (long) addr, len, mode, (long)nodemask, maxnode, flags);
+#else
+  errno = ENOSYS;
+  return -1;
+#endif
+}
+
+#ifndef __NR_set_mempolicy
+# ifdef __i386__
+#  define __NR_set_mempolicy 276
+# elif defined(__x86_64__)
+#  define __NR_set_mempolicy 239
+# elif defined(__ia64__)
+#  define __NR_set_mempolicy 1261
+# elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__powerpc64__) || defined(__ppc64__)
+#  define __NR_set_mempolicy 261
+# elif defined(__sparc__)
+#  define __NR_set_mempolicy 305
+# endif
+#endif
+static __hwloc_inline long hwloc_set_mempolicy(int mode __hwloc_attribute_unused,
+					       const unsigned long *nodemask __hwloc_attribute_unused,
+					       unsigned long maxnode __hwloc_attribute_unused)
+{
+#if (defined __NR_set_mempolicy) && (defined HWLOC_HAVE_SYSCALL)
+  return syscall(__NR_set_mempolicy, mode, nodemask, maxnode);
+#else
+  errno = ENOSYS;
+  return -1;
+#endif
+}
+
+#ifndef __NR_get_mempolicy
+# ifdef __i386__
+#  define __NR_get_mempolicy 275
+# elif defined(__x86_64__)
+#  define __NR_get_mempolicy 238
+# elif defined(__ia64__)
+#  define __NR_get_mempolicy 1260
+# elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__powerpc64__) || defined(__ppc64__)
+#  define __NR_get_mempolicy 260
+# elif defined(__sparc__)
+#  define __NR_get_mempolicy 304
+# endif
+#endif
+static __hwloc_inline long hwloc_get_mempolicy(int *mode __hwloc_attribute_unused,
+					       const unsigned long *nodemask __hwloc_attribute_unused,
+					       unsigned long maxnode __hwloc_attribute_unused,
+					       void *addr __hwloc_attribute_unused,
+					       int flags __hwloc_attribute_unused)
+{
+#if (defined __NR_get_mempolicy) && (defined HWLOC_HAVE_SYSCALL)
+  return syscall(__NR_get_mempolicy, mode, nodemask, maxnode, addr, flags);
+#else
+  errno = ENOSYS;
+  return -1;
+#endif
+}
+
+#ifndef __NR_migrate_pages
+# ifdef __i386__
+#  define __NR_migrate_pages 204
+# elif defined(__x86_64__)
+#  define __NR_migrate_pages 256
+# elif defined(__ia64__)
+#  define __NR_migrate_pages 1280
+# elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__powerpc64__) || defined(__ppc64__)
+#  define __NR_migrate_pages 258
+# elif defined(__sparc__)
+#  define __NR_migrate_pages 302
+# endif
+#endif
+static __hwloc_inline long hwloc_migrate_pages(int pid __hwloc_attribute_unused,
+					       unsigned long maxnode __hwloc_attribute_unused,
+					       const unsigned long *oldnodes __hwloc_attribute_unused,
+					       const unsigned long *newnodes __hwloc_attribute_unused)
+{
+#if (defined __NR_migrate_pages) && (defined HWLOC_HAVE_SYSCALL)
+  return syscall(__NR_migrate_pages, pid, maxnode, oldnodes, newnodes);
+#else
+  errno = ENOSYS;
+  return -1;
+#endif
+}
+
+#ifndef __NR_move_pages
+# ifdef __i386__
+#  define __NR_move_pages 317
+# elif defined(__x86_64__)
+#  define __NR_move_pages 279
+# elif defined(__ia64__)
+#  define __NR_move_pages 1276
+# elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__powerpc64__) || defined(__ppc64__)
+#  define __NR_move_pages 301
+# elif defined(__sparc__)
+#  define __NR_move_pages 307
+# endif
+#endif
+static __hwloc_inline long hwloc_move_pages(int pid __hwloc_attribute_unused,
+					    unsigned long count __hwloc_attribute_unused,
+					    void **pages __hwloc_attribute_unused,
+					    const int *nodes __hwloc_attribute_unused,
+					    int *status __hwloc_attribute_unused,
+					    int flags __hwloc_attribute_unused)
+{
+#if (defined __NR_move_pages) && (defined HWLOC_HAVE_SYSCALL)
+  return syscall(__NR_move_pages, pid, count, pages, nodes, status, flags);
+#else
+  errno = ENOSYS;
+  return -1;
+#endif
+}
+
+
+/* Added for ntohl() */
+#include <arpa/inet.h>
+
+#ifdef HAVE_OPENAT
+/* Use our own filesystem functions if we have openat */
+
+static const char *
+hwloc_checkat(const char *path, int fsroot_fd)
+{
+  const char *relative_path;
+  if (fsroot_fd < 0) {
+    errno = EBADF;
+    return NULL;
+  }
+
+  /* Skip leading slashes.  */
+  for (relative_path = path; *relative_path == '/'; relative_path++);
+
+  return relative_path;
+}
+
+static int
+hwloc_openat(const char *path, int fsroot_fd)
+{
+  const char *relative_path;
+
+  relative_path = hwloc_checkat(path, fsroot_fd);
+  if (!relative_path)
+    return -1;
+
+  return openat (fsroot_fd, relative_path, O_RDONLY);
+}
+
+static FILE *
+hwloc_fopenat(const char *path, const char *mode, int fsroot_fd)
+{
+  int fd;
+
+  if (strcmp(mode, "r")) {
+    errno = ENOTSUP;
+    return NULL;
+  }
+
+  fd = hwloc_openat (path, fsroot_fd);
+  if (fd == -1)
+    return NULL;
+
+  return fdopen(fd, mode);
+}
+
+static int
+hwloc_accessat(const char *path, int mode, int fsroot_fd)
+{
+  const char *relative_path;
+
+  relative_path = hwloc_checkat(path, fsroot_fd);
+  if (!relative_path)
+    return -1;
+
+  return faccessat(fsroot_fd, relative_path, mode, 0);
+}
+
+static int
+hwloc_fstatat(const char *path, struct stat *st, int flags, int fsroot_fd)
+{
+  const char *relative_path;
+
+  relative_path = hwloc_checkat(path, fsroot_fd);
+  if (!relative_path)
+    return -1;
+
+  return fstatat(fsroot_fd, relative_path, st, flags);
+}
+
+static DIR*
+hwloc_opendirat(const char *path, int fsroot_fd)
+{
+  int dir_fd;
+  const char *relative_path;
+
+  relative_path = hwloc_checkat(path, fsroot_fd);
+  if (!relative_path)
+    return NULL;
+
+  dir_fd = openat(fsroot_fd, relative_path, O_RDONLY | O_DIRECTORY);
+  if (dir_fd < 0)
+    return NULL;
+
+  return fdopendir(dir_fd);
+}
+
+static int
+hwloc_readlinkat(const char *path, char *buf, size_t buflen, int fsroot_fd)
+{
+  const char *relative_path;
+
+  relative_path = hwloc_checkat(path, fsroot_fd);
+  if (!relative_path)
+    return -1;
+
+  return readlinkat(fsroot_fd, relative_path, buf, buflen);
+}
+
+#endif /* HAVE_OPENAT */
+
+/* Static inline version of fopen so that we can use openat if we have
+   it, but still preserve compiler parameter checking */
+static __hwloc_inline int
+hwloc_open(const char *p, int d __hwloc_attribute_unused)
+{
+#ifdef HAVE_OPENAT
+    return hwloc_openat(p, d);
+#else
+    return open(p, O_RDONLY);
+#endif
+}
+
+static __hwloc_inline FILE *
+hwloc_fopen(const char *p, const char *m, int d __hwloc_attribute_unused)
+{
+#ifdef HAVE_OPENAT
+    return hwloc_fopenat(p, m, d);
+#else
+    return fopen(p, m);
+#endif
+}
+
+/* Static inline version of access so that we can use openat if we have
+   it, but still preserve compiler parameter checking */
+static __hwloc_inline int
+hwloc_access(const char *p, int m, int d __hwloc_attribute_unused)
+{
+#ifdef HAVE_OPENAT
+    return hwloc_accessat(p, m, d);
+#else
+    return access(p, m);
+#endif
+}
+
+static __hwloc_inline int
+hwloc_stat(const char *p, struct stat *st, int d __hwloc_attribute_unused)
+{
+#ifdef HAVE_OPENAT
+    return hwloc_fstatat(p, st, 0, d);
+#else
+    return stat(p, st);
+#endif
+}
+
+static __hwloc_inline int
+hwloc_lstat(const char *p, struct stat *st, int d __hwloc_attribute_unused)
+{
+#ifdef HAVE_OPENAT
+    return hwloc_fstatat(p, st, AT_SYMLINK_NOFOLLOW, d);
+#else
+    return lstat(p, st);
+#endif
+}
+
+/* Static inline version of opendir so that we can use openat if we have
+   it, but still preserve compiler parameter checking */
+static __hwloc_inline DIR *
+hwloc_opendir(const char *p, int d __hwloc_attribute_unused)
+{
+#ifdef HAVE_OPENAT
+    return hwloc_opendirat(p, d);
+#else
+    return opendir(p);
+#endif
+}
+
+static __hwloc_inline int
+hwloc_readlink(const char *p, char *l, size_t ll, int d __hwloc_attribute_unused)
+{
+#ifdef HAVE_OPENAT
+  return hwloc_readlinkat(p, l, ll, d);
+#else
+  return readlink(p, l, ll);
+#endif
+}
+
+
+/*****************************************
+ ******* Helpers for reading files *******
+ *****************************************/
+
+static __hwloc_inline int
+hwloc_read_path_by_length(const char *path, char *string, size_t length, int fsroot_fd)
+{
+  int fd, ret;
+
+  fd = hwloc_open(path, fsroot_fd);
+  if (fd < 0)
+    return -1;
+
+  ret = read(fd, string, length-1); /* read -1 to put the ending \0 */
+  close(fd);
+
+  if (ret <= 0)
+    return -1;
+
+  string[ret] = 0;
+
+  return 0;
+}
+
+static __hwloc_inline int
+hwloc_read_path_as_int(const char *path, int *value, int fsroot_fd)
+{
+  char string[11];
+  if (hwloc_read_path_by_length(path, string, sizeof(string), fsroot_fd) < 0)
+    return -1;
+  *value = atoi(string);
+  return 0;
+}
+
+static __hwloc_inline int
+hwloc_read_path_as_uint(const char *path, unsigned *value, int fsroot_fd)
+{
+  char string[11];
+  if (hwloc_read_path_by_length(path, string, sizeof(string), fsroot_fd) < 0)
+    return -1;
+  *value = (unsigned) strtoul(string, NULL, 10);
+  return 0;
+}
+
+/* Read everything from fd and save it into a newly allocated buffer
+ * returned in bufferp. Use sizep as a default buffer size, and returned
+ * the actually needed size in sizep.
+ */
+static __hwloc_inline int
+hwloc__read_fd(int fd, char **bufferp, size_t *sizep)
+{
+  char *buffer;
+  size_t toread, filesize, totalread;
+  ssize_t ret;
+
+  toread = filesize = *sizep;
+
+  /* Alloc and read +1 so that we get EOF on 2^n without reading once more */
+  buffer = malloc(filesize+1);
+  if (!buffer)
+    return -1;
+
+  ret = read(fd, buffer, toread+1);
+  if (ret < 0) {
+    free(buffer);
+    return -1;
+  }
+
+  totalread = (size_t) ret;
+
+  if (totalread < toread + 1)
+    /* Normal case, a single read got EOF */
+    goto done;
+
+  /* Unexpected case, must extend the buffer and read again.
+   * Only occurs on first invocation and if the kernel ever uses multiple page for a single mask.
+   */
+  do {
+    char *tmp;
+
+    toread = filesize;
+    filesize *= 2;
+
+    tmp = realloc(buffer, filesize+1);
+    if (!tmp) {
+      free(buffer);
+      return -1;
+    }
+    buffer = tmp;
+
+    ret = read(fd, buffer+toread+1, toread);
+    if (ret < 0) {
+      free(buffer);
+      return -1;
+    }
+
+    totalread += ret;
+  } while ((size_t) ret == toread);
+
+ done:
+  buffer[totalread] = '\0';
+  *bufferp = buffer;
+  *sizep = filesize;
+  return 0;
+}
+
+/* kernel cpumaps are composed of an array of 32bits cpumasks */
+#define KERNEL_CPU_MASK_BITS 32
+#define KERNEL_CPU_MAP_LEN (KERNEL_CPU_MASK_BITS/4+2)
+
+static __hwloc_inline int
+hwloc__read_fd_as_cpumask(int fd, hwloc_bitmap_t set)
+{
+  static size_t _filesize = 0; /* will be dynamically initialized to hwloc_get_pagesize(), and increased later if needed */
+  size_t filesize;
+  unsigned long *maps;
+  unsigned long map;
+  int nr_maps = 0;
+  static int _nr_maps_allocated = 8; /* Only compute the power-of-two above the kernel cpumask size once.
+				      * Actually, it may increase multiple times if first read cpumaps start with zeroes.
+				      */
+  int nr_maps_allocated = _nr_maps_allocated;
+  char *buffer, *tmpbuf;
+  int i;
+
+  /* Kernel sysfs files are usually at most one page. 4kB may contain 455 32-bit
+   * masks (followed by comma), enough for 14k PUs. So allocate a page by default for now.
+   *
+   * If we ever need a larger buffer, we'll realloc() the buffer during the first
+   * invocation of this function so that others directly allocate the right size
+   * (all cpumask files have the exact same size).
+   */
+  filesize = _filesize;
+  if (!filesize)
+    filesize = hwloc_getpagesize();
+  if (hwloc__read_fd(fd, &buffer, &filesize) < 0)
+    return -1;
+  /* Only update the static value with the final one,
+   * to avoid sharing intermediate values that we modify,
+   * in case there's ever multiple concurrent calls.
+   */
+  _filesize = filesize;
+
+  maps = malloc(nr_maps_allocated * sizeof(*maps));
+  if (!maps) {
+    free(buffer);
+    return -1;
+  }
+
+  /* reset to zero first */
+  hwloc_bitmap_zero(set);
+
+  /* parse the whole mask */
+  tmpbuf = buffer;
+  while (sscanf(tmpbuf, "%lx", &map) == 1) {
+    /* read one kernel cpu mask and the ending comma */
+    if (nr_maps == nr_maps_allocated) {
+      unsigned long *tmp = realloc(maps, 2*nr_maps_allocated * sizeof(*maps));
+      if (!tmp) {
+	free(buffer);
+	free(maps);
+	return -1;
+      }
+      maps = tmp;
+      nr_maps_allocated *= 2;
+    }
+
+    tmpbuf = strchr(tmpbuf, ',');
+    if (!tmpbuf) {
+      maps[nr_maps++] = map;
+      break;
+    } else
+      tmpbuf++;
+
+    if (!map && !nr_maps)
+      /* ignore the first map if it's empty */
+      continue;
+
+    maps[nr_maps++] = map;
+  }
+
+  free(buffer);
+
+  /* convert into a set */
+#if KERNEL_CPU_MASK_BITS == HWLOC_BITS_PER_LONG
+  for(i=0; i<nr_maps; i++)
+    hwloc_bitmap_set_ith_ulong(set, i, maps[nr_maps-1-i]);
+#else
+  for(i=0; i<(nr_maps+1)/2; i++) {
+    unsigned long mask;
+    mask = maps[nr_maps-2*i-1];
+    if (2*i+1<nr_maps)
+      mask |= maps[nr_maps-2*i-2] << KERNEL_CPU_MASK_BITS;
+    hwloc_bitmap_set_ith_ulong(set, i, mask);
+  }
+#endif
+
+  free(maps);
+
+  /* Only update the static value with the final one,
+   * to avoid sharing intermediate values that we modify,
+   * in case there's ever multiple concurrent calls.
+   */
+  if (nr_maps_allocated > _nr_maps_allocated)
+    _nr_maps_allocated = nr_maps_allocated;
+  return 0;
+}
+
+static __hwloc_inline int
+hwloc__read_path_as_cpumask(const char *maskpath, hwloc_bitmap_t set, int fsroot_fd)
+{
+  int fd, err;
+  fd = hwloc_open(maskpath, fsroot_fd);
+  if (fd < 0)
+    return -1;
+  err = hwloc__read_fd_as_cpumask(fd, set);
+  close(fd);
+  return err;
+}
+
+static __hwloc_inline hwloc_bitmap_t
+hwloc__alloc_read_path_as_cpumask(const char *maskpath, int fsroot_fd)
+{
+  hwloc_bitmap_t set;
+  int err;
+  set = hwloc_bitmap_alloc();
+  if (!set)
+    return NULL;
+  err = hwloc__read_path_as_cpumask(maskpath, set, fsroot_fd);
+  if (err < 0) {
+    hwloc_bitmap_free(set);
+    return NULL;
+  } else
+    return set;
+}
+
+int
+hwloc_linux_read_path_as_cpumask(const char *maskpath, hwloc_bitmap_t set)
+{
+  int fd, err;
+  fd = open(maskpath, O_RDONLY);
+  if (fd < 0)
+    return -1;
+  err = hwloc__read_fd_as_cpumask(fd, set);
+  close(fd);
+  return err;
+}
+
+/* set must be full on input */
+static __hwloc_inline int
+hwloc__read_fd_as_cpulist(int fd, hwloc_bitmap_t set)
+{
+  /* Kernel sysfs files are usually at most one page.
+   * But cpulists can be of very different sizes depending on the fragmentation,
+   * so don't bother remember the actual read size between invocations.
+   * We don't have many invocations anyway.
+   */
+  size_t filesize = hwloc_getpagesize();
+  char *buffer, *current, *comma, *tmp;
+  int prevlast, nextfirst, nextlast; /* beginning/end of enabled-segments */
+
+  if (hwloc__read_fd(fd, &buffer, &filesize) < 0)
+    return -1;
+
+  current = buffer;
+  prevlast = -1;
+
+  while (1) {
+    /* save a pointer to the next comma and erase it to simplify things */
+    comma = strchr(current, ',');
+    if (comma)
+      *comma = '\0';
+
+    /* find current enabled-segment bounds */
+    nextfirst = strtoul(current, &tmp, 0);
+    if (*tmp == '-')
+      nextlast = strtoul(tmp+1, NULL, 0);
+    else
+      nextlast = nextfirst;
+    if (prevlast+1 <= nextfirst-1)
+      hwloc_bitmap_clr_range(set, prevlast+1, nextfirst-1);
+
+    /* switch to next enabled-segment */
+    prevlast = nextlast;
+    if (!comma)
+      break;
+    current = comma+1;
+  }
+
+  hwloc_bitmap_clr_range(set, prevlast+1, -1);
+  free(buffer);
+  return 0;
+}
+
+
+/*****************************
+ ******* CpuBind Hooks *******
+ *****************************/
+
+int
+hwloc_linux_set_tid_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, pid_t tid __hwloc_attribute_unused, hwloc_const_bitmap_t hwloc_set __hwloc_attribute_unused)
+{
+  /* The resulting binding is always strict */
+
+#if defined(HWLOC_HAVE_CPU_SET_S) && !defined(HWLOC_HAVE_OLD_SCHED_SETAFFINITY)
+  cpu_set_t *plinux_set;
+  unsigned cpu;
+  int last;
+  size_t setsize;
+  int err;
+
+  last = hwloc_bitmap_last(hwloc_set);
+  if (last == -1) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  setsize = CPU_ALLOC_SIZE(last+1);
+  plinux_set = CPU_ALLOC(last+1);
+
+  CPU_ZERO_S(setsize, plinux_set);
+  hwloc_bitmap_foreach_begin(cpu, hwloc_set)
+    CPU_SET_S(cpu, setsize, plinux_set);
+  hwloc_bitmap_foreach_end();
+
+  err = sched_setaffinity(tid, setsize, plinux_set);
+
+  CPU_FREE(plinux_set);
+  return err;
+#elif defined(HWLOC_HAVE_CPU_SET)
+  cpu_set_t linux_set;
+  unsigned cpu;
+
+  CPU_ZERO(&linux_set);
+  hwloc_bitmap_foreach_begin(cpu, hwloc_set)
+    CPU_SET(cpu, &linux_set);
+  hwloc_bitmap_foreach_end();
+
+#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
+  return sched_setaffinity(tid, &linux_set);
+#else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
+  return sched_setaffinity(tid, sizeof(linux_set), &linux_set);
+#endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
+#elif defined(HWLOC_HAVE_SYSCALL)
+  unsigned long mask = hwloc_bitmap_to_ulong(hwloc_set);
+
+#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
+  return sched_setaffinity(tid, (void*) &mask);
+#else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
+  return sched_setaffinity(tid, sizeof(mask), (void*) &mask);
+#endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
+#else /* !SYSCALL */
+  errno = ENOSYS;
+  return -1;
+#endif /* !SYSCALL */
+}
+
+#if defined(HWLOC_HAVE_CPU_SET_S) && !defined(HWLOC_HAVE_OLD_SCHED_SETAFFINITY)
+/*
+ * On some kernels, sched_getaffinity requires the output size to be larger
+ * than the kernel cpu_set size (defined by CONFIG_NR_CPUS).
+ * Try sched_affinity on ourself until we find a nr_cpus value that makes
+ * the kernel happy.
+ */
+static int
+hwloc_linux_find_kernel_nr_cpus(hwloc_topology_t topology)
+{
+  static int _nr_cpus = -1;
+  int nr_cpus = _nr_cpus;
+  int fd;
+
+  if (nr_cpus != -1)
+    /* already computed */
+    return nr_cpus;
+
+  if (topology->levels[0][0]->complete_cpuset)
+    /* start with a nr_cpus that may contain the whole topology */
+    nr_cpus = hwloc_bitmap_last(topology->levels[0][0]->complete_cpuset) + 1;
+  if (nr_cpus <= 0)
+    /* start from scratch, the topology isn't ready yet (complete_cpuset is missing (-1) or empty (0))*/
+    nr_cpus = 1;
+
+  fd = open("/sys/devices/system/cpu/possible", O_RDONLY); /* binding only supported in real fsroot, no need for data->root_fd */
+  if (fd >= 0) {
+    hwloc_bitmap_t possible_bitmap = hwloc_bitmap_alloc_full();
+    if (hwloc__read_fd_as_cpulist(fd, possible_bitmap) == 0) {
+      int max_possible = hwloc_bitmap_last(possible_bitmap);
+      hwloc_debug_bitmap("possible CPUs are %s\n", possible_bitmap);
+
+      if (nr_cpus < max_possible + 1)
+        nr_cpus = max_possible + 1;
+    }
+    close(fd);
+    hwloc_bitmap_free(possible_bitmap);
+  }
+
+  while (1) {
+    cpu_set_t *set = CPU_ALLOC(nr_cpus);
+    size_t setsize = CPU_ALLOC_SIZE(nr_cpus);
+    int err = sched_getaffinity(0, setsize, set); /* always works, unless setsize is too small */
+    CPU_FREE(set);
+    nr_cpus = setsize * 8; /* that's the value that was actually tested */
+    if (!err)
+      /* Found it. Only update the static value with the final one,
+       * to avoid sharing intermediate values that we modify,
+       * in case there's ever multiple concurrent calls.
+       */
+      return _nr_cpus = nr_cpus;
+    nr_cpus *= 2;
+  }
+}
+#endif
+
+int
+hwloc_linux_get_tid_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, pid_t tid __hwloc_attribute_unused, hwloc_bitmap_t hwloc_set __hwloc_attribute_unused)
+{
+  int err __hwloc_attribute_unused;
+
+#if defined(HWLOC_HAVE_CPU_SET_S) && !defined(HWLOC_HAVE_OLD_SCHED_SETAFFINITY)
+  cpu_set_t *plinux_set;
+  unsigned cpu;
+  int last;
+  size_t setsize;
+  int kernel_nr_cpus;
+
+  /* find the kernel nr_cpus so as to use a large enough cpu_set size */
+  kernel_nr_cpus = hwloc_linux_find_kernel_nr_cpus(topology);
+  setsize = CPU_ALLOC_SIZE(kernel_nr_cpus);
+  plinux_set = CPU_ALLOC(kernel_nr_cpus);
+
+  err = sched_getaffinity(tid, setsize, plinux_set);
+
+  if (err < 0) {
+    CPU_FREE(plinux_set);
+    return -1;
+  }
+
+  last = -1;
+  if (topology->levels[0][0]->complete_cpuset)
+    last = hwloc_bitmap_last(topology->levels[0][0]->complete_cpuset);
+  if (last == -1)
+    /* round the maximal support number, the topology isn't ready yet (complete_cpuset is missing or empty)*/
+    last = kernel_nr_cpus-1;
+
+  hwloc_bitmap_zero(hwloc_set);
+  for(cpu=0; cpu<=(unsigned) last; cpu++)
+    if (CPU_ISSET_S(cpu, setsize, plinux_set))
+      hwloc_bitmap_set(hwloc_set, cpu);
+
+  CPU_FREE(plinux_set);
+#elif defined(HWLOC_HAVE_CPU_SET)
+  cpu_set_t linux_set;
+  unsigned cpu;
+
+#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
+  err = sched_getaffinity(tid, &linux_set);
+#else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
+  err = sched_getaffinity(tid, sizeof(linux_set), &linux_set);
+#endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
+  if (err < 0)
+    return -1;
+
+  hwloc_bitmap_zero(hwloc_set);
+  for(cpu=0; cpu<CPU_SETSIZE; cpu++)
+    if (CPU_ISSET(cpu, &linux_set))
+      hwloc_bitmap_set(hwloc_set, cpu);
+#elif defined(HWLOC_HAVE_SYSCALL)
+  unsigned long mask;
+
+#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
+  err = sched_getaffinity(tid, (void*) &mask);
+#else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
+  err = sched_getaffinity(tid, sizeof(mask), (void*) &mask);
+#endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
+  if (err < 0)
+    return -1;
+
+  hwloc_bitmap_from_ulong(hwloc_set, mask);
+#else /* !SYSCALL */
+  errno = ENOSYS;
+  return -1;
+#endif /* !SYSCALL */
+
+  return 0;
+}
+
+/* Get the array of tids of a process from the task directory in /proc */
+static int
+hwloc_linux_get_proc_tids(DIR *taskdir, unsigned *nr_tidsp, pid_t ** tidsp)
+{
+  struct dirent *dirent;
+  unsigned nr_tids = 0;
+  unsigned max_tids = 32;
+  pid_t *tids;
+  struct stat sb;
+
+  /* take the number of links as a good estimate for the number of tids */
+  if (fstat(dirfd(taskdir), &sb) == 0)
+    max_tids = sb.st_nlink;
+
+  tids = malloc(max_tids*sizeof(pid_t));
+  if (!tids) {
+    errno = ENOMEM;
+    return -1;
+  }
+
+  rewinddir(taskdir);
+
+  while ((dirent = readdir(taskdir)) != NULL) {
+    if (nr_tids == max_tids) {
+      pid_t *newtids;
+      max_tids += 8;
+      newtids = realloc(tids, max_tids*sizeof(pid_t));
+      if (!newtids) {
+        free(tids);
+        errno = ENOMEM;
+        return -1;
+      }
+      tids = newtids;
+    }
+    if (!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, ".."))
+      continue;
+    tids[nr_tids++] = atoi(dirent->d_name);
+  }
+
+  *nr_tidsp = nr_tids;
+  *tidsp = tids;
+  return 0;
+}
+
+/* Per-tid callbacks */
+typedef int (*hwloc_linux_foreach_proc_tid_cb_t)(hwloc_topology_t topology, pid_t tid, void *data, int idx);
+
+static int
+hwloc_linux_foreach_proc_tid(hwloc_topology_t topology,
+			     pid_t pid, hwloc_linux_foreach_proc_tid_cb_t cb,
+			     void *data)
+{
+  char taskdir_path[128];
+  DIR *taskdir;
+  pid_t *tids, *newtids;
+  unsigned i, nr, newnr, failed = 0, failed_errno = 0;
+  unsigned retrynr = 0;
+  int err;
+
+  if (pid)
+    snprintf(taskdir_path, sizeof(taskdir_path), "/proc/%u/task", (unsigned) pid);
+  else
+    snprintf(taskdir_path, sizeof(taskdir_path), "/proc/self/task");
+
+  taskdir = opendir(taskdir_path);
+  if (!taskdir) {
+    if (errno == ENOENT)
+      errno = EINVAL;
+    err = -1;
+    goto out;
+  }
+
+  /* read the current list of threads */
+  err = hwloc_linux_get_proc_tids(taskdir, &nr, &tids);
+  if (err < 0)
+    goto out_with_dir;
+
+ retry:
+  /* apply the callback to all threads */
+  failed=0;
+  for(i=0; i<nr; i++) {
+    err = cb(topology, tids[i], data, i);
+    if (err < 0) {
+      failed++;
+      failed_errno = errno;
+    }
+  }
+
+  /* re-read the list of thread */
+  err = hwloc_linux_get_proc_tids(taskdir, &newnr, &newtids);
+  if (err < 0)
+    goto out_with_tids;
+  /* retry if the list changed in the meantime, or we failed for *some* threads only.
+   * if we're really unlucky, all threads changed but we got the same set of tids. no way to support this.
+   */
+  if (newnr != nr || memcmp(newtids, tids, nr*sizeof(pid_t)) || (failed && failed != nr)) {
+    free(tids);
+    tids = newtids;
+    nr = newnr;
+    if (++retrynr > 10) {
+      /* we tried 10 times, it didn't work, the application is probably creating/destroying many threads, stop trying */
+      errno = EAGAIN;
+      err = -1;
+      goto out_with_tids;
+    }
+    goto retry;
+  } else {
+    free(newtids);
+  }
+
+  /* if all threads failed, return the last errno. */
+  if (failed) {
+    err = -1;
+    errno = failed_errno;
+    goto out_with_tids;
+  }
+
+  err = 0;
+ out_with_tids:
+  free(tids);
+ out_with_dir:
+  closedir(taskdir);
+ out:
+  return err;
+}
+
+/* Per-tid proc_set_cpubind callback and caller.
+ * Callback data is a hwloc_bitmap_t. */
+static int
+hwloc_linux_foreach_proc_tid_set_cpubind_cb(hwloc_topology_t topology, pid_t tid, void *data, int idx __hwloc_attribute_unused)
+{
+  return hwloc_linux_set_tid_cpubind(topology, tid, (hwloc_bitmap_t) data);
+}
+
+static int
+hwloc_linux_set_pid_cpubind(hwloc_topology_t topology, pid_t pid, hwloc_const_bitmap_t hwloc_set, int flags __hwloc_attribute_unused)
+{
+  return hwloc_linux_foreach_proc_tid(topology, pid,
+				      hwloc_linux_foreach_proc_tid_set_cpubind_cb,
+				      (void*) hwloc_set);
+}
+
+/* Per-tid proc_get_cpubind callback data, callback function and caller */
+struct hwloc_linux_foreach_proc_tid_get_cpubind_cb_data_s {
+  hwloc_bitmap_t cpuset;
+  hwloc_bitmap_t tidset;
+  int flags;
+};
+
+static int
+hwloc_linux_foreach_proc_tid_get_cpubind_cb(hwloc_topology_t topology, pid_t tid, void *_data, int idx)
+{
+  struct hwloc_linux_foreach_proc_tid_get_cpubind_cb_data_s *data = _data;
+  hwloc_bitmap_t cpuset = data->cpuset;
+  hwloc_bitmap_t tidset = data->tidset;
+  int flags = data->flags;
+
+  if (hwloc_linux_get_tid_cpubind(topology, tid, tidset))
+    return -1;
+
+  /* reset the cpuset on first iteration */
+  if (!idx)
+    hwloc_bitmap_zero(cpuset);
+
+  if (flags & HWLOC_CPUBIND_STRICT) {
+    /* if STRICT, we want all threads to have the same binding */
+    if (!idx) {
+      /* this is the first thread, copy its binding */
+      hwloc_bitmap_copy(cpuset, tidset);
+    } else if (!hwloc_bitmap_isequal(cpuset, tidset)) {
+      /* this is not the first thread, and it's binding is different */
+      errno = EXDEV;
+      return -1;
+    }
+  } else {
+    /* if not STRICT, just OR all thread bindings */
+    hwloc_bitmap_or(cpuset, cpuset, tidset);
+  }
+  return 0;
+}
+
+static int
+hwloc_linux_get_pid_cpubind(hwloc_topology_t topology, pid_t pid, hwloc_bitmap_t hwloc_set, int flags)
+{
+  struct hwloc_linux_foreach_proc_tid_get_cpubind_cb_data_s data;
+  hwloc_bitmap_t tidset = hwloc_bitmap_alloc();
+  int ret;
+
+  data.cpuset = hwloc_set;
+  data.tidset = tidset;
+  data.flags = flags;
+  ret = hwloc_linux_foreach_proc_tid(topology, pid,
+				     hwloc_linux_foreach_proc_tid_get_cpubind_cb,
+				     (void*) &data);
+  hwloc_bitmap_free(tidset);
+  return ret;
+}
+
+static int
+hwloc_linux_set_proc_cpubind(hwloc_topology_t topology, pid_t pid, hwloc_const_bitmap_t hwloc_set, int flags)
+{
+  if (pid == 0)
+    pid = topology->pid;
+  if (flags & HWLOC_CPUBIND_THREAD)
+    return hwloc_linux_set_tid_cpubind(topology, pid, hwloc_set);
+  else
+    return hwloc_linux_set_pid_cpubind(topology, pid, hwloc_set, flags);
+}
+
+static int
+hwloc_linux_get_proc_cpubind(hwloc_topology_t topology, pid_t pid, hwloc_bitmap_t hwloc_set, int flags)
+{
+  if (pid == 0)
+    pid = topology->pid;
+  if (flags & HWLOC_CPUBIND_THREAD)
+    return hwloc_linux_get_tid_cpubind(topology, pid, hwloc_set);
+  else
+    return hwloc_linux_get_pid_cpubind(topology, pid, hwloc_set, flags);
+}
+
+static int
+hwloc_linux_set_thisproc_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags)
+{
+  return hwloc_linux_set_pid_cpubind(topology, topology->pid, hwloc_set, flags);
+}
+
+static int
+hwloc_linux_get_thisproc_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, int flags)
+{
+  return hwloc_linux_get_pid_cpubind(topology, topology->pid, hwloc_set, flags);
+}
+
+static int
+hwloc_linux_set_thisthread_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags __hwloc_attribute_unused)
+{
+  if (topology->pid) {
+    errno = ENOSYS;
+    return -1;
+  }
+  return hwloc_linux_set_tid_cpubind(topology, 0, hwloc_set);
+}
+
+static int
+hwloc_linux_get_thisthread_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused)
+{
+  if (topology->pid) {
+    errno = ENOSYS;
+    return -1;
+  }
+  return hwloc_linux_get_tid_cpubind(topology, 0, hwloc_set);
+}
+
+#if HAVE_DECL_PTHREAD_SETAFFINITY_NP
+#pragma weak pthread_setaffinity_np
+#pragma weak pthread_self
+
+static int
+hwloc_linux_set_thread_cpubind(hwloc_topology_t topology, pthread_t tid, hwloc_const_bitmap_t hwloc_set, int flags __hwloc_attribute_unused)
+{
+  int err;
+
+  if (topology->pid) {
+    errno = ENOSYS;
+    return -1;
+  }
+
+  if (!pthread_self) {
+    /* ?! Application uses set_thread_cpubind, but doesn't link against libpthread ?! */
+    errno = ENOSYS;
+    return -1;
+  }
+  if (tid == pthread_self())
+    return hwloc_linux_set_tid_cpubind(topology, 0, hwloc_set);
+
+  if (!pthread_setaffinity_np) {
+    errno = ENOSYS;
+    return -1;
+  }
+
+#if defined(HWLOC_HAVE_CPU_SET_S) && !defined(HWLOC_HAVE_OLD_SCHED_SETAFFINITY)
+  /* Use a separate block so that we can define specific variable
+     types here */
+  {
+     cpu_set_t *plinux_set;
+     unsigned cpu;
+     int last;
+     size_t setsize;
+
+     last = hwloc_bitmap_last(hwloc_set);
+     if (last == -1) {
+       errno = EINVAL;
+       return -1;
+     }
+
+     setsize = CPU_ALLOC_SIZE(last+1);
+     plinux_set = CPU_ALLOC(last+1);
+
+     CPU_ZERO_S(setsize, plinux_set);
+     hwloc_bitmap_foreach_begin(cpu, hwloc_set)
+         CPU_SET_S(cpu, setsize, plinux_set);
+     hwloc_bitmap_foreach_end();
+
+     err = pthread_setaffinity_np(tid, setsize, plinux_set);
+
+     CPU_FREE(plinux_set);
+  }
+#elif defined(HWLOC_HAVE_CPU_SET)
+  /* Use a separate block so that we can define specific variable
+     types here */
+  {
+     cpu_set_t linux_set;
+     unsigned cpu;
+
+     CPU_ZERO(&linux_set);
+     hwloc_bitmap_foreach_begin(cpu, hwloc_set)
+         CPU_SET(cpu, &linux_set);
+     hwloc_bitmap_foreach_end();
+
+#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
+     err = pthread_setaffinity_np(tid, &linux_set);
+#else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
+     err = pthread_setaffinity_np(tid, sizeof(linux_set), &linux_set);
+#endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
+  }
+#else /* CPU_SET */
+  /* Use a separate block so that we can define specific variable
+     types here */
+  {
+      unsigned long mask = hwloc_bitmap_to_ulong(hwloc_set);
+
+#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
+      err = pthread_setaffinity_np(tid, (void*) &mask);
+#else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
+      err = pthread_setaffinity_np(tid, sizeof(mask), (void*) &mask);
+#endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
+  }
+#endif /* CPU_SET */
+
+  if (err) {
+    errno = err;
+    return -1;
+  }
+  return 0;
+}
+#endif /* HAVE_DECL_PTHREAD_SETAFFINITY_NP */
+
+#if HAVE_DECL_PTHREAD_GETAFFINITY_NP
+#pragma weak pthread_getaffinity_np
+#pragma weak pthread_self
+
+static int
+hwloc_linux_get_thread_cpubind(hwloc_topology_t topology, pthread_t tid, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused)
+{
+  int err;
+
+  if (topology->pid) {
+    errno = ENOSYS;
+    return -1;
+  }
+
+  if (!pthread_self) {
+    /* ?! Application uses set_thread_cpubind, but doesn't link against libpthread ?! */
+    errno = ENOSYS;
+    return -1;
+  }
+  if (tid == pthread_self())
+    return hwloc_linux_get_tid_cpubind(topology, 0, hwloc_set);
+
+  if (!pthread_getaffinity_np) {
+    errno = ENOSYS;
+    return -1;
+  }
+
+#if defined(HWLOC_HAVE_CPU_SET_S) && !defined(HWLOC_HAVE_OLD_SCHED_SETAFFINITY)
+  /* Use a separate block so that we can define specific variable
+     types here */
+  {
+     cpu_set_t *plinux_set;
+     unsigned cpu;
+     int last;
+     size_t setsize;
+
+     last = hwloc_bitmap_last(topology->levels[0][0]->complete_cpuset);
+     assert (last != -1);
+
+     setsize = CPU_ALLOC_SIZE(last+1);
+     plinux_set = CPU_ALLOC(last+1);
+
+     err = pthread_getaffinity_np(tid, setsize, plinux_set);
+     if (err) {
+        CPU_FREE(plinux_set);
+        errno = err;
+        return -1;
+     }
+
+     hwloc_bitmap_zero(hwloc_set);
+     for(cpu=0; cpu<=(unsigned) last; cpu++)
+       if (CPU_ISSET_S(cpu, setsize, plinux_set))
+	 hwloc_bitmap_set(hwloc_set, cpu);
+
+     CPU_FREE(plinux_set);
+  }
+#elif defined(HWLOC_HAVE_CPU_SET)
+  /* Use a separate block so that we can define specific variable
+     types here */
+  {
+     cpu_set_t linux_set;
+     unsigned cpu;
+
+#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
+     err = pthread_getaffinity_np(tid, &linux_set);
+#else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
+     err = pthread_getaffinity_np(tid, sizeof(linux_set), &linux_set);
+#endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
+     if (err) {
+        errno = err;
+        return -1;
+     }
+
+     hwloc_bitmap_zero(hwloc_set);
+     for(cpu=0; cpu<CPU_SETSIZE; cpu++)
+       if (CPU_ISSET(cpu, &linux_set))
+	 hwloc_bitmap_set(hwloc_set, cpu);
+  }
+#else /* CPU_SET */
+  /* Use a separate block so that we can define specific variable
+     types here */
+  {
+      unsigned long mask;
+
+#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
+      err = pthread_getaffinity_np(tid, (void*) &mask);
+#else /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
+      err = pthread_getaffinity_np(tid, sizeof(mask), (void*) &mask);
+#endif /* HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
+      if (err) {
+        errno = err;
+        return -1;
+      }
+
+     hwloc_bitmap_from_ulong(hwloc_set, mask);
+  }
+#endif /* CPU_SET */
+
+  return 0;
+}
+#endif /* HAVE_DECL_PTHREAD_GETAFFINITY_NP */
+
+int
+hwloc_linux_get_tid_last_cpu_location(hwloc_topology_t topology __hwloc_attribute_unused, pid_t tid, hwloc_bitmap_t set)
+{
+  /* read /proc/pid/stat.
+   * its second field contains the command name between parentheses,
+   * and the command itself may contain parentheses,
+   * so read the whole line and find the last closing parenthesis to find the third field.
+   */
+  char buf[1024] = "";
+  char name[64];
+  char *tmp;
+  int fd, i, err;
+
+  if (!tid) {
+#ifdef SYS_gettid
+    tid = syscall(SYS_gettid);
+#else
+    errno = ENOSYS;
+    return -1;
+#endif
+  }
+
+  snprintf(name, sizeof(name), "/proc/%lu/stat", (unsigned long) tid);
+  fd = open(name, O_RDONLY); /* no fsroot for real /proc */
+  if (fd < 0) {
+    errno = ENOSYS;
+    return -1;
+  }
+  err = read(fd, buf, sizeof(buf)-1); /* read -1 to put the ending \0 */
+  close(fd);
+  if (err <= 0) {
+    errno = ENOSYS;
+    return -1;
+  }
+  buf[err-1] = '\0';
+
+  tmp = strrchr(buf, ')');
+  if (!tmp) {
+    errno = ENOSYS;
+    return -1;
+  }
+  /* skip ') ' to find the actual third argument */
+  tmp += 2;
+
+  /* skip 35 fields */
+  for(i=0; i<36; i++) {
+    tmp = strchr(tmp, ' ');
+    if (!tmp) {
+      errno = ENOSYS;
+      return -1;
+    }
+    /* skip the ' ' itself */
+    tmp++;
+  }
+
+  /* read the last cpu in the 38th field now */
+  if (sscanf(tmp, "%d ", &i) != 1) {
+    errno = ENOSYS;
+    return -1;
+  }
+
+  hwloc_bitmap_only(set, i);
+  return 0;
+}
+
+/* Per-tid proc_get_last_cpu_location callback data, callback function and caller */
+struct hwloc_linux_foreach_proc_tid_get_last_cpu_location_cb_data_s {
+  hwloc_bitmap_t cpuset;
+  hwloc_bitmap_t tidset;
+};
+
+static int
+hwloc_linux_foreach_proc_tid_get_last_cpu_location_cb(hwloc_topology_t topology, pid_t tid, void *_data, int idx)
+{
+  struct hwloc_linux_foreach_proc_tid_get_last_cpu_location_cb_data_s *data = _data;
+  hwloc_bitmap_t cpuset = data->cpuset;
+  hwloc_bitmap_t tidset = data->tidset;
+
+  if (hwloc_linux_get_tid_last_cpu_location(topology, tid, tidset))
+    return -1;
+
+  /* reset the cpuset on first iteration */
+  if (!idx)
+    hwloc_bitmap_zero(cpuset);
+
+  hwloc_bitmap_or(cpuset, cpuset, tidset);
+  return 0;
+}
+
+static int
+hwloc_linux_get_pid_last_cpu_location(hwloc_topology_t topology, pid_t pid, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused)
+{
+  struct hwloc_linux_foreach_proc_tid_get_last_cpu_location_cb_data_s data;
+  hwloc_bitmap_t tidset = hwloc_bitmap_alloc();
+  int ret;
+
+  data.cpuset = hwloc_set;
+  data.tidset = tidset;
+  ret = hwloc_linux_foreach_proc_tid(topology, pid,
+				     hwloc_linux_foreach_proc_tid_get_last_cpu_location_cb,
+				     &data);
+  hwloc_bitmap_free(tidset);
+  return ret;
+}
+
+static int
+hwloc_linux_get_proc_last_cpu_location(hwloc_topology_t topology, pid_t pid, hwloc_bitmap_t hwloc_set, int flags)
+{
+  if (pid == 0)
+    pid = topology->pid;
+  if (flags & HWLOC_CPUBIND_THREAD)
+    return hwloc_linux_get_tid_last_cpu_location(topology, pid, hwloc_set);
+  else
+    return hwloc_linux_get_pid_last_cpu_location(topology, pid, hwloc_set, flags);
+}
+
+static int
+hwloc_linux_get_thisproc_last_cpu_location(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, int flags)
+{
+  return hwloc_linux_get_pid_last_cpu_location(topology, topology->pid, hwloc_set, flags);
+}
+
+static int
+hwloc_linux_get_thisthread_last_cpu_location(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused)
+{
+  if (topology->pid) {
+    errno = ENOSYS;
+    return -1;
+  }
+  return hwloc_linux_get_tid_last_cpu_location(topology, 0, hwloc_set);
+}
+
+
+
+/***************************
+ ****** Membind hooks ******
+ ***************************/
+
+static int
+hwloc_linux_membind_policy_from_hwloc(int *linuxpolicy, hwloc_membind_policy_t policy, int flags)
+{
+  switch (policy) {
+  case HWLOC_MEMBIND_DEFAULT:
+  case HWLOC_MEMBIND_FIRSTTOUCH:
+    *linuxpolicy = MPOL_DEFAULT;
+    break;
+  case HWLOC_MEMBIND_BIND:
+    if (flags & HWLOC_MEMBIND_STRICT)
+      *linuxpolicy = MPOL_BIND;
+    else
+      *linuxpolicy = MPOL_PREFERRED;
+    break;
+  case HWLOC_MEMBIND_INTERLEAVE:
+    *linuxpolicy = MPOL_INTERLEAVE;
+    break;
+  /* TODO: next-touch when (if?) patch applied upstream */
+  default:
+    errno = ENOSYS;
+    return -1;
+  }
+  return 0;
+}
+
+static int
+hwloc_linux_membind_mask_from_nodeset(hwloc_topology_t topology __hwloc_attribute_unused,
+				      hwloc_const_nodeset_t nodeset,
+				      unsigned *max_os_index_p, unsigned long **linuxmaskp)
+{
+  unsigned max_os_index = 0; /* highest os_index + 1 */
+  unsigned long *linuxmask;
+  unsigned i;
+  hwloc_nodeset_t linux_nodeset = NULL;
+
+  if (hwloc_bitmap_isfull(nodeset)) {
+    linux_nodeset = hwloc_bitmap_alloc();
+    hwloc_bitmap_only(linux_nodeset, 0);
+    nodeset = linux_nodeset;
+  }
+
+  max_os_index = hwloc_bitmap_last(nodeset);
+  if (max_os_index == (unsigned) -1)
+    max_os_index = 0;
+  /* add 1 to convert the last os_index into a max_os_index,
+   * and round up to the nearest multiple of BITS_PER_LONG */
+  max_os_index = (max_os_index + 1 + HWLOC_BITS_PER_LONG - 1) & ~(HWLOC_BITS_PER_LONG - 1);
+
+  linuxmask = calloc(max_os_index/HWLOC_BITS_PER_LONG, sizeof(long));
+  if (!linuxmask) {
+    hwloc_bitmap_free(linux_nodeset);
+    errno = ENOMEM;
+    return -1;
+  }
+
+  for(i=0; i<max_os_index/HWLOC_BITS_PER_LONG; i++)
+    linuxmask[i] = hwloc_bitmap_to_ith_ulong(nodeset, i);
+
+  if (linux_nodeset)
+    hwloc_bitmap_free(linux_nodeset);
+
+  *max_os_index_p = max_os_index;
+  *linuxmaskp = linuxmask;
+  return 0;
+}
+
+static void
+hwloc_linux_membind_mask_to_nodeset(hwloc_topology_t topology __hwloc_attribute_unused,
+				    hwloc_nodeset_t nodeset,
+				    unsigned max_os_index, const unsigned long *linuxmask)
+{
+  unsigned i;
+
+#ifdef HWLOC_DEBUG
+  /* max_os_index comes from hwloc_linux_find_kernel_max_numnodes() so it's a multiple of HWLOC_BITS_PER_LONG */
+  assert(!(max_os_index%HWLOC_BITS_PER_LONG));
+#endif
+
+  hwloc_bitmap_zero(nodeset);
+  for(i=0; i<max_os_index/HWLOC_BITS_PER_LONG; i++)
+    hwloc_bitmap_set_ith_ulong(nodeset, i, linuxmask[i]);
+}
+
+static int
+hwloc_linux_set_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  unsigned max_os_index; /* highest os_index + 1 */
+  unsigned long *linuxmask;
+  size_t remainder;
+  int linuxpolicy;
+  unsigned linuxflags = 0;
+  int err;
+
+  remainder = (uintptr_t) addr & (hwloc_getpagesize()-1);
+  addr = (char*) addr - remainder;
+  len += remainder;
+
+  err = hwloc_linux_membind_policy_from_hwloc(&linuxpolicy, policy, flags);
+  if (err < 0)
+    return err;
+
+  if (linuxpolicy == MPOL_DEFAULT)
+    /* Some Linux kernels don't like being passed a set */
+    return hwloc_mbind((void *) addr, len, linuxpolicy, NULL, 0, 0);
+
+  err = hwloc_linux_membind_mask_from_nodeset(topology, nodeset, &max_os_index, &linuxmask);
+  if (err < 0)
+    goto out;
+
+  if (flags & HWLOC_MEMBIND_MIGRATE) {
+    linuxflags = MPOL_MF_MOVE;
+    if (flags & HWLOC_MEMBIND_STRICT)
+      linuxflags |= MPOL_MF_STRICT;
+  }
+
+  err = hwloc_mbind((void *) addr, len, linuxpolicy, linuxmask, max_os_index+1, linuxflags);
+  if (err < 0)
+    goto out_with_mask;
+
+  free(linuxmask);
+  return 0;
+
+ out_with_mask:
+  free(linuxmask);
+ out:
+  return -1;
+}
+
+static void *
+hwloc_linux_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  void *buffer;
+  int err;
+
+  buffer = hwloc_alloc_mmap(topology, len);
+  if (!buffer)
+    return NULL;
+
+  err = hwloc_linux_set_area_membind(topology, buffer, len, nodeset, policy, flags);
+  if (err < 0 && policy & HWLOC_MEMBIND_STRICT) {
+    munmap(buffer, len);
+    return NULL;
+  }
+
+  return buffer;
+}
+
+static int
+hwloc_linux_set_thisthread_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  unsigned max_os_index; /* highest os_index + 1 */
+  unsigned long *linuxmask;
+  int linuxpolicy;
+  int err;
+
+  err = hwloc_linux_membind_policy_from_hwloc(&linuxpolicy, policy, flags);
+  if (err < 0)
+    return err;
+
+  if (linuxpolicy == MPOL_DEFAULT)
+    /* Some Linux kernels don't like being passed a set */
+    return hwloc_set_mempolicy(linuxpolicy, NULL, 0);
+
+  err = hwloc_linux_membind_mask_from_nodeset(topology, nodeset, &max_os_index, &linuxmask);
+  if (err < 0)
+    goto out;
+
+  if (flags & HWLOC_MEMBIND_MIGRATE) {
+    unsigned long *fullmask = malloc(max_os_index/HWLOC_BITS_PER_LONG * sizeof(long));
+    if (fullmask) {
+      memset(fullmask, 0xf, max_os_index/HWLOC_BITS_PER_LONG * sizeof(long));
+      err = hwloc_migrate_pages(0, max_os_index+1, fullmask, linuxmask);
+      free(fullmask);
+    } else
+      err = -1;
+    if (err < 0 && (flags & HWLOC_MEMBIND_STRICT))
+      goto out_with_mask;
+  }
+
+  err = hwloc_set_mempolicy(linuxpolicy, linuxmask, max_os_index+1);
+  if (err < 0)
+    goto out_with_mask;
+
+  free(linuxmask);
+  return 0;
+
+ out_with_mask:
+  free(linuxmask);
+ out:
+  return -1;
+}
+
+/*
+ * On some kernels, get_mempolicy requires the output size to be larger
+ * than the kernel MAX_NUMNODES (defined by CONFIG_NODES_SHIFT).
+ * Try get_mempolicy on ourself until we find a max_os_index value that
+ * makes the kernel happy.
+ */
+static int
+hwloc_linux_find_kernel_max_numnodes(hwloc_topology_t topology __hwloc_attribute_unused)
+{
+  static int _max_numnodes = -1, max_numnodes;
+  int linuxpolicy;
+
+  if (_max_numnodes != -1)
+    /* already computed */
+    return _max_numnodes;
+
+  /* start with a single ulong, it's the minimal and it's enough for most machines */
+  max_numnodes = HWLOC_BITS_PER_LONG;
+  while (1) {
+    unsigned long *mask = malloc(max_numnodes / HWLOC_BITS_PER_LONG * sizeof(long));
+    int err = hwloc_get_mempolicy(&linuxpolicy, mask, max_numnodes, 0, 0);
+    free(mask);
+    if (!err || errno != EINVAL)
+      /* Found it. Only update the static value with the final one,
+       * to avoid sharing intermediate values that we modify,
+       * in case there's ever multiple concurrent calls.
+       */
+      return _max_numnodes = max_numnodes;
+    max_numnodes *= 2;
+  }
+}
+
+static int
+hwloc_linux_membind_policy_to_hwloc(int linuxpolicy, hwloc_membind_policy_t *policy)
+{
+  switch (linuxpolicy) {
+  case MPOL_DEFAULT:
+    *policy = HWLOC_MEMBIND_FIRSTTOUCH;
+    return 0;
+  case MPOL_PREFERRED:
+  case MPOL_BIND:
+    *policy = HWLOC_MEMBIND_BIND;
+    return 0;
+  case MPOL_INTERLEAVE:
+    *policy = HWLOC_MEMBIND_INTERLEAVE;
+    return 0;
+  default:
+    errno = EINVAL;
+    return -1;
+  }
+}
+
+static int
+hwloc_linux_get_thisthread_membind(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t *policy, int flags __hwloc_attribute_unused)
+{
+  unsigned max_os_index;
+  unsigned long *linuxmask;
+  int linuxpolicy;
+  int err;
+
+  max_os_index = hwloc_linux_find_kernel_max_numnodes(topology);
+
+  linuxmask = malloc(max_os_index/HWLOC_BITS_PER_LONG * sizeof(long));
+  if (!linuxmask) {
+    errno = ENOMEM;
+    goto out;
+  }
+
+  err = hwloc_get_mempolicy(&linuxpolicy, linuxmask, max_os_index, 0, 0);
+  if (err < 0)
+    goto out_with_mask;
+
+  if (linuxpolicy == MPOL_DEFAULT) {
+    hwloc_bitmap_copy(nodeset, hwloc_topology_get_topology_nodeset(topology));
+  } else {
+    hwloc_linux_membind_mask_to_nodeset(topology, nodeset, max_os_index, linuxmask);
+  }
+
+  err = hwloc_linux_membind_policy_to_hwloc(linuxpolicy, policy);
+  if (err < 0)
+    goto out_with_mask;
+
+  free(linuxmask);
+  return 0;
+
+ out_with_mask:
+  free(linuxmask);
+ out:
+  return -1;
+}
+
+static int
+hwloc_linux_get_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_nodeset_t nodeset, hwloc_membind_policy_t *policy, int flags __hwloc_attribute_unused)
+{
+  unsigned max_os_index;
+  unsigned long *linuxmask, *globallinuxmask;
+  int linuxpolicy = 0, globallinuxpolicy = 0; /* shut-up the compiler */
+  int mixed = 0;
+  int full = 0;
+  int first = 1;
+  int pagesize = hwloc_getpagesize();
+  char *tmpaddr;
+  int err;
+  unsigned i;
+
+  max_os_index = hwloc_linux_find_kernel_max_numnodes(topology);
+
+  linuxmask = malloc(max_os_index/HWLOC_BITS_PER_LONG * sizeof(long));
+  if (!linuxmask) {
+    errno = ENOMEM;
+    goto out;
+  }
+  globallinuxmask = calloc(max_os_index/HWLOC_BITS_PER_LONG, sizeof(long));
+  if (!globallinuxmask) {
+    errno = ENOMEM;
+    goto out_with_masks;
+  }
+
+  for(tmpaddr = (char *)((unsigned long)addr & ~(pagesize-1));
+      tmpaddr < (char *)addr + len;
+      tmpaddr += pagesize) {
+    err = hwloc_get_mempolicy(&linuxpolicy, linuxmask, max_os_index, tmpaddr, MPOL_F_ADDR);
+    if (err < 0)
+      goto out_with_masks;
+
+    /* use the first found policy. if we find a different one later, set mixed to 1 */
+    if (first)
+      globallinuxpolicy = linuxpolicy;
+    else if (globallinuxpolicy != linuxpolicy)
+      mixed = 1;
+
+    /* agregate masks, and set full to 1 if we ever find DEFAULT */
+    if (full || linuxpolicy == MPOL_DEFAULT) {
+      full = 1;
+    } else {
+      for(i=0; i<max_os_index/HWLOC_BITS_PER_LONG; i++)
+        globallinuxmask[i] |= linuxmask[i];
+    }
+
+    first = 0;
+  }
+
+  if (mixed) {
+    *policy = HWLOC_MEMBIND_MIXED;
+  } else {
+    err = hwloc_linux_membind_policy_to_hwloc(linuxpolicy, policy);
+    if (err < 0)
+      goto out_with_masks;
+  }
+
+  if (full) {
+    hwloc_bitmap_copy(nodeset, hwloc_topology_get_topology_nodeset(topology));
+  } else {
+    hwloc_linux_membind_mask_to_nodeset(topology, nodeset, max_os_index, globallinuxmask);
+  }
+
+  free(globallinuxmask);
+  free(linuxmask);
+  return 0;
+
+ out_with_masks:
+  free(globallinuxmask);
+  free(linuxmask);
+ out:
+  return -1;
+}
+
+static int
+hwloc_linux_get_area_memlocation(hwloc_topology_t topology __hwloc_attribute_unused, const void *addr, size_t len, hwloc_nodeset_t nodeset, int flags __hwloc_attribute_unused)
+{
+  unsigned offset;
+  unsigned long count;
+  void **pages;
+  int *status;
+  int pagesize = hwloc_getpagesize();
+  int ret;
+  unsigned i;
+
+  offset = ((unsigned long) addr) & (pagesize-1);
+  addr = ((char*) addr) - offset;
+  len += offset;
+  count = (len + pagesize-1)/pagesize;
+  pages = malloc(count*sizeof(*pages));
+  status = malloc(count*sizeof(*status));
+  if (!pages || !status) {
+    ret = -1;
+    goto out_with_pages;
+  }
+
+  for(i=0; i<count; i++)
+    pages[i] = ((char*)addr) + i*pagesize;
+
+  ret = hwloc_move_pages(0, count, pages, NULL, status, 0);
+  if (ret  < 0)
+    goto out_with_pages;
+
+  hwloc_bitmap_zero(nodeset);
+  for(i=0; i<count; i++)
+    if (status[i] >= 0)
+      hwloc_bitmap_set(nodeset, status[i]);
+  ret = 0;
+
+ out_with_pages:
+  free(pages);
+  free(status);
+  return ret;
+}
+
+static void hwloc_linux__get_allowed_resources(hwloc_topology_t topology, const char *root_path, int root_fd, char **cpuset_namep);
+
+static int hwloc_linux_get_allowed_resources_hook(hwloc_topology_t topology)
+{
+  const char *fsroot_path;
+  char *cpuset_name;
+  int root_fd = -1;
+
+  fsroot_path = getenv("HWLOC_FSROOT");
+  if (!fsroot_path)
+    fsroot_path = "/";
+
+#ifdef HAVE_OPENAT
+  root_fd = open(fsroot_path, O_RDONLY | O_DIRECTORY);
+  if (root_fd < 0)
+    goto out;
+#else
+  if (strcmp(fsroot_path, "/")) {
+    errno = ENOSYS;
+    goto out;
+  }
+#endif
+
+  /* we could also error-out if the current topology doesn't actually match the system,
+   * at least for PUs and NUMA nodes. But it would increase the overhead of loading XMLs.
+   *
+   * Just trust the user when he sets THISSYSTEM=1. It enables hacky
+   * tests such as restricting random XML or synthetic to the current
+   * machine (uses the default cgroup).
+   */
+
+  hwloc_linux__get_allowed_resources(topology, fsroot_path, root_fd, &cpuset_name);
+  if (cpuset_name) {
+    hwloc_obj_add_info(topology->levels[0][0], "LinuxCgroup", cpuset_name);
+    free(cpuset_name);
+  }
+  if (root_fd != -1)
+    close(root_fd);
+
+ out:
+  return -1;
+}
+
+void
+hwloc_set_linuxfs_hooks(struct hwloc_binding_hooks *hooks,
+			struct hwloc_topology_support *support __hwloc_attribute_unused)
+{
+  hooks->set_thisthread_cpubind = hwloc_linux_set_thisthread_cpubind;
+  hooks->get_thisthread_cpubind = hwloc_linux_get_thisthread_cpubind;
+  hooks->set_thisproc_cpubind = hwloc_linux_set_thisproc_cpubind;
+  hooks->get_thisproc_cpubind = hwloc_linux_get_thisproc_cpubind;
+  hooks->set_proc_cpubind = hwloc_linux_set_proc_cpubind;
+  hooks->get_proc_cpubind = hwloc_linux_get_proc_cpubind;
+#if HAVE_DECL_PTHREAD_SETAFFINITY_NP
+  hooks->set_thread_cpubind = hwloc_linux_set_thread_cpubind;
+#endif /* HAVE_DECL_PTHREAD_SETAFFINITY_NP */
+#if HAVE_DECL_PTHREAD_GETAFFINITY_NP
+  hooks->get_thread_cpubind = hwloc_linux_get_thread_cpubind;
+#endif /* HAVE_DECL_PTHREAD_GETAFFINITY_NP */
+  hooks->get_thisthread_last_cpu_location = hwloc_linux_get_thisthread_last_cpu_location;
+  hooks->get_thisproc_last_cpu_location = hwloc_linux_get_thisproc_last_cpu_location;
+  hooks->get_proc_last_cpu_location = hwloc_linux_get_proc_last_cpu_location;
+  hooks->set_thisthread_membind = hwloc_linux_set_thisthread_membind;
+  hooks->get_thisthread_membind = hwloc_linux_get_thisthread_membind;
+  hooks->get_area_membind = hwloc_linux_get_area_membind;
+  hooks->set_area_membind = hwloc_linux_set_area_membind;
+  hooks->get_area_memlocation = hwloc_linux_get_area_memlocation;
+  hooks->alloc_membind = hwloc_linux_alloc_membind;
+  hooks->alloc = hwloc_alloc_mmap;
+  hooks->free_membind = hwloc_free_mmap;
+  support->membind->firsttouch_membind = 1;
+  support->membind->bind_membind = 1;
+  support->membind->interleave_membind = 1;
+  support->membind->migrate_membind = 1;
+  hooks->get_allowed_resources = hwloc_linux_get_allowed_resources_hook;
+}
+
+
+/*******************************************
+ *** Misc Helpers for Topology Discovery ***
+ *******************************************/
+
+/* cpuinfo array */
+struct hwloc_linux_cpuinfo_proc {
+  /* set during hwloc_linux_parse_cpuinfo */
+  unsigned long Pproc;
+  /* set during hwloc_linux_parse_cpuinfo or -1 if unknown*/
+  long Pcore, Ppkg;
+  /* set later, or -1 if unknown */
+  long Lcore, Lpkg;
+
+  /* custom info, set during hwloc_linux_parse_cpuinfo */
+  struct hwloc_obj_info_s *infos;
+  unsigned infos_count;
+};
+
+static void
+hwloc_find_linux_cpuset_mntpnt(char **cgroup_mntpnt, char **cpuset_mntpnt, const char *root_path)
+{
+  char *mount_path;
+  struct mntent mntent;
+  FILE *fd;
+  int err;
+  size_t bufsize;
+  char *buf;
+
+  *cgroup_mntpnt = NULL;
+  *cpuset_mntpnt = NULL;
+
+  if (root_path) {
+    /* setmntent() doesn't support openat(), so use the root_path directly */
+    err = asprintf(&mount_path, "%s/proc/mounts", root_path);
+    if (err < 0)
+      return;
+    fd = setmntent(mount_path, "r");
+    free(mount_path);
+  } else {
+    fd = setmntent("/proc/mounts", "r");
+  }
+  if (!fd)
+    return;
+
+  /* getmntent_r() doesn't actually report an error when the buffer
+   * is too small. It just silently truncates things. So we can't
+   * dynamically resize things.
+   *
+   * Linux limits mount type, string, and options to one page each.
+   * getmntent() limits the line size to 4kB.
+   * so use 4*pagesize to be far above both.
+   */
+  bufsize = hwloc_getpagesize()*4;
+  buf = malloc(bufsize);
+
+  while (getmntent_r(fd, &mntent, buf, bufsize)) {
+    if (!strcmp(mntent.mnt_type, "cpuset")) {
+      hwloc_debug("Found cpuset mount point on %s\n", mntent.mnt_dir);
+      *cpuset_mntpnt = strdup(mntent.mnt_dir);
+      break;
+    } else if (!strcmp(mntent.mnt_type, "cgroup")) {
+      /* found a cgroup mntpnt */
+      char *opt, *opts = mntent.mnt_opts;
+      int cpuset_opt = 0;
+      int noprefix_opt = 0;
+      /* look at options */
+      while ((opt = strsep(&opts, ",")) != NULL) {
+	if (!strcmp(opt, "cpuset"))
+	  cpuset_opt = 1;
+	else if (!strcmp(opt, "noprefix"))
+	  noprefix_opt = 1;
+      }
+      if (!cpuset_opt)
+	continue;
+      if (noprefix_opt) {
+	hwloc_debug("Found cgroup emulating a cpuset mount point on %s\n", mntent.mnt_dir);
+	*cpuset_mntpnt = strdup(mntent.mnt_dir);
+      } else {
+	hwloc_debug("Found cgroup/cpuset mount point on %s\n", mntent.mnt_dir);
+	*cgroup_mntpnt = strdup(mntent.mnt_dir);
+      }
+      break;
+    }
+  }
+
+  free(buf);
+  endmntent(fd);
+}
+
+/*
+ * Linux cpusets may be managed directly or through cgroup.
+ * If cgroup is used, tasks get a /proc/pid/cgroup which may contain a
+ * single line %d:cpuset:<name>. If cpuset are used they get /proc/pid/cpuset
+ * containing <name>.
+ */
+static char *
+hwloc_read_linux_cpuset_name(int fsroot_fd, hwloc_pid_t pid)
+{
+#define CPUSET_NAME_LEN 128
+  char cpuset_name[CPUSET_NAME_LEN];
+  FILE *file;
+  int err;
+  char *tmp;
+
+  /* check whether a cgroup-cpuset is enabled */
+  if (!pid)
+    file = hwloc_fopen("/proc/self/cgroup", "r", fsroot_fd);
+  else {
+    char path[] = "/proc/XXXXXXXXXX/cgroup";
+    snprintf(path, sizeof(path), "/proc/%d/cgroup", pid);
+    file = hwloc_fopen(path, "r", fsroot_fd);
+  }
+  if (file) {
+    /* find a cpuset line */
+#define CGROUP_LINE_LEN 256
+    char line[CGROUP_LINE_LEN];
+    while (fgets(line, sizeof(line), file)) {
+      char *end, *colon = strchr(line, ':');
+      if (!colon)
+	continue;
+      if (strncmp(colon, ":cpuset:", 8))
+	continue;
+
+      /* found a cgroup-cpuset line, return the name */
+      fclose(file);
+      end = strchr(colon, '\n');
+      if (end)
+	*end = '\0';
+      hwloc_debug("Found cgroup-cpuset %s\n", colon+8);
+      return strdup(colon+8);
+    }
+    fclose(file);
+  }
+
+  /* check whether a cpuset is enabled */
+  if (!pid)
+    err = hwloc_read_path_by_length("/proc/self/cpuset", cpuset_name, sizeof(cpuset_name), fsroot_fd);
+  else {
+    char path[] = "/proc/XXXXXXXXXX/cpuset";
+    snprintf(path, sizeof(path), "/proc/%d/cpuset", pid);
+    err = hwloc_read_path_by_length(path, cpuset_name, sizeof(cpuset_name), fsroot_fd);
+  }
+  if (err < 0) {
+    /* found nothing */
+    hwloc_debug("%s", "No cgroup or cpuset found\n");
+    return NULL;
+  }
+
+  /* found a cpuset, return the name */
+  tmp = strchr(cpuset_name, '\n');
+  if (tmp)
+    *tmp = '\0';
+  hwloc_debug("Found cpuset %s\n", cpuset_name);
+  return strdup(cpuset_name);
+}
+
+/*
+ * Then, the cpuset description is available from either the cgroup or
+ * the cpuset filesystem (usually mounted in / or /dev) where there
+ * are cgroup<name>/cpuset.{cpus,mems} or cpuset<name>/{cpus,mems} files.
+ */
+static void
+hwloc_admin_disable_set_from_cpuset(int root_fd,
+				    const char *cgroup_mntpnt, const char *cpuset_mntpnt, const char *cpuset_name,
+				    const char *attr_name,
+				    hwloc_bitmap_t admin_enabled_cpus_set)
+{
+#define CPUSET_FILENAME_LEN 256
+  char cpuset_filename[CPUSET_FILENAME_LEN];
+  int fd;
+  int err;
+
+  if (cgroup_mntpnt) {
+    /* try to read the cpuset from cgroup */
+    snprintf(cpuset_filename, CPUSET_FILENAME_LEN, "%s%s/cpuset.%s", cgroup_mntpnt, cpuset_name, attr_name);
+    hwloc_debug("Trying to read cgroup file <%s>\n", cpuset_filename);
+  } else if (cpuset_mntpnt) {
+    /* try to read the cpuset directly */
+    snprintf(cpuset_filename, CPUSET_FILENAME_LEN, "%s%s/%s", cpuset_mntpnt, cpuset_name, attr_name);
+    hwloc_debug("Trying to read cpuset file <%s>\n", cpuset_filename);
+  }
+
+  fd = hwloc_open(cpuset_filename, root_fd);
+  if (fd < 0) {
+    /* found no cpuset description, ignore it */
+    hwloc_debug("Couldn't find cpuset <%s> description, ignoring\n", cpuset_name);
+    return;
+  }
+
+  err = hwloc__read_fd_as_cpulist(fd, admin_enabled_cpus_set);
+  close(fd);
+
+  if (err < 0)
+    hwloc_bitmap_fill(admin_enabled_cpus_set);
+  else
+    hwloc_debug_bitmap("cpuset includes %s\n", admin_enabled_cpus_set);
+}
+
+static void
+hwloc_parse_meminfo_info(struct hwloc_linux_backend_data_s *data,
+			 const char *path,
+			 uint64_t *local_memory,
+			 uint64_t *meminfo_hugepages_count,
+			 uint64_t *meminfo_hugepages_size,
+			 int onlytotal)
+{
+  char *tmp;
+  char buffer[4096];
+  unsigned long long number;
+
+  if (hwloc_read_path_by_length(path, buffer, sizeof(buffer), data->root_fd) < 0)
+    return;
+
+  tmp = strstr(buffer, "MemTotal: "); /* MemTotal: %llu kB */
+  if (tmp) {
+    number = strtoull(tmp+10, NULL, 10);
+    *local_memory = number << 10;
+
+    if (onlytotal)
+      return;
+
+    tmp = strstr(tmp, "Hugepagesize: "); /* Hugepagesize: %llu */
+    if (tmp) {
+      number = strtoull(tmp+14, NULL, 10);
+      *meminfo_hugepages_size = number << 10;
+
+      tmp = strstr(tmp, "HugePages_Free: "); /* HugePages_Free: %llu */
+      if (tmp) {
+	number = strtoull(tmp+16, NULL, 10);
+	*meminfo_hugepages_count = number;
+      }
+    }
+  }
+}
+
+#define SYSFS_NUMA_NODE_PATH_LEN 128
+
+static void
+hwloc_parse_hugepages_info(struct hwloc_linux_backend_data_s *data,
+			   const char *dirpath,
+			   struct hwloc_obj_memory_s *memory,
+			   uint64_t *remaining_local_memory)
+{
+  DIR *dir;
+  struct dirent *dirent;
+  unsigned long index_ = 1;
+  char line[64];
+  char path[SYSFS_NUMA_NODE_PATH_LEN];
+
+  dir = hwloc_opendir(dirpath, data->root_fd);
+  if (dir) {
+    while ((dirent = readdir(dir)) != NULL) {
+      int err;
+      if (strncmp(dirent->d_name, "hugepages-", 10))
+        continue;
+      memory->page_types[index_].size = strtoul(dirent->d_name+10, NULL, 0) * 1024ULL;
+      err = snprintf(path, sizeof(path), "%s/%s/nr_hugepages", dirpath, dirent->d_name);
+      if ((size_t) err < sizeof(path)
+	  && !hwloc_read_path_by_length(path, line, sizeof(line), data->root_fd)) {
+	/* these are the actual total amount of huge pages */
+	memory->page_types[index_].count = strtoull(line, NULL, 0);
+	*remaining_local_memory -= memory->page_types[index_].count * memory->page_types[index_].size;
+	index_++;
+      }
+    }
+    closedir(dir);
+    memory->page_types_len = index_;
+  }
+}
+
+static void
+hwloc_get_procfs_meminfo_info(struct hwloc_topology *topology,
+			      struct hwloc_linux_backend_data_s *data,
+			      struct hwloc_obj_memory_s *memory)
+{
+  uint64_t meminfo_hugepages_count, meminfo_hugepages_size = 0;
+  struct stat st;
+  int has_sysfs_hugepages = 0;
+  const char *pagesize_env = getenv("HWLOC_DEBUG_PAGESIZE");
+  int types = 2;
+  int err;
+
+  err = hwloc_stat("/sys/kernel/mm/hugepages", &st, data->root_fd);
+  if (!err) {
+    types = 1 + st.st_nlink-2;
+    has_sysfs_hugepages = 1;
+  }
+
+  if (topology->is_thissystem || pagesize_env) {
+    /* we cannot report any page_type info unless we have the page size.
+     * we'll take it either from the system if local, or from the debug env variable
+     */
+    memory->page_types_len = types;
+    memory->page_types = calloc(types, sizeof(*memory->page_types));
+  }
+
+  if (topology->is_thissystem) {
+    /* Get the page and hugepage sizes from sysconf */
+#if HAVE_DECL__SC_LARGE_PAGESIZE
+    memory->page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
+#endif
+    memory->page_types[0].size = data->pagesize; /* might be overwritten later by /proc/meminfo or sysfs */
+  }
+
+  hwloc_parse_meminfo_info(data, "/proc/meminfo",
+			   &memory->local_memory,
+			   &meminfo_hugepages_count, &meminfo_hugepages_size,
+			   memory->page_types == NULL);
+
+  if (memory->page_types) {
+    uint64_t remaining_local_memory = memory->local_memory;
+    if (has_sysfs_hugepages) {
+      /* read from node%d/hugepages/hugepages-%skB/nr_hugepages */
+      hwloc_parse_hugepages_info(data, "/sys/kernel/mm/hugepages", memory, &remaining_local_memory);
+    } else {
+      /* use what we found in meminfo */
+      if (meminfo_hugepages_size) {
+        memory->page_types[1].size = meminfo_hugepages_size;
+        memory->page_types[1].count = meminfo_hugepages_count;
+        remaining_local_memory -= meminfo_hugepages_count * meminfo_hugepages_size;
+      } else {
+        memory->page_types_len = 1;
+      }
+    }
+
+    if (pagesize_env) {
+      /* We cannot get the pagesize if not thissystem, use the env-given one to experience the code during make check */
+      memory->page_types[0].size = strtoull(pagesize_env, NULL, 10);
+      /* If failed, use 4kB */
+      if (!memory->page_types[0].size)
+	memory->page_types[0].size = 4096;
+    }
+    assert(memory->page_types[0].size); /* from sysconf if local or from the env */
+    /* memory->page_types[1].size from sysconf if local, or from /proc/meminfo, or from sysfs,
+     * may be 0 if no hugepage support in the kernel */
+
+    memory->page_types[0].count = remaining_local_memory / memory->page_types[0].size;
+  }
+}
+
+static void
+hwloc_sysfs_node_meminfo_info(struct hwloc_topology *topology,
+			      struct hwloc_linux_backend_data_s *data,
+			      const char *syspath, int node,
+			      struct hwloc_obj_memory_s *memory)
+{
+  char path[SYSFS_NUMA_NODE_PATH_LEN];
+  char meminfopath[SYSFS_NUMA_NODE_PATH_LEN];
+  uint64_t meminfo_hugepages_count = 0;
+  uint64_t meminfo_hugepages_size = 0;
+  struct stat st;
+  int has_sysfs_hugepages = 0;
+  int types = 2;
+  int err;
+
+  sprintf(path, "%s/node%d/hugepages", syspath, node);
+  err = hwloc_stat(path, &st, data->root_fd);
+  if (!err) {
+    types = 1 + st.st_nlink-2;
+    has_sysfs_hugepages = 1;
+  }
+
+  if (topology->is_thissystem) {
+    memory->page_types_len = types;
+    memory->page_types = malloc(types*sizeof(*memory->page_types));
+    memset(memory->page_types, 0, types*sizeof(*memory->page_types));
+  }
+
+  sprintf(meminfopath, "%s/node%d/meminfo", syspath, node);
+  hwloc_parse_meminfo_info(data, meminfopath,
+			   &memory->local_memory,
+			   &meminfo_hugepages_count, NULL /* no hugepage size in node-specific meminfo */,
+			   memory->page_types == NULL);
+
+  if (memory->page_types) {
+    uint64_t remaining_local_memory = memory->local_memory;
+    if (has_sysfs_hugepages) {
+      /* read from node%d/hugepages/hugepages-%skB/nr_hugepages */
+      hwloc_parse_hugepages_info(data, path, memory, &remaining_local_memory);
+    } else {
+      /* get hugepage size from machine-specific meminfo since there is no size in node-specific meminfo,
+       * hwloc_get_procfs_meminfo_info must have been called earlier */
+      meminfo_hugepages_size = topology->levels[0][0]->memory.page_types[1].size;
+      /* use what we found in meminfo */
+      if (meminfo_hugepages_size) {
+        memory->page_types[1].count = meminfo_hugepages_count;
+        memory->page_types[1].size = meminfo_hugepages_size;
+        remaining_local_memory -= meminfo_hugepages_count * meminfo_hugepages_size;
+      } else {
+        memory->page_types_len = 1;
+      }
+    }
+    /* update what's remaining as normal pages */
+    memory->page_types[0].size = data->pagesize;
+    memory->page_types[0].count = remaining_local_memory / memory->page_types[0].size;
+  }
+}
+
+static int
+hwloc_parse_nodes_distances(const char *path, unsigned nbnodes, unsigned *indexes, uint64_t *distances, int fsroot_fd)
+{
+  size_t len = (10+1)*nbnodes;
+  uint64_t *curdist = distances;
+  char *string;
+  unsigned i;
+
+  string = malloc(len); /* space-separated %d */
+  if (!string)
+    goto out;
+
+  for(i=0; i<nbnodes; i++) {
+    unsigned osnode = indexes[i];
+    char distancepath[SYSFS_NUMA_NODE_PATH_LEN];
+    char *tmp, *next;
+    unsigned found;
+
+    /* Linux nodeX/distance file contains distance from X to other localities (from ACPI SLIT table or so),
+     * store them in slots X*N...X*N+N-1 */
+    sprintf(distancepath, "%s/node%u/distance", path, osnode);
+    if (hwloc_read_path_by_length(distancepath, string, len, fsroot_fd) < 0)
+      goto out_with_string;
+
+    tmp = string;
+    found = 0;
+    while (tmp) {
+      unsigned distance = strtoul(tmp, &next, 0); /* stored as a %d */
+      if (next == tmp)
+	break;
+      *curdist = (uint64_t) distance;
+      curdist++;
+      found++;
+      if (found == nbnodes)
+	break;
+      tmp = next+1;
+    }
+    if (found != nbnodes)
+      goto out_with_string;
+  }
+
+  free(string);
+  return 0;
+
+ out_with_string:
+  free(string);
+ out:
+  return -1;
+}
+
+static void
+hwloc__get_dmi_id_one_info(struct hwloc_linux_backend_data_s *data,
+			   hwloc_obj_t obj,
+			   char *path, unsigned pathlen,
+			   const char *dmi_name, const char *hwloc_name)
+{
+  char dmi_line[64];
+
+  strcpy(path+pathlen, dmi_name);
+  if (hwloc_read_path_by_length(path, dmi_line, sizeof(dmi_line), data->root_fd) < 0)
+    return;
+
+  if (dmi_line[0] != '\0') {
+    char *tmp = strchr(dmi_line, '\n');
+    if (tmp)
+      *tmp = '\0';
+    hwloc_debug("found %s '%s'\n", hwloc_name, dmi_line);
+    hwloc_obj_add_info(obj, hwloc_name, dmi_line);
+  }
+}
+
+static void
+hwloc__get_dmi_id_info(struct hwloc_linux_backend_data_s *data, hwloc_obj_t obj)
+{
+  char path[128];
+  unsigned pathlen;
+  DIR *dir;
+
+  strcpy(path, "/sys/devices/virtual/dmi/id");
+  dir = hwloc_opendir(path, data->root_fd);
+  if (dir) {
+    pathlen = 27;
+  } else {
+    strcpy(path, "/sys/class/dmi/id");
+    dir = hwloc_opendir(path, data->root_fd);
+    if (dir)
+      pathlen = 17;
+    else
+      return;
+  }
+  closedir(dir);
+
+  path[pathlen++] = '/';
+
+  hwloc__get_dmi_id_one_info(data, obj, path, pathlen, "product_name", "DMIProductName");
+  hwloc__get_dmi_id_one_info(data, obj, path, pathlen, "product_version", "DMIProductVersion");
+  hwloc__get_dmi_id_one_info(data, obj, path, pathlen, "product_serial", "DMIProductSerial");
+  hwloc__get_dmi_id_one_info(data, obj, path, pathlen, "product_uuid", "DMIProductUUID");
+  hwloc__get_dmi_id_one_info(data, obj, path, pathlen, "board_vendor", "DMIBoardVendor");
+  hwloc__get_dmi_id_one_info(data, obj, path, pathlen, "board_name", "DMIBoardName");
+  hwloc__get_dmi_id_one_info(data, obj, path, pathlen, "board_version", "DMIBoardVersion");
+  hwloc__get_dmi_id_one_info(data, obj, path, pathlen, "board_serial", "DMIBoardSerial");
+  hwloc__get_dmi_id_one_info(data, obj, path, pathlen, "board_asset_tag", "DMIBoardAssetTag");
+  hwloc__get_dmi_id_one_info(data, obj, path, pathlen, "chassis_vendor", "DMIChassisVendor");
+  hwloc__get_dmi_id_one_info(data, obj, path, pathlen, "chassis_type", "DMIChassisType");
+  hwloc__get_dmi_id_one_info(data, obj, path, pathlen, "chassis_version", "DMIChassisVersion");
+  hwloc__get_dmi_id_one_info(data, obj, path, pathlen, "chassis_serial", "DMIChassisSerial");
+  hwloc__get_dmi_id_one_info(data, obj, path, pathlen, "chassis_asset_tag", "DMIChassisAssetTag");
+  hwloc__get_dmi_id_one_info(data, obj, path, pathlen, "bios_vendor", "DMIBIOSVendor");
+  hwloc__get_dmi_id_one_info(data, obj, path, pathlen, "bios_version", "DMIBIOSVersion");
+  hwloc__get_dmi_id_one_info(data, obj, path, pathlen, "bios_date", "DMIBIOSDate");
+  hwloc__get_dmi_id_one_info(data, obj, path, pathlen, "sys_vendor", "DMISysVendor");
+}
+
+
+/***********************************
+ ****** Device tree Discovery ******
+ ***********************************/
+
+/* Reads the entire file and returns bytes read if bytes_read != NULL
+ * Returned pointer can be freed by using free().  */
+static void *
+hwloc_read_raw(const char *p, const char *p1, size_t *bytes_read, int root_fd)
+{
+  char fname[256];
+  char *ret = NULL;
+  struct stat fs;
+  int file = -1;
+
+  snprintf(fname, sizeof(fname), "%s/%s", p, p1);
+
+  file = hwloc_open(fname, root_fd);
+  if (-1 == file) {
+      goto out_no_close;
+  }
+  if (fstat(file, &fs)) {
+    goto out;
+  }
+
+  ret = (char *) malloc(fs.st_size);
+  if (NULL != ret) {
+    ssize_t cb = read(file, ret, fs.st_size);
+    if (cb == -1) {
+      free(ret);
+      ret = NULL;
+    } else {
+      if (NULL != bytes_read)
+        *bytes_read = cb;
+    }
+  }
+
+ out:
+  close(file);
+ out_no_close:
+  return ret;
+}
+
+/* Reads the entire file and returns it as a 0-terminated string
+ * Returned pointer can be freed by using free().  */
+static char *
+hwloc_read_str(const char *p, const char *p1, int root_fd)
+{
+  size_t cb = 0;
+  char *ret = hwloc_read_raw(p, p1, &cb, root_fd);
+  if ((NULL != ret) && (0 < cb) && (0 != ret[cb-1])) {
+    char *tmp = realloc(ret, cb + 1);
+    if (!tmp) {
+      free(ret);
+      return NULL;
+    }
+    ret = tmp;
+    ret[cb] = 0;
+  }
+  return ret;
+}
+
+/* Reads first 32bit bigendian value */
+static ssize_t
+hwloc_read_unit32be(const char *p, const char *p1, uint32_t *buf, int root_fd)
+{
+  size_t cb = 0;
+  uint32_t *tmp = hwloc_read_raw(p, p1, &cb, root_fd);
+  if (sizeof(*buf) != cb) {
+    errno = EINVAL;
+    free(tmp); /* tmp is either NULL or contains useless things */
+    return -1;
+  }
+  *buf = htonl(*tmp);
+  free(tmp);
+  return sizeof(*buf);
+}
+
+typedef struct {
+  unsigned int n, allocated;
+  struct {
+    hwloc_bitmap_t cpuset;
+    uint32_t phandle;
+    uint32_t l2_cache;
+    char *name;
+  } *p;
+} device_tree_cpus_t;
+
+static void
+add_device_tree_cpus_node(device_tree_cpus_t *cpus, hwloc_bitmap_t cpuset,
+    uint32_t l2_cache, uint32_t phandle, const char *name)
+{
+  if (cpus->n == cpus->allocated) {
+    void *tmp;
+    unsigned allocated;
+    if (!cpus->allocated)
+      allocated = 64;
+    else
+      allocated = 2 * cpus->allocated;
+    tmp = realloc(cpus->p, allocated * sizeof(cpus->p[0]));
+    if (!tmp)
+      return; /* failed to realloc, ignore this entry */
+    cpus->p = tmp;
+    cpus->allocated = allocated;
+  }
+  cpus->p[cpus->n].phandle = phandle;
+  cpus->p[cpus->n].cpuset = (NULL == cpuset)?NULL:hwloc_bitmap_dup(cpuset);
+  cpus->p[cpus->n].l2_cache = l2_cache;
+  cpus->p[cpus->n].name = strdup(name);
+  ++cpus->n;
+}
+
+/* Walks over the cache list in order to detect nested caches and CPU mask for each */
+static int
+look_powerpc_device_tree_discover_cache(device_tree_cpus_t *cpus,
+    uint32_t phandle, unsigned int *level, hwloc_bitmap_t cpuset)
+{
+  unsigned int i;
+  int ret = -1;
+  if ((NULL == level) || (NULL == cpuset) || phandle == (uint32_t) -1)
+    return ret;
+  for (i = 0; i < cpus->n; ++i) {
+    if (phandle != cpus->p[i].l2_cache)
+      continue;
+    if (NULL != cpus->p[i].cpuset) {
+      hwloc_bitmap_or(cpuset, cpuset, cpus->p[i].cpuset);
+      ret = 0;
+    } else {
+      ++(*level);
+      if (0 == look_powerpc_device_tree_discover_cache(cpus,
+            cpus->p[i].phandle, level, cpuset))
+        ret = 0;
+    }
+  }
+  return ret;
+}
+
+static void
+try__add_cache_from_device_tree_cpu(struct hwloc_topology *topology,
+				    unsigned int level, hwloc_obj_cache_type_t ctype,
+				    uint32_t cache_line_size, uint32_t cache_size, uint32_t cache_sets,
+				    hwloc_bitmap_t cpuset)
+{
+  struct hwloc_obj *c = NULL;
+  hwloc_obj_type_t otype;
+
+  if (0 == cache_size)
+    return;
+
+  otype = hwloc_cache_type_by_depth_type(level, ctype);
+  if (otype == HWLOC_OBJ_TYPE_NONE)
+    return;
+  if (!hwloc_filter_check_keep_object_type(topology, otype))
+    return;
+
+  c = hwloc_alloc_setup_object(topology, otype, -1);
+  c->attr->cache.depth = level;
+  c->attr->cache.linesize = cache_line_size;
+  c->attr->cache.size = cache_size;
+  c->attr->cache.type = ctype;
+  if (cache_sets == 1)
+    /* likely wrong, make it unknown */
+    cache_sets = 0;
+  if (cache_sets && cache_line_size)
+    c->attr->cache.associativity = cache_size / (cache_sets * cache_line_size);
+  else
+    c->attr->cache.associativity = 0;
+  c->cpuset = hwloc_bitmap_dup(cpuset);
+  hwloc_debug_2args_bitmap("cache (%s) depth %u has cpuset %s\n",
+			   ctype == HWLOC_OBJ_CACHE_UNIFIED ? "unified" : (ctype == HWLOC_OBJ_CACHE_DATA ? "data" : "instruction"),
+			   level, c->cpuset);
+  hwloc_insert_object_by_cpuset(topology, c);
+}
+
+static void
+try_add_cache_from_device_tree_cpu(struct hwloc_topology *topology,
+				   struct hwloc_linux_backend_data_s *data,
+				   const char *cpu, unsigned int level, hwloc_bitmap_t cpuset)
+{
+  /* d-cache-block-size - ignore */
+  /* d-cache-line-size - to read, in bytes */
+  /* d-cache-sets - ignore */
+  /* d-cache-size - to read, in bytes */
+  /* i-cache, same for instruction */
+  /* cache-unified only exist if data and instruction caches are unified */
+  /* d-tlb-sets - ignore */
+  /* d-tlb-size - ignore, always 0 on power6 */
+  /* i-tlb-*, same */
+  uint32_t d_cache_line_size = 0, d_cache_size = 0, d_cache_sets = 0;
+  uint32_t i_cache_line_size = 0, i_cache_size = 0, i_cache_sets = 0;
+  char unified_path[1024];
+  struct stat statbuf;
+  int unified;
+
+  snprintf(unified_path, sizeof(unified_path), "%s/cache-unified", cpu);
+  unified = (hwloc_stat(unified_path, &statbuf, data->root_fd) == 0);
+
+  hwloc_read_unit32be(cpu, "d-cache-line-size", &d_cache_line_size,
+      data->root_fd);
+  hwloc_read_unit32be(cpu, "d-cache-size", &d_cache_size,
+      data->root_fd);
+  hwloc_read_unit32be(cpu, "d-cache-sets", &d_cache_sets,
+      data->root_fd);
+  hwloc_read_unit32be(cpu, "i-cache-line-size", &i_cache_line_size,
+      data->root_fd);
+  hwloc_read_unit32be(cpu, "i-cache-size", &i_cache_size,
+      data->root_fd);
+  hwloc_read_unit32be(cpu, "i-cache-sets", &i_cache_sets,
+      data->root_fd);
+
+  if (!unified)
+    try__add_cache_from_device_tree_cpu(topology, level, HWLOC_OBJ_CACHE_INSTRUCTION,
+					i_cache_line_size, i_cache_size, i_cache_sets, cpuset);
+  try__add_cache_from_device_tree_cpu(topology, level, unified ? HWLOC_OBJ_CACHE_UNIFIED : HWLOC_OBJ_CACHE_DATA,
+				      d_cache_line_size, d_cache_size, d_cache_sets, cpuset);
+}
+
+/*
+ * Discovers L1/L2/L3 cache information on IBM PowerPC systems for old kernels (RHEL5.*)
+ * which provide NUMA nodes information without any details
+ */
+static void
+look_powerpc_device_tree(struct hwloc_topology *topology,
+			 struct hwloc_linux_backend_data_s *data)
+{
+  device_tree_cpus_t cpus;
+  const char ofroot[] = "/proc/device-tree/cpus";
+  unsigned int i;
+  int root_fd = data->root_fd;
+  DIR *dt = hwloc_opendir(ofroot, root_fd);
+  struct dirent *dirent;
+
+  if (NULL == dt)
+    return;
+
+  /* only works for Power so far, and not useful on ARM */
+  if (data->arch != HWLOC_LINUX_ARCH_POWER) {
+    closedir(dt);
+    return;
+  }
+
+  cpus.n = 0;
+  cpus.p = NULL;
+  cpus.allocated = 0;
+
+  while (NULL != (dirent = readdir(dt))) {
+    char cpu[256];
+    char *device_type;
+    uint32_t reg = -1, l2_cache = -1, phandle = -1;
+    int err;
+
+    if ('.' == dirent->d_name[0])
+      continue;
+
+    err = snprintf(cpu, sizeof(cpu), "%s/%s", ofroot, dirent->d_name);
+    if ((size_t) err >= sizeof(cpu))
+      continue;
+
+    device_type = hwloc_read_str(cpu, "device_type", root_fd);
+    if (NULL == device_type)
+      continue;
+
+    hwloc_read_unit32be(cpu, "reg", &reg, root_fd);
+    if (hwloc_read_unit32be(cpu, "next-level-cache", &l2_cache, root_fd) == -1)
+      hwloc_read_unit32be(cpu, "l2-cache", &l2_cache, root_fd);
+    if (hwloc_read_unit32be(cpu, "phandle", &phandle, root_fd) == -1)
+      if (hwloc_read_unit32be(cpu, "ibm,phandle", &phandle, root_fd) == -1)
+        hwloc_read_unit32be(cpu, "linux,phandle", &phandle, root_fd);
+
+    if (0 == strcmp(device_type, "cache")) {
+      add_device_tree_cpus_node(&cpus, NULL, l2_cache, phandle, dirent->d_name);
+    }
+    else if (0 == strcmp(device_type, "cpu")) {
+      /* Found CPU */
+      hwloc_bitmap_t cpuset = NULL;
+      size_t cb = 0;
+      uint32_t *threads = hwloc_read_raw(cpu, "ibm,ppc-interrupt-server#s", &cb, root_fd);
+      uint32_t nthreads = cb / sizeof(threads[0]);
+
+      if (NULL != threads) {
+        cpuset = hwloc_bitmap_alloc();
+        for (i = 0; i < nthreads; ++i) {
+          if (hwloc_bitmap_isset(topology->levels[0][0]->complete_cpuset, ntohl(threads[i])))
+            hwloc_bitmap_set(cpuset, ntohl(threads[i]));
+        }
+        free(threads);
+      } else if ((unsigned int)-1 != reg) {
+        /* Doesn't work on ARM because cpu "reg" do not start at 0.
+	 * We know the first cpu "reg" is the lowest. The others are likely
+	 * in order assuming the device-tree shows objects in order.
+	 */
+        cpuset = hwloc_bitmap_alloc();
+        hwloc_bitmap_set(cpuset, reg);
+      }
+
+      if (NULL == cpuset) {
+        hwloc_debug("%s has no \"reg\" property, skipping\n", cpu);
+      } else {
+        struct hwloc_obj *core = NULL;
+        add_device_tree_cpus_node(&cpus, cpuset, l2_cache, phandle, dirent->d_name);
+
+	if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) {
+	  /* Add core */
+	  core = hwloc_alloc_setup_object(topology, HWLOC_OBJ_CORE, reg);
+	  core->cpuset = hwloc_bitmap_dup(cpuset);
+	  hwloc_insert_object_by_cpuset(topology, core);
+	}
+
+        /* Add L1 cache */
+        try_add_cache_from_device_tree_cpu(topology, data, cpu, 1, cpuset);
+
+        hwloc_bitmap_free(cpuset);
+      }
+    }
+    free(device_type);
+  }
+  closedir(dt);
+
+  /* No cores and L2 cache were found, exiting */
+  if (0 == cpus.n) {
+    hwloc_debug("No cores and L2 cache were found in %s, exiting\n", ofroot);
+    return;
+  }
+
+#ifdef HWLOC_DEBUG
+  for (i = 0; i < cpus.n; ++i) {
+    hwloc_debug("%u: %s  ibm,phandle=%08X l2_cache=%08X ",
+      i, cpus.p[i].name, cpus.p[i].phandle, cpus.p[i].l2_cache);
+    if (NULL == cpus.p[i].cpuset) {
+      hwloc_debug("%s\n", "no cpuset");
+    } else {
+      hwloc_debug_bitmap("cpuset %s\n", cpus.p[i].cpuset);
+    }
+  }
+#endif
+
+  /* Scan L2/L3/... caches */
+  for (i = 0; i < cpus.n; ++i) {
+    unsigned int level = 2;
+    hwloc_bitmap_t cpuset;
+    /* Skip real CPUs */
+    if (NULL != cpus.p[i].cpuset)
+      continue;
+
+    /* Calculate cache level and CPU mask */
+    cpuset = hwloc_bitmap_alloc();
+    if (0 == look_powerpc_device_tree_discover_cache(&cpus,
+          cpus.p[i].phandle, &level, cpuset)) {
+      char cpu[256];
+      snprintf(cpu, sizeof(cpu), "%s/%s", ofroot, cpus.p[i].name);
+      try_add_cache_from_device_tree_cpu(topology, data, cpu, level, cpuset);
+    }
+    hwloc_bitmap_free(cpuset);
+  }
+
+  /* Do cleanup */
+  for (i = 0; i < cpus.n; ++i) {
+    hwloc_bitmap_free(cpus.p[i].cpuset);
+    free(cpus.p[i].name);
+  }
+  free(cpus.p);
+}
+
+struct knl_hwdata {
+  char memory_mode[32];
+  char cluster_mode[32];
+  long long int mcdram_cache_size; /* mcdram_cache_* is valid only if size > 0 */
+  int mcdram_cache_associativity;
+  int mcdram_cache_inclusiveness;
+  int mcdram_cache_line_size;
+};
+
+/* Try to handle knl hwdata properties
+ * Returns 0 on success and -1 otherwise */
+static int hwloc_linux_try_handle_knl_hwdata_properties(struct hwloc_linux_backend_data_s *data,
+							struct knl_hwdata *hwdata)
+{
+  char *knl_cache_file;
+  int version = 0;
+  char buffer[512] = {0};
+  char *data_beg = NULL;
+
+  hwdata->memory_mode[0] = '\0';
+  hwdata->cluster_mode[0] = '\0';
+  hwdata->mcdram_cache_size = -1;
+  hwdata->mcdram_cache_associativity = -1;
+  hwdata->mcdram_cache_inclusiveness = -1;
+  hwdata->mcdram_cache_line_size = -1;
+
+  if (asprintf(&knl_cache_file, "%s/knl_memoryside_cache", data->dumped_hwdata_dirname) < 0)
+    return -1;
+
+  hwloc_debug("Reading knl cache data from: %s\n", knl_cache_file);
+  if (hwloc_read_path_by_length(knl_cache_file, buffer, sizeof(buffer), data->root_fd) < 0) {
+    hwloc_debug("Unable to open KNL data file `%s' (%s)\n", knl_cache_file, strerror(errno));
+    free(knl_cache_file);
+    return -1;
+  }
+  free(knl_cache_file);
+
+  data_beg = &buffer[0];
+
+  /* file must start with version information */
+  if (sscanf(data_beg, "version: %d", &version) != 1) {
+    fprintf(stderr, "Invalid knl_memoryside_cache header, expected \"version: <int>\".\n");
+    return -1;
+  }
+
+  while (1) {
+    char *line_end = strstr(data_beg, "\n");
+    if (!line_end)
+        break;
+    if (version >= 1) {
+      if (!strncmp("cache_size:", data_beg, strlen("cache_size"))) {
+          sscanf(data_beg, "cache_size: %lld", &hwdata->mcdram_cache_size);
+          hwloc_debug("read cache_size=%lld\n", hwdata->mcdram_cache_size);
+      } else if (!strncmp("line_size:", data_beg, strlen("line_size:"))) {
+          sscanf(data_beg, "line_size: %d", &hwdata->mcdram_cache_line_size);
+          hwloc_debug("read line_size=%d\n", hwdata->mcdram_cache_line_size);
+      } else if (!strncmp("inclusiveness:", data_beg, strlen("inclusiveness:"))) {
+          sscanf(data_beg, "inclusiveness: %d", &hwdata->mcdram_cache_inclusiveness);
+          hwloc_debug("read inclusiveness=%d\n", hwdata->mcdram_cache_inclusiveness);
+      } else if (!strncmp("associativity:", data_beg, strlen("associativity:"))) {
+          sscanf(data_beg, "associativity: %d\n", &hwdata->mcdram_cache_associativity);
+          hwloc_debug("read associativity=%d\n", hwdata->mcdram_cache_associativity);
+      }
+    }
+    if (version >= 2) {
+      if (!strncmp("cluster_mode: ", data_beg, strlen("cluster_mode: "))) {
+	size_t length;
+	data_beg += strlen("cluster_mode: ");
+	length = line_end-data_beg;
+	if (length > sizeof(hwdata->cluster_mode)-1)
+	  length = sizeof(hwdata->cluster_mode)-1;
+	memcpy(hwdata->cluster_mode, data_beg, length);
+	hwdata->cluster_mode[length] = '\0';
+        hwloc_debug("read cluster_mode=%s\n", hwdata->cluster_mode);
+      } else if (!strncmp("memory_mode: ", data_beg, strlen("memory_mode: "))) {
+	size_t length;
+	data_beg += strlen("memory_mode: ");
+	length = line_end-data_beg;
+	if (length > sizeof(hwdata->memory_mode)-1)
+	  length = sizeof(hwdata->memory_mode)-1;
+	memcpy(hwdata->memory_mode, data_beg, length);
+	hwdata->memory_mode[length] = '\0';
+        hwloc_debug("read memory_mode=%s\n", hwdata->memory_mode);
+      }
+    }
+
+    data_beg = line_end + 1;
+  }
+
+  if (hwdata->mcdram_cache_size == -1
+      || hwdata->mcdram_cache_line_size == -1
+      || hwdata->mcdram_cache_associativity == -1
+      || hwdata->mcdram_cache_inclusiveness == -1) {
+    hwloc_debug("Incorrect file format cache_size=%lld line_size=%d associativity=%d inclusiveness=%d\n",
+		hwdata->mcdram_cache_size,
+		hwdata->mcdram_cache_line_size,
+		hwdata->mcdram_cache_associativity,
+		hwdata->mcdram_cache_inclusiveness);
+    hwdata->mcdram_cache_size = -1; /* mark cache as invalid */
+  }
+
+  return 0;
+}
+
+
+
+/**************************************
+ ****** Sysfs Topology Discovery ******
+ **************************************/
+
+static int
+look_sysfsnode(struct hwloc_topology *topology,
+	       struct hwloc_linux_backend_data_s *data,
+	       const char *path, unsigned *found)
+{
+  unsigned osnode;
+  unsigned nbnodes = 0;
+  DIR *dir;
+  struct dirent *dirent;
+  hwloc_bitmap_t nodeset;
+
+  /* NUMA nodes cannot be filtered out */
+
+  *found = 0;
+
+  /* Get the list of nodes first */
+  dir = hwloc_opendir(path, data->root_fd);
+  if (dir)
+    {
+      nodeset = hwloc_bitmap_alloc();
+      while ((dirent = readdir(dir)) != NULL)
+	{
+	  if (strncmp(dirent->d_name, "node", 4))
+	    continue;
+	  osnode = strtoul(dirent->d_name+4, NULL, 0);
+	  hwloc_bitmap_set(nodeset, osnode);
+	  nbnodes++;
+	}
+      closedir(dir);
+    }
+  else
+    return -1;
+
+  if (!nbnodes) {
+    hwloc_bitmap_free(nodeset);
+    return 0;
+  }
+
+  /* For convenience, put these declarations inside a block. */
+
+  {
+      hwloc_obj_t * nodes = calloc(nbnodes, sizeof(hwloc_obj_t));
+      unsigned *indexes = calloc(nbnodes, sizeof(unsigned));
+      uint64_t * distances = NULL;
+      struct knl_hwdata knl_hwdata;
+      int failednodes = 0;
+      unsigned index_;
+
+      if (NULL == nodes || NULL == indexes) {
+          free(nodes);
+          free(indexes);
+          hwloc_bitmap_free(nodeset);
+          nbnodes = 0;
+          goto out;
+      }
+
+      /* Unsparsify node indexes.
+       * We'll need them later because Linux groups sparse distances
+       * and keeps them in order in the sysfs distance files.
+       * It'll simplify things in the meantime.
+       */
+      index_ = 0;
+      hwloc_bitmap_foreach_begin (osnode, nodeset) {
+	indexes[index_] = osnode;
+	index_++;
+      } hwloc_bitmap_foreach_end();
+      hwloc_bitmap_free(nodeset);
+
+#ifdef HWLOC_DEBUG
+      hwloc_debug("%s", "NUMA indexes: ");
+      for (index_ = 0; index_ < nbnodes; index_++) {
+	hwloc_debug(" %u", indexes[index_]);
+      }
+      hwloc_debug("%s", "\n");
+#endif
+
+      /* Create NUMA objects */
+      for (index_ = 0; index_ < nbnodes; index_++) {
+          hwloc_obj_t node, res_obj;
+	  int annotate;
+
+	  osnode = indexes[index_];
+
+	  node = hwloc_get_numanode_obj_by_os_index(topology, osnode);
+	  annotate = (node != NULL);
+	  if (!annotate) {
+	    /* create a new node */
+	    char nodepath[SYSFS_NUMA_NODE_PATH_LEN];
+	    hwloc_bitmap_t cpuset;
+	    sprintf(nodepath, "%s/node%u/cpumap", path, osnode);
+	    cpuset = hwloc__alloc_read_path_as_cpumask(nodepath, data->root_fd);
+	    if (!cpuset) {
+	      /* This NUMA object won't be inserted, we'll ignore distances */
+	      failednodes++;
+	      continue;
+	    }
+
+	    node = hwloc_alloc_setup_object(topology, HWLOC_OBJ_NUMANODE, osnode);
+	    node->cpuset = cpuset;
+	    node->nodeset = hwloc_bitmap_alloc();
+	    hwloc_bitmap_set(node->nodeset, osnode);
+	  }
+          hwloc_sysfs_node_meminfo_info(topology, data, path, osnode, &node->memory);
+
+          hwloc_debug_1arg_bitmap("os node %u has cpuset %s\n",
+                                  osnode, node->cpuset);
+
+	  if (annotate) {
+	    nodes[index_] = node;
+	  } else {
+	    res_obj = hwloc_insert_object_by_cpuset(topology, node);
+	    if (node == res_obj) {
+	      nodes[index_] = node;
+	    } else {
+	      /* We got merged somehow, could be a buggy BIOS reporting wrong NUMA node cpuset.
+	       * This object disappeared, we'll ignore distances */
+	      failednodes++;
+	    }
+	  }
+      }
+
+      if (failednodes) {
+	/* failed to read/create some nodes, don't bother reading/fixing
+	 * a distance matrix that would likely be wrong anyway.
+	 */
+	nbnodes -= failednodes;
+      } else if (nbnodes > 1) {
+	distances = malloc(nbnodes*nbnodes*sizeof(*distances));
+      }
+
+      if (distances && hwloc_parse_nodes_distances(path, nbnodes, indexes, distances, data->root_fd) < 0) {
+	free(nodes);
+	free(distances);
+	free(indexes);
+	goto out;
+      }
+
+      free(indexes);
+
+      if (data->is_knl) {
+	char *env = getenv("HWLOC_KNL_NUMA_QUIRK");
+	int noquirk = (env && !atoi(env)) || !distances || !hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP);
+	int mscache;
+	unsigned i, j, closest;
+
+	hwloc_linux_try_handle_knl_hwdata_properties(data, &knl_hwdata);
+	mscache = knl_hwdata.mcdram_cache_size > 0 && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L3CACHE);
+
+	if (knl_hwdata.cluster_mode[0])
+	  hwloc_obj_add_info(topology->levels[0][0], "ClusterMode", knl_hwdata.cluster_mode);
+	if (knl_hwdata.memory_mode[0])
+	  hwloc_obj_add_info(topology->levels[0][0], "MemoryMode", knl_hwdata.memory_mode);
+
+	for(i=0; i<nbnodes; i++) {
+	  if (!hwloc_bitmap_iszero(nodes[i]->cpuset)) {
+	    /* DDR, see if there's a MCDRAM cache to add */
+	    if (mscache) {
+	      hwloc_obj_t cache = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L3CACHE, -1);
+	      if (cache) {
+		cache->attr->cache.depth = 3;
+		cache->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
+		cache->attr->cache.size = knl_hwdata.mcdram_cache_size;
+		cache->attr->cache.linesize = knl_hwdata.mcdram_cache_line_size;
+		cache->attr->cache.associativity = knl_hwdata.mcdram_cache_associativity;
+		hwloc_obj_add_info(cache, "Inclusive", knl_hwdata.mcdram_cache_inclusiveness ? "1" : "0");
+		cache->cpuset = hwloc_bitmap_dup(nodes[i]->cpuset);
+		cache->subtype = strdup("MemorySideCache");
+		hwloc_insert_object_by_cpuset(topology, cache);
+	      }
+	    }
+	    /* nothing else to do for DDR */
+	    continue;
+	  }
+	  /* MCDRAM */
+	  nodes[i]->subtype = strdup("MCDRAM");
+
+	  if (noquirk)
+	    continue;
+
+	  /* DDR is the closest node with CPUs */
+	  closest = (unsigned)-1;
+	  for(j=0; j<nbnodes; j++) {
+	    if (j==i)
+	      continue;
+	    if (hwloc_bitmap_iszero(nodes[j]->cpuset))
+	      /* nodes without CPU, that's another MCDRAM, skip it */
+	      continue;
+	    if (closest == (unsigned)-1 || distances[i*nbnodes+j]<distances[i*nbnodes+closest])
+	      closest = j;
+	  }
+	  if (closest != (unsigned) -1) {
+	    /* Add a Group for Cluster containing this MCDRAM + DDR */
+	    hwloc_obj_t cluster = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, -1);
+	    hwloc_obj_add_other_obj_sets(cluster, nodes[i]);
+	    hwloc_obj_add_other_obj_sets(cluster, nodes[closest]);
+	    cluster->subtype = strdup("Cluster");
+	    cluster->attr->group.kind = HWLOC_GROUP_KIND_INTEL_SUBNUMA_CLUSTER;
+	    hwloc_insert_object_by_cpuset(topology, cluster);
+	  }
+	}
+	if (!noquirk) {
+	  /* drop the distance matrix, it contradicts the above NUMA layout groups */
+	  free(distances);
+          free(nodes);
+          goto out;
+	}
+      }
+
+      if (distances)
+	hwloc_internal_distances_add(topology, nbnodes, nodes, distances,
+				     HWLOC_DISTANCES_KIND_FROM_OS|HWLOC_DISTANCES_KIND_MEANS_LATENCY,
+				     HWLOC_DISTANCES_FLAG_GROUP);
+      else
+	free(nodes);
+  }
+
+ out:
+  *found = nbnodes;
+  return 0;
+}
+
+/* Look at Linux' /sys/devices/system/cpu/cpu%d/topology/ */
+static int
+look_sysfscpu(struct hwloc_topology *topology,
+	      struct hwloc_linux_backend_data_s *data,
+	      const char *path,
+	      struct hwloc_linux_cpuinfo_proc * cpuinfo_Lprocs, unsigned cpuinfo_numprocs)
+{
+  hwloc_bitmap_t cpuset; /* Set of cpus for which we have topology information */
+  hwloc_bitmap_t unknownset; /* Set of cpus to clear from kernel cpusets so that we know when first meeting an object */
+#define CPU_TOPOLOGY_STR_LEN 128
+  char str[CPU_TOPOLOGY_STR_LEN];
+  DIR *dir;
+  int i,j;
+  unsigned caches_added, merge_buggy_core_siblings;
+  hwloc_obj_t packages = NULL; /* temporary list of packages before actual insert in the tree */
+  int threadwithcoreid = data->is_amd_with_CU ? -1 : 0; /* -1 means we don't know yet if threads have their own coreids within thread_siblings */
+
+  /* fill the cpuset of interesting cpus */
+  dir = hwloc_opendir(path, data->root_fd);
+  if (!dir)
+    return -1;
+  else {
+    struct dirent *dirent;
+    cpuset = hwloc_bitmap_alloc();
+    unknownset = hwloc_bitmap_alloc();
+
+    while ((dirent = readdir(dir)) != NULL) {
+      unsigned long cpu;
+      char online[2];
+
+      if (strncmp(dirent->d_name, "cpu", 3))
+	continue;
+      cpu = strtoul(dirent->d_name+3, NULL, 0);
+
+      /* Maybe we don't have topology information but at least it exists */
+      hwloc_bitmap_set(topology->levels[0][0]->complete_cpuset, cpu);
+
+      /* check whether this processor is online */
+      sprintf(str, "%s/cpu%lu/online", path, cpu);
+      if (hwloc_read_path_by_length(str, online, sizeof(online), data->root_fd) == 0) {
+	if (!atoi(online)) {
+	  hwloc_debug("os proc %lu is offline\n", cpu);
+	  hwloc_bitmap_set(unknownset, cpu);
+	  continue;
+	}
+      }
+
+      /* check whether the kernel exports topology information for this cpu */
+      sprintf(str, "%s/cpu%lu/topology", path, cpu);
+      if (hwloc_access(str, X_OK, data->root_fd) < 0 && errno == ENOENT) {
+	hwloc_debug("os proc %lu has no accessible %s/cpu%lu/topology\n",
+		   cpu, path, cpu);
+	hwloc_bitmap_set(unknownset, cpu);
+	continue;
+      }
+
+      hwloc_bitmap_set(cpuset, cpu);
+    }
+    closedir(dir);
+  }
+
+  topology->support.discovery->pu = 1;
+  hwloc_debug_1arg_bitmap("found %d cpu topologies, cpuset %s\n",
+	     hwloc_bitmap_weight(cpuset), cpuset);
+
+  merge_buggy_core_siblings = (data->arch == HWLOC_LINUX_ARCH_X86);
+  caches_added = 0;
+  hwloc_bitmap_foreach_begin(i, cpuset) {
+    hwloc_bitmap_t packageset, coreset, bookset, threadset;
+    unsigned mypackageid, mycoreid, mybookid;
+    int tmpint;
+
+    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) {
+      /* look at the package */
+      sprintf(str, "%s/cpu%d/topology/core_siblings", path, i);
+      packageset = hwloc__alloc_read_path_as_cpumask(str, data->root_fd);
+      if (packageset) {
+	hwloc_bitmap_andnot(packageset, packageset, unknownset);
+	if (hwloc_bitmap_first(packageset) == i) {
+	  /* first cpu in this package, add the package */
+	  struct hwloc_obj *package;
+
+	  mypackageid = (unsigned) -1;
+	  sprintf(str, "%s/cpu%d/topology/physical_package_id", path, i); /* contains %d at least up to 4.9 */
+	  if (hwloc_read_path_as_int(str, &tmpint, data->root_fd) == 0)
+	    mypackageid = (unsigned) tmpint;
+
+	  if (merge_buggy_core_siblings) {
+	    /* check for another package with same physical_package_id */
+	    hwloc_obj_t curpackage = packages;
+	    while (curpackage) {
+	      if (curpackage->os_index == mypackageid) {
+		/* found another package with same physical_package_id but different core_siblings.
+		 * looks like a buggy kernel on Intel Xeon E5 v3 processor with two rings.
+		 * merge these core_siblings to extend the existing first package object.
+		 */
+		static int reported = 0;
+		if (!reported && !hwloc_hide_errors()) {
+		  char *a, *b;
+		  hwloc_bitmap_asprintf(&a, curpackage->cpuset);
+		  hwloc_bitmap_asprintf(&b, packageset);
+		  fprintf(stderr, "****************************************************************************\n");
+		  fprintf(stderr, "* hwloc %s has detected buggy sysfs package information: Two packages have\n", HWLOC_VERSION);
+		  fprintf(stderr, "* the same physical package id %u but different core_siblings %s and %s\n",
+			  mypackageid, a, b);
+		  fprintf(stderr, "* hwloc is merging these packages into a single one assuming your Linux kernel\n");
+		  fprintf(stderr, "* does not support this processor correctly.\n");
+		  fprintf(stderr, "* You may hide this warning by setting HWLOC_HIDE_ERRORS=1 in the environment.\n");
+		  fprintf(stderr, "*\n");
+		  fprintf(stderr, "* If hwloc does not report the right number of packages,\n");
+		  fprintf(stderr, "* please report this error message to the hwloc user's mailing list,\n");
+		  fprintf(stderr, "* along with the output+tarball generated by the hwloc-gather-topology script.\n");
+		  fprintf(stderr, "****************************************************************************\n");
+		  reported = 1;
+		  free(a);
+		  free(b);
+		}
+		hwloc_bitmap_or(curpackage->cpuset, curpackage->cpuset, packageset);
+		goto package_done;
+	      }
+	      curpackage = curpackage->next_cousin;
+	    }
+	  }
+
+	  /* no package with same physical_package_id, create a new one */
+	  package = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PACKAGE, mypackageid);
+	  package->cpuset = packageset;
+	  hwloc_debug_1arg_bitmap("os package %u has cpuset %s\n",
+				  mypackageid, packageset);
+	  /* add cpuinfo */
+	  if (cpuinfo_Lprocs) {
+	    for(j=0; j<(int) cpuinfo_numprocs; j++)
+	      if ((int) cpuinfo_Lprocs[j].Pproc == i) {
+		hwloc__move_infos(&package->infos, &package->infos_count,
+				  &cpuinfo_Lprocs[j].infos, &cpuinfo_Lprocs[j].infos_count);
+	      }
+	  }
+	  /* insert in a temporary list in case we have to modify the cpuset by merging other core_siblings later.
+	   * we'll actually insert the tree at the end of the entire sysfs cpu loop.
+	   */
+	  package->next_cousin = packages;
+	  packages = package;
+
+	  packageset = NULL; /* don't free it */
+	}
+      package_done:
+	hwloc_bitmap_free(packageset);
+      }
+    }
+
+    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) {
+      /* look at the core */
+      sprintf(str, "%s/cpu%d/topology/thread_siblings", path, i);
+      coreset = hwloc__alloc_read_path_as_cpumask(str, data->root_fd);
+      if (coreset) {
+	int gotcoreid = 0; /* to avoid reading the coreid twice */
+	hwloc_bitmap_andnot(coreset, coreset, unknownset);
+	if (hwloc_bitmap_weight(coreset) > 1 && threadwithcoreid == -1) {
+	  /* check if this is hyper-threading or different coreids */
+	  unsigned siblingid, siblingcoreid;
+
+	  mycoreid = (unsigned) -1;
+	  sprintf(str, "%s/cpu%d/topology/core_id", path, i); /* contains %d at least up to 4.9 */
+	  if (hwloc_read_path_as_int(str, &tmpint, data->root_fd) == 0)
+	    mycoreid = (unsigned) tmpint;
+	  gotcoreid = 1;
+
+	  siblingid = hwloc_bitmap_first(coreset);
+	  if (siblingid == (unsigned) i)
+	    siblingid = hwloc_bitmap_next(coreset, i);
+	  siblingcoreid = (unsigned) -1;
+	  sprintf(str, "%s/cpu%u/topology/core_id", path, siblingid); /* contains %d at least up to 4.9 */
+	  if (hwloc_read_path_as_int(str, &tmpint, data->root_fd) == 0)
+	    siblingcoreid = (unsigned) tmpint;
+	  threadwithcoreid = (siblingcoreid != mycoreid);
+	}
+	if (hwloc_bitmap_first(coreset) == i || threadwithcoreid) {
+	  /* regular core */
+	  struct hwloc_obj *core;
+
+	  if (!gotcoreid) {
+	    mycoreid = (unsigned) -1;
+	    sprintf(str, "%s/cpu%d/topology/core_id", path, i); /* contains %d at least up to 4.9 */
+	    if (hwloc_read_path_as_int(str, &tmpint, data->root_fd) == 0)
+	      mycoreid = (unsigned) tmpint;
+	  }
+
+	  core = hwloc_alloc_setup_object(topology, HWLOC_OBJ_CORE, mycoreid);
+	  if (threadwithcoreid)
+	    /* amd multicore compute-unit, create one core per thread */
+	    hwloc_bitmap_only(coreset, i);
+	  core->cpuset = coreset;
+	  hwloc_debug_1arg_bitmap("os core %u has cpuset %s\n",
+				  mycoreid, core->cpuset);
+	  hwloc_insert_object_by_cpuset(topology, core);
+	  coreset = NULL; /* don't free it */
+	}
+	hwloc_bitmap_free(coreset);
+      }
+    }
+
+    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) {
+      /* look at the books */
+      sprintf(str, "%s/cpu%d/topology/book_siblings", path, i);
+      bookset = hwloc__alloc_read_path_as_cpumask(str, data->root_fd);
+      if (bookset) {
+	hwloc_bitmap_andnot(bookset, bookset, unknownset);
+	if (hwloc_bitmap_first(bookset) == i) {
+	  struct hwloc_obj *book;
+
+	  mybookid = (unsigned) -1;
+	  sprintf(str, "%s/cpu%d/topology/book_id", path, i); /* contains %d at least up to 4.9 */
+	  if (hwloc_read_path_as_int(str, &tmpint, data->root_fd) == 0) {
+	    mybookid = (unsigned) tmpint;
+
+	  book = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, mybookid);
+          book->cpuset = bookset;
+          hwloc_debug_1arg_bitmap("os book %u has cpuset %s\n",
+                       mybookid, bookset);
+          book->subtype = strdup("Book");
+	  book->attr->group.kind = HWLOC_GROUP_KIND_S390_BOOK;
+          hwloc_insert_object_by_cpuset(topology, book);
+          bookset = NULL; /* don't free it */
+	  }
+        }
+	hwloc_bitmap_free(bookset);
+      }
+    }
+
+    /* PU cannot be filtered-out */
+    {
+      /* look at the thread */
+      struct hwloc_obj *thread = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PU, i);
+      threadset = hwloc_bitmap_alloc();
+      hwloc_bitmap_only(threadset, i);
+      thread->cpuset = threadset;
+      hwloc_debug_1arg_bitmap("thread %d has cpuset %s\n",
+		 i, threadset);
+      hwloc_insert_object_by_cpuset(topology, thread);
+    }
+
+    /* look at the caches */
+    for(j=0; j<10; j++) {
+      char str2[20]; /* enough for a level number (one digit) or a type (Data/Instruction/Unified) */
+      hwloc_bitmap_t cacheset;
+
+      sprintf(str, "%s/cpu%d/cache/index%d/shared_cpu_map", path, i, j);
+      cacheset = hwloc__alloc_read_path_as_cpumask(str, data->root_fd);
+      if (cacheset) {
+	if (hwloc_bitmap_iszero(cacheset)) {
+	  /* ia64 returning empty L3 and L2i? use the core set instead */
+	  hwloc_bitmap_t tmpset;
+	  sprintf(str, "%s/cpu%d/topology/thread_siblings", path, i);
+	  tmpset = hwloc__alloc_read_path_as_cpumask(str, data->root_fd);
+	  /* only use it if we actually got something */
+	  if (tmpset) {
+	    hwloc_bitmap_free(cacheset);
+	    cacheset = tmpset;
+	  }
+	}
+	hwloc_bitmap_andnot(cacheset, cacheset, unknownset);
+
+	if (hwloc_bitmap_first(cacheset) == i) {
+	  unsigned kB;
+	  unsigned linesize;
+	  unsigned sets, lines_per_tag;
+	  unsigned depth; /* 1 for L1, .... */
+	  hwloc_obj_cache_type_t ctype = HWLOC_OBJ_CACHE_UNIFIED; /* default */
+	  hwloc_obj_type_t otype;
+	  struct hwloc_obj *cache;
+
+	  /* get the cache level depth */
+	  sprintf(str, "%s/cpu%d/cache/index%d/level", path, i, j); /* contains %u at least up to 4.9 */
+	  if (hwloc_read_path_as_uint(str, &depth, data->root_fd) < 0) {
+	    hwloc_bitmap_free(cacheset);
+	    continue;
+	  }
+
+	  /* cache type */
+	  sprintf(str, "%s/cpu%d/cache/index%d/type", path, i, j);
+	  if (hwloc_read_path_by_length(str, str2, sizeof(str2), data->root_fd) == 0) {
+	    if (!strncmp(str2, "Data", 4))
+	      ctype = HWLOC_OBJ_CACHE_DATA;
+	    else if (!strncmp(str2, "Unified", 7))
+	      ctype = HWLOC_OBJ_CACHE_UNIFIED;
+	    else if (!strncmp(str2, "Instruction", 11))
+	      ctype = HWLOC_OBJ_CACHE_INSTRUCTION;
+	  }
+
+	  otype = hwloc_cache_type_by_depth_type(depth, ctype);
+	  if (otype == HWLOC_OBJ_TYPE_NONE
+	      || !hwloc_filter_check_keep_object_type(topology, otype)) {
+	    hwloc_bitmap_free(cacheset);
+	    continue;
+	  }
+
+	  /* FIXME: if Bulldozer/Piledriver, add compute unit Groups when L2/L1i filtered-out */
+	  /* FIXME: if KNL, add tile Groups when L2/L1i filtered-out */
+
+	  /* get the cache size */
+	  kB = 0;
+	  sprintf(str, "%s/cpu%d/cache/index%d/size", path, i, j); /* contains %uK at least up to 4.9 */
+	  hwloc_read_path_as_uint(str, &kB, data->root_fd);
+	  /* KNL reports L3 with size=0 and full cpuset in cpuid.
+	   * Let hwloc_linux_try_add_knl_mcdram_cache() detect it better.
+	   */
+	  if (!kB && otype == HWLOC_OBJ_L3CACHE && data->is_knl) {
+	    hwloc_bitmap_free(cacheset);
+	    continue;
+	  }
+
+	  /* get the line size */
+	  linesize = 0;
+	  sprintf(str, "%s/cpu%d/cache/index%d/coherency_line_size", path, i, j); /* contains %u at least up to 4.9 */
+	  hwloc_read_path_as_uint(str, &linesize, data->root_fd);
+
+	  /* get the number of sets and lines per tag.
+	   * don't take the associativity directly in "ways_of_associativity" because
+	   * some archs (ia64, ppc) put 0 there when fully-associative, while others (x86) put something like -1 there.
+	   */
+	  sets = 0;
+	  sprintf(str, "%s/cpu%d/cache/index%d/number_of_sets", path, i, j); /* contains %u at least up to 4.9 */
+	  hwloc_read_path_as_uint(str, &sets, data->root_fd);
+
+	  lines_per_tag = 1;
+	  sprintf(str, "%s/cpu%d/cache/index%d/physical_line_partition", path, i, j); /* contains %u at least up to 4.9 */
+	  hwloc_read_path_as_uint(str, &lines_per_tag, data->root_fd);
+
+	  /* first cpu in this cache, add the cache */
+	  cache = hwloc_alloc_setup_object(topology, otype, -1);
+	  cache->attr->cache.size = ((uint64_t)kB) << 10;
+	  cache->attr->cache.depth = depth;
+	  cache->attr->cache.linesize = linesize;
+	  cache->attr->cache.type = ctype;
+	  if (!linesize || !lines_per_tag || !sets)
+	    cache->attr->cache.associativity = 0; /* unknown */
+	  else if (sets == 1)
+	    cache->attr->cache.associativity = 0; /* likely wrong, make it unknown */
+	  else
+	    cache->attr->cache.associativity = (kB << 10) / linesize / lines_per_tag / sets;
+	  cache->cpuset = cacheset;
+	  hwloc_debug_1arg_bitmap("cache depth %u has cpuset %s\n",
+				  depth, cacheset);
+	  hwloc_insert_object_by_cpuset(topology, cache);
+	  cacheset = NULL; /* don't free it */
+	  ++caches_added;
+	}
+      }
+      hwloc_bitmap_free(cacheset);
+     }
+
+  } hwloc_bitmap_foreach_end();
+
+  /* actually insert in the tree now that package cpusets have been fixed-up */
+  while (packages) {
+    hwloc_obj_t next = packages->next_cousin;
+    packages->next_cousin = NULL;
+    hwloc_insert_object_by_cpuset(topology, packages);
+    packages = next;
+  }
+
+  if (0 == caches_added)
+    look_powerpc_device_tree(topology, data);
+
+  hwloc_bitmap_free(cpuset);
+  hwloc_bitmap_free(unknownset);
+
+  return 0;
+}
+
+
+
+/****************************************
+ ****** cpuinfo Topology Discovery ******
+ ****************************************/
+
+static int
+hwloc_linux_parse_cpuinfo_x86(const char *prefix, const char *value,
+			      struct hwloc_obj_info_s **infos, unsigned *infos_count,
+			      int is_global __hwloc_attribute_unused)
+{
+  if (!strcmp("vendor_id", prefix)) {
+    hwloc__add_info(infos, infos_count, "CPUVendor", value);
+  } else if (!strcmp("model name", prefix)) {
+    hwloc__add_info(infos, infos_count, "CPUModel", value);
+  } else if (!strcmp("model", prefix)) {
+    hwloc__add_info(infos, infos_count, "CPUModelNumber", value);
+  } else if (!strcmp("cpu family", prefix)) {
+    hwloc__add_info(infos, infos_count, "CPUFamilyNumber", value);
+  } else if (!strcmp("stepping", prefix)) {
+    hwloc__add_info(infos, infos_count, "CPUStepping", value);
+  }
+  return 0;
+}
+
+static int
+hwloc_linux_parse_cpuinfo_ia64(const char *prefix, const char *value,
+			       struct hwloc_obj_info_s **infos, unsigned *infos_count,
+			       int is_global __hwloc_attribute_unused)
+{
+  if (!strcmp("vendor", prefix)) {
+    hwloc__add_info(infos, infos_count, "CPUVendor", value);
+  } else if (!strcmp("model name", prefix)) {
+    hwloc__add_info(infos, infos_count, "CPUModel", value);
+  } else if (!strcmp("model", prefix)) {
+    hwloc__add_info(infos, infos_count, "CPUModelNumber", value);
+  } else if (!strcmp("family", prefix)) {
+    hwloc__add_info(infos, infos_count, "CPUFamilyNumber", value);
+  }
+  return 0;
+}
+
+static int
+hwloc_linux_parse_cpuinfo_arm(const char *prefix, const char *value,
+			      struct hwloc_obj_info_s **infos, unsigned *infos_count,
+			      int is_global __hwloc_attribute_unused)
+{
+  if (!strcmp("Processor", prefix) /* old kernels with one Processor header */
+      || !strcmp("model name", prefix) /* new kernels with one model name per core */) {
+    hwloc__add_info(infos, infos_count, "CPUModel", value);
+  } else if (!strcmp("CPU implementer", prefix)) {
+    hwloc__add_info(infos, infos_count, "CPUImplementer", value);
+  } else if (!strcmp("CPU architecture", prefix)) {
+    hwloc__add_info(infos, infos_count, "CPUArchitecture", value);
+  } else if (!strcmp("CPU variant", prefix)) {
+    hwloc__add_info(infos, infos_count, "CPUVariant", value);
+  } else if (!strcmp("CPU part", prefix)) {
+    hwloc__add_info(infos, infos_count, "CPUPart", value);
+  } else if (!strcmp("CPU revision", prefix)) {
+    hwloc__add_info(infos, infos_count, "CPURevision", value);
+  } else if (!strcmp("Hardware", prefix)) {
+    hwloc__add_info(infos, infos_count, "HardwareName", value);
+  } else if (!strcmp("Revision", prefix)) {
+    hwloc__add_info(infos, infos_count, "HardwareRevision", value);
+  } else if (!strcmp("Serial", prefix)) {
+    hwloc__add_info(infos, infos_count, "HardwareSerial", value);
+  }
+  return 0;
+}
+
+static int
+hwloc_linux_parse_cpuinfo_ppc(const char *prefix, const char *value,
+			      struct hwloc_obj_info_s **infos, unsigned *infos_count,
+			      int is_global)
+{
+  /* common fields */
+  if (!strcmp("cpu", prefix)) {
+    hwloc__add_info(infos, infos_count, "CPUModel", value);
+  } else if (!strcmp("platform", prefix)) {
+    hwloc__add_info(infos, infos_count, "PlatformName", value);
+  } else if (!strcmp("model", prefix)) {
+    hwloc__add_info(infos, infos_count, "PlatformModel", value);
+  }
+  /* platform-specific fields */
+  else if (!strcasecmp("vendor", prefix)) {
+    hwloc__add_info(infos, infos_count, "PlatformVendor", value);
+  } else if (!strcmp("Board ID", prefix)) {
+    hwloc__add_info(infos, infos_count, "PlatformBoardID", value);
+  } else if (!strcmp("Board", prefix)
+	     || !strcasecmp("Machine", prefix)) {
+    /* machine and board are similar (and often more precise) than model above */
+    char **valuep = hwloc__find_info_slot(infos, infos_count, "PlatformModel");
+    free(*valuep);
+    *valuep = strdup(value);
+  } else if (!strcasecmp("Revision", prefix)
+	     || !strcmp("Hardware rev", prefix)) {
+    hwloc__add_info(infos, infos_count, is_global ? "PlatformRevision" : "CPURevision", value);
+  } else if (!strcmp("SVR", prefix)) {
+    hwloc__add_info(infos, infos_count, "SystemVersionRegister", value);
+  } else if (!strcmp("PVR", prefix)) {
+    hwloc__add_info(infos, infos_count, "ProcessorVersionRegister", value);
+  }
+  /* don't match 'board*' because there's also "board l2" on some platforms */
+  return 0;
+}
+
+/*
+ * avr32: "chip type\t:"			=> OK
+ * blackfin: "model name\t:"			=> OK
+ * h8300: "CPU:"				=> OK
+ * m68k: "CPU:"					=> OK
+ * mips: "cpu model\t\t:"			=> OK
+ * openrisc: "CPU:"				=> OK
+ * sparc: "cpu\t\t:"				=> OK
+ * tile: "model name\t:"			=> OK
+ * unicore32: "Processor\t:"			=> OK
+ * alpha: "cpu\t\t\t: Alpha" + "cpu model\t\t:"	=> "cpu" overwritten by "cpu model", no processor indexes
+ * cris: "cpu\t\t:" + "cpu model\t:"		=> only "cpu"
+ * frv: "CPU-Core:" + "CPU:"			=> only "CPU"
+ * mn10300: "cpu core   :" + "model name :"	=> only "model name"
+ * parisc: "cpu family\t:" + "cpu\t\t:"		=> only "cpu"
+ *
+ * not supported because of conflicts with other arch minor lines:
+ * m32r: "cpu family\t:"			=> KO (adding "cpu family" would break "blackfin")
+ * microblaze: "CPU-Family:"			=> KO
+ * sh: "cpu family\t:" + "cpu type\t:"		=> KO
+ * xtensa: "model\t\t:"				=> KO
+ */
+static int
+hwloc_linux_parse_cpuinfo_generic(const char *prefix, const char *value,
+				  struct hwloc_obj_info_s **infos, unsigned *infos_count,
+				  int is_global __hwloc_attribute_unused)
+{
+  if (!strcmp("model name", prefix)
+      || !strcmp("Processor", prefix)
+      || !strcmp("chip type", prefix)
+      || !strcmp("cpu model", prefix)
+      || !strcasecmp("cpu", prefix)) {
+    /* keep the last one, assume it's more precise than the first one.
+     * we should have the Architecture keypair for basic information anyway.
+     */
+    char **valuep = hwloc__find_info_slot(infos, infos_count, "CPUModel");
+    free(*valuep);
+    *valuep = strdup(value);
+  }
+  return 0;
+}
+
+/* Lprocs_p set to NULL unless returns > 0 */
+static int
+hwloc_linux_parse_cpuinfo(struct hwloc_linux_backend_data_s *data,
+			  const char *path,
+			  struct hwloc_linux_cpuinfo_proc ** Lprocs_p,
+			  struct hwloc_obj_info_s **global_infos, unsigned *global_infos_count)
+{
+  FILE *fd;
+  char *str = NULL;
+  char *endptr;
+  unsigned len;
+  unsigned allocated_Lprocs = 0;
+  struct hwloc_linux_cpuinfo_proc * Lprocs = NULL;
+  unsigned numprocs = 0;
+  int curproc = -1;
+  int (*parse_cpuinfo_func)(const char *, const char *, struct hwloc_obj_info_s **, unsigned *, int) = NULL;
+
+  if (!(fd=hwloc_fopen(path,"r", data->root_fd)))
+    {
+      hwloc_debug("could not open %s\n", path);
+      return -1;
+    }
+
+#      define PROCESSOR	"processor"
+#      define PACKAGEID "physical id" /* the longest one */
+#      define COREID "core id"
+  len = 128; /* vendor/model can be very long */
+  str = malloc(len);
+  hwloc_debug("\n\n * Topology extraction from %s *\n\n", path);
+  while (fgets(str,len,fd)!=NULL) {
+    unsigned long Ppkg, Pcore, Pproc;
+    char *end, *dot, *prefix, *value;
+    int noend = 0;
+
+    /* remove the ending \n */
+    end = strchr(str, '\n');
+    if (end)
+      *end = 0;
+    else
+      noend = 1;
+    /* if empty line, skip and reset curproc */
+    if (!*str) {
+      curproc = -1;
+      continue;
+    }
+    /* skip lines with no dot */
+    dot = strchr(str, ':');
+    if (!dot)
+      continue;
+    /* skip lines not starting with a letter */
+    if ((*str > 'z' || *str < 'a')
+	&& (*str > 'Z' || *str < 'A'))
+      continue;
+
+    /* mark the end of the prefix */
+    prefix = str;
+    end = dot;
+    while (end[-1] == ' ' || end[-1] == '	') end--; /* need a strrspn() */
+    *end = 0;
+    /* find beginning of value, its end is already marked */
+    value = dot+1 + strspn(dot+1, " 	");
+
+    /* defines for parsing numbers */
+#   define getprocnb_begin(field, var)					\
+    if (!strcmp(field,prefix)) {					\
+      var = strtoul(value,&endptr,0);					\
+      if (endptr==value) {						\
+	hwloc_debug("no number in "field" field of %s\n", path);	\
+	goto err;							\
+      } else if (var==ULONG_MAX) {					\
+	hwloc_debug("too big "field" number in %s\n", path); 		\
+	goto err;							\
+      }									\
+      hwloc_debug(field " %lu\n", var)
+#   define getprocnb_end()						\
+    }
+    /* actually parse numbers */
+    getprocnb_begin(PROCESSOR, Pproc);
+    curproc = numprocs++;
+    if (numprocs > allocated_Lprocs) {
+      struct hwloc_linux_cpuinfo_proc * tmp;
+      if (!allocated_Lprocs)
+	allocated_Lprocs = 8;
+      else
+        allocated_Lprocs *= 2;
+      tmp = realloc(Lprocs, allocated_Lprocs * sizeof(*Lprocs));
+      if (!tmp)
+	goto err;
+      Lprocs = tmp;
+    }
+    Lprocs[curproc].Pproc = Pproc;
+    Lprocs[curproc].Pcore = -1;
+    Lprocs[curproc].Ppkg = -1;
+    Lprocs[curproc].Lcore = -1;
+    Lprocs[curproc].Lpkg = -1;
+    Lprocs[curproc].infos = NULL;
+    Lprocs[curproc].infos_count = 0;
+    getprocnb_end() else
+    getprocnb_begin(PACKAGEID, Ppkg);
+    Lprocs[curproc].Ppkg = Ppkg;
+    getprocnb_end() else
+    getprocnb_begin(COREID, Pcore);
+    Lprocs[curproc].Pcore = Pcore;
+    getprocnb_end() else {
+
+      /* architecture specific or default routine for parsing cpumodel */
+      switch (data->arch) {
+      case HWLOC_LINUX_ARCH_X86:
+	parse_cpuinfo_func = hwloc_linux_parse_cpuinfo_x86;
+	break;
+      case HWLOC_LINUX_ARCH_ARM:
+	parse_cpuinfo_func = hwloc_linux_parse_cpuinfo_arm;
+	break;
+      case HWLOC_LINUX_ARCH_POWER:
+	parse_cpuinfo_func = hwloc_linux_parse_cpuinfo_ppc;
+	break;
+      case HWLOC_LINUX_ARCH_IA64:
+	parse_cpuinfo_func = hwloc_linux_parse_cpuinfo_ia64;
+	break;
+      default:
+	parse_cpuinfo_func = hwloc_linux_parse_cpuinfo_generic;
+      }
+
+      /* we can't assume that we already got a processor index line:
+       * alpha/frv/h8300/m68k/microblaze/sparc have no processor lines at all, only a global entry.
+       * tile has a global section with model name before the list of processor lines.
+       */
+      parse_cpuinfo_func(prefix, value,
+			 curproc >= 0 ? &Lprocs[curproc].infos : global_infos,
+			 curproc >= 0 ? &Lprocs[curproc].infos_count : global_infos_count,
+			 curproc < 0);
+    }
+
+    if (noend) {
+      /* ignore end of line */
+      if (fscanf(fd,"%*[^\n]") == EOF)
+	break;
+      getc(fd);
+    }
+  }
+  fclose(fd);
+  free(str);
+
+  *Lprocs_p = Lprocs;
+  return numprocs;
+
+ err:
+  fclose(fd);
+  free(str);
+  free(Lprocs);
+  *Lprocs_p = NULL;
+  return -1;
+}
+
+static void
+hwloc_linux_free_cpuinfo(struct hwloc_linux_cpuinfo_proc * Lprocs, unsigned numprocs,
+			 struct hwloc_obj_info_s *global_infos, unsigned global_infos_count)
+{
+  if (Lprocs) {
+    unsigned i;
+    for(i=0; i<numprocs; i++) {
+      hwloc__free_infos(Lprocs[i].infos, Lprocs[i].infos_count);
+    }
+    free(Lprocs);
+  }
+  hwloc__free_infos(global_infos, global_infos_count);
+}
+
+static int
+look_cpuinfo(struct hwloc_topology *topology,
+	     struct hwloc_linux_cpuinfo_proc * Lprocs,
+	     unsigned numprocs)
+{
+  /* P for physical/OS index, L for logical (e.g. in we order we get them, not in the final hwloc logical order) */
+  unsigned *Lcore_to_Pcore;
+  unsigned *Lcore_to_Ppkg; /* needed because Lcore is equivalent to Pcore+Ppkg, not to Pcore alone */
+  unsigned *Lpkg_to_Ppkg;
+  unsigned numpkgs=0;
+  unsigned numcores=0;
+  unsigned long Lproc;
+  unsigned missingpkg;
+  unsigned missingcore;
+  unsigned i,j;
+
+  /* initialize misc arrays, there can be at most numprocs entries */
+  Lcore_to_Pcore = malloc(numprocs * sizeof(*Lcore_to_Pcore));
+  Lcore_to_Ppkg = malloc(numprocs * sizeof(*Lcore_to_Ppkg));
+  Lpkg_to_Ppkg = malloc(numprocs * sizeof(*Lpkg_to_Ppkg));
+  for (i = 0; i < numprocs; i++) {
+    Lcore_to_Pcore[i] = -1;
+    Lcore_to_Ppkg[i] = -1;
+    Lpkg_to_Ppkg[i] = -1;
+  }
+
+  /* create PU objects */
+  for(Lproc=0; Lproc<numprocs; Lproc++) {
+    unsigned long Pproc = Lprocs[Lproc].Pproc;
+    hwloc_obj_t obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PU, Pproc);
+    obj->cpuset = hwloc_bitmap_alloc();
+    hwloc_bitmap_only(obj->cpuset, Pproc);
+    hwloc_debug_2args_bitmap("cpu %lu (os %lu) has cpuset %s\n",
+			     Lproc, Pproc, obj->cpuset);
+    hwloc_insert_object_by_cpuset(topology, obj);
+  }
+
+  topology->support.discovery->pu = 1;
+
+  hwloc_debug("%s", "\n * Topology summary *\n");
+  hwloc_debug("%u processors)\n", numprocs);
+
+  /* fill Lprocs[].Lpkg and Lpkg_to_Ppkg */
+  for(Lproc=0; Lproc<numprocs; Lproc++) {
+    long Ppkg = Lprocs[Lproc].Ppkg;
+    if (Ppkg != -1) {
+      unsigned long Pproc = Lprocs[Lproc].Pproc;
+      for (i=0; i<numpkgs; i++)
+	if ((unsigned) Ppkg == Lpkg_to_Ppkg[i])
+	  break;
+      Lprocs[Lproc].Lpkg = i;
+      hwloc_debug("%lu on package %u (%lx)\n", Pproc, i, (unsigned long) Ppkg);
+      if (i==numpkgs) {
+	Lpkg_to_Ppkg[numpkgs] = Ppkg;
+	numpkgs++;
+      }
+    }
+  }
+  /* Some buggy Linuxes don't provide numbers for processor 0, which makes us
+   * provide bogus information. We should rather drop it. */
+  missingpkg=0;
+  for(j=0; j<numprocs; j++)
+    if (Lprocs[j].Ppkg == -1) {
+      missingpkg=1;
+      break;
+    }
+  /* create package objects */
+  hwloc_debug("%u pkgs%s\n", numpkgs, missingpkg ? ", but some missing package" : "");
+  if (!missingpkg && numpkgs>0) {
+    for (i = 0; i < numpkgs; i++) {
+      struct hwloc_obj *obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PACKAGE, Lpkg_to_Ppkg[i]);
+      int doneinfos = 0;
+      obj->cpuset = hwloc_bitmap_alloc();
+      for(j=0; j<numprocs; j++)
+	if ((unsigned) Lprocs[j].Lpkg == i) {
+	  hwloc_bitmap_set(obj->cpuset, Lprocs[j].Pproc);
+	  if (!doneinfos) {
+	    hwloc__move_infos(&obj->infos, &obj->infos_count, &Lprocs[j].infos, &Lprocs[j].infos_count);
+	    doneinfos = 1;
+	  }
+	}
+      hwloc_debug_1arg_bitmap("Package %u has cpuset %s\n", i, obj->cpuset);
+      hwloc_insert_object_by_cpuset(topology, obj);
+    }
+    hwloc_debug("%s", "\n");
+  }
+
+  /* fill Lprocs[].Lcore, Lcore_to_Ppkg and Lcore_to_Pcore */
+  for(Lproc=0; Lproc<numprocs; Lproc++) {
+    long Pcore = Lprocs[Lproc].Pcore;
+    if (Pcore != -1) {
+      for (i=0; i<numcores; i++)
+	if ((unsigned) Pcore == Lcore_to_Pcore[i] && (unsigned) Lprocs[Lproc].Ppkg == Lcore_to_Ppkg[i])
+	  break;
+      Lprocs[Lproc].Lcore = i;
+      if (i==numcores) {
+	Lcore_to_Ppkg[numcores] = Lprocs[Lproc].Ppkg;
+	Lcore_to_Pcore[numcores] = Pcore;
+	numcores++;
+      }
+    }
+  }
+  /* Some buggy Linuxes don't provide numbers for processor 0, which makes us
+   * provide bogus information. We should rather drop it. */
+  missingcore=0;
+  for(j=0; j<numprocs; j++)
+    if (Lprocs[j].Pcore == -1) {
+      missingcore=1;
+      break;
+    }
+  /* create Core objects */
+  hwloc_debug("%u cores%s\n", numcores, missingcore ? ", but some missing core" : "");
+  if (!missingcore && numcores>0) {
+    for (i = 0; i < numcores; i++) {
+      struct hwloc_obj *obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_CORE, Lcore_to_Pcore[i]);
+      obj->cpuset = hwloc_bitmap_alloc();
+      for(j=0; j<numprocs; j++)
+	if ((unsigned) Lprocs[j].Lcore == i)
+	  hwloc_bitmap_set(obj->cpuset, Lprocs[j].Pproc);
+      hwloc_debug_1arg_bitmap("Core %u has cpuset %s\n", i, obj->cpuset);
+      hwloc_insert_object_by_cpuset(topology, obj);
+    }
+    hwloc_debug("%s", "\n");
+  }
+
+  free(Lcore_to_Pcore);
+  free(Lcore_to_Ppkg);
+  free(Lpkg_to_Ppkg);
+  return 0;
+}
+
+
+
+/*************************************
+ ****** Main Topology Discovery ******
+ *************************************/
+
+static void
+hwloc__linux_get_mic_sn(struct hwloc_topology *topology, struct hwloc_linux_backend_data_s *data)
+{
+  char line[64], *tmp, *end;
+  if (hwloc_read_path_by_length("/proc/elog", line, sizeof(line), data->root_fd) < 0)
+    return;
+  if (strncmp(line, "Card ", 5))
+    return;
+  tmp = line + 5;
+  end = strchr(tmp, ':');
+  if (!end)
+    return;
+  *end = '\0';
+  hwloc_obj_add_info(hwloc_get_root_obj(topology), "MICSerialNumber", tmp);
+}
+
+static void
+hwloc_gather_system_info(struct hwloc_topology *topology,
+			 struct hwloc_linux_backend_data_s *data)
+{
+  FILE *file;
+  char line[128]; /* enough for utsname fields */
+  const char *env;
+
+  /* initialize to something sane, in case !is_thissystem and we can't find things in /proc/hwloc-nofile-info */
+  memset(&data->utsname, 0, sizeof(data->utsname));
+  data->fallback_nbprocessors = 1;
+  data->pagesize = 4096;
+
+  /* read thissystem info */
+  if (topology->is_thissystem) {
+    uname(&data->utsname);
+    data->fallback_nbprocessors = hwloc_fallback_nbprocessors(topology);
+    data->pagesize = hwloc_getpagesize();
+  }
+
+  /* overwrite with optional /proc/hwloc-nofile-info */
+  file = hwloc_fopen("/proc/hwloc-nofile-info", "r", data->root_fd);
+  if (file) {
+    while (fgets(line, sizeof(line), file)) {
+      char *tmp = strchr(line, '\n');
+      if (!strncmp("OSName: ", line, 8)) {
+	if (tmp)
+	  *tmp = '\0';
+	strncpy(data->utsname.sysname, line+8, sizeof(data->utsname.sysname));
+	data->utsname.sysname[sizeof(data->utsname.sysname)-1] = '\0';
+      } else if (!strncmp("OSRelease: ", line, 11)) {
+	if (tmp)
+	  *tmp = '\0';
+	strncpy(data->utsname.release, line+11, sizeof(data->utsname.release));
+	data->utsname.release[sizeof(data->utsname.release)-1] = '\0';
+      } else if (!strncmp("OSVersion: ", line, 11)) {
+	if (tmp)
+	  *tmp = '\0';
+	strncpy(data->utsname.version, line+11, sizeof(data->utsname.version));
+	data->utsname.version[sizeof(data->utsname.version)-1] = '\0';
+      } else if (!strncmp("HostName: ", line, 10)) {
+	if (tmp)
+	  *tmp = '\0';
+	strncpy(data->utsname.nodename, line+10, sizeof(data->utsname.nodename));
+	data->utsname.nodename[sizeof(data->utsname.nodename)-1] = '\0';
+      } else if (!strncmp("Architecture: ", line, 14)) {
+	if (tmp)
+	  *tmp = '\0';
+	strncpy(data->utsname.machine, line+14, sizeof(data->utsname.machine));
+	data->utsname.machine[sizeof(data->utsname.machine)-1] = '\0';
+      } else if (!strncmp("FallbackNbProcessors: ", line, 22)) {
+	if (tmp)
+	  *tmp = '\0';
+	data->fallback_nbprocessors = atoi(line+22);
+      } else if (!strncmp("PageSize: ", line, 10)) {
+	if (tmp)
+	 *tmp = '\0';
+	data->pagesize = strtoull(line+10, NULL, 10);
+      } else {
+	hwloc_debug("ignored /proc/hwloc-nofile-info line %s\n", line);
+	/* ignored */
+      }
+    }
+    fclose(file);
+  }
+
+  env = getenv("HWLOC_DUMP_NOFILE_INFO");
+  if (env && *env) {
+    file = fopen(env, "w");
+    if (file) {
+      if (*data->utsname.sysname)
+	fprintf(file, "OSName: %s\n", data->utsname.sysname);
+      if (*data->utsname.release)
+	fprintf(file, "OSRelease: %s\n", data->utsname.release);
+      if (*data->utsname.version)
+	fprintf(file, "OSVersion: %s\n", data->utsname.version);
+      if (*data->utsname.nodename)
+	fprintf(file, "HostName: %s\n", data->utsname.nodename);
+      if (*data->utsname.machine)
+	fprintf(file, "Architecture: %s\n", data->utsname.machine);
+      fprintf(file, "FallbackNbProcessors: %u\n", data->fallback_nbprocessors);
+      fprintf(file, "PageSize: %llu\n", (unsigned long long) data->pagesize);
+      fclose(file);
+    }
+  }
+
+  /* detect arch for quirks, using configure #defines if possible, or uname */
+#if (defined HWLOC_X86_32_ARCH) || (defined HWLOC_X86_64_ARCH) /* does not cover KNC */
+  if (topology->is_thissystem)
+    data->arch = HWLOC_LINUX_ARCH_X86;
+#endif
+  if (data->arch == HWLOC_LINUX_ARCH_UNKNOWN && *data->utsname.machine) {
+    if (!strcmp(data->utsname.machine, "x86_64")
+	|| (data->utsname.machine[0] == 'i' && !strcmp(data->utsname.machine+2, "86"))
+	|| !strcmp(data->utsname.machine, "k1om"))
+      data->arch = HWLOC_LINUX_ARCH_X86;
+    else if (!strncmp(data->utsname.machine, "arm", 3))
+      data->arch = HWLOC_LINUX_ARCH_ARM;
+    else if (!strncmp(data->utsname.machine, "ppc", 3)
+	     || !strncmp(data->utsname.machine, "power", 5))
+      data->arch = HWLOC_LINUX_ARCH_POWER;
+    else if (!strcmp(data->utsname.machine, "ia64"))
+      data->arch = HWLOC_LINUX_ARCH_IA64;
+  }
+}
+
+/* returns 0 on success, -1 on non-match or error during hardwired load */
+static int
+hwloc_linux_try_hardwired_cpuinfo(struct hwloc_backend *backend)
+{
+  struct hwloc_topology *topology = backend->topology;
+  struct hwloc_linux_backend_data_s *data = backend->private_data;
+
+  if (getenv("HWLOC_NO_HARDWIRED_TOPOLOGY"))
+    return -1;
+
+  if (!strcmp(data->utsname.machine, "s64fx")) {
+    char line[128];
+    /* Fujistu K-computer, FX10, and FX100 use specific processors
+     * whose Linux topology support is broken until 4.1 (acc455cffa75070d55e74fc7802b49edbc080e92and)
+     * and existing machines will likely never be fixed by kernel upgrade.
+     */
+
+    /* /proc/cpuinfo starts with one of these lines:
+     * "cpu             : Fujitsu SPARC64 VIIIfx"
+     * "cpu             : Fujitsu SPARC64 XIfx"
+     * "cpu             : Fujitsu SPARC64 IXfx"
+     */
+    if (hwloc_read_path_by_length("/proc/cpuinfo", line, sizeof(line), data->root_fd) < 0)
+      return -1;
+
+    if (strncmp(line, "cpu	", 4))
+      return -1;
+
+    if (strstr(line, "Fujitsu SPARC64 VIIIfx"))
+      return hwloc_look_hardwired_fujitsu_k(topology);
+    else if (strstr(line, "Fujitsu SPARC64 IXfx"))
+      return hwloc_look_hardwired_fujitsu_fx10(topology);
+    else if (strstr(line, "FUJITSU SPARC64 XIfx"))
+      return hwloc_look_hardwired_fujitsu_fx100(topology);
+  }
+  return -1;
+}
+
+static void hwloc_linux__get_allowed_resources(hwloc_topology_t topology, const char *root_path, int root_fd, char **cpuset_namep)
+{
+  char *cpuset_mntpnt, *cgroup_mntpnt, *cpuset_name = NULL;
+  hwloc_find_linux_cpuset_mntpnt(&cgroup_mntpnt, &cpuset_mntpnt, root_path);
+  if (cgroup_mntpnt || cpuset_mntpnt) {
+    cpuset_name = hwloc_read_linux_cpuset_name(root_fd, topology->pid);
+    if (cpuset_name) {
+      hwloc_admin_disable_set_from_cpuset(root_fd, cgroup_mntpnt, cpuset_mntpnt, cpuset_name, "cpus", topology->levels[0][0]->allowed_cpuset);
+      hwloc_admin_disable_set_from_cpuset(root_fd, cgroup_mntpnt, cpuset_mntpnt, cpuset_name, "mems", topology->levels[0][0]->allowed_nodeset);
+    }
+    free(cgroup_mntpnt);
+    free(cpuset_mntpnt);
+  }
+  *cpuset_namep = cpuset_name;
+}
+
+static int
+hwloc_look_linuxfs(struct hwloc_backend *backend)
+{
+  struct hwloc_topology *topology = backend->topology;
+  struct hwloc_linux_backend_data_s *data = backend->private_data;
+  unsigned nbnodes;
+  char *cpuset_name;
+  struct hwloc_linux_cpuinfo_proc * Lprocs = NULL;
+  struct hwloc_obj_info_s *global_infos = NULL;
+  unsigned global_infos_count = 0;
+  int numprocs;
+  int already_pus;
+  int already_numanodes;
+  int err;
+
+  already_pus = (topology->levels[0][0]->complete_cpuset != NULL
+		 && !hwloc_bitmap_iszero(topology->levels[0][0]->complete_cpuset));
+  /* if there are PUs, still look at memory information
+   * since x86 misses NUMA node information (unless the processor supports topoext)
+   * memory size.
+   */
+  already_numanodes = (topology->levels[0][0]->complete_nodeset != NULL
+		       && !hwloc_bitmap_iszero(topology->levels[0][0]->complete_nodeset));
+  /* if there are already NUMA nodes, we'll just annotate them with memory information,
+   * which requires the NUMA level to be connected.
+   */
+  if (already_numanodes)
+    hwloc_topology_reconnect(topology, 0);
+
+  /* allocate root sets in case not done yet */
+  hwloc_alloc_obj_cpusets(topology->levels[0][0]);
+
+  /*********************************
+   * Platform information for later
+   */
+  hwloc_gather_system_info(topology, data);
+
+  /**********************
+   * /proc/cpuinfo
+   */
+  numprocs = hwloc_linux_parse_cpuinfo(data, "/proc/cpuinfo", &Lprocs, &global_infos, &global_infos_count);
+  if (numprocs < 0)
+    numprocs = 0;
+
+  /**************************
+   * detect model for quirks
+   */
+  if (data->arch == HWLOC_LINUX_ARCH_X86 && numprocs > 0) {
+      unsigned i;
+      const char *cpuvendor = NULL, *cpufamilynumber = NULL, *cpumodelnumber = NULL;
+      for(i=0; i<Lprocs[0].infos_count; i++) {
+	if (!strcmp(Lprocs[0].infos[i].name, "CPUVendor")) {
+	  cpuvendor = Lprocs[0].infos[i].value;
+	} else if (!strcmp(Lprocs[0].infos[i].name, "CPUFamilyNumber")) {
+	  cpufamilynumber = Lprocs[0].infos[i].value;
+	} else if (!strcmp(Lprocs[0].infos[i].name, "CPUModelNumber")) {
+	  cpumodelnumber = Lprocs[0].infos[i].value;
+	}
+      }
+      if (cpuvendor && !strcmp(cpuvendor, "GenuineIntel")
+	  && cpufamilynumber && !strcmp(cpufamilynumber, "6")
+	  && cpumodelnumber && (!strcmp(cpumodelnumber, "87")
+	  || !strcmp(cpumodelnumber, "133")))
+	data->is_knl = 1;
+      if (cpuvendor && !strcmp(cpuvendor, "AuthenticAMD")
+	  && cpufamilynumber
+	  && (!strcmp(cpufamilynumber, "21")
+	      || !strcmp(cpufamilynumber, "22")))
+	data->is_amd_with_CU = 1;
+  }
+
+  /**********************
+   * Gather the list of admin-disabled cpus and mems
+   */
+  hwloc_linux__get_allowed_resources(topology, data->root_path, data->root_fd, &cpuset_name);
+
+  /*********************
+   * Memory information
+   */
+
+  /* Get the machine memory attributes */
+  hwloc_get_procfs_meminfo_info(topology, data, &topology->levels[0][0]->memory);
+
+  /* Gather NUMA information. Must be after hwloc_get_procfs_meminfo_info so that the hugepage size is known */
+  if (look_sysfsnode(topology, data, "/sys/bus/node/devices", &nbnodes) < 0)
+    look_sysfsnode(topology, data, "/sys/devices/system/node", &nbnodes);
+
+  /* if we found some numa nodes, the machine object has no local memory */
+  if (nbnodes) {
+    unsigned i;
+    topology->levels[0][0]->memory.local_memory = 0;
+    if (topology->levels[0][0]->memory.page_types)
+      for(i=0; i<topology->levels[0][0]->memory.page_types_len; i++)
+	topology->levels[0][0]->memory.page_types[i].count = 0;
+  }
+
+  /**********************
+   * CPU information
+   */
+
+  /* Don't rediscover CPU resources if already done */
+  if (already_pus)
+    goto done;
+
+  /* Gather the list of cpus now */
+  err = hwloc_linux_try_hardwired_cpuinfo(backend);
+  if (!err)
+    goto done;
+
+  /* setup root info */
+  hwloc__move_infos(&hwloc_get_root_obj(topology)->infos, &hwloc_get_root_obj(topology)->infos_count,
+		    &global_infos, &global_infos_count);
+
+  if (getenv("HWLOC_LINUX_USE_CPUINFO")
+      || (hwloc_access("/sys/devices/system/cpu/cpu0/topology/core_siblings", R_OK, data->root_fd) < 0
+	  && hwloc_access("/sys/devices/system/cpu/cpu0/topology/thread_siblings", R_OK, data->root_fd) < 0
+	  && hwloc_access("/sys/bus/cpu/devices/cpu0/topology/thread_siblings", R_OK, data->root_fd) < 0
+	  && hwloc_access("/sys/bus/cpu/devices/cpu0/topology/core_siblings", R_OK, data->root_fd) < 0)) {
+    /* revert to reading cpuinfo only if /sys/.../topology unavailable (before 2.6.16)
+     * or not containing anything interesting */
+    if (numprocs > 0)
+      err = look_cpuinfo(topology, Lprocs, numprocs);
+    else
+      err = -1;
+    if (err < 0)
+      hwloc_setup_pu_level(topology, data->fallback_nbprocessors);
+    look_powerpc_device_tree(topology, data);
+
+  } else {
+    /* sysfs */
+    if (look_sysfscpu(topology, data, "/sys/bus/cpu/devices", Lprocs, numprocs) < 0)
+      if (look_sysfscpu(topology, data, "/sys/devices/system/cpu", Lprocs, numprocs) < 0)
+	/* sysfs but we failed to read cpu topology, fallback */
+	hwloc_setup_pu_level(topology, data->fallback_nbprocessors);
+  }
+
+ done:
+
+  /**********************
+   * Misc
+   */
+
+  /* Gather DMI info */
+  hwloc__get_dmi_id_info(data, topology->levels[0][0]);
+
+  hwloc_obj_add_info(topology->levels[0][0], "Backend", "Linux");
+  if (cpuset_name) {
+    hwloc_obj_add_info(topology->levels[0][0], "LinuxCgroup", cpuset_name);
+    free(cpuset_name);
+  }
+
+  hwloc__linux_get_mic_sn(topology, data);
+
+  /* data->utsname was filled with real uname or \0, we can safely pass it */
+  hwloc_add_uname_info(topology, &data->utsname);
+
+  hwloc_linux_free_cpuinfo(Lprocs, numprocs, global_infos, global_infos_count);
+  return 0;
+}
+
+
+
+/****************************************
+ ***** Linux PCI backend callbacks ******
+ ****************************************/
+
+/*
+ * backend callback for retrieving the location of a pci device
+ */
+static int
+hwloc_linux_backend_get_pci_busid_cpuset(struct hwloc_backend *backend,
+					 struct hwloc_pcidev_attr_s *busid, hwloc_bitmap_t cpuset)
+{
+  struct hwloc_linux_backend_data_s *data = backend->private_data;
+  char path[256];
+  int err;
+
+  snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/local_cpus",
+	   busid->domain, busid->bus,
+	   busid->dev, busid->func);
+  err = hwloc__read_path_as_cpumask(path, cpuset, data->root_fd);
+  if (!err && !hwloc_bitmap_iszero(cpuset))
+    return 0;
+  return -1;
+}
+
+
+
+/*******************************
+ ******* Linux component *******
+ *******************************/
+
+static void
+hwloc_linux_backend_disable(struct hwloc_backend *backend)
+{
+  struct hwloc_linux_backend_data_s *data = backend->private_data;
+#ifdef HAVE_OPENAT
+  free(data->root_path);
+  close(data->root_fd);
+#endif
+#ifdef HWLOC_HAVE_LIBUDEV
+  if (data->udev)
+    udev_unref(data->udev);
+#endif
+  free(data);
+}
+
+static struct hwloc_backend *
+hwloc_linux_component_instantiate(struct hwloc_disc_component *component,
+				  const void *_data1 __hwloc_attribute_unused,
+				  const void *_data2 __hwloc_attribute_unused,
+				  const void *_data3 __hwloc_attribute_unused)
+{
+  struct hwloc_backend *backend;
+  struct hwloc_linux_backend_data_s *data;
+  const char * fsroot_path;
+  int flags, root = -1;
+
+  backend = hwloc_backend_alloc(component);
+  if (!backend)
+    goto out;
+
+  data = malloc(sizeof(*data));
+  if (!data) {
+    errno = ENOMEM;
+    goto out_with_backend;
+  }
+
+  backend->private_data = data;
+  backend->discover = hwloc_look_linuxfs;
+  backend->get_pci_busid_cpuset = hwloc_linux_backend_get_pci_busid_cpuset;
+  backend->disable = hwloc_linux_backend_disable;
+
+  /* default values */
+  data->arch = HWLOC_LINUX_ARCH_UNKNOWN;
+  data->is_knl = 0;
+  data->is_amd_with_CU = 0;
+  data->is_real_fsroot = 1;
+  data->root_path = NULL;
+  fsroot_path = getenv("HWLOC_FSROOT");
+  if (!fsroot_path)
+    fsroot_path = "/";
+
+#ifdef HAVE_OPENAT
+  root = open(fsroot_path, O_RDONLY | O_DIRECTORY);
+  if (root < 0)
+    goto out_with_data;
+
+  if (strcmp(fsroot_path, "/")) {
+    backend->is_thissystem = 0;
+    data->is_real_fsroot = 0;
+    data->root_path = strdup(fsroot_path);
+  }
+
+  /* Since this fd stays open after hwloc returns, mark it as
+     close-on-exec so that children don't inherit it.  Stevens says
+     that we should GETFD before we SETFD, so we do. */
+  flags = fcntl(root, F_GETFD, 0);
+  if (-1 == flags ||
+      -1 == fcntl(root, F_SETFD, FD_CLOEXEC | flags)) {
+      close(root);
+      root = -1;
+      goto out_with_data;
+  }
+#else
+  if (strcmp(fsroot_path, "/")) {
+    errno = ENOSYS;
+    goto out_with_data;
+  }
+#endif
+  data->root_fd = root;
+
+#ifdef HWLOC_HAVE_LIBUDEV
+  data->udev = NULL;
+  if (data->is_real_fsroot) {
+    data->udev = udev_new();
+  }
+#endif
+
+  data->dumped_hwdata_dirname = getenv("HWLOC_DUMPED_HWDATA_DIR");
+  if (!data->dumped_hwdata_dirname)
+    data->dumped_hwdata_dirname = RUNSTATEDIR "/hwloc/";
+
+  return backend;
+
+ out_with_data:
+#ifdef HAVE_OPENAT
+  free(data->root_path);
+#endif
+  free(data);
+ out_with_backend:
+  free(backend);
+ out:
+  return NULL;
+}
+
+static struct hwloc_disc_component hwloc_linux_disc_component = {
+  HWLOC_DISC_COMPONENT_TYPE_CPU,
+  "linux",
+  HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+  hwloc_linux_component_instantiate,
+  50,
+  NULL
+};
+
+const struct hwloc_component hwloc_linux_component = {
+  HWLOC_COMPONENT_ABI,
+  NULL, NULL,
+  HWLOC_COMPONENT_TYPE_DISC,
+  0,
+  &hwloc_linux_disc_component
+};
+
+
+
+
+#ifdef HWLOC_HAVE_LINUXIO
+
+/***********************************
+ ******* Linux I/O component *******
+ ***********************************/
+
+static hwloc_obj_t
+hwloc_linuxfs_find_osdev_parent(struct hwloc_backend *backend, int root_fd,
+				const char *osdevpath, int allowvirtual)
+{
+  struct hwloc_topology *topology = backend->topology;
+  char path[256], buf[10];
+  int fd;
+  int foundpci;
+  unsigned pcidomain = 0, pcibus = 0, pcidev = 0, pcifunc = 0;
+  unsigned _pcidomain, _pcibus, _pcidev, _pcifunc;
+  hwloc_bitmap_t cpuset;
+  const char *tmp;
+  hwloc_obj_t parent;
+  int err;
+
+  err = hwloc_readlink(osdevpath, path, sizeof(path), root_fd);
+  if (err < 0) {
+    /* /sys/class/<class>/<name> is a directory instead of a symlink on old kernels (at least around 2.6.18 and 2.6.25).
+     * The link to parse can be found in /sys/class/<class>/<name>/device instead, at least for "/pci..."
+     */
+    char olddevpath[256];
+    snprintf(olddevpath, sizeof(olddevpath), "%s/device", osdevpath);
+    err = hwloc_readlink(olddevpath, path, sizeof(path), root_fd);
+    if (err < 0)
+      return NULL;
+  }
+  path[err] = '\0';
+
+  if (!allowvirtual) {
+    if (strstr(path, "/virtual/"))
+      return NULL;
+  }
+
+  tmp = strstr(path, "/pci");
+  if (!tmp)
+    goto nopci;
+  tmp = strchr(tmp+4, '/');
+  if (!tmp)
+    goto nopci;
+  tmp++;
+
+  /* iterate through busid to find the last one (previous ones are bridges) */
+  foundpci = 0;
+ nextpci:
+  if (sscanf(tmp+1, "%x:%x:%x.%x", &_pcidomain, &_pcibus, &_pcidev, &_pcifunc) == 4) {
+    foundpci = 1;
+    pcidomain = _pcidomain;
+    pcibus = _pcibus;
+    pcidev = _pcidev;
+    pcifunc = _pcifunc;
+    tmp += 13;
+    goto nextpci;
+  }
+  if (sscanf(tmp+1, "%x:%x.%x", &_pcibus, &_pcidev, &_pcifunc) == 3) {
+    foundpci = 1;
+    pcidomain = 0;
+    pcibus = _pcibus;
+    pcidev = _pcidev;
+    pcifunc = _pcifunc;
+    tmp += 8;
+    goto nextpci;
+  }
+
+  if (foundpci) {
+    /* attach to a PCI parent */
+    parent = hwloc_pci_belowroot_find_by_busid(topology, pcidomain, pcibus, pcidev, pcifunc);
+    if (parent)
+      return parent;
+    /* attach to a normal (non-I/O) parent found by PCI affinity */
+    parent = hwloc_pci_find_busid_parent(topology, pcidomain, pcibus, pcidev, pcifunc);
+    if (parent)
+      return parent;
+  }
+
+ nopci:
+  /* attach directly to the right NUMA node */
+  snprintf(path, sizeof(path), "%s/device/numa_node", osdevpath);
+  fd = hwloc_open(path, root_fd);
+  if (fd >= 0) {
+    err = read(fd, buf, sizeof(buf));
+    close(fd);
+    if (err > 0) {
+      int node = atoi(buf);
+      if (node >= 0) {
+	parent = hwloc_get_numanode_obj_by_os_index(topology, node);
+	if (parent)
+	  return parent;
+      }
+    }
+  }
+
+  /* attach directly to the right cpuset */
+  snprintf(path, sizeof(path), "%s/device/local_cpus", osdevpath);
+  cpuset = hwloc__alloc_read_path_as_cpumask(path, root_fd);
+  if (cpuset) {
+    parent = hwloc_find_insert_io_parent_by_complete_cpuset(topology, cpuset);
+    hwloc_bitmap_free(cpuset);
+    if (parent)
+      return parent;
+  }
+
+  /* FIXME: {numa_node,local_cpus} may be missing when the device link points to a subdirectory.
+   * For instance, device of scsi blocks may point to foo/ata1/host0/target0:0:0/0:0:0:0/ instead of foo/
+   * In such case, we should look for device/../../../../{numa_node,local_cpus} instead of device/{numa_node,local_cpus}
+   * Not needed yet since scsi blocks use the PCI locality above.
+   */
+
+  /* fallback to the root object */
+  return hwloc_get_root_obj(topology);
+}
+
+static hwloc_obj_t
+hwloc_linux_add_os_device(struct hwloc_backend *backend, struct hwloc_obj *pcidev, hwloc_obj_osdev_type_t type, const char *name)
+{
+  struct hwloc_topology *topology = backend->topology;
+  struct hwloc_obj *obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_OS_DEVICE, -1);
+  obj->name = strdup(name);
+  obj->logical_index = -1;
+  obj->attr->osdev.type = type;
+
+  hwloc_insert_object_by_parent(topology, pcidev, obj);
+  /* insert_object_by_parent() doesn't merge during insert, so obj is still valid */
+
+  return obj;
+}
+
+static void
+hwloc_linuxfs_block_class_fillinfos(struct hwloc_backend *backend __hwloc_attribute_unused, int root_fd,
+				    struct hwloc_obj *obj, const char *osdevpath)
+{
+#ifdef HWLOC_HAVE_LIBUDEV
+  struct hwloc_linux_backend_data_s *data = backend->private_data;
+#endif
+  FILE *file;
+  char path[256];
+  char line[128];
+  char vendor[64] = "";
+  char model[64] = "";
+  char serial[64] = "";
+  char revision[64] = "";
+  char blocktype[64] = "";
+  unsigned sectorsize = 0;
+  unsigned major_id, minor_id;
+  char *tmp;
+
+  snprintf(path, sizeof(path), "%s/size", osdevpath);
+  if (!hwloc_read_path_by_length(path, line, sizeof(line), root_fd)) {
+    unsigned long long sectors = strtoull(line, NULL, 10);
+    /* linux always reports size in 512-byte units, we want kB */
+    snprintf(line, sizeof(line), "%llu", sectors / 2);
+    hwloc_obj_add_info(obj, "Size", line);
+  }
+
+  snprintf(path, sizeof(path), "%s/queue/hw_sector_size", osdevpath);
+  if (!hwloc_read_path_by_length(path, line, sizeof(line), root_fd)) {
+    sectorsize = strtoul(line, NULL, 10);
+  }
+
+  /* pmem have device/devtype containing "nd_btt" (sectors)
+   * or "nd_namespace_io" (byte-granularity).
+   * Note that device/sector_size in btt devices includes integrity metadata
+   * (512/4096 block + 0/N) while queue/hw_sector_size above is the user sectorsize
+   * without metadata.
+   */
+  snprintf(path, sizeof(path), "%s/device/devtype", osdevpath);
+  if (!hwloc_read_path_by_length(path, line, sizeof(line), root_fd)) {
+    if (!strncmp(line, "nd_", 3)) {
+      strcpy(blocktype, "NVDIMM"); /* Save the blocktype now since udev reports "" so far */
+      if (!strcmp(line, "nd_namespace_io"))
+	sectorsize = 1;
+    }
+  }
+  if (sectorsize) {
+    snprintf(line, sizeof(line), "%u", sectorsize);
+    hwloc_obj_add_info(obj, "SectorSize", line);
+  }
+
+  snprintf(path, sizeof(path), "%s/dev", osdevpath);
+  if (hwloc_read_path_by_length(path, line, sizeof(line), root_fd) < 0)
+    return;
+  if (sscanf(line, "%u:%u", &major_id, &minor_id) != 2)
+    return;
+  tmp = strchr(line, '\n');
+  if (tmp)
+    *tmp = '\0';
+  hwloc_obj_add_info(obj, "LinuxDeviceID", line);
+
+#ifdef HWLOC_HAVE_LIBUDEV
+  if (data->udev) {
+    struct udev_device *dev;
+    const char *prop;
+    dev = udev_device_new_from_subsystem_sysname(data->udev, "block", obj->name);
+    if (!dev)
+      return;
+    prop = udev_device_get_property_value(dev, "ID_VENDOR");
+    if (prop) {
+      strncpy(vendor, prop, sizeof(vendor));
+      vendor[sizeof(vendor)-1] = '\0';
+    }
+    prop = udev_device_get_property_value(dev, "ID_MODEL");
+    if (prop) {
+      strncpy(model, prop, sizeof(model));
+      model[sizeof(model)-1] = '\0';
+    }
+    prop = udev_device_get_property_value(dev, "ID_REVISION");
+    if (prop) {
+      strncpy(revision, prop, sizeof(revision));
+      revision[sizeof(revision)-1] = '\0';
+    }
+    prop = udev_device_get_property_value(dev, "ID_SERIAL_SHORT");
+    if (prop) {
+      strncpy(serial, prop, sizeof(serial));
+      serial[sizeof(serial)-1] = '\0';
+    }
+    prop = udev_device_get_property_value(dev, "ID_TYPE");
+    if (prop) {
+      strncpy(blocktype, prop, sizeof(blocktype));
+      blocktype[sizeof(blocktype)-1] = '\0';
+    }
+
+    udev_device_unref(dev);
+  } else
+    /* fallback to reading files, works with any fsroot */
+#endif
+ {
+  snprintf(path, sizeof(path), "/run/udev/data/b%u:%u", major_id, minor_id);
+  file = hwloc_fopen(path, "r", root_fd);
+  if (!file)
+    return;
+
+  while (NULL != fgets(line, sizeof(line), file)) {
+    tmp = strchr(line, '\n');
+    if (tmp)
+      *tmp = '\0';
+    if (!strncmp(line, "E:ID_VENDOR=", strlen("E:ID_VENDOR="))) {
+      strncpy(vendor, line+strlen("E:ID_VENDOR="), sizeof(vendor));
+      vendor[sizeof(vendor)-1] = '\0';
+    } else if (!strncmp(line, "E:ID_MODEL=", strlen("E:ID_MODEL="))) {
+      strncpy(model, line+strlen("E:ID_MODEL="), sizeof(model));
+      model[sizeof(model)-1] = '\0';
+    } else if (!strncmp(line, "E:ID_REVISION=", strlen("E:ID_REVISION="))) {
+      strncpy(revision, line+strlen("E:ID_REVISION="), sizeof(revision));
+      revision[sizeof(revision)-1] = '\0';
+    } else if (!strncmp(line, "E:ID_SERIAL_SHORT=", strlen("E:ID_SERIAL_SHORT="))) {
+      strncpy(serial, line+strlen("E:ID_SERIAL_SHORT="), sizeof(serial));
+      serial[sizeof(serial)-1] = '\0';
+    } else if (!strncmp(line, "E:ID_TYPE=", strlen("E:ID_TYPE="))) {
+      strncpy(blocktype, line+strlen("E:ID_TYPE="), sizeof(blocktype));
+      blocktype[sizeof(blocktype)-1] = '\0';
+    }
+  }
+  fclose(file);
+ }
+
+  /* clear fake "ATA" vendor name */
+  if (!strcasecmp(vendor, "ATA"))
+    *vendor = '\0';
+  /* overwrite vendor name from model when possible */
+  if (!*vendor) {
+    if (!strncasecmp(model, "wd", 2))
+      strcpy(vendor, "Western Digital");
+    else if (!strncasecmp(model, "st", 2))
+      strcpy(vendor, "Seagate");
+    else if (!strncasecmp(model, "samsung", 7))
+      strcpy(vendor, "Samsung");
+    else if (!strncasecmp(model, "sandisk", 7))
+      strcpy(vendor, "SanDisk");
+    else if (!strncasecmp(model, "toshiba", 7))
+      strcpy(vendor, "Toshiba");
+  }
+
+  if (*vendor)
+    hwloc_obj_add_info(obj, "Vendor", vendor);
+  if (*model)
+    hwloc_obj_add_info(obj, "Model", model);
+  if (*revision)
+    hwloc_obj_add_info(obj, "Revision", revision);
+  if (*serial)
+    hwloc_obj_add_info(obj, "SerialNumber", serial);
+
+  if (!strcmp(blocktype, "disk"))
+    obj->subtype = strdup("Disk");
+  else if (!strcmp(blocktype, "NVDIMM")) /* FIXME: set by us above, to workaround udev returning "" so far */
+    obj->subtype = strdup("NVDIMM");
+  else if (!strcmp(blocktype, "tape"))
+    obj->subtype = strdup("Tape");
+  else if (!strcmp(blocktype, "cd") || !strcmp(blocktype, "floppy") || !strcmp(blocktype, "optical"))
+    obj->subtype = strdup("Removable Media Device");
+  else {
+    /* generic, usb mass storage/rbc, usb mass storage/scsi */
+  }
+}
+
+static int
+hwloc_linuxfs_lookup_block_class(struct hwloc_backend *backend)
+{
+  struct hwloc_linux_backend_data_s *data = backend->private_data;
+  int root_fd = data->root_fd;
+  DIR *dir;
+  struct dirent *dirent;
+
+  dir = hwloc_opendir("/sys/class/block", root_fd);
+  if (!dir)
+    return 0;
+
+  while ((dirent = readdir(dir)) != NULL) {
+    char path[256];
+    struct stat stbuf;
+    hwloc_obj_t obj, parent;
+    int err;
+
+    if (!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, ".."))
+      continue;
+
+    /* ignore partitions */
+    err = snprintf(path, sizeof(path), "/sys/class/block/%s/partition", dirent->d_name);
+    if ((size_t) err < sizeof(path)
+	&& hwloc_stat(path, &stbuf, root_fd) >= 0)
+      continue;
+
+    err = snprintf(path, sizeof(path), "/sys/class/block/%s", dirent->d_name);
+    if ((size_t) err >= sizeof(path))
+      continue;
+    parent = hwloc_linuxfs_find_osdev_parent(backend, root_fd, path, 0 /* no virtual */);
+    if (!parent)
+      continue;
+
+    /* USB device are created here but removed later when USB PCI devices get filtered out
+     * (unless WHOLE_IO is enabled).
+     */
+
+    obj = hwloc_linux_add_os_device(backend, parent, HWLOC_OBJ_OSDEV_BLOCK, dirent->d_name);
+
+    hwloc_linuxfs_block_class_fillinfos(backend, root_fd, obj, path);
+  }
+
+  closedir(dir);
+
+  return 0;
+}
+
+static void
+hwloc_linuxfs_net_class_fillinfos(int root_fd,
+				  struct hwloc_obj *obj, const char *osdevpath)
+{
+  struct stat st;
+  char path[256];
+  char address[128];
+  snprintf(path, sizeof(path), "%s/address", osdevpath);
+  if (!hwloc_read_path_by_length(path, address, sizeof(address), root_fd)) {
+    char *eol = strchr(address, '\n');
+    if (eol)
+      *eol = 0;
+    hwloc_obj_add_info(obj, "Address", address);
+  }
+  snprintf(path, sizeof(path), "%s/device/infiniband", osdevpath);
+  if (!hwloc_stat(path, &st, root_fd)) {
+    char hexid[16];
+    snprintf(path, sizeof(path), "%s/dev_id", osdevpath);
+    if (!hwloc_read_path_by_length(path, hexid, sizeof(hexid), root_fd)) {
+      char *eoid;
+      unsigned long port;
+      port = strtoul(hexid, &eoid, 0);
+      if (eoid != hexid) {
+	char portstr[16];
+	snprintf(portstr, sizeof(portstr), "%lu", port+1);
+	hwloc_obj_add_info(obj, "Port", portstr);
+      }
+    }
+  }
+}
+
+static int
+hwloc_linuxfs_lookup_net_class(struct hwloc_backend *backend)
+{
+  struct hwloc_linux_backend_data_s *data = backend->private_data;
+  int root_fd = data->root_fd;
+  DIR *dir;
+  struct dirent *dirent;
+
+  dir = hwloc_opendir("/sys/class/net", root_fd);
+  if (!dir)
+    return 0;
+
+  while ((dirent = readdir(dir)) != NULL) {
+    char path[256];
+    hwloc_obj_t obj, parent;
+    int err;
+
+    if (!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, ".."))
+      continue;
+
+    err = snprintf(path, sizeof(path), "/sys/class/net/%s", dirent->d_name);
+    if ((size_t) err >= sizeof(path))
+      continue;
+    parent = hwloc_linuxfs_find_osdev_parent(backend, root_fd, path, 0 /* no virtual */);
+    if (!parent)
+      continue;
+
+    obj = hwloc_linux_add_os_device(backend, parent, HWLOC_OBJ_OSDEV_NETWORK, dirent->d_name);
+
+    hwloc_linuxfs_net_class_fillinfos(root_fd, obj, path);
+  }
+
+  closedir(dir);
+
+  return 0;
+}
+
+static void
+hwloc_linuxfs_infiniband_class_fillinfos(int root_fd,
+					 struct hwloc_obj *obj, const char *osdevpath)
+{
+  char path[256];
+  char guidvalue[20];
+  unsigned i,j;
+
+  snprintf(path, sizeof(path), "%s/node_guid", osdevpath);
+  if (!hwloc_read_path_by_length(path, guidvalue, sizeof(guidvalue), root_fd)) {
+    size_t len;
+    len = strspn(guidvalue, "0123456789abcdefx:");
+    guidvalue[len] = '\0';
+    hwloc_obj_add_info(obj, "NodeGUID", guidvalue);
+  }
+
+  snprintf(path, sizeof(path), "%s/sys_image_guid", osdevpath);
+  if (!hwloc_read_path_by_length(path, guidvalue, sizeof(guidvalue), root_fd)) {
+    size_t len;
+    len = strspn(guidvalue, "0123456789abcdefx:");
+    guidvalue[len] = '\0';
+    hwloc_obj_add_info(obj, "SysImageGUID", guidvalue);
+  }
+
+  for(i=1; ; i++) {
+    char statevalue[2];
+    char lidvalue[11];
+    char gidvalue[40];
+
+    snprintf(path, sizeof(path), "%s/ports/%u/state", osdevpath, i);
+    if (!hwloc_read_path_by_length(path, statevalue, sizeof(statevalue), root_fd)) {
+      char statename[32];
+      statevalue[1] = '\0'; /* only keep the first byte/digit */
+      snprintf(statename, sizeof(statename), "Port%uState", i);
+      hwloc_obj_add_info(obj, statename, statevalue);
+    } else {
+      /* no such port */
+      break;
+    }
+
+    snprintf(path, sizeof(path), "%s/ports/%u/lid", osdevpath, i);
+    if (!hwloc_read_path_by_length(path, lidvalue, sizeof(lidvalue), root_fd)) {
+      char lidname[32];
+      size_t len;
+      len = strspn(lidvalue, "0123456789abcdefx");
+      lidvalue[len] = '\0';
+      snprintf(lidname, sizeof(lidname), "Port%uLID", i);
+      hwloc_obj_add_info(obj, lidname, lidvalue);
+    }
+
+    snprintf(path, sizeof(path), "%s/ports/%u/lid_mask_count", osdevpath, i);
+    if (!hwloc_read_path_by_length(path, lidvalue, sizeof(lidvalue), root_fd)) {
+      char lidname[32];
+      size_t len;
+      len = strspn(lidvalue, "0123456789");
+      lidvalue[len] = '\0';
+      snprintf(lidname, sizeof(lidname), "Port%uLMC", i);
+      hwloc_obj_add_info(obj, lidname, lidvalue);
+    }
+
+    for(j=0; ; j++) {
+      snprintf(path, sizeof(path), "%s/ports/%u/gids/%u", osdevpath, i, j);
+      if (!hwloc_read_path_by_length(path, gidvalue, sizeof(gidvalue), root_fd)) {
+	char gidname[32];
+	size_t len;
+	len = strspn(gidvalue, "0123456789abcdefx:");
+	gidvalue[len] = '\0';
+	if (strncmp(gidvalue+20, "0000:0000:0000:0000", 19)) {
+	  /* only keep initialized GIDs */
+	  snprintf(gidname, sizeof(gidname), "Port%uGID%u", i, j);
+	  hwloc_obj_add_info(obj, gidname, gidvalue);
+	}
+      } else {
+	/* no such port */
+	break;
+      }
+    }
+  }
+}
+
+static int
+hwloc_linuxfs_lookup_infiniband_class(struct hwloc_backend *backend)
+{
+  struct hwloc_linux_backend_data_s *data = backend->private_data;
+  int root_fd = data->root_fd;
+  DIR *dir;
+  struct dirent *dirent;
+
+  dir = hwloc_opendir("/sys/class/infiniband", root_fd);
+  if (!dir)
+    return 0;
+
+  while ((dirent = readdir(dir)) != NULL) {
+    char path[256];
+    hwloc_obj_t obj, parent;
+    int err;
+
+    if (!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, ".."))
+      continue;
+
+    /* blocklist scif* fake devices */
+    if (!strncmp(dirent->d_name, "scif", 4))
+      continue;
+
+    err = snprintf(path, sizeof(path), "/sys/class/infiniband/%s", dirent->d_name);
+    if ((size_t) err > sizeof(path))
+      continue;
+    parent = hwloc_linuxfs_find_osdev_parent(backend, root_fd, path, 0 /* no virtual */);
+    if (!parent)
+      continue;
+
+    obj = hwloc_linux_add_os_device(backend, parent, HWLOC_OBJ_OSDEV_OPENFABRICS, dirent->d_name);
+
+    hwloc_linuxfs_infiniband_class_fillinfos(root_fd, obj, path);
+  }
+
+  closedir(dir);
+
+  return 0;
+}
+
+static void
+hwloc_linuxfs_mic_class_fillinfos(int root_fd,
+				  struct hwloc_obj *obj, const char *osdevpath)
+{
+  char path[256];
+  char family[64];
+  char sku[64];
+  char sn[64];
+  char string[20];
+
+  obj->subtype = strdup("MIC");
+
+  snprintf(path, sizeof(path), "%s/family", osdevpath);
+  if (!hwloc_read_path_by_length(path, family, sizeof(family), root_fd)) {
+    char *eol = strchr(family, '\n');
+    if (eol)
+      *eol = 0;
+    hwloc_obj_add_info(obj, "MICFamily", family);
+  }
+
+  snprintf(path, sizeof(path), "%s/sku", osdevpath);
+  if (!hwloc_read_path_by_length(path, sku, sizeof(sku), root_fd)) {
+    char *eol = strchr(sku, '\n');
+    if (eol)
+      *eol = 0;
+    hwloc_obj_add_info(obj, "MICSKU", sku);
+  }
+
+  snprintf(path, sizeof(path), "%s/serialnumber", osdevpath);
+  if (!hwloc_read_path_by_length(path, sn, sizeof(sn), root_fd)) {
+    char *eol;
+    eol = strchr(sn, '\n');
+    if (eol)
+      *eol = 0;
+    hwloc_obj_add_info(obj, "MICSerialNumber", sn);
+  }
+
+  snprintf(path, sizeof(path), "%s/active_cores", osdevpath);
+  if (!hwloc_read_path_by_length(path, string, sizeof(string), root_fd)) {
+    unsigned long count = strtoul(string, NULL, 16);
+    snprintf(string, sizeof(string), "%lu", count);
+    hwloc_obj_add_info(obj, "MICActiveCores", string);
+  }
+
+  snprintf(path, sizeof(path), "%s/memsize", osdevpath);
+  if (!hwloc_read_path_by_length(path, string, sizeof(string), root_fd)) {
+    unsigned long count = strtoul(string, NULL, 16);
+    snprintf(string, sizeof(string), "%lu", count);
+    hwloc_obj_add_info(obj, "MICMemorySize", string);
+  }
+}
+
+static int
+hwloc_linuxfs_lookup_mic_class(struct hwloc_backend *backend)
+{
+  struct hwloc_linux_backend_data_s *data = backend->private_data;
+  int root_fd = data->root_fd;
+  unsigned idx;
+  DIR *dir;
+  struct dirent *dirent;
+
+  dir = hwloc_opendir("/sys/class/mic", root_fd);
+  if (!dir)
+    return 0;
+
+  while ((dirent = readdir(dir)) != NULL) {
+    char path[256];
+    hwloc_obj_t obj, parent;
+
+    if (!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, ".."))
+      continue;
+    if (sscanf(dirent->d_name, "mic%u", &idx) != 1)
+      continue;
+
+    snprintf(path, sizeof(path), "/sys/class/mic/mic%u", idx);
+    parent = hwloc_linuxfs_find_osdev_parent(backend, root_fd, path, 0 /* no virtual */);
+    if (!parent)
+      continue;
+
+    obj = hwloc_linux_add_os_device(backend, parent, HWLOC_OBJ_OSDEV_COPROC, dirent->d_name);
+
+    hwloc_linuxfs_mic_class_fillinfos(root_fd, obj, path);
+  }
+
+  closedir(dir);
+
+  return 0;
+}
+
+static int
+hwloc_linuxfs_lookup_drm_class(struct hwloc_backend *backend)
+{
+  struct hwloc_linux_backend_data_s *data = backend->private_data;
+  int root_fd = data->root_fd;
+  DIR *dir;
+  struct dirent *dirent;
+
+  dir = hwloc_opendir("/sys/class/drm", root_fd);
+  if (!dir)
+    return 0;
+
+  while ((dirent = readdir(dir)) != NULL) {
+    char path[256];
+    hwloc_obj_t parent;
+    struct stat stbuf;
+    int err;
+
+    if (!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, ".."))
+      continue;
+
+    /* only keep main devices, not subdevices for outputs */
+    err = snprintf(path, sizeof(path), "/sys/class/drm/%s/dev", dirent->d_name);
+    if ((size_t) err < sizeof(path)
+	&& hwloc_stat(path, &stbuf, root_fd) < 0)
+      continue;
+
+    /* FIXME: only keep cardX ? */
+    /* FIXME: drop cardX for proprietary drivers that get CUDA/OpenCL devices? */
+
+    err = snprintf(path, sizeof(path), "/sys/class/drm/%s", dirent->d_name);
+    if ((size_t) err >= sizeof(path))
+      continue;
+    parent = hwloc_linuxfs_find_osdev_parent(backend, root_fd, path, 0 /* no virtual */);
+    if (!parent)
+      continue;
+
+    hwloc_linux_add_os_device(backend, parent, HWLOC_OBJ_OSDEV_GPU, dirent->d_name);
+  }
+
+  closedir(dir);
+
+  return 0;
+}
+
+static int
+hwloc_linuxfs_lookup_dma_class(struct hwloc_backend *backend)
+{
+  struct hwloc_linux_backend_data_s *data = backend->private_data;
+  int root_fd = data->root_fd;
+  DIR *dir;
+  struct dirent *dirent;
+
+  dir = hwloc_opendir("/sys/class/dma", root_fd);
+  if (!dir)
+    return 0;
+
+  while ((dirent = readdir(dir)) != NULL) {
+    char path[256];
+    hwloc_obj_t parent;
+    int err;
+
+    if (!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, ".."))
+      continue;
+
+    err = snprintf(path, sizeof(path), "/sys/class/dma/%s", dirent->d_name);
+    if ((size_t) err >= sizeof(path))
+      continue;
+    parent = hwloc_linuxfs_find_osdev_parent(backend, root_fd, path, 0 /* no virtual */);
+    if (!parent)
+      continue;
+
+    hwloc_linux_add_os_device(backend, parent, HWLOC_OBJ_OSDEV_DMA, dirent->d_name);
+  }
+
+  closedir(dir);
+
+  return 0;
+}
+
+struct hwloc_firmware_dmi_mem_device_header {
+  unsigned char type;
+  unsigned char length;
+  unsigned char handle[2];
+  unsigned char phy_mem_handle[2];
+  unsigned char mem_err_handle[2];
+  unsigned char tot_width[2];
+  unsigned char dat_width[2];
+  unsigned char size[2];
+  unsigned char ff;
+  unsigned char dev_set;
+  unsigned char dev_loc_str_num;
+  unsigned char bank_loc_str_num;
+  unsigned char mem_type;
+  unsigned char type_detail[2];
+  unsigned char speed[2];
+  unsigned char manuf_str_num;
+  unsigned char serial_str_num;
+  unsigned char asset_tag_str_num;
+  unsigned char part_num_str_num;
+  /* don't include the following fields since we don't need them,
+   * some old implementations may miss them.
+   */
+};
+
+static int check_dmi_entry(const char *buffer)
+{
+  /* reject empty strings */
+  if (!*buffer)
+    return 0;
+  /* reject strings of spaces (at least Dell use this for empty memory slots) */
+  if (strspn(buffer, " ") == strlen(buffer))
+    return 0;
+  return 1;
+}
+
+static int
+hwloc__get_firmware_dmi_memory_info_one(struct hwloc_topology *topology,
+					unsigned idx, const char *path, FILE *fd,
+					struct hwloc_firmware_dmi_mem_device_header *header)
+{
+  unsigned slen;
+  char buffer[256]; /* enough for memory device strings, or at least for each of them */
+  unsigned foff; /* offset in raw file */
+  unsigned boff; /* offset in buffer read from raw file */
+  unsigned i;
+  struct hwloc_obj_info_s *infos = NULL;
+  unsigned infos_count = 0;
+  hwloc_obj_t misc;
+  int foundinfo = 0;
+
+  /* start after the header */
+  foff = header->length;
+  i = 1;
+  while (1) {
+    /* read one buffer */
+    if (fseek(fd, foff, SEEK_SET) < 0)
+      break;
+    if (!fgets(buffer, sizeof(buffer), fd))
+      break;
+    /* read string at the beginning of the buffer */
+    boff = 0;
+    while (1) {
+      /* stop on empty string */
+      if (!buffer[boff])
+        goto done;
+      /* stop if this string goes to the end of the buffer */
+      slen = strlen(buffer+boff);
+      if (boff + slen+1 == sizeof(buffer))
+        break;
+      /* string didn't get truncated, should be OK */
+      if (i == header->manuf_str_num) {
+	if (check_dmi_entry(buffer+boff)) {
+	  hwloc__add_info(&infos, &infos_count, "Vendor", buffer+boff);
+	  foundinfo = 1;
+	}
+      }	else if (i == header->serial_str_num) {
+	if (check_dmi_entry(buffer+boff)) {
+	  hwloc__add_info(&infos, &infos_count, "SerialNumber", buffer+boff);
+	  foundinfo = 1;
+	}
+      } else if (i == header->asset_tag_str_num) {
+	if (check_dmi_entry(buffer+boff)) {
+	  hwloc__add_info(&infos, &infos_count, "AssetTag", buffer+boff);
+	  foundinfo = 1;
+	}
+      } else if (i == header->part_num_str_num) {
+	if (check_dmi_entry(buffer+boff)) {
+	  hwloc__add_info(&infos, &infos_count, "PartNumber", buffer+boff);
+	  foundinfo = 1;
+	}
+      } else if (i == header->dev_loc_str_num) {
+	if (check_dmi_entry(buffer+boff)) {
+	  hwloc__add_info(&infos, &infos_count, "DeviceLocation", buffer+boff);
+	  /* only a location, not an actual info about the device */
+	}
+      } else if (i == header->bank_loc_str_num) {
+	if (check_dmi_entry(buffer+boff)) {
+	  hwloc__add_info(&infos, &infos_count, "BankLocation", buffer+boff);
+	  /* only a location, not an actual info about the device */
+	}
+      } else {
+	goto done;
+      }
+      /* next string in buffer */
+      boff += slen+1;
+      i++;
+    }
+    /* couldn't read a single full string from that buffer, we're screwed */
+    if (!boff) {
+      fprintf(stderr, "hwloc could read a DMI firmware entry #%u in %s\n",
+	      i, path);
+      break;
+    }
+    /* reread buffer after previous string */
+    foff += boff;
+  }
+
+done:
+  if (!foundinfo) {
+    /* found no actual info about the device. if there's only location info, the slot may be empty */
+    goto out_with_infos;
+  }
+
+  misc = hwloc_alloc_setup_object(topology, HWLOC_OBJ_MISC, idx);
+  if (!misc)
+    goto out_with_infos;
+
+  misc->subtype = strdup("MemoryModule");
+
+  hwloc__move_infos(&misc->infos, &misc->infos_count, &infos, &infos_count);
+  /* FIXME: find a way to identify the corresponding NUMA node and attach these objects there.
+   * but it means we need to parse DeviceLocation=DIMM_B4 but these vary significantly
+   * with the vendor, and it's hard to be 100% sure 'B' is second socket.
+   * Examples at http://sourceforge.net/p/edac-utils/code/HEAD/tree/trunk/src/etc/labels.db
+   * or https://github.com/grondo/edac-utils/blob/master/src/etc/labels.db
+   */
+  hwloc_insert_object_by_parent(topology, hwloc_get_root_obj(topology), misc);
+  return 1;
+
+ out_with_infos:
+  hwloc__free_infos(infos, infos_count);
+  return 0;
+}
+
+static int
+hwloc__get_firmware_dmi_memory_info(struct hwloc_topology *topology,
+				    struct hwloc_linux_backend_data_s *data)
+{
+  char path[128];
+  unsigned i;
+
+  for(i=0; ; i++) {
+    FILE *fd;
+    struct hwloc_firmware_dmi_mem_device_header header;
+    int err;
+
+    snprintf(path, sizeof(path), "/sys/firmware/dmi/entries/17-%u/raw", i);
+    fd = hwloc_fopen(path, "r", data->root_fd);
+    if (!fd)
+      break;
+
+    err = fread(&header, sizeof(header), 1, fd);
+    if (err != 1) {
+      fclose(fd);
+      break;
+    }
+    if (header.length < sizeof(header)) {
+      /* invalid, or too old entry/spec that doesn't contain what we need */
+      fclose(fd);
+      break;
+    }
+
+    hwloc__get_firmware_dmi_memory_info_one(topology, i, path, fd, &header);
+
+    fclose(fd);
+  }
+
+  return 0;
+}
+
+#ifdef HWLOC_HAVE_LINUXPCI
+
+#define HWLOC_PCI_REVISION_ID 0x08
+#define HWLOC_PCI_CAP_ID_EXP 0x10
+#define HWLOC_PCI_CLASS_NOT_DEFINED 0x0000
+
+static int
+hwloc_linuxfs_pci_look_pcidevices(struct hwloc_backend *backend)
+{
+  struct hwloc_linux_backend_data_s *data = backend->private_data;
+  struct hwloc_topology *topology = backend->topology;
+  hwloc_obj_t tree = NULL;
+  int root_fd = data->root_fd;
+  DIR *dir;
+  struct dirent *dirent;
+
+  /* We could lookup /sys/devices/pci.../.../busid1/.../busid2 recursively
+   * to build the hierarchy of bridges/devices directly.
+   * But that would require readdirs in all bridge sysfs subdirectories.
+   * Do a single readdir in the linear list in /sys/bus/pci/devices/...
+   * and build the hierarchy manually instead.
+   */
+  dir = hwloc_opendir("/sys/bus/pci/devices/", root_fd);
+  if (!dir)
+    return 0;
+
+  while ((dirent = readdir(dir)) != NULL) {
+#define CONFIG_SPACE_CACHESIZE 256
+    unsigned char config_space_cache[CONFIG_SPACE_CACHESIZE];
+    unsigned domain, bus, dev, func;
+    unsigned short class_id;
+    hwloc_obj_type_t type;
+    hwloc_obj_t obj;
+    struct hwloc_pcidev_attr_s *attr;
+    unsigned offset;
+    char path[64];
+    char value[16];
+    size_t ret;
+    int fd, err;
+
+    if (sscanf(dirent->d_name, "%04x:%02x:%02x.%01x", &domain, &bus, &dev, &func) != 4)
+      continue;
+
+    /* initialize the config space in case we fail to read it (missing permissions, etc). */
+    memset(config_space_cache, 0xff, CONFIG_SPACE_CACHESIZE);
+    err = snprintf(path, sizeof(path), "/sys/bus/pci/devices/%s/config", dirent->d_name);
+    if ((size_t) err < sizeof(path)) {
+      /* don't use hwloc_read_path_by_length() because we don't want the ending \0 */
+      fd = hwloc_open(path, root_fd);
+      if (fd >= 0) {
+	ret = read(fd, config_space_cache, CONFIG_SPACE_CACHESIZE);
+	(void) ret; /* we initialized config_space_cache in case we don't read enough, ignore the read length */
+	close(fd);
+      }
+    }
+
+    class_id = HWLOC_PCI_CLASS_NOT_DEFINED;
+    err = snprintf(path, sizeof(path), "/sys/bus/pci/devices/%s/class", dirent->d_name);
+    if ((size_t) err < sizeof(path)
+	&& !hwloc_read_path_by_length(path, value, sizeof(value), root_fd))
+      class_id = strtoul(value, NULL, 16) >> 8;
+
+    type = hwloc_pci_check_bridge_type(class_id, config_space_cache);
+
+    /* filtered? */
+    if (type == HWLOC_OBJ_PCI_DEVICE) {
+      enum hwloc_type_filter_e filter;
+      hwloc_topology_get_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, &filter);
+      if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
+	continue;
+      if (filter == HWLOC_TYPE_FILTER_KEEP_IMPORTANT
+	  && !hwloc_filter_check_pcidev_subtype_important(class_id))
+	continue;
+    } else if (type == HWLOC_OBJ_BRIDGE) {
+      enum hwloc_type_filter_e filter;
+      hwloc_topology_get_type_filter(topology, HWLOC_OBJ_BRIDGE, &filter);
+      if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
+	continue;
+      /* HWLOC_TYPE_FILTER_KEEP_IMPORTANT filtered later in the core */
+    }
+
+    obj = hwloc_alloc_setup_object(topology, type, -1);
+    if (!obj)
+      break;
+    attr = &obj->attr->pcidev;
+
+    attr->domain = domain;
+    attr->bus = bus;
+    attr->dev = dev;
+    attr->func = func;
+
+    /* default (unknown) values */
+    attr->vendor_id = 0;
+    attr->device_id = 0;
+    attr->class_id = class_id;
+    attr->revision = 0;
+    attr->subvendor_id = 0;
+    attr->subdevice_id = 0;
+    attr->linkspeed = 0;
+
+    err = snprintf(path, sizeof(path), "/sys/bus/pci/devices/%s/vendor", dirent->d_name);
+    if ((size_t) err < sizeof(path)
+	&& !hwloc_read_path_by_length(path, value, sizeof(value), root_fd))
+      attr->vendor_id = strtoul(value, NULL, 16);
+
+    err = snprintf(path, sizeof(path), "/sys/bus/pci/devices/%s/device", dirent->d_name);
+    if ((size_t) err < sizeof(path)
+	&& !hwloc_read_path_by_length(path, value, sizeof(value), root_fd))
+      attr->device_id = strtoul(value, NULL, 16);
+
+    err = snprintf(path, sizeof(path), "/sys/bus/pci/devices/%s/subsystem_vendor", dirent->d_name);
+    if ((size_t) err < sizeof(path)
+	&& !hwloc_read_path_by_length(path, value, sizeof(value), root_fd))
+      attr->subvendor_id = strtoul(value, NULL, 16);
+
+    err = snprintf(path, sizeof(path), "/sys/bus/pci/devices/%s/subsystem_device", dirent->d_name);
+    if ((size_t) err < sizeof(path)
+	&& !hwloc_read_path_by_length(path, value, sizeof(value), root_fd))
+      attr->subdevice_id = strtoul(value, NULL, 16);
+
+    /* bridge specific attributes */
+    if (type == HWLOC_OBJ_BRIDGE) {
+      if (hwloc_pci_setup_bridge_attr(obj, config_space_cache) < 0)
+	continue;
+    }
+
+    /* get the revision */
+    attr->revision = config_space_cache[HWLOC_PCI_REVISION_ID];
+
+    /* try to get the link speed */
+    offset = hwloc_pci_find_cap(config_space_cache, HWLOC_PCI_CAP_ID_EXP);
+    if (offset > 0 && offset + 20 /* size of PCI express block up to link status */ <= CONFIG_SPACE_CACHESIZE)
+      hwloc_pci_find_linkspeed(config_space_cache, offset, &attr->linkspeed);
+
+    hwloc_pci_tree_insert_by_busid(&tree, obj);
+  }
+
+  closedir(dir);
+
+  hwloc_pci_tree_attach_belowroot(backend->topology, tree);
+  return 0;
+}
+
+static hwloc_obj_t
+hwloc_linuxfs_pci_find_pcislot_obj(struct hwloc_obj *tree,
+				   unsigned domain, unsigned bus, unsigned dev)
+{
+  for ( ; tree; tree = tree->next_sibling) {
+    if (tree->type == HWLOC_OBJ_PCI_DEVICE
+	|| (tree->type == HWLOC_OBJ_BRIDGE
+	    && tree->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)) {
+      if (tree->attr->pcidev.domain == domain
+	  && tree->attr->pcidev.bus == bus
+	  && tree->attr->pcidev.dev == dev
+	  && tree->attr->pcidev.func == 0)
+	/* that's the right bus id */
+	return tree;
+      if (tree->attr->pcidev.domain > domain
+	  || (tree->attr->pcidev.domain == domain
+	      && tree->attr->pcidev.bus > bus))
+	/* bus id too high, won't find anything later */
+	return NULL;
+      if (tree->type == HWLOC_OBJ_BRIDGE
+	  && tree->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI
+	  && tree->attr->bridge.downstream.pci.domain == domain
+	  && tree->attr->bridge.downstream.pci.secondary_bus <= bus
+	  && tree->attr->bridge.downstream.pci.subordinate_bus >= bus)
+	/* not the right bus id, but it's included in the bus below that bridge */
+	return hwloc_linuxfs_pci_find_pcislot_obj(tree->io_first_child, domain, bus, dev);
+
+    } else if (tree->type == HWLOC_OBJ_BRIDGE
+	       && tree->attr->bridge.upstream_type != HWLOC_OBJ_BRIDGE_PCI
+	       && tree->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI
+	       /* non-PCI to PCI bridge, just look at the subordinate bus */
+	       && tree->attr->bridge.downstream.pci.domain == domain
+	       && tree->attr->bridge.downstream.pci.secondary_bus <= bus
+	       && tree->attr->bridge.downstream.pci.subordinate_bus >= bus) {
+      /* contains our bus, recurse */
+      return hwloc_linuxfs_pci_find_pcislot_obj(tree->io_first_child, domain, bus, dev);
+    }
+  }
+  return NULL;
+}
+
+static int
+hwloc_linuxfs_pci_look_pcislots(struct hwloc_backend *backend)
+{
+  struct hwloc_topology *topology = backend->topology;
+  struct hwloc_linux_backend_data_s *data = backend->private_data;
+  int root_fd = data->root_fd;
+  DIR *dir;
+  struct dirent *dirent;
+
+  dir = hwloc_opendir("/sys/bus/pci/slots/", root_fd);
+  if (dir) {
+    while ((dirent = readdir(dir)) != NULL) {
+      char path[64];
+      char buf[64];
+      unsigned domain, bus, dev;
+      int err;
+
+      if (dirent->d_name[0] == '.')
+	continue;
+      err = snprintf(path, sizeof(path), "/sys/bus/pci/slots/%s/address", dirent->d_name);
+      if ((size_t) err < sizeof(path)
+	  && !hwloc_read_path_by_length(path, buf, sizeof(buf), root_fd)
+	  && sscanf(buf, "%x:%x:%x", &domain, &bus, &dev) == 3) {
+	hwloc_obj_t obj = hwloc_linuxfs_pci_find_pcislot_obj(hwloc_get_root_obj(topology)->io_first_child, domain, bus, dev);
+	if (obj) {
+	  while (obj && obj->attr->pcidev.dev == dev /* sibling have same domain+bus */) {
+	    hwloc_obj_add_info(obj, "PCISlot", dirent->d_name);
+	    obj = obj->next_sibling;
+	  }
+	}
+      }
+    }
+    closedir(dir);
+  }
+
+  return 0;
+}
+#endif /* HWLOC_HAVE_LINUXPCI */
+
+static int
+hwloc_look_linuxfs_io(struct hwloc_backend *backend)
+{
+  struct hwloc_topology *topology = backend->topology;
+  struct hwloc_linux_backend_data_s *data = NULL;
+  struct hwloc_backend *tmpbackend;
+  enum hwloc_type_filter_e pfilter, bfilter, ofilter, mfilter;
+  int root_fd = -1;
+#ifdef HWLOC_HAVE_LINUXPCI
+  struct hwloc_obj *tmp;
+  int needpcidiscovery;
+#endif
+
+  hwloc_topology_get_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, &pfilter);
+  hwloc_topology_get_type_filter(topology, HWLOC_OBJ_BRIDGE, &bfilter);
+  hwloc_topology_get_type_filter(topology, HWLOC_OBJ_OS_DEVICE, &ofilter);
+  hwloc_topology_get_type_filter(topology, HWLOC_OBJ_MISC, &mfilter);
+ if (bfilter == HWLOC_TYPE_FILTER_KEEP_NONE
+      && pfilter == HWLOC_TYPE_FILTER_KEEP_NONE
+      && ofilter == HWLOC_TYPE_FILTER_KEEP_NONE
+      && mfilter == HWLOC_TYPE_FILTER_KEEP_NONE)
+    return 0;
+
+  /* hackily find the linux backend to steal its private_data (for fsroot) */
+  tmpbackend = topology->backends;
+  while (tmpbackend) {
+    if (tmpbackend->component == &hwloc_linux_disc_component) {
+      data = tmpbackend->private_data;
+      break;
+    }
+    tmpbackend = tmpbackend->next;
+  }
+  if (!data) {
+    hwloc_debug("linuxio failed to find linux backend private_data, aborting its discovery()\n");
+    return -1;
+  }
+  backend->private_data = data;
+  root_fd = data->root_fd;
+  hwloc_debug("linuxio backend stole linux backend root_fd %d\n", root_fd);
+
+  if (bfilter != HWLOC_TYPE_FILTER_KEEP_NONE
+      || pfilter != HWLOC_TYPE_FILTER_KEEP_NONE) {
+#ifdef HWLOC_HAVE_LINUXPCI
+  /* don't rediscovery PCI devices if another backend did it
+   * (they are attached to root until later in the core discovery)
+   */
+  needpcidiscovery = 1;
+  tmp = hwloc_get_root_obj(topology)->io_first_child;
+  while (tmp) {
+    if (tmp->type == HWLOC_OBJ_PCI_DEVICE
+	|| (tmp->type == HWLOC_OBJ_BRIDGE && tmp->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI)) {
+      hwloc_debug("%s", "PCI objects already added, ignoring linuxio PCI discovery.\n");
+      needpcidiscovery = 0;
+      break;
+    }
+    tmp = tmp->next_sibling;
+  }
+
+  if (needpcidiscovery)
+    hwloc_linuxfs_pci_look_pcidevices(backend);
+
+  hwloc_linuxfs_pci_look_pcislots(backend);
+#endif /* HWLOC_HAVE_LINUXPCI */
+  }
+
+  if (ofilter != HWLOC_TYPE_FILTER_KEEP_NONE) {
+      hwloc_linuxfs_lookup_block_class(backend);
+      hwloc_linuxfs_lookup_net_class(backend);
+      hwloc_linuxfs_lookup_infiniband_class(backend);
+      hwloc_linuxfs_lookup_mic_class(backend);
+      if (ofilter != HWLOC_TYPE_FILTER_KEEP_IMPORTANT) {
+	hwloc_linuxfs_lookup_drm_class(backend);
+	hwloc_linuxfs_lookup_dma_class(backend);
+      }
+  }
+  if (mfilter != HWLOC_TYPE_FILTER_KEEP_NONE) {
+    hwloc__get_firmware_dmi_memory_info(topology, data);
+  }
+
+  return 0;
+}
+
+static struct hwloc_backend *
+hwloc_linuxio_component_instantiate(struct hwloc_disc_component *component,
+				    const void *_data1 __hwloc_attribute_unused,
+				    const void *_data2 __hwloc_attribute_unused,
+				    const void *_data3 __hwloc_attribute_unused)
+{
+  struct hwloc_backend *backend;
+
+  backend = hwloc_backend_alloc(component);
+  if (!backend)
+    return NULL;
+  backend->discover = hwloc_look_linuxfs_io;
+
+  /* backend->is_thissystem should be what the linux backend has,
+   * but it's actually useless since both backends will change the main topology->is_thissystem in the same way.
+   */
+
+  /* backend->private_data will point to the main linux private_data after load(),
+   * once the main linux component is instantiated for sure.
+   * it remains valid until the main linux component gets disabled during topology destroy.
+   */
+  return backend;
+}
+
+static struct hwloc_disc_component hwloc_linuxio_disc_component = {
+  HWLOC_DISC_COMPONENT_TYPE_MISC,
+  "linuxio",
+  HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+  hwloc_linuxio_component_instantiate,
+  19, /* after pci */
+  NULL
+};
+
+const struct hwloc_component hwloc_linuxio_component = {
+  HWLOC_COMPONENT_ABI,
+  NULL, NULL,
+  HWLOC_COMPONENT_TYPE_DISC,
+  0,
+  &hwloc_linuxio_disc_component
+};
+
+#endif /* HWLOC_HAVE_LINUXIO */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-netbsd.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-netbsd.c
new file mode 100644
index 0000000000..db6c7aa832
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-netbsd.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright © 2012 Aleksej Saushev, The NetBSD Foundation
+ * Copyright © 2009-2015 Inria.  All rights reserved.
+ * Copyright © 2009-2010 Université Bordeaux
+ * Copyright © 2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#define _NETBSD_SOURCE /* request "_np" functions */
+
+#include <private/autogen/config.h>
+
+#include <sys/types.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <sys/param.h>
+#include <pthread.h>
+#include <sched.h>
+#ifdef HAVE_SYS_SYSCTL_H
+#include <sys/sysctl.h>
+#endif
+
+#include <hwloc.h>
+#include <private/private.h>
+#include <private/debug.h>
+
+static void
+hwloc_netbsd_bsd2hwloc(hwloc_bitmap_t hwloc_cpuset, const cpuset_t *cpuset)
+{
+  unsigned cpu, cpulimit;
+  int found = 0;
+  hwloc_bitmap_zero(hwloc_cpuset);
+  cpulimit = cpuset_size(cpuset) * CHAR_BIT;
+  for (cpu = 0; cpu < cpulimit; cpu++)
+    if (cpuset_isset(cpu, cpuset)) {
+      hwloc_bitmap_set(hwloc_cpuset, cpu);
+      found++;
+    }
+  /* when never bound, it returns an empty set, fill it instead */
+  if (!found)
+    hwloc_bitmap_fill(hwloc_cpuset);
+}
+
+static void
+hwloc_netbsd_hwloc2bsd(hwloc_const_bitmap_t hwloc_cpuset, cpuset_t *cpuset)
+{
+  unsigned cpu, cpulimit;
+  cpuset_zero(cpuset);
+  cpulimit = cpuset_size(cpuset) * CHAR_BIT;
+  for (cpu = 0; cpu < cpulimit; cpu++)
+    if (hwloc_bitmap_isset(hwloc_cpuset, cpu))
+      cpuset_set(cpu, cpuset);
+}
+
+static int
+hwloc_netbsd_set_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t pid, hwloc_const_bitmap_t hwloc_cpuset, int flags __hwloc_attribute_unused)
+{
+  int status;
+  cpuset_t *cpuset = cpuset_create();
+  hwloc_netbsd_hwloc2bsd(hwloc_cpuset, cpuset);
+  status = sched_setaffinity_np(pid, cpuset_size(cpuset), cpuset);
+  cpuset_destroy(cpuset);
+  return status;
+}
+
+static int
+hwloc_netbsd_get_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t pid, hwloc_bitmap_t hwloc_cpuset, int flags __hwloc_attribute_unused)
+{
+  int status;
+  cpuset_t *cpuset = cpuset_create();
+  status = sched_getaffinity_np(pid, cpuset_size(cpuset), cpuset);
+  hwloc_netbsd_bsd2hwloc(hwloc_cpuset, cpuset);
+  cpuset_destroy(cpuset);
+  return status;
+}
+
+
+static int
+hwloc_netbsd_set_thisproc_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_cpuset, int flags)
+{
+  return hwloc_netbsd_set_proc_cpubind(topology, 0, hwloc_cpuset, flags);
+}
+
+static int
+hwloc_netbsd_get_thisproc_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_cpuset, int flags)
+{
+  return hwloc_netbsd_get_proc_cpubind(topology, 0, hwloc_cpuset, flags);
+}
+
+
+static int
+hwloc_netbsd_set_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t tid, hwloc_const_bitmap_t hwloc_cpuset, int flags __hwloc_attribute_unused)
+{
+  int status;
+  cpuset_t *cpuset = cpuset_create();
+  hwloc_netbsd_hwloc2bsd(hwloc_cpuset, cpuset);
+  status = pthread_setaffinity_np(tid, cpuset_size(cpuset), cpuset);
+  cpuset_destroy(cpuset);
+
+  if (status) {
+    errno = status;
+    return -1;
+  }
+  return 0;
+}
+
+static int
+hwloc_netbsd_get_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t tid, hwloc_bitmap_t hwloc_cpuset, int flags __hwloc_attribute_unused)
+{
+  int status;
+  cpuset_t *cpuset = cpuset_create();
+  status = pthread_getaffinity_np(tid, cpuset_size(cpuset), cpuset);
+  hwloc_netbsd_bsd2hwloc(hwloc_cpuset, cpuset);
+  cpuset_destroy(cpuset);
+
+  if (status) {
+    errno = status;
+    return -1;
+  }
+  return 0;
+}
+
+
+static int
+hwloc_netbsd_set_thisthread_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_cpuset, int flags)
+{
+  return hwloc_netbsd_set_thread_cpubind(topology, pthread_self(), hwloc_cpuset, flags);
+}
+
+static int
+hwloc_netbsd_get_thisthread_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_cpuset, int flags)
+{
+  return hwloc_netbsd_get_thread_cpubind(topology, pthread_self(), hwloc_cpuset, flags);
+}
+
+#if (defined HAVE_SYSCTL) && (defined HAVE_SYS_SYSCTL_H)
+static void
+hwloc_netbsd_node_meminfo_info(struct hwloc_topology *topology)
+{
+  int mib[2] = { CTL_HW, HW_PHYSMEM64 };
+  unsigned long physmem;
+  size_t len = sizeof(physmem);
+  sysctl(mib, 2, &physmem, &len, NULL, 0);
+  topology->levels[0][0]->memory.local_memory = physmem;
+}
+#endif
+
+static int
+hwloc_look_netbsd(struct hwloc_backend *backend)
+{
+  struct hwloc_topology *topology = backend->topology;
+  unsigned nbprocs = hwloc_fallback_nbprocessors(topology);
+
+  if (!topology->levels[0][0]->cpuset) {
+    /* Nobody (even the x86 backend) created objects yet, setup basic objects */
+    hwloc_alloc_obj_cpusets(topology->levels[0][0]);
+    hwloc_setup_pu_level(topology, nbprocs);
+  }
+
+  /* Add NetBSD specific information */
+#if (defined HAVE_SYSCTL) && (defined HAVE_SYS_SYSCTL_H)
+  hwloc_netbsd_node_meminfo_info(topology);
+#endif
+  hwloc_obj_add_info(topology->levels[0][0], "Backend", "NetBSD");
+  hwloc_add_uname_info(topology, NULL);
+  return 0;
+}
+
+void
+hwloc_set_netbsd_hooks(struct hwloc_binding_hooks *hooks __hwloc_attribute_unused,
+                        struct hwloc_topology_support *support __hwloc_attribute_unused)
+{
+  hooks->set_proc_cpubind = hwloc_netbsd_set_proc_cpubind;
+  hooks->get_proc_cpubind = hwloc_netbsd_get_proc_cpubind;
+  hooks->set_thisproc_cpubind = hwloc_netbsd_set_thisproc_cpubind;
+  hooks->get_thisproc_cpubind = hwloc_netbsd_get_thisproc_cpubind;
+
+  hooks->set_thread_cpubind = hwloc_netbsd_set_thread_cpubind;
+  hooks->get_thread_cpubind = hwloc_netbsd_get_thread_cpubind;
+  hooks->set_thisthread_cpubind = hwloc_netbsd_set_thisthread_cpubind;
+  hooks->get_thisthread_cpubind = hwloc_netbsd_get_thisthread_cpubind;
+}
+
+static struct hwloc_backend *
+hwloc_netbsd_component_instantiate(struct hwloc_disc_component *component,
+				   const void *_data1 __hwloc_attribute_unused,
+				   const void *_data2 __hwloc_attribute_unused,
+				   const void *_data3 __hwloc_attribute_unused)
+{
+  struct hwloc_backend *backend;
+  backend = hwloc_backend_alloc(component);
+  if (!backend)
+    return NULL;
+  backend->discover = hwloc_look_netbsd;
+  return backend;
+}
+
+static struct hwloc_disc_component hwloc_netbsd_disc_component = {
+  HWLOC_DISC_COMPONENT_TYPE_CPU,
+  "netbsd",
+  HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+  hwloc_netbsd_component_instantiate,
+  50,
+  NULL
+};
+
+const struct hwloc_component hwloc_netbsd_component = {
+  HWLOC_COMPONENT_ABI,
+  NULL, NULL,
+  HWLOC_COMPONENT_TYPE_DISC,
+  0,
+  &hwloc_netbsd_disc_component
+};
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-noos.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-noos.c
new file mode 100644
index 0000000000..dbfe6cf2c5
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-noos.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2015 Inria.  All rights reserved.
+ * Copyright © 2009-2012 Université Bordeaux
+ * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <private/private.h>
+
+static int
+hwloc_look_noos(struct hwloc_backend *backend)
+{
+  struct hwloc_topology *topology = backend->topology;
+
+  if (topology->levels[0][0]->cpuset)
+    /* somebody discovered things */
+    return -1;
+
+  hwloc_alloc_obj_cpusets(topology->levels[0][0]);
+  hwloc_setup_pu_level(topology, hwloc_fallback_nbprocessors(topology));
+  hwloc_add_uname_info(topology, NULL);
+  return 0;
+}
+
+static struct hwloc_backend *
+hwloc_noos_component_instantiate(struct hwloc_disc_component *component,
+				 const void *_data1 __hwloc_attribute_unused,
+				 const void *_data2 __hwloc_attribute_unused,
+				 const void *_data3 __hwloc_attribute_unused)
+{
+  struct hwloc_backend *backend;
+  backend = hwloc_backend_alloc(component);
+  if (!backend)
+    return NULL;
+  backend->discover = hwloc_look_noos;
+  return backend;
+}
+
+static struct hwloc_disc_component hwloc_noos_disc_component = {
+  HWLOC_DISC_COMPONENT_TYPE_CPU,
+  "no_os",
+  HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+  hwloc_noos_component_instantiate,
+  40, /* lower than native OS component, higher than globals */
+  NULL
+};
+
+const struct hwloc_component hwloc_noos_component = {
+  HWLOC_COMPONENT_ABI,
+  NULL, NULL,
+  HWLOC_COMPONENT_TYPE_DISC,
+  0,
+  &hwloc_noos_disc_component
+};
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-nvml.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-nvml.c
new file mode 100644
index 0000000000..e904e6b98a
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-nvml.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright © 2012-2017 Inria.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <hwloc/plugins.h>
+
+/* private headers allowed for convenience because this plugin is built within hwloc */
+#include <private/misc.h>
+#include <private/debug.h>
+
+#include <nvml.h>
+
+static int
+hwloc_nvml_discover(struct hwloc_backend *backend)
+{
+  struct hwloc_topology *topology = backend->topology;
+  enum hwloc_type_filter_e filter;
+  nvmlReturn_t ret;
+  unsigned nb, i;
+
+  hwloc_topology_get_type_filter(topology, HWLOC_OBJ_OS_DEVICE, &filter);
+  if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
+    return 0;
+
+  ret = nvmlInit();
+  if (NVML_SUCCESS != ret)
+    return -1;
+  ret = nvmlDeviceGetCount(&nb);
+  if (NVML_SUCCESS != ret || !nb) {
+    nvmlShutdown();
+    return 0;
+  }
+
+  for(i=0; i<nb; i++) {
+    nvmlPciInfo_t pci;
+    nvmlDevice_t device;
+    hwloc_obj_t osdev, parent;
+    char buffer[64];
+
+    ret = nvmlDeviceGetHandleByIndex(i, &device);
+    assert(ret == NVML_SUCCESS);
+
+    osdev = hwloc_alloc_setup_object(topology, HWLOC_OBJ_OS_DEVICE, -1);
+    snprintf(buffer, sizeof(buffer), "nvml%u", i);
+    osdev->name = strdup(buffer);
+    osdev->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN;
+    osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU;
+
+    hwloc_obj_add_info(osdev, "Backend", "NVML");
+    hwloc_obj_add_info(osdev, "GPUVendor", "NVIDIA Corporation");
+
+    buffer[0] = '\0';
+    ret = nvmlDeviceGetName(device, buffer, sizeof(buffer));
+    hwloc_obj_add_info(osdev, "GPUModel", buffer);
+
+    /* these may fail with NVML_ERROR_NOT_SUPPORTED on old devices */
+    buffer[0] = '\0';
+    ret = nvmlDeviceGetSerial(device, buffer, sizeof(buffer));
+    if (buffer[0] != '\0')
+      hwloc_obj_add_info(osdev, "NVIDIASerial", buffer);
+
+    buffer[0] = '\0';
+    ret = nvmlDeviceGetUUID(device, buffer, sizeof(buffer));
+    if (buffer[0] != '\0')
+      hwloc_obj_add_info(osdev, "NVIDIAUUID", buffer);
+
+    parent = NULL;
+    if (NVML_SUCCESS == nvmlDeviceGetPciInfo(device, &pci)) {
+      parent = hwloc_pci_belowroot_find_by_busid(topology, pci.domain, pci.bus, pci.device, 0);
+      if (!parent)
+	parent = hwloc_pci_find_busid_parent(topology, pci.domain, pci.bus, pci.device, 0);
+#if HAVE_DECL_NVMLDEVICEGETMAXPCIELINKGENERATION
+      if (parent && parent->type == HWLOC_OBJ_PCI_DEVICE) {
+	unsigned maxwidth = 0, maxgen = 0;
+	float lanespeed;
+	nvmlDeviceGetMaxPcieLinkWidth(device, &maxwidth);
+	nvmlDeviceGetMaxPcieLinkGeneration(device, &maxgen);
+	/* PCIe Gen1 = 2.5GT/s signal-rate per lane with 8/10 encoding    = 0.25GB/s data-rate per lane
+	 * PCIe Gen2 = 5  GT/s signal-rate per lane with 8/10 encoding    = 0.5 GB/s data-rate per lane
+	 * PCIe Gen3 = 8  GT/s signal-rate per lane with 128/130 encoding = 1   GB/s data-rate per lane
+	 */
+	lanespeed = maxgen <= 2 ? 2.5 * maxgen * 0.8 : 8.0 * 128/130; /* Gbit/s per lane */
+	if (lanespeed * maxwidth != 0.)
+	  /* we found the max link speed, replace the current link speed found by pci (or none) */
+	  parent->attr->pcidev.linkspeed = lanespeed * maxwidth / 8; /* GB/s */
+      }
+#endif
+    }
+    if (!parent)
+      parent = hwloc_get_root_obj(topology);
+
+    hwloc_insert_object_by_parent(topology, parent, osdev);
+  }
+
+  nvmlShutdown();
+  return 0;
+}
+
+static struct hwloc_backend *
+hwloc_nvml_component_instantiate(struct hwloc_disc_component *component,
+				 const void *_data1 __hwloc_attribute_unused,
+				 const void *_data2 __hwloc_attribute_unused,
+				 const void *_data3 __hwloc_attribute_unused)
+{
+  struct hwloc_backend *backend;
+
+  backend = hwloc_backend_alloc(component);
+  if (!backend)
+    return NULL;
+  backend->discover = hwloc_nvml_discover;
+  return backend;
+}
+
+static struct hwloc_disc_component hwloc_nvml_disc_component = {
+  HWLOC_DISC_COMPONENT_TYPE_MISC,
+  "nvml",
+  HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+  hwloc_nvml_component_instantiate,
+  5, /* after pci, and after cuda since likely less useful */
+  NULL
+};
+
+static int
+hwloc_nvml_component_init(unsigned long flags)
+{
+  if (flags)
+    return -1;
+  if (hwloc_plugin_check_namespace("nvml", "hwloc_backend_alloc") < 0)
+    return -1;
+  return 0;
+}
+
+#ifdef HWLOC_INSIDE_PLUGIN
+HWLOC_DECLSPEC extern const struct hwloc_component hwloc_nvml_component;
+#endif
+
+const struct hwloc_component hwloc_nvml_component = {
+  HWLOC_COMPONENT_ABI,
+  hwloc_nvml_component_init, NULL,
+  HWLOC_COMPONENT_TYPE_DISC,
+  0,
+  &hwloc_nvml_disc_component
+};
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-opencl.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-opencl.c
new file mode 100644
index 0000000000..815644157a
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-opencl.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright © 2012-2017 Inria.  All rights reserved.
+ * Copyright © 2013 Université Bordeaux.  All right reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <hwloc/plugins.h>
+
+/* private headers allowed for convenience because this plugin is built within hwloc */
+#include <private/misc.h>
+#include <private/debug.h>
+
+#include <CL/cl_ext.h>
+
+static int
+hwloc_opencl_discover(struct hwloc_backend *backend)
+{
+  struct hwloc_topology *topology = backend->topology;
+  enum hwloc_type_filter_e filter;
+  cl_platform_id *platform_ids = NULL;
+  cl_uint nr_platforms;
+  cl_int clret;
+  unsigned j;
+
+  hwloc_topology_get_type_filter(topology, HWLOC_OBJ_OS_DEVICE, &filter);
+  if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
+    return 0;
+
+  clret = clGetPlatformIDs(0, NULL, &nr_platforms);
+  if (CL_SUCCESS != clret || !nr_platforms)
+    return -1;
+  hwloc_debug("%u OpenCL platforms\n", nr_platforms);
+  platform_ids = malloc(nr_platforms * sizeof(*platform_ids));
+  if (!platform_ids)
+    return -1;
+  clret = clGetPlatformIDs(nr_platforms, platform_ids, &nr_platforms);
+  if (CL_SUCCESS != clret || !nr_platforms) {
+    free(platform_ids);
+    return -1;
+  }
+
+  for(j=0; j<nr_platforms; j++) {
+    cl_device_id *device_ids = NULL;
+    cl_uint nr_devices;
+    unsigned i;
+
+    clret = clGetDeviceIDs(platform_ids[j], CL_DEVICE_TYPE_ALL, 0, NULL, &nr_devices);
+    if (CL_SUCCESS != clret)
+      continue;
+    device_ids = malloc(nr_devices * sizeof(*device_ids));
+    clret = clGetDeviceIDs(platform_ids[j], CL_DEVICE_TYPE_ALL, nr_devices, device_ids, &nr_devices);
+    if (CL_SUCCESS != clret) {
+      free(device_ids);
+      continue;
+    }
+
+    for(i=0; i<nr_devices; i++) {
+      cl_platform_id platform_id = 0;
+      cl_device_type type;
+#ifdef CL_DEVICE_TOPOLOGY_AMD
+      cl_device_topology_amd amdtopo;
+#endif
+      cl_ulong globalmemsize;
+      cl_uint computeunits;
+      hwloc_obj_t osdev, parent;
+      char buffer[64];
+
+      hwloc_debug("This is opencl%ud%u\n", j, i);
+
+#ifdef CL_DEVICE_TOPOLOGY_AMD
+      clret = clGetDeviceInfo(device_ids[i], CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL);
+      if (CL_SUCCESS != clret) {
+	hwloc_debug("no AMD-specific device information: %d\n", clret);
+	continue;
+      } else if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) {
+	hwloc_debug("AMD-specific device topology reports non-PCIe device type: %u\n", amdtopo.raw.type);
+	continue;
+      }
+#else
+      continue;
+#endif
+
+      osdev = hwloc_alloc_setup_object(topology, HWLOC_OBJ_OS_DEVICE, -1);
+      snprintf(buffer, sizeof(buffer), "opencl%ud%u", j, i);
+      osdev->name = strdup(buffer);
+      osdev->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN;
+      osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_COPROC;
+
+      osdev->subtype = strdup("OpenCL");
+      hwloc_obj_add_info(osdev, "Backend", "OpenCL");
+
+      clGetDeviceInfo(device_ids[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+      if (type == CL_DEVICE_TYPE_GPU)
+	hwloc_obj_add_info(osdev, "OpenCLDeviceType", "GPU");
+      else if (type == CL_DEVICE_TYPE_ACCELERATOR)
+	hwloc_obj_add_info(osdev, "OpenCLDeviceType", "Accelerator");
+      else if (type == CL_DEVICE_TYPE_CPU)
+	hwloc_obj_add_info(osdev, "OpenCLDeviceType", "CPU");
+      else if (type == CL_DEVICE_TYPE_CUSTOM)
+	hwloc_obj_add_info(osdev, "OpenCLDeviceType", "Custom");
+      else
+	hwloc_obj_add_info(osdev, "OpenCLDeviceType", "Unknown");
+
+      buffer[0] = '\0';
+      clGetDeviceInfo(device_ids[i], CL_DEVICE_VENDOR, sizeof(buffer), buffer, NULL);
+      if (buffer[0] != '\0')
+	hwloc_obj_add_info(osdev, "GPUVendor", buffer);
+
+      buffer[0] = '\0';
+#ifdef CL_DEVICE_BOARD_NAME_AMD
+      clGetDeviceInfo(device_ids[i], CL_DEVICE_BOARD_NAME_AMD, sizeof(buffer), buffer, NULL);
+#else
+      clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, sizeof(buffer), buffer, NULL);
+#endif
+      if (buffer[0] != '\0')
+	hwloc_obj_add_info(osdev, "GPUModel", buffer);
+
+      snprintf(buffer, sizeof(buffer), "%u", j);
+      hwloc_obj_add_info(osdev, "OpenCLPlatformIndex", buffer);
+
+      buffer[0] = '\0';
+      clret = clGetDeviceInfo(device_ids[i], CL_DEVICE_PLATFORM, sizeof(platform_id), &platform_id, NULL);
+      if (CL_SUCCESS == clret) {
+	clGetPlatformInfo(platform_id, CL_PLATFORM_NAME, sizeof(buffer), buffer, NULL);
+	if (buffer[0] != '\0')
+	  hwloc_obj_add_info(osdev, "OpenCLPlatformName", buffer);
+      }
+
+      snprintf(buffer, sizeof(buffer), "%u", i);
+      hwloc_obj_add_info(osdev, "OpenCLPlatformDeviceIndex", buffer);
+
+      clGetDeviceInfo(device_ids[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(computeunits), &computeunits, NULL);
+      snprintf(buffer, sizeof(buffer), "%u", computeunits);
+      hwloc_obj_add_info(osdev, "OpenCLComputeUnits", buffer);
+
+      clGetDeviceInfo(device_ids[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(globalmemsize), &globalmemsize, NULL);
+      snprintf(buffer, sizeof(buffer), "%llu", (unsigned long long) globalmemsize / 1024);
+      hwloc_obj_add_info(osdev, "OpenCLGlobalMemorySize", buffer);
+
+      parent = NULL;
+#ifdef CL_DEVICE_TOPOLOGY_AMD
+      parent = hwloc_pci_belowroot_find_by_busid(topology, 0, amdtopo.pcie.bus, amdtopo.pcie.device, amdtopo.pcie.function);
+      if (!parent)
+	parent = hwloc_pci_find_busid_parent(topology, 0, amdtopo.pcie.bus, amdtopo.pcie.device, amdtopo.pcie.function);
+#endif
+      if (!parent)
+	parent = hwloc_get_root_obj(topology);
+
+      hwloc_insert_object_by_parent(topology, parent, osdev);
+    }
+    free(device_ids);
+  }
+  free(platform_ids);
+  return 0;
+}
+
+static struct hwloc_backend *
+hwloc_opencl_component_instantiate(struct hwloc_disc_component *component,
+				   const void *_data1 __hwloc_attribute_unused,
+				   const void *_data2 __hwloc_attribute_unused,
+				   const void *_data3 __hwloc_attribute_unused)
+{
+  struct hwloc_backend *backend;
+
+  backend = hwloc_backend_alloc(component);
+  if (!backend)
+    return NULL;
+  backend->discover = hwloc_opencl_discover;
+  return backend;
+}
+
+static struct hwloc_disc_component hwloc_opencl_disc_component = {
+  HWLOC_DISC_COMPONENT_TYPE_MISC,
+  "opencl",
+  HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+  hwloc_opencl_component_instantiate,
+  10, /* after pci */
+  NULL
+};
+
+static int
+hwloc_opencl_component_init(unsigned long flags)
+{
+  if (flags)
+    return -1;
+  if (hwloc_plugin_check_namespace("opencl", "hwloc_backend_alloc") < 0)
+    return -1;
+  return 0;
+}
+
+#ifdef HWLOC_INSIDE_PLUGIN
+HWLOC_DECLSPEC extern const struct hwloc_component hwloc_opencl_component;
+#endif
+
+const struct hwloc_component hwloc_opencl_component = {
+  HWLOC_COMPONENT_ABI,
+  hwloc_opencl_component_init, NULL,
+  HWLOC_COMPONENT_TYPE_DISC,
+  0,
+  &hwloc_opencl_disc_component
+};
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-pci.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-pci.c
new file mode 100644
index 0000000000..78c0548eef
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-pci.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2017 Inria.  All rights reserved.
+ * Copyright © 2009-2011, 2013 Université Bordeaux
+ * Copyright © 2014 Cisco Systems, Inc.  All rights reserved.
+ * Copyright © 2015      Research Organization for Information Science
+ *                       and Technology (RIST). All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <hwloc/helper.h>
+#include <hwloc/plugins.h>
+
+/* private headers allowed for convenience because this plugin is built within hwloc */
+#include <private/debug.h>
+#include <private/misc.h>
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <assert.h>
+#include <stdarg.h>
+#ifdef HWLOC_LINUX_SYS
+#include <dirent.h>
+#endif
+
+#include <pciaccess.h>
+
+#ifndef PCI_HEADER_TYPE
+#define PCI_HEADER_TYPE 0x0e
+#endif
+#ifndef PCI_HEADER_TYPE_BRIDGE
+#define PCI_HEADER_TYPE_BRIDGE 1
+#endif
+
+#ifndef PCI_CLASS_DEVICE
+#define PCI_CLASS_DEVICE 0x0a
+#endif
+#ifndef PCI_CLASS_BRIDGE_PCI
+#define PCI_CLASS_BRIDGE_PCI 0x0604
+#endif
+
+#ifndef PCI_REVISION_ID
+#define PCI_REVISION_ID 0x08
+#endif
+
+#ifndef PCI_SUBSYSTEM_VENDOR_ID
+#define PCI_SUBSYSTEM_VENDOR_ID 0x2c
+#endif
+#ifndef PCI_SUBSYSTEM_ID
+#define PCI_SUBSYSTEM_ID 0x2e
+#endif
+
+#ifndef PCI_PRIMARY_BUS
+#define PCI_PRIMARY_BUS 0x18
+#endif
+#ifndef PCI_SECONDARY_BUS
+#define PCI_SECONDARY_BUS 0x19
+#endif
+#ifndef PCI_SUBORDINATE_BUS
+#define PCI_SUBORDINATE_BUS 0x1a
+#endif
+
+#ifndef PCI_CAP_ID_EXP
+#define PCI_CAP_ID_EXP 0x10
+#endif
+
+#ifndef PCI_CAP_NORMAL
+#define PCI_CAP_NORMAL 1
+#endif
+
+#define CONFIG_SPACE_CACHESIZE 256
+
+
+static int
+hwloc_look_pci(struct hwloc_backend *backend)
+{
+  struct hwloc_topology *topology = backend->topology;
+  enum hwloc_type_filter_e pfilter, bfilter;
+  struct hwloc_obj *tree = NULL, *tmp;
+  int ret;
+  struct pci_device_iterator *iter;
+  struct pci_device *pcidev;
+
+  hwloc_topology_get_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, &pfilter);
+  hwloc_topology_get_type_filter(topology, HWLOC_OBJ_BRIDGE, &bfilter);
+  if (bfilter == HWLOC_TYPE_FILTER_KEEP_NONE
+      && pfilter == HWLOC_TYPE_FILTER_KEEP_NONE)
+    return 0;
+
+  /* don't do anything if another backend attached PCI already
+   * (they are attached to root until later in the core discovery)
+   */
+  tmp = hwloc_get_root_obj(topology)->io_first_child;
+  while (tmp) {
+    if (tmp->type == HWLOC_OBJ_PCI_DEVICE
+	|| (tmp->type == HWLOC_OBJ_BRIDGE && tmp->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI)) {
+      hwloc_debug("%s", "PCI objects already added, ignoring linuxpci backend.\n");
+      return 0;
+    }
+    tmp = tmp->next_sibling;
+  }
+
+  hwloc_debug("%s", "\nScanning PCI buses...\n");
+
+  /* initialize PCI scanning */
+  ret = pci_system_init();
+  if (ret) {
+    hwloc_debug("%s", "Can not initialize libpciaccess\n");
+    return -1;
+  }
+
+  iter = pci_slot_match_iterator_create(NULL);
+
+  /* iterate over devices */
+  for (pcidev = pci_device_next(iter);
+       pcidev;
+       pcidev = pci_device_next(iter))
+  {
+    const char *vendorname, *devicename;
+    unsigned char config_space_cache[CONFIG_SPACE_CACHESIZE];
+    hwloc_obj_type_t type;
+    struct hwloc_obj *obj;
+    unsigned domain;
+    unsigned device_class;
+    unsigned short tmp16;
+    unsigned offset;
+
+    /* initialize the config space in case we fail to read it (missing permissions, etc). */
+    memset(config_space_cache, 0xff, CONFIG_SPACE_CACHESIZE);
+    pci_device_probe(pcidev);
+    pci_device_cfg_read(pcidev, config_space_cache, 0, CONFIG_SPACE_CACHESIZE, NULL);
+
+    /* try to read the domain */
+    domain = pcidev->domain;
+
+    /* try to read the device_class */
+    device_class = pcidev->device_class >> 8;
+
+    /* bridge or pci dev? */
+    type = hwloc_pci_check_bridge_type(device_class, config_space_cache);
+
+    /* filtered? */
+    if (type == HWLOC_OBJ_PCI_DEVICE) {
+      enum hwloc_type_filter_e filter;
+      hwloc_topology_get_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, &filter);
+      if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
+	continue;
+      if (filter == HWLOC_TYPE_FILTER_KEEP_IMPORTANT
+	  && !hwloc_filter_check_pcidev_subtype_important(device_class))
+	continue;
+    } else if (type == HWLOC_OBJ_BRIDGE) {
+      enum hwloc_type_filter_e filter;
+      hwloc_topology_get_type_filter(topology, HWLOC_OBJ_BRIDGE, &filter);
+      if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
+	continue;
+      /* HWLOC_TYPE_FILTER_KEEP_IMPORTANT filtered later in the core */
+    }
+
+    /* fixup SR-IOV buggy VF device/vendor IDs */
+    if (0xffff == pcidev->vendor_id && 0xffff == pcidev->device_id) {
+      /* SR-IOV puts ffff:ffff in Virtual Function config space.
+       * The actual VF device ID is stored at a special (dynamic) location in the Physical Function config space.
+       * VF and PF have the same vendor ID.
+       *
+       * libpciaccess just returns ffff:ffff, needs to be fixed.
+       * linuxpci is OK because sysfs files are already fixed in the kernel.
+       * (pciutils is OK when it uses those Linux sysfs files.)
+       *
+       * Reading these files is an easy way to work around the libpciaccess issue on Linux,
+       * but we have no way to know if this is caused by SR-IOV or not.
+       *
+       * TODO:
+       *  If PF has CAP_ID_PCIX or CAP_ID_EXP (offset>0),
+       *  look for extended capability PCI_EXT_CAP_ID_SRIOV (need extended config space (more than 256 bytes)),
+       *  then read the VF device ID after it (PCI_IOV_DID bytes later).
+       *  Needs access to extended config space (needs root on Linux).
+       * TODO:
+       *  Add string info attributes in VF and PF objects?
+       */
+#ifdef HWLOC_LINUX_SYS
+      /* Workaround for Linux (the kernel returns the VF device/vendor IDs). */
+      char path[64];
+      char value[16];
+      FILE *file;
+      size_t read;
+
+      snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/vendor",
+	       domain, pcidev->bus, pcidev->dev, pcidev->func);
+      file = fopen(path, "r");
+      if (file) {
+	read = fread(value, 1, sizeof(value), file);
+	fclose(file);
+	if (read)
+	  /* fixup the pciaccess struct so that pci_device_get_vendor_name() is correct later. */
+          pcidev->vendor_id = strtoul(value, NULL, 16);
+      }
+
+      snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/device",
+	       domain, pcidev->bus, pcidev->dev, pcidev->func);
+      file = fopen(path, "r");
+      if (file) {
+	read = fread(value, 1, sizeof(value), file);
+	fclose(file);
+	if (read)
+	  /* fixup the pciaccess struct so that pci_device_get_device_name() is correct later. */
+          pcidev->device_id = strtoul(value, NULL, 16);
+      }
+#endif
+    }
+
+    obj = hwloc_alloc_setup_object(topology, type, -1);
+    obj->attr->pcidev.domain = domain;
+    obj->attr->pcidev.bus = pcidev->bus;
+    obj->attr->pcidev.dev = pcidev->dev;
+    obj->attr->pcidev.func = pcidev->func;
+    obj->attr->pcidev.vendor_id = pcidev->vendor_id;
+    obj->attr->pcidev.device_id = pcidev->device_id;
+    obj->attr->pcidev.class_id = device_class;
+    obj->attr->pcidev.revision = config_space_cache[PCI_REVISION_ID];
+
+    obj->attr->pcidev.linkspeed = 0; /* unknown */
+    offset = hwloc_pci_find_cap(config_space_cache, PCI_CAP_ID_EXP);
+
+    if (offset > 0 && offset + 20 /* size of PCI express block up to link status */ <= CONFIG_SPACE_CACHESIZE)
+      hwloc_pci_find_linkspeed(config_space_cache, offset, &obj->attr->pcidev.linkspeed);
+
+    if (type == HWLOC_OBJ_BRIDGE) {
+      if (hwloc_pci_setup_bridge_attr(obj, config_space_cache) < 0)
+	continue;
+    }
+
+    if (obj->type == HWLOC_OBJ_PCI_DEVICE) {
+      memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_VENDOR_ID], sizeof(tmp16));
+      obj->attr->pcidev.subvendor_id = tmp16;
+      memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_ID], sizeof(tmp16));
+      obj->attr->pcidev.subdevice_id = tmp16;
+    } else {
+      /* TODO:
+       * bridge must lookup PCI_CAP_ID_SSVID and then look at offset+PCI_SSVID_VENDOR/DEVICE_ID
+       * cardbus must look at PCI_CB_SUBSYSTEM_VENDOR_ID and PCI_CB_SUBSYSTEM_ID
+       */
+    }
+
+    /* get the vendor name */
+    vendorname = pci_device_get_vendor_name(pcidev);
+    if (vendorname && *vendorname)
+      hwloc_obj_add_info(obj, "PCIVendor", vendorname);
+
+    /* get the device name */
+    devicename = pci_device_get_device_name(pcidev);
+    if (devicename && *devicename)
+      hwloc_obj_add_info(obj, "PCIDevice", devicename);
+
+    hwloc_debug("  %04x:%02x:%02x.%01x %04x %04x:%04x %s %s\n",
+		domain, pcidev->bus, pcidev->dev, pcidev->func,
+		device_class, pcidev->vendor_id, pcidev->device_id,
+		vendorname && *vendorname ? vendorname : "??",
+		devicename && *devicename ? devicename : "??");
+
+    hwloc_pci_tree_insert_by_busid(&tree, obj);
+  }
+
+  /* finalize device scanning */
+  pci_iterator_destroy(iter);
+  pci_system_cleanup();
+
+  hwloc_pci_tree_attach_belowroot(topology, tree);
+  return 0;
+}
+
+static struct hwloc_backend *
+hwloc_pci_component_instantiate(struct hwloc_disc_component *component,
+				   const void *_data1 __hwloc_attribute_unused,
+				   const void *_data2 __hwloc_attribute_unused,
+				   const void *_data3 __hwloc_attribute_unused)
+{
+  struct hwloc_backend *backend;
+
+#ifdef HWLOC_SOLARIS_SYS
+  if ((uid_t)0 != geteuid())
+    return NULL;
+#endif
+
+  backend = hwloc_backend_alloc(component);
+  if (!backend)
+    return NULL;
+  backend->discover = hwloc_look_pci;
+  return backend;
+}
+
+static struct hwloc_disc_component hwloc_pci_disc_component = {
+  HWLOC_DISC_COMPONENT_TYPE_MISC,
+  "pci",
+  HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+  hwloc_pci_component_instantiate,
+  20,
+  NULL
+};
+
+static int
+hwloc_pci_component_init(unsigned long flags)
+{
+  if (flags)
+    return -1;
+  if (hwloc_plugin_check_namespace("pci", "hwloc_backend_alloc") < 0)
+    return -1;
+  return 0;
+}
+
+#ifdef HWLOC_INSIDE_PLUGIN
+HWLOC_DECLSPEC extern const struct hwloc_component hwloc_pci_component;
+#endif
+
+const struct hwloc_component hwloc_pci_component = {
+  HWLOC_COMPONENT_ABI,
+  hwloc_pci_component_init, NULL,
+  HWLOC_COMPONENT_TYPE_DISC,
+  0,
+  &hwloc_pci_disc_component
+};
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-solaris-chiptype.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-solaris-chiptype.c
new file mode 100644
index 0000000000..9a1b7288ac
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-solaris-chiptype.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright © 2009-2010 Oracle and/or its affiliates.  All rights reserved.
+ * Copyright © 2013 Université Bordeaux.  All rights reserved.
+ * Copyright © 2016 Inria.  All rights reserved.
+ *
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <private/solaris-chiptype.h>
+#include <stdlib.h>
+#include <strings.h>
+
+#ifdef HAVE_PICL_H
+#include <sys/systeminfo.h>
+#include <picl.h>
+
+/*****************************************************************************
+   Order of this list is important for the assign_value and
+   assign_string_value routines
+*****************************************************************************/
+
+static const char* items[] = {
+  "clock-frequency",
+  "cpu-mhz",
+  "ecache-size",
+  "l2-cache-size",
+  "sectored-l2-cache-size",
+  "implementation#",
+  "manufacturer#",
+  "compatible",
+  "ProcessorType",
+  "vendor-id",
+  "brand-string"
+};
+
+#define NUM_ITEMS (sizeof(items) / sizeof(items[0]))
+
+/*****************************************************************************
+SPARC strings for chip modes and implementation
+*****************************************************************************/
+static const char* sparc_modes[] = {
+    "UNKNOWN",
+    "SPITFIRE",
+    "BLACKBIRD",
+    "CHEETAH",
+    "SPARC64_VI",
+    "T1",
+    "T2",
+    "SPARC64_VII",
+    "ROCK",
+    "T5"
+/* needs T4, T3 and T2+ ? */
+};
+
+/*****************************************************************************
+Default values are for Unknown so we can build up from there.
+*****************************************************************************/
+
+static long dss_chip_mode         = MODE_UNKNOWN;
+static long dss_chip_impl         = IMPL_SPITFIRE;
+static long dss_chip_cache        = TWO_MEG_CACHE;
+static long dss_chip_manufacturer = TI_MANUFACTURER;
+static long long dss_chip_speed   = SPITFIRE_SPEED;
+static char dss_chip_type[PICL_PROPNAMELEN_MAX];
+static char dss_chip_model[PICL_PROPNAMELEN_MAX];
+static int  called_cpu_probe      = 0;
+
+/*****************************************************************************
+Assigns values based on the value of index.  For this reason, the order of
+the items array is important.
+*****************************************************************************/
+static void assign_value(int index, long long val) {
+  if (index == 0) {  /* clock-frequency */
+    dss_chip_speed = val;
+  }
+  if (index == 1) {  /* cpu-mhz */
+    dss_chip_speed = val * 1000000; /* Scale since value was in MHz */
+  }
+  else if ((index >= 2) && (index <= 4)) {
+    /* ecache-size, l2-cache-size, sectored-l2-cache-size */
+    dss_chip_cache = val;
+  }
+  else if (index == 5) {
+    /* implementation#  T1, T2, and Rock do not have this, see RFE 6615268 */
+    dss_chip_impl = val;
+    if (dss_chip_impl == IMPL_SPITFIRE) {
+      dss_chip_mode = 1;
+    }
+    else if ((dss_chip_impl >= IMPL_BLACKBIRD) &&
+             (dss_chip_impl <= IMPL_HUMMINGBIRD)) {
+      dss_chip_mode = 2;
+    }
+    else if ((dss_chip_impl >= IMPL_CHEETAH) &&
+             (dss_chip_impl <= IMPL_PANTHER)) {
+      dss_chip_mode = 3;
+    }
+    else if (dss_chip_impl == IMPL_SPARC64_VI) {
+      dss_chip_mode = 4;
+    }
+    else if (dss_chip_impl == IMPL_NIAGARA) {
+      dss_chip_mode = 5;
+    }
+    else if (dss_chip_impl == IMPL_NIAGARA_2) {
+      dss_chip_mode = 6;
+    }
+    else if (dss_chip_impl == IMPL_SPARC64_VII) {
+      dss_chip_mode = 7;
+    }
+    else if (dss_chip_impl == IMPL_ROCK) {
+      dss_chip_mode = 8;
+    }
+  }
+  else if (index == 6) { /* manufacturer# */
+    dss_chip_manufacturer = val;
+  }
+}
+
+/*****************************************************************************
+Assigns values based on the value of index.  For this reason, the order of
+the items array is important.
+*****************************************************************************/
+static void assign_string_value(int index, char* string_val) {
+  if (index == 7) { /* compatible */
+    if (strncasecmp(string_val, "FJSV,SPARC64-VI",
+                    PICL_PROPNAMELEN_MAX) == 0) {
+      dss_chip_mode = 4;
+    }
+    else if (strncasecmp(string_val, "SUNW,UltraSPARC-T1",
+                         PICL_PROPNAMELEN_MAX) == 0) {
+      dss_chip_mode = 5;
+    }
+    else if (strncasecmp(string_val, "SUNW,UltraSPARC-T2",
+                         PICL_PROPNAMELEN_MAX) == 0) {
+      dss_chip_mode = 6;
+    }
+    else if (strncasecmp(string_val, "FJSV,SPARC64-VII",
+                         PICL_PROPNAMELEN_MAX) == 0) {
+      dss_chip_mode = 7;
+    }
+    else if (strncasecmp(string_val, "SUNW,Rock",
+                         PICL_PROPNAMELEN_MAX) == 0) {
+      dss_chip_mode = 8;
+    }
+    else if (strncasecmp(string_val, "SPARC-T5",
+			 PICL_PROPNAMELEN_MAX) == 0) {
+      dss_chip_mode = 9;
+    }
+  } else if (index == 8) {  /* ProcessorType */
+      strncpy(&dss_chip_type[0], string_val, PICL_PROPNAMELEN_MAX);
+  } else if (index == 10) { /* brand-string */
+      strncpy(&dss_chip_model[0], string_val, PICL_PROPNAMELEN_MAX);
+  }
+
+}
+
+/*****************************************************************************
+Gets called by probe_cpu.  Cycles through the table values until we find
+what we are looking for.
+*****************************************************************************/
+static void search_table(int index, picl_prophdl_t table_hdl) {
+
+  picl_prophdl_t  col_hdl;
+  picl_prophdl_t  row_hdl;
+  picl_propinfo_t p_info;
+  int             val;
+  char            string_val[PICL_PROPNAMELEN_MAX];
+
+  for (val = picl_get_next_by_col(table_hdl, &row_hdl); val != PICL_ENDOFLIST;
+       val = picl_get_next_by_col(row_hdl, &row_hdl)) {
+    if (val == PICL_SUCCESS) {
+      for (col_hdl = row_hdl; val != PICL_ENDOFLIST;
+           val = picl_get_next_by_row(col_hdl, &col_hdl)) {
+        if (val == PICL_SUCCESS) {
+          val = picl_get_propinfo(col_hdl, &p_info);
+          if (val == PICL_SUCCESS) {
+            if (p_info.type == PICL_PTYPE_CHARSTRING) {
+              val = picl_get_propval(col_hdl, &string_val, sizeof(string_val));
+              if (val == PICL_SUCCESS) {
+                assign_string_value(index, string_val);
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+/*****************************************************************************
+Gets called by picl_walk_tree_by_class.  Then it cycles through the properties
+until we find what we are looking for.  Once we are done, we return
+PICL_WALK_TERMINATE to stop picl_walk_tree_by_class from traversing the tree.
+
+Note that PICL_PTYPE_UNSIGNED_INT and PICL_PTYPE_INT can either be 4-bytes
+or 8-bytes.
+*****************************************************************************/
+static int probe_cpu(picl_nodehdl_t node_hdl, void* dummy_arg __hwloc_attribute_unused) {
+
+  picl_prophdl_t  p_hdl;
+  picl_prophdl_t  table_hdl;
+  picl_propinfo_t p_info;
+  long long       long_long_val;
+  unsigned int    uint_val;
+  unsigned int    index;
+  int             int_val;
+  int             val;
+  char            string_val[PICL_PROPNAMELEN_MAX];
+
+  val = picl_get_first_prop(node_hdl, &p_hdl);
+  while (val == PICL_SUCCESS) {
+    called_cpu_probe = 1;
+    val = picl_get_propinfo(p_hdl, &p_info);
+    if (val == PICL_SUCCESS) {
+      for (index = 0; index < NUM_ITEMS; index++) {
+        if (strcasecmp(p_info.name, items[index]) == 0) {
+          if (p_info.type == PICL_PTYPE_UNSIGNED_INT) {
+            if (p_info.size == sizeof(uint_val)) {
+              val = picl_get_propval(p_hdl, &uint_val, sizeof(uint_val));
+              if (val == PICL_SUCCESS) {
+                long_long_val = uint_val;
+                assign_value(index, long_long_val);
+              }
+            }
+            else if (p_info.size == sizeof(long_long_val)) {
+              val = picl_get_propval(p_hdl, &long_long_val,
+                                     sizeof(long_long_val));
+              if (val == PICL_SUCCESS) {
+                assign_value(index, long_long_val);
+              }
+            }
+          }
+          else if (p_info.type == PICL_PTYPE_INT) {
+            if (p_info.size == sizeof(int_val)) {
+              val = picl_get_propval(p_hdl, &int_val, sizeof(int_val));
+              if (val == PICL_SUCCESS) {
+                long_long_val = int_val;
+                assign_value(index, long_long_val);
+              }
+            }
+            else if (p_info.size == sizeof(long_long_val)) {
+              val = picl_get_propval(p_hdl, &long_long_val,
+                                     sizeof(long_long_val));
+              if (val == PICL_SUCCESS) {
+                assign_value(index, long_long_val);
+              }
+            }
+          }
+          else if (p_info.type == PICL_PTYPE_CHARSTRING) {
+            val = picl_get_propval(p_hdl, &string_val, sizeof(string_val));
+            if (val == PICL_SUCCESS) {
+              assign_string_value(index, string_val);
+            }
+          }
+          else if (p_info.type == PICL_PTYPE_TABLE) {
+            val = picl_get_propval(p_hdl, &table_hdl, p_info.size);
+            if (val == PICL_SUCCESS) {
+              search_table(index, table_hdl);
+            }
+          }
+          break;
+        } else if (index == NUM_ITEMS-1) {
+	  if (p_info.type == PICL_PTYPE_CHARSTRING) {
+            val = picl_get_propval(p_hdl, &string_val, sizeof(string_val));
+            if (val == PICL_SUCCESS) {
+            }
+	  }
+	}
+      }
+    }
+
+    val = picl_get_next_prop(p_hdl, &p_hdl);
+  }
+  return PICL_WALK_TERMINATE;
+}
+
+
+/*****************************************************************************
+Initializes, gets the root, then walks the picl tree looking for information
+
+Currently, the "core" class is only needed for OPL systems
+*****************************************************************************/
+char* hwloc_solaris_get_chip_type(void) {
+  picl_nodehdl_t root;
+  int            val;
+  static char chip_type[PICL_PROPNAMELEN_MAX];
+
+  val = picl_initialize();
+  if (val != PICL_SUCCESS) { /* Can't initialize session with PICL daemon */
+      return(NULL);
+  }
+  val = picl_get_root(&root);
+  if (val != PICL_SUCCESS) {  /* Failed to get root node of the PICL tree */
+      return(NULL);
+  }
+  val = picl_walk_tree_by_class(root, "cpu", (void *)NULL, probe_cpu);
+  val = picl_walk_tree_by_class(root, "core", (void *)NULL, probe_cpu);
+  picl_shutdown();
+
+  if (called_cpu_probe) {
+#if (defined HWLOC_X86_64_ARCH) || (defined HWLOC_X86_32_ARCH)
+      /* PICL returns some corrupted chip_type strings on x86,
+       * and CPUType only used on Sparc anyway, at least for now.
+       * So we just ignore this attribute on x86. */
+#else
+      strncpy(chip_type, dss_chip_type, PICL_PROPNAMELEN_MAX);
+#endif
+  } else {
+      /* no picl information on machine available */
+      sysinfo(SI_HW_PROVIDER, chip_type, PICL_PROPNAMELEN_MAX);
+  }
+  return(chip_type);
+}
+
+/*****************************************************************************
+Initializes, gets the root, then walks the picl tree looking for information
+
+Currently, the "core" class is only needed for OPL systems
+*****************************************************************************/
+char *hwloc_solaris_get_chip_model(void) {
+
+    if (called_cpu_probe) {
+	if (dss_chip_mode != MODE_UNKNOWN) { /* SPARC chip */
+	    strncpy(dss_chip_model, sparc_modes[dss_chip_mode],
+		    PICL_PROPNAMELEN_MAX);
+	}
+    } else {
+	/* no picl information on machine available */
+	sysinfo(SI_PLATFORM, dss_chip_model, PICL_PROPNAMELEN_MAX);
+    }
+    return(dss_chip_model);
+}
+
+#else
+char* hwloc_solaris_get_chip_type(void) {
+  return NULL;
+}
+char *hwloc_solaris_get_chip_model(void) {
+  return NULL;
+}
+#endif
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-solaris.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-solaris.c
new file mode 100644
index 0000000000..cba94ff727
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-solaris.c
@@ -0,0 +1,817 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2017 Inria.  All rights reserved.
+ * Copyright © 2009-2011 Université Bordeaux
+ * Copyright © 2011 Cisco Systems, Inc.  All rights reserved.
+ * Copyright © 2011      Oracle and/or its affiliates.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <private/private.h>
+#include <private/debug.h>
+#include <private/solaris-chiptype.h>
+
+#include <stdio.h>
+#include <errno.h>
+#ifdef HAVE_DIRENT_H
+#include <dirent.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <sys/types.h>
+#include <sys/processor.h>
+#include <sys/procset.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+
+#ifdef HAVE_LIBLGRP
+#  include <sys/lgrp_user.h>
+#endif
+
+/* TODO: use psets? (only for root)
+ * TODO: get cache info from prtdiag? (it is setgid sys to be able to read from
+ * crw-r-----   1 root     sys       88,  0 nov   3 14:35 /devices/pseudo/devinfo@0:devinfo
+ * and run (apparently undocumented) ioctls on it.
+ */
+
+static int
+hwloc_solaris_set_sth_cpubind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_const_bitmap_t hwloc_set, int flags)
+{
+  unsigned target_cpu;
+
+  /* The resulting binding is always strict */
+
+  if (hwloc_bitmap_isequal(hwloc_set, hwloc_topology_get_complete_cpuset(topology))) {
+    if (processor_bind(idtype, id, PBIND_NONE, NULL) != 0)
+      return -1;
+#ifdef HAVE_LIBLGRP
+    if (!(flags & HWLOC_CPUBIND_NOMEMBIND)) {
+      int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
+      int n, i;
+      assert (depth >= 0);
+      n = hwloc_get_nbobjs_by_depth(topology, depth);
+      for (i = 0; i < n; i++) {
+	hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
+	lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_NONE);
+      }
+    }
+#endif /* HAVE_LIBLGRP */
+    return 0;
+  }
+
+#ifdef HAVE_LIBLGRP
+  if (!(flags & HWLOC_CPUBIND_NOMEMBIND)) {
+    int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
+    int n, i, ok;
+    assert(depth >= 0);
+    n = hwloc_get_nbobjs_by_depth(topology, depth);
+    hwloc_bitmap_t target = hwloc_bitmap_alloc();
+    for (i = 0; i < n; i++) {
+      hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
+      if (hwloc_bitmap_isincluded(obj->cpuset, hwloc_set))
+	hwloc_bitmap_or(target, target, obj->cpuset);
+    }
+
+    ok = hwloc_bitmap_isequal(target, hwloc_set);
+    hwloc_bitmap_free(target);
+
+    if (ok) {
+      /* Ok, managed to achieve hwloc_set by just combining NUMA nodes */
+
+      for (i = 0; i < n; i++) {
+        hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
+
+        if (hwloc_bitmap_isincluded(obj->cpuset, hwloc_set)) {
+          lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_STRONG);
+        } else {
+          if (flags & HWLOC_CPUBIND_STRICT)
+            lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_NONE);
+          else
+            lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_WEAK);
+        }
+      }
+
+      return 0;
+    }
+  }
+#endif /* HAVE_LIBLGRP */
+
+  if (hwloc_bitmap_weight(hwloc_set) != 1) {
+    errno = EXDEV;
+    return -1;
+  }
+
+  target_cpu = hwloc_bitmap_first(hwloc_set);
+
+  if (processor_bind(idtype, id,
+		     (processorid_t) (target_cpu), NULL) != 0)
+    return -1;
+
+  return 0;
+}
+
+static int
+hwloc_solaris_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_bitmap_t hwloc_set, int flags)
+{
+  return hwloc_solaris_set_sth_cpubind(topology, P_PID, pid, hwloc_set, flags);
+}
+
+static int
+hwloc_solaris_set_thisproc_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags)
+{
+  return hwloc_solaris_set_sth_cpubind(topology, P_PID, P_MYID, hwloc_set, flags);
+}
+
+static int
+hwloc_solaris_set_thisthread_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags)
+{
+  return hwloc_solaris_set_sth_cpubind(topology, P_LWPID, P_MYID, hwloc_set, flags);
+}
+
+#ifdef HAVE_LIBLGRP
+static int
+hwloc_solaris_get_sth_cpubind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused)
+{
+  processorid_t binding;
+  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
+  int n;
+  int i;
+
+  assert(depth >= 0);
+
+  /* first check if processor_bind() was used to bind to a single processor rather than to an lgroup */
+  if ( processor_bind(idtype, id, PBIND_QUERY, &binding) == 0 && binding != PBIND_NONE ) {
+    hwloc_bitmap_only(hwloc_set, binding);
+    return 0;
+  }
+
+  /* if not, check lgroups */
+  hwloc_bitmap_zero(hwloc_set);
+  n = hwloc_get_nbobjs_by_depth(topology, depth);
+  for (i = 0; i < n; i++) {
+    hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
+    lgrp_affinity_t aff = lgrp_affinity_get(idtype, id, obj->os_index);
+
+    if (aff == LGRP_AFF_STRONG)
+      hwloc_bitmap_or(hwloc_set, hwloc_set, obj->cpuset);
+  }
+
+  if (hwloc_bitmap_iszero(hwloc_set))
+    hwloc_bitmap_copy(hwloc_set, hwloc_topology_get_complete_cpuset(topology));
+
+  return 0;
+}
+
+static int
+hwloc_solaris_get_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_bitmap_t hwloc_set, int flags)
+{
+  return hwloc_solaris_get_sth_cpubind(topology, P_PID, pid, hwloc_set, flags);
+}
+
+static int
+hwloc_solaris_get_thisproc_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, int flags)
+{
+  return hwloc_solaris_get_sth_cpubind(topology, P_PID, P_MYID, hwloc_set, flags);
+}
+
+static int
+hwloc_solaris_get_thisthread_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, int flags)
+{
+  return hwloc_solaris_get_sth_cpubind(topology, P_LWPID, P_MYID, hwloc_set, flags);
+}
+#endif /* HAVE_LIBLGRP */
+
+/* TODO: given thread, probably not easy because of the historical n:m implementation */
+#ifdef HAVE_LIBLGRP
+static int
+hwloc_solaris_set_sth_membind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  int depth;
+  int n, i;
+
+  switch (policy) {
+    case HWLOC_MEMBIND_DEFAULT:
+    case HWLOC_MEMBIND_BIND:
+      break;
+    default:
+      errno = ENOSYS;
+      return -1;
+  }
+
+  if (flags & HWLOC_MEMBIND_NOCPUBIND) {
+    errno = ENOSYS;
+    return -1;
+  }
+
+  depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
+  assert(depth >= 0);
+  n = hwloc_get_nbobjs_by_depth(topology, depth);
+
+  for (i = 0; i < n; i++) {
+    hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
+    if (hwloc_bitmap_isset(nodeset, obj->os_index)) {
+      lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_STRONG);
+    } else {
+      if (flags & HWLOC_CPUBIND_STRICT)
+	lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_NONE);
+      else
+	lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_WEAK);
+    }
+  }
+
+  return 0;
+}
+
+static int
+hwloc_solaris_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  return hwloc_solaris_set_sth_membind(topology, P_PID, pid, nodeset, policy, flags);
+}
+
+static int
+hwloc_solaris_set_thisproc_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  return hwloc_solaris_set_sth_membind(topology, P_PID, P_MYID, nodeset, policy, flags);
+}
+
+static int
+hwloc_solaris_set_thisthread_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  return hwloc_solaris_set_sth_membind(topology, P_LWPID, P_MYID, nodeset, policy, flags);
+}
+
+static int
+hwloc_solaris_get_sth_membind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_nodeset_t nodeset, hwloc_membind_policy_t *policy, int flags __hwloc_attribute_unused)
+{
+  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
+  int n;
+  int i;
+
+  assert(depth >= 0);
+
+  hwloc_bitmap_zero(nodeset);
+  n = hwloc_get_nbobjs_by_depth(topology, depth);
+
+  for (i = 0; i < n; i++) {
+    hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
+    lgrp_affinity_t aff = lgrp_affinity_get(idtype, id, obj->os_index);
+
+    if (aff == LGRP_AFF_STRONG)
+      hwloc_bitmap_set(nodeset, obj->os_index);
+  }
+
+  if (hwloc_bitmap_iszero(nodeset))
+    hwloc_bitmap_copy(nodeset, hwloc_topology_get_complete_nodeset(topology));
+
+  *policy = HWLOC_MEMBIND_BIND;
+  return 0;
+}
+
+static int
+hwloc_solaris_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t *policy, int flags)
+{
+  return hwloc_solaris_get_sth_membind(topology, P_PID, pid, nodeset, policy, flags);
+}
+
+static int
+hwloc_solaris_get_thisproc_membind(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t *policy, int flags)
+{
+  return hwloc_solaris_get_sth_membind(topology, P_PID, P_MYID, nodeset, policy, flags);
+}
+
+static int
+hwloc_solaris_get_thisthread_membind(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t *policy, int flags)
+{
+  return hwloc_solaris_get_sth_membind(topology, P_LWPID, P_MYID, nodeset, policy, flags);
+}
+#endif /* HAVE_LIBLGRP */
+
+
+#ifdef MADV_ACCESS_LWP
+static int
+hwloc_solaris_set_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags __hwloc_attribute_unused)
+{
+  int advice;
+  size_t remainder;
+
+  /* Can not give a set of nodes just for an area.  */
+  if (!hwloc_bitmap_isequal(nodeset, hwloc_topology_get_complete_nodeset(topology))) {
+    errno = EXDEV;
+    return -1;
+  }
+
+  switch (policy) {
+    case HWLOC_MEMBIND_DEFAULT:
+    case HWLOC_MEMBIND_BIND:
+      advice = MADV_ACCESS_DEFAULT;
+      break;
+    case HWLOC_MEMBIND_FIRSTTOUCH:
+    case HWLOC_MEMBIND_NEXTTOUCH:
+      advice = MADV_ACCESS_LWP;
+      break;
+    case HWLOC_MEMBIND_INTERLEAVE:
+      advice = MADV_ACCESS_MANY;
+      break;
+    default:
+      errno = ENOSYS;
+      return -1;
+  }
+
+  remainder = (uintptr_t) addr & (sysconf(_SC_PAGESIZE)-1);
+  addr = (char*) addr - remainder;
+  len += remainder;
+  return madvise((void*) addr, len, advice);
+}
+#endif
+
+#ifdef HAVE_LIBLGRP
+static void
+browse(struct hwloc_topology *topology, lgrp_cookie_t cookie, lgrp_id_t lgrp, hwloc_obj_t *glob_lgrps, unsigned *curlgrp)
+{
+  int n;
+  hwloc_obj_t obj;
+  lgrp_mem_size_t mem_size;
+
+  n = lgrp_cpus(cookie, lgrp, NULL, 0, LGRP_CONTENT_HIERARCHY);
+  if (n == -1)
+    return;
+
+  /* Is this lgrp a NUMA node? */
+  if ((mem_size = lgrp_mem_size(cookie, lgrp, LGRP_MEM_SZ_INSTALLED, LGRP_CONTENT_DIRECT)) > 0)
+  {
+    int i;
+    processorid_t *cpuids;
+    cpuids = malloc(sizeof(processorid_t) * n);
+    assert(cpuids != NULL);
+
+    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_NUMANODE, lgrp);
+    obj->nodeset = hwloc_bitmap_alloc();
+    hwloc_bitmap_set(obj->nodeset, lgrp);
+    obj->cpuset = hwloc_bitmap_alloc();
+    glob_lgrps[(*curlgrp)++] = obj;
+
+    lgrp_cpus(cookie, lgrp, cpuids, n, LGRP_CONTENT_HIERARCHY);
+    for (i = 0; i < n ; i++) {
+      hwloc_debug("node %ld's cpu %d is %d\n", lgrp, i, cpuids[i]);
+      hwloc_bitmap_set(obj->cpuset, cpuids[i]);
+    }
+    hwloc_debug_1arg_bitmap("node %ld has cpuset %s\n",
+	lgrp, obj->cpuset);
+
+    /* or LGRP_MEM_SZ_FREE */
+    hwloc_debug("node %ld has %lldkB\n", lgrp, mem_size/1024);
+    obj->memory.local_memory = mem_size;
+    obj->memory.page_types_len = 2;
+    obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types));
+    memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types));
+    obj->memory.page_types[0].size = hwloc_getpagesize();
+#if HAVE_DECL__SC_LARGE_PAGESIZE
+    obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
+#endif
+    hwloc_insert_object_by_cpuset(topology, obj);
+    free(cpuids);
+  }
+
+  n = lgrp_children(cookie, lgrp, NULL, 0);
+  {
+    lgrp_id_t *lgrps;
+    int i;
+
+    lgrps = malloc(sizeof(lgrp_id_t) * n);
+    assert(lgrps != NULL);
+    lgrp_children(cookie, lgrp, lgrps, n);
+    hwloc_debug("lgrp %ld has %d children\n", lgrp, n);
+    for (i = 0; i < n ; i++)
+      {
+	browse(topology, cookie, lgrps[i], glob_lgrps, curlgrp);
+      }
+    hwloc_debug("lgrp %ld's children done\n", lgrp);
+    free(lgrps);
+  }
+}
+
+static void
+hwloc_look_lgrp(struct hwloc_topology *topology)
+{
+  lgrp_cookie_t cookie;
+  unsigned curlgrp = 0;
+  int nlgrps;
+  lgrp_id_t root;
+
+  if ((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM))
+    cookie = lgrp_init(LGRP_VIEW_OS);
+  else
+    cookie = lgrp_init(LGRP_VIEW_CALLER);
+  if (cookie == LGRP_COOKIE_NONE)
+    {
+      hwloc_debug("lgrp_init failed: %s\n", strerror(errno));
+      return;
+    }
+  nlgrps = lgrp_nlgrps(cookie);
+  root = lgrp_root(cookie);
+  if (nlgrps > 0) {
+    hwloc_obj_t *glob_lgrps = calloc(nlgrps, sizeof(hwloc_obj_t));
+    browse(topology, cookie, root, glob_lgrps, &curlgrp);
+#if HAVE_DECL_LGRP_LATENCY_COOKIE
+    if (nlgrps > 1) {
+      uint64_t *distances = calloc(curlgrp*curlgrp, sizeof(uint64_t));
+      unsigned i, j;
+      for (i = 0; i < curlgrp; i++) {
+	for (j = 0; j < curlgrp; j++)
+          distances[i*curlgrp+j] = (uint64_t) lgrp_latency_cookie(cookie, glob_lgrps[i]->os_index, glob_lgrps[j]->os_index, LGRP_LAT_CPU_TO_MEM);
+      }
+      hwloc_internal_distances_add(topology, curlgrp, glob_lgrps, distances,
+				   HWLOC_DISTANCES_KIND_FROM_OS|HWLOC_DISTANCES_KIND_MEANS_LATENCY,
+				   HWLOC_DISTANCES_FLAG_GROUP);
+    } else
+#endif /* HAVE_DECL_LGRP_LATENCY_COOKIE */
+      free(glob_lgrps);
+  }
+  lgrp_fini(cookie);
+}
+#endif /* LIBLGRP */
+
+#ifdef HAVE_LIBKSTAT
+#include <kstat.h>
+static int
+hwloc_look_kstat(struct hwloc_topology *topology)
+{
+  /* FIXME this assumes that all packages are identical */
+  char *CPUType = hwloc_solaris_get_chip_type();
+  char *CPUModel = hwloc_solaris_get_chip_model();
+
+  kstat_ctl_t *kc = kstat_open();
+  kstat_t *ksp;
+  kstat_named_t *stat;
+  unsigned look_cores = 1, look_chips = 1;
+
+  unsigned Pproc_max = 0;
+  unsigned Pproc_alloc = 256;
+  struct hwloc_solaris_Pproc {
+    unsigned Lpkg, Ppkg, Lcore, Lproc;
+  } * Pproc = malloc(Pproc_alloc * sizeof(*Pproc));
+
+  unsigned Lproc_num = 0;
+  unsigned Lproc_alloc = 256;
+  struct hwloc_solaris_Lproc {
+    unsigned Pproc;
+  } * Lproc = malloc(Lproc_alloc * sizeof(*Lproc));
+
+  unsigned Lcore_num = 0;
+  unsigned Lcore_alloc = 256;
+  struct hwloc_solaris_Lcore {
+    unsigned Pcore, Ppkg;
+  } * Lcore = malloc(Lcore_alloc * sizeof(*Lcore));
+
+  unsigned Lpkg_num = 0;
+  unsigned Lpkg_alloc = 256;
+  struct hwloc_solaris_Lpkg {
+    unsigned Ppkg;
+  } * Lpkg = malloc(Lpkg_alloc * sizeof(*Lpkg));
+
+  unsigned pkgid, coreid, cpuid;
+  unsigned i;
+
+  for (i = 0; i < Pproc_alloc; i++) {
+    Pproc[i].Lproc = -1;
+    Pproc[i].Lpkg = -1;
+    Pproc[i].Ppkg = -1;
+    Pproc[i].Lcore = -1;
+  }
+
+  if (!kc) {
+    hwloc_debug("kstat_open failed: %s\n", strerror(errno));
+    free(Pproc);
+    free(Lproc);
+    free(Lcore);
+    free(Lpkg);
+    return 0;
+  }
+
+  for (ksp = kc->kc_chain; ksp; ksp = ksp->ks_next)
+    {
+      if (strncmp("cpu_info", ksp->ks_module, 8))
+	continue;
+
+      cpuid = ksp->ks_instance;
+
+      if (kstat_read(kc, ksp, NULL) == -1)
+	{
+	  fprintf(stderr, "kstat_read failed for CPU%u: %s\n", cpuid, strerror(errno));
+	  continue;
+	}
+
+      hwloc_debug("cpu%u\n", cpuid);
+      hwloc_bitmap_set(topology->levels[0][0]->complete_cpuset, cpuid);
+
+      stat = (kstat_named_t *) kstat_data_lookup(ksp, "state");
+      if (!stat)
+          hwloc_debug("could not read state for CPU%u: %s\n", cpuid, strerror(errno));
+      else if (stat->data_type != KSTAT_DATA_CHAR)
+          hwloc_debug("unknown kstat type %d for cpu state\n", stat->data_type);
+      else
+        {
+          hwloc_debug("cpu%u's state is %s\n", cpuid, stat->value.c);
+          if (strcmp(stat->value.c, "on-line")) {
+            /* Not online.
+	     * It was marked as existing in complete_cpuset above, ignore everything else.
+	     * We wouldn't get the all topology information about parents anyway.
+	     */
+	    continue;
+	  }
+        }
+
+      if (cpuid >= Pproc_alloc) {
+	struct hwloc_solaris_Pproc *tmp = realloc(Pproc, 2*Pproc_alloc * sizeof(*Pproc));
+	if (!tmp)
+	  goto err;
+	Pproc = tmp;
+	Pproc_alloc *= 2;
+	for(i = Pproc_alloc/2; i < Pproc_alloc; i++) {
+	  Pproc[i].Lproc = -1;
+	  Pproc[i].Lpkg = -1;
+	  Pproc[i].Ppkg = -1;
+	  Pproc[i].Lcore = -1;
+	}
+      }
+      Pproc[cpuid].Lproc = Lproc_num;
+
+      if (Lproc_num >= Lproc_alloc) {
+	struct hwloc_solaris_Lproc *tmp = realloc(Lproc, 2*Lproc_alloc * sizeof(*Lproc));
+	if (!tmp)
+	  goto err;
+	Lproc = tmp;
+	Lproc_alloc *= 2;
+      }
+      Lproc[Lproc_num].Pproc = cpuid;
+      Lproc_num++;
+
+      if (cpuid >= Pproc_max)
+        Pproc_max = cpuid + 1;
+
+      if (look_chips) do {
+	/* Get Chip ID */
+	stat = (kstat_named_t *) kstat_data_lookup(ksp, "chip_id");
+	if (!stat)
+	  {
+	    if (Lpkg_num)
+	      fprintf(stderr, "could not read package id for CPU%u: %s\n", cpuid, strerror(errno));
+	    else
+	      hwloc_debug("could not read package id for CPU%u: %s\n", cpuid, strerror(errno));
+	    look_chips = 0;
+	    continue;
+	  }
+	switch (stat->data_type) {
+	  case KSTAT_DATA_INT32:
+	    pkgid = stat->value.i32;
+	    break;
+	  case KSTAT_DATA_UINT32:
+	    pkgid = stat->value.ui32;
+	    break;
+#ifdef _INT64_TYPE
+	  case KSTAT_DATA_UINT64:
+	    pkgid = stat->value.ui64;
+	    break;
+	  case KSTAT_DATA_INT64:
+	    pkgid = stat->value.i64;
+	    break;
+#endif
+	  default:
+	    fprintf(stderr, "chip_id type %d unknown\n", stat->data_type);
+	    look_chips = 0;
+	    continue;
+	}
+	Pproc[cpuid].Ppkg = pkgid;
+	for (i = 0; i < Lpkg_num; i++)
+	  if (pkgid == Lpkg[i].Ppkg)
+	    break;
+	Pproc[cpuid].Lpkg = i;
+	hwloc_debug("%u on package %u (%u)\n", cpuid, i, pkgid);
+	if (i == Lpkg_num) {
+	  if (Lpkg_num == Lpkg_alloc) {
+	    struct hwloc_solaris_Lpkg *tmp = realloc(Lpkg, 2*Lpkg_alloc * sizeof(*Lpkg));
+	    if (!tmp)
+	      goto err;
+	    Lpkg = tmp;
+	    Lpkg_alloc *= 2;
+	  }
+	  Lpkg[Lpkg_num++].Ppkg = pkgid;
+	}
+      } while(0);
+
+      if (look_cores) do {
+	/* Get Core ID */
+	stat = (kstat_named_t *) kstat_data_lookup(ksp, "core_id");
+	if (!stat)
+	  {
+	    if (Lcore_num)
+	      fprintf(stderr, "could not read core id for CPU%u: %s\n", cpuid, strerror(errno));
+	    else
+	      hwloc_debug("could not read core id for CPU%u: %s\n", cpuid, strerror(errno));
+	    look_cores = 0;
+	    continue;
+	  }
+	switch (stat->data_type) {
+	  case KSTAT_DATA_INT32:
+	    coreid = stat->value.i32;
+	    break;
+	  case KSTAT_DATA_UINT32:
+	    coreid = stat->value.ui32;
+	    break;
+#ifdef _INT64_TYPE
+	  case KSTAT_DATA_UINT64:
+	    coreid = stat->value.ui64;
+	    break;
+	  case KSTAT_DATA_INT64:
+	    coreid = stat->value.i64;
+	    break;
+#endif
+	  default:
+	    fprintf(stderr, "core_id type %d unknown\n", stat->data_type);
+	    look_cores = 0;
+	    continue;
+	}
+	for (i = 0; i < Lcore_num; i++)
+	  if (coreid == Lcore[i].Pcore && Pproc[cpuid].Ppkg == Lcore[i].Ppkg)
+	    break;
+	Pproc[cpuid].Lcore = i;
+	hwloc_debug("%u on core %u (%u)\n", cpuid, i, coreid);
+	if (i == Lcore_num) {
+	  if (Lcore_num == Lcore_alloc) {
+	    struct hwloc_solaris_Lcore *tmp = realloc(Lcore, 2*Lcore_alloc * sizeof(*Lcore));
+	    if (!tmp)
+	      goto err;
+	    Lcore = tmp;
+	    Lcore_alloc *= 2;
+	  }
+	  Lcore[Lcore_num].Ppkg = Pproc[cpuid].Ppkg;
+	  Lcore[Lcore_num++].Pcore = coreid;
+	}
+      } while(0);
+
+      /* Note: there is also clog_id for the Thread ID (not unique) and
+       * pkg_core_id for the core ID (not unique).  They are not useful to us
+       * however. */
+    }
+
+  if (look_chips
+      && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) {
+    struct hwloc_obj *obj;
+    unsigned j,k;
+    hwloc_debug("%u Packages\n", Lpkg_num);
+    for (j = 0; j < Lpkg_num; j++) {
+      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PACKAGE, Lpkg[j].Ppkg);
+      if (CPUType)
+	hwloc_obj_add_info(obj, "CPUType", CPUType);
+      if (CPUModel)
+	hwloc_obj_add_info(obj, "CPUModel", CPUModel);
+      obj->cpuset = hwloc_bitmap_alloc();
+      for(k=0; k<Pproc_max; k++)
+	if (Pproc[k].Lpkg == j)
+	  hwloc_bitmap_set(obj->cpuset, k);
+      hwloc_debug_1arg_bitmap("Package %u has cpuset %s\n", j, obj->cpuset);
+      hwloc_insert_object_by_cpuset(topology, obj);
+    }
+    hwloc_debug("%s", "\n");
+  }
+
+  if (look_cores
+      && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) {
+    struct hwloc_obj *obj;
+    unsigned j,k;
+    hwloc_debug("%u Cores\n", Lcore_num);
+    for (j = 0; j < Lcore_num; j++) {
+      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_CORE, Lcore[j].Pcore);
+      obj->cpuset = hwloc_bitmap_alloc();
+      for(k=0; k<Pproc_max; k++)
+	if (Pproc[k].Lcore == j)
+	  hwloc_bitmap_set(obj->cpuset, k);
+      hwloc_debug_1arg_bitmap("Core %u has cpuset %s\n", j, obj->cpuset);
+      hwloc_insert_object_by_cpuset(topology, obj);
+    }
+    hwloc_debug("%s", "\n");
+  }
+
+  if (Lproc_num) {
+    struct hwloc_obj *obj;
+    unsigned j,k;
+    hwloc_debug("%u PUs\n", Lproc_num);
+    for (j = 0; j < Lproc_num; j++) {
+      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PU, Lproc[j].Pproc);
+      obj->cpuset = hwloc_bitmap_alloc();
+      for(k=0; k<Pproc_max; k++)
+	if (Pproc[k].Lproc == j)
+	  hwloc_bitmap_set(obj->cpuset, k);
+      hwloc_debug_1arg_bitmap("PU %u has cpuset %s\n", j, obj->cpuset);
+      hwloc_insert_object_by_cpuset(topology, obj);
+    }
+    hwloc_debug("%s", "\n");
+  }
+
+  kstat_close(kc);
+
+  free(Pproc);
+  free(Lproc);
+  free(Lcore);
+  free(Lpkg);
+  return Lproc_num > 0;
+
+ err:
+  kstat_close(kc);
+
+  free(Pproc);
+  free(Lproc);
+  free(Lcore);
+  free(Lpkg);
+  return 0;
+}
+#endif /* LIBKSTAT */
+
+static int
+hwloc_look_solaris(struct hwloc_backend *backend)
+{
+  struct hwloc_topology *topology = backend->topology;
+  unsigned nbprocs = hwloc_fallback_nbprocessors (topology);
+  int alreadypus = 0;
+
+  if (topology->levels[0][0]->cpuset)
+    /* somebody discovered things */
+    return -1;
+
+  hwloc_alloc_obj_cpusets(topology->levels[0][0]);
+
+#ifdef HAVE_LIBLGRP
+  hwloc_look_lgrp(topology);
+#endif /* HAVE_LIBLGRP */
+#ifdef HAVE_LIBKSTAT
+  if (hwloc_look_kstat(topology) > 0)
+    alreadypus = 1;
+#endif /* HAVE_LIBKSTAT */
+  if (!alreadypus)
+    hwloc_setup_pu_level(topology, nbprocs);
+
+  hwloc_obj_add_info(topology->levels[0][0], "Backend", "Solaris");
+  hwloc_add_uname_info(topology, NULL);
+  return 0;
+}
+
+void
+hwloc_set_solaris_hooks(struct hwloc_binding_hooks *hooks,
+			struct hwloc_topology_support *support __hwloc_attribute_unused)
+{
+  hooks->set_proc_cpubind = hwloc_solaris_set_proc_cpubind;
+  hooks->set_thisproc_cpubind = hwloc_solaris_set_thisproc_cpubind;
+  hooks->set_thisthread_cpubind = hwloc_solaris_set_thisthread_cpubind;
+#ifdef HAVE_LIBLGRP
+  hooks->get_proc_cpubind = hwloc_solaris_get_proc_cpubind;
+  hooks->get_thisproc_cpubind = hwloc_solaris_get_thisproc_cpubind;
+  hooks->get_thisthread_cpubind = hwloc_solaris_get_thisthread_cpubind;
+  hooks->set_proc_membind = hwloc_solaris_set_proc_membind;
+  hooks->set_thisproc_membind = hwloc_solaris_set_thisproc_membind;
+  hooks->set_thisthread_membind = hwloc_solaris_set_thisthread_membind;
+  hooks->get_proc_membind = hwloc_solaris_get_proc_membind;
+  hooks->get_thisproc_membind = hwloc_solaris_get_thisproc_membind;
+  hooks->get_thisthread_membind = hwloc_solaris_get_thisthread_membind;
+#endif /* HAVE_LIBLGRP */
+#ifdef MADV_ACCESS_LWP
+  hooks->set_area_membind = hwloc_solaris_set_area_membind;
+  support->membind->firsttouch_membind = 1;
+  support->membind->bind_membind = 1;
+  support->membind->interleave_membind = 1;
+  support->membind->nexttouch_membind = 1;
+#endif
+}
+
+static struct hwloc_backend *
+hwloc_solaris_component_instantiate(struct hwloc_disc_component *component,
+				    const void *_data1 __hwloc_attribute_unused,
+				    const void *_data2 __hwloc_attribute_unused,
+				    const void *_data3 __hwloc_attribute_unused)
+{
+  struct hwloc_backend *backend;
+  backend = hwloc_backend_alloc(component);
+  if (!backend)
+    return NULL;
+  backend->discover = hwloc_look_solaris;
+  return backend;
+}
+
+static struct hwloc_disc_component hwloc_solaris_disc_component = {
+  HWLOC_DISC_COMPONENT_TYPE_CPU,
+  "solaris",
+  HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+  hwloc_solaris_component_instantiate,
+  50,
+  NULL
+};
+
+const struct hwloc_component hwloc_solaris_component = {
+  HWLOC_COMPONENT_ABI,
+  NULL, NULL,
+  HWLOC_COMPONENT_TYPE_DISC,
+  0,
+  &hwloc_solaris_disc_component
+};
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-synthetic.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-synthetic.c
new file mode 100644
index 0000000000..db003f042e
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-synthetic.c
@@ -0,0 +1,1215 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2017 Inria.  All rights reserved.
+ * Copyright © 2009-2010 Université Bordeaux
+ * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <private/private.h>
+#include <private/misc.h>
+#include <private/debug.h>
+
+#include <limits.h>
+#include <assert.h>
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif
+
+struct hwloc_synthetic_level_data_s {
+  unsigned arity;
+  unsigned long totalwidth;
+  hwloc_obj_type_t type;
+  unsigned depth; /* For caches/groups */
+  hwloc_obj_cache_type_t cachetype; /* For caches */
+  hwloc_uint64_t memorysize; /* For caches/memory */
+
+  /* the indexes= attribute before parsing */
+  const char *index_string;
+  unsigned long index_string_length;
+  /* the array of explicit indexes after parsing */
+  unsigned *index_array;
+
+  /* used while filling the topology */
+  unsigned next_os_index; /* id of the next object for that level */
+};
+
+struct hwloc_synthetic_backend_data_s {
+  /* synthetic backend parameters */
+  char *string;
+#define HWLOC_SYNTHETIC_MAX_DEPTH 128
+  struct hwloc_synthetic_level_data_s level[HWLOC_SYNTHETIC_MAX_DEPTH];
+};
+
+struct hwloc_synthetic_intlv_loop_s {
+  unsigned step;
+  unsigned nb;
+  unsigned level_depth;
+};
+
+static void
+hwloc_synthetic_process_level_indexes(struct hwloc_synthetic_backend_data_s *data,
+				      unsigned curleveldepth,
+				      int verbose)
+{
+  struct hwloc_synthetic_level_data_s *curlevel = &data->level[curleveldepth];
+  unsigned long total = curlevel->totalwidth;
+  const char *attr = curlevel->index_string;
+  unsigned long length = curlevel->index_string_length;
+  unsigned *array = NULL;
+  struct hwloc_synthetic_intlv_loop_s * loops = NULL;
+  size_t i;
+
+  if (!attr)
+    return;
+
+  array = calloc(total, sizeof(*array));
+  if (!array) {
+    if (verbose)
+      fprintf(stderr, "Failed to allocate synthetic index array of size %lu\n", total);
+    goto out;
+  }
+
+  i = strspn(attr, "0123456789,");
+  if (i == length) {
+    /* explicit array of indexes */
+
+    for(i=0; i<total; i++) {
+      const char *next;
+      unsigned idx = strtoul(attr, (char **) &next, 10);
+      if (next == attr) {
+	if (verbose)
+	  fprintf(stderr, "Failed to read synthetic index #%lu at '%s'\n", (unsigned long) i, attr);
+	goto out_with_array;
+      }
+
+      array[i] = idx;
+      if (i != total-1) {
+	if (*next != ',') {
+	  if (verbose)
+	    fprintf(stderr, "Missing comma after synthetic index #%lu at '%s'\n", (unsigned long) i, attr);
+	  goto out_with_array;
+	}
+	attr = next+1;
+      } else {
+	attr = next;
+      }
+    }
+    curlevel->index_array = array;
+
+  } else {
+    /* interleaving */
+    unsigned nr_loops = 1, cur_loop;
+    unsigned minstep = total;
+    unsigned long nbs = 1;
+    unsigned j, mul;
+    const char *tmp;
+
+    tmp = attr;
+    while (tmp) {
+      tmp = strchr(tmp, ':');
+      if (!tmp || tmp >= attr+length)
+	break;
+      nr_loops++;
+      tmp++;
+    }
+    /* nr_loops colon-separated fields, but we may need one more at the end */
+    loops = malloc((nr_loops+1)*sizeof(*loops));
+    if (!loops) {
+      if (verbose)
+	fprintf(stderr, "Failed to allocate synthetic index interleave loop array of size %u\n", nr_loops);
+      goto out_with_array;
+    }
+
+    if (*attr >= '0' && *attr <= '9') {
+      /* interleaving as x*y:z*t:... */
+      unsigned step, nb;
+
+      tmp = attr;
+      cur_loop = 0;
+      while (tmp) {
+	char *tmp2, *tmp3;
+	step = (unsigned) strtol(tmp, &tmp2, 0);
+	if (tmp2 == tmp || *tmp2 != '*') {
+	  if (verbose)
+	    fprintf(stderr, "Failed to read synthetic index interleaving loop '%s' without number before '*'\n", tmp);
+	  goto out_with_loops;
+	}
+	if (!step) {
+	  if (verbose)
+	    fprintf(stderr, "Invalid interleaving loop with step 0 at '%s'\n", tmp);
+	  goto out_with_loops;
+	}
+	tmp2++;
+	nb = (unsigned) strtol(tmp2, &tmp3, 0);
+	if (tmp3 == tmp2 || (*tmp3 && *tmp3 != ':' && *tmp3 != ')' && *tmp3 != ' ')) {
+	  if (verbose)
+	    fprintf(stderr, "Failed to read synthetic index interleaving loop '%s' without number between '*' and ':'\n", tmp);
+	  goto out_with_loops;
+	}
+	if (!nb) {
+	  if (verbose)
+	    fprintf(stderr, "Invalid interleaving loop with number 0 at '%s'\n", tmp2);
+	  goto out_with_loops;
+	}
+	loops[cur_loop].step = step;
+	loops[cur_loop].nb = nb;
+	if (step < minstep)
+	  minstep = step;
+	nbs *= nb;
+	cur_loop++;
+	if (*tmp3 == ')' || *tmp3 == ' ')
+	  break;
+	tmp = (const char*) (tmp3+1);
+      }
+
+    } else {
+      /* interleaving as type1:type2:... */
+      hwloc_obj_type_t type;
+      union hwloc_obj_attr_u attrs;
+      int err;
+
+      /* find level depths for each interleaving loop */
+      tmp = attr;
+      cur_loop = 0;
+      while (tmp) {
+	err = hwloc_type_sscanf(tmp, &type, &attrs, sizeof(attrs));
+	if (err < 0) {
+	  if (verbose)
+	    fprintf(stderr, "Failed to read synthetic index interleaving loop type '%s'\n", tmp);
+	  goto out_with_loops;
+	}
+	if (type == HWLOC_OBJ_MISC || type == HWLOC_OBJ_BRIDGE || type == HWLOC_OBJ_PCI_DEVICE || type == HWLOC_OBJ_OS_DEVICE) {
+	  if (verbose)
+	    fprintf(stderr, "Misc object type disallowed in synthetic index interleaving loop type '%s'\n", tmp);
+	  goto out_with_loops;
+	}
+	for(i=0; i<curleveldepth; i++) {
+	  if (type != data->level[i].type)
+	    continue;
+	  if (type == HWLOC_OBJ_GROUP
+	      && attrs.group.depth != (unsigned) -1
+	      && attrs.group.depth != data->level[i].depth)
+	    continue;
+	  loops[cur_loop].level_depth = (unsigned)i;
+	  break;
+	}
+	if (i == curleveldepth) {
+	  if (verbose)
+	    fprintf(stderr, "Failed to find level for synthetic index interleaving loop type '%s' above '%s'\n",
+		    tmp, hwloc_type_name(curlevel->type));
+	  goto out_with_loops;
+	}
+	tmp = strchr(tmp, ':');
+	if (!tmp || tmp > attr+length)
+	  break;
+	tmp++;
+	cur_loop++;
+      }
+
+      /* compute actual loop step/nb */
+      for(cur_loop=0; cur_loop<nr_loops; cur_loop++) {
+	unsigned mydepth = loops[cur_loop].level_depth;
+	unsigned prevdepth = 0;
+	unsigned step, nb;
+	for(i=0; i<nr_loops; i++) {
+	  if (loops[i].level_depth == mydepth && i != cur_loop) {
+	    if (verbose)
+	      fprintf(stderr, "Invalid duplicate interleaving loop type in synthetic index '%s'\n", attr);
+	    goto out_with_loops;
+	  }
+	  if (loops[i].level_depth < mydepth
+	      && loops[i].level_depth > prevdepth)
+	    prevdepth = loops[i].level_depth;
+	}
+	step = curlevel->totalwidth / data->level[mydepth].totalwidth; /* number of objects below us */
+	nb = data->level[mydepth].totalwidth / data->level[prevdepth].totalwidth; /* number of us within parent */
+
+	loops[cur_loop].step = step;
+	loops[cur_loop].nb = nb;
+	assert(nb);
+	assert(step);
+	if (step < minstep)
+	  minstep = step;
+	nbs *= nb;
+      }
+    }
+    assert(nbs);
+
+    if (nbs != total) {
+      /* one loop of total/nbs steps is missing, add it if it's just the smallest one */
+      if (minstep == total/nbs) {
+	loops[nr_loops].step = 1;
+	loops[nr_loops].nb = total/nbs;
+	nr_loops++;
+      } else {
+	if (verbose)
+	  fprintf(stderr, "Invalid index interleaving total width %lu instead of %lu\n", nbs, total);
+	goto out_with_loops;
+      }
+    }
+
+    /* generate the array of indexes */
+    mul = 1;
+    for(i=0; i<nr_loops; i++) {
+      unsigned step = loops[i].step;
+      unsigned nb = loops[i].nb;
+      for(j=0; j<total; j++)
+	array[j] += ((j / step) % nb) * mul;
+      mul *= nb;
+    }
+
+    /* check that we have the right values (cannot pass total, cannot give duplicate 0) */
+    for(j=0; j<total; j++) {
+      if (array[j] >= total) {
+	if (verbose)
+	  fprintf(stderr, "Invalid index interleaving generates out-of-range index %u\n", array[j]);
+	goto out_with_loops;
+      }
+      if (!array[j] && j) {
+	if (verbose)
+	  fprintf(stderr, "Invalid index interleaving generates duplicate index values\n");
+	goto out_with_loops;
+      }
+    }
+
+    free(loops);
+    curlevel->index_array = array;
+  }
+
+  return;
+
+ out_with_loops:
+  free(loops);
+ out_with_array:
+  free(array);
+ out:
+  return;
+}
+
+static hwloc_uint64_t
+hwloc_synthetic_parse_memory_attr(const char *attr, const char **endp)
+{
+  const char *endptr;
+  hwloc_uint64_t size;
+  size = strtoull(attr, (char **) &endptr, 0);
+  if (!hwloc_strncasecmp(endptr, "TB", 2)) {
+    size <<= 40;
+    endptr += 2;
+  } else if (!hwloc_strncasecmp(endptr, "GB", 2)) {
+    size <<= 30;
+    endptr += 2;
+  } else if (!hwloc_strncasecmp(endptr, "MB", 2)) {
+    size <<= 20;
+    endptr += 2;
+  } else if (!hwloc_strncasecmp(endptr, "kB", 2)) {
+    size <<= 10;
+    endptr += 2;
+  }
+  *endp = endptr;
+  return size;
+}
+
+static int
+hwloc_synthetic_parse_level_attrs(const char *attrs, const char **next_posp,
+				  struct hwloc_synthetic_level_data_s *curlevel,
+				  int verbose)
+{
+  hwloc_obj_type_t type = curlevel->type;
+  const char *next_pos;
+  hwloc_uint64_t memorysize = 0;
+  const char *index_string = NULL;
+  size_t index_string_length = 0;
+
+  next_pos = (const char *) strchr(attrs, ')');
+  if (!next_pos) {
+    if (verbose)
+      fprintf(stderr, "Missing attribute closing bracket in synthetic string doesn't have a number of objects at '%s'\n", attrs);
+    errno = EINVAL;
+    return -1;
+  }
+
+  while (')' != *attrs) {
+    int iscache = hwloc_obj_type_is_cache(type);
+
+    if (iscache && !strncmp("size=", attrs, 5)) {
+      memorysize = hwloc_synthetic_parse_memory_attr(attrs+5, &attrs);
+
+    } else if (!iscache && !strncmp("memory=", attrs, 7)) {
+      memorysize = hwloc_synthetic_parse_memory_attr(attrs+7, &attrs);
+
+    } else if (!strncmp("indexes=", attrs, 8)) {
+      index_string = attrs+8;
+      attrs += 8;
+      index_string_length = strcspn(attrs, " )");
+      attrs += index_string_length;
+
+    } else {
+      if (verbose)
+	fprintf(stderr, "Unknown attribute at '%s'\n", attrs);
+      errno = EINVAL;
+      return -1;
+    }
+
+    if (' ' == *attrs)
+      attrs++;
+    else if (')' != *attrs) {
+      if (verbose)
+	fprintf(stderr, "Missing parameter separator at '%s'\n", attrs);
+      errno = EINVAL;
+      return -1;
+    }
+  }
+
+  curlevel->memorysize = memorysize;
+  curlevel->index_string = index_string;
+  curlevel->index_string_length = (unsigned long)index_string_length;
+  *next_posp = next_pos+1;
+  return 0;
+}
+
+/* Read from description a series of integers describing a symmetrical
+   topology and update the hwloc_synthetic_backend_data_s accordingly.  On
+   success, return zero.  */
+static int
+hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
+			     const char *description)
+{
+  const char *pos, *next_pos;
+  unsigned long item, count;
+  unsigned i;
+  int type_count[HWLOC_OBJ_TYPE_MAX];
+  unsigned unset;
+  int verbose = 0;
+  const char *env = getenv("HWLOC_SYNTHETIC_VERBOSE");
+  int err;
+  unsigned long totalarity = 1;
+
+  if (env)
+    verbose = atoi(env);
+
+  /* default values before we add root attributes */
+  data->level[0].totalwidth = 1;
+  data->level[0].type = HWLOC_OBJ_MACHINE;
+  data->level[0].index_string = NULL;
+  data->level[0].index_array = NULL;
+  data->level[0].memorysize = 0;
+  if (*description == '(') {
+    err = hwloc_synthetic_parse_level_attrs(description+1, &description, &data->level[0], verbose);
+    if (err < 0)
+      return err;
+  }
+
+  for (pos = description, count = 1; *pos; pos = next_pos) {
+    hwloc_obj_type_t type = HWLOC_OBJ_TYPE_NONE;
+    union hwloc_obj_attr_u attrs;
+
+    /* initialize parent arity to 0 so that the levels are not infinite */
+    data->level[count-1].arity = 0;
+
+    while (*pos == ' ')
+      pos++;
+
+    if (!*pos)
+      break;
+
+    if (*pos < '0' || *pos > '9') {
+      if (hwloc_type_sscanf(pos, &type, &attrs, sizeof(attrs)) < 0) {
+	/* FIXME: allow generic "Cache" string? would require to deal with possibly duplicate cache levels */
+	if (verbose)
+	  fprintf(stderr, "Synthetic string with unknown object type at '%s'\n", pos);
+	errno = EINVAL;
+	goto error;
+      }
+      if (type == HWLOC_OBJ_SYSTEM || type == HWLOC_OBJ_MISC || type == HWLOC_OBJ_BRIDGE || type == HWLOC_OBJ_PCI_DEVICE || type == HWLOC_OBJ_OS_DEVICE) {
+	if (verbose)
+	  fprintf(stderr, "Synthetic string with disallowed object type at '%s'\n", pos);
+	errno = EINVAL;
+	goto error;
+      }
+
+      next_pos = strchr(pos, ':');
+      if (!next_pos) {
+	if (verbose)
+	  fprintf(stderr,"Synthetic string doesn't have a `:' after object type at '%s'\n", pos);
+	errno = EINVAL;
+	goto error;
+      }
+      pos = next_pos + 1;
+    }
+    data->level[count].type = type;
+    data->level[count].depth = (unsigned) -1;
+    data->level[count].cachetype = (hwloc_obj_cache_type_t) -1;
+    if (hwloc_obj_type_is_cache(type)) {
+      /* these are always initialized */
+      data->level[count].depth = attrs.cache.depth;
+      data->level[count].cachetype = attrs.cache.type;
+    } else if (type == HWLOC_OBJ_GROUP) {
+      /* could be -1 but will be set below */
+      data->level[count].depth = attrs.group.depth;
+    }
+
+    item = strtoul(pos, (char **)&next_pos, 0);
+    if (next_pos == pos) {
+      if (verbose)
+	fprintf(stderr,"Synthetic string doesn't have a number of objects at '%s'\n", pos);
+      errno = EINVAL;
+      goto error;
+    }
+    if (!item) {
+      if (verbose)
+	fprintf(stderr,"Synthetic string with disallow 0 number of objects at '%s'\n", pos);
+      errno = EINVAL;
+      goto error;
+    }
+    data->level[count-1].arity = (unsigned)item;
+
+    totalarity *= item;
+    data->level[count].totalwidth = totalarity;
+    data->level[count].index_string = NULL;
+    data->level[count].index_array = NULL;
+    data->level[count].memorysize = 0;
+    if (*next_pos == '(') {
+      err = hwloc_synthetic_parse_level_attrs(next_pos+1, &next_pos, &data->level[count], verbose);
+      if (err < 0)
+	goto error;
+    }
+
+    if (count + 1 >= HWLOC_SYNTHETIC_MAX_DEPTH) {
+      if (verbose)
+	fprintf(stderr,"Too many synthetic levels, max %d\n", HWLOC_SYNTHETIC_MAX_DEPTH);
+      errno = EINVAL;
+      goto error;
+    }
+    if (item > UINT_MAX) {
+      if (verbose)
+	fprintf(stderr,"Too big arity, max %u\n", UINT_MAX);
+      errno = EINVAL;
+      goto error;
+    }
+
+    count++;
+  }
+
+  if (count <= 0) {
+    if (verbose)
+      fprintf(stderr, "Synthetic string doesn't contain any object\n");
+    errno = EINVAL;
+    goto error;
+  }
+
+  if (data->level[count-1].type != HWLOC_OBJ_TYPE_NONE && data->level[count-1].type != HWLOC_OBJ_PU) {
+    if (verbose)
+      fprintf(stderr, "Synthetic string cannot use non-PU type for last level\n");
+    errno = EINVAL;
+    return -1;
+  }
+  data->level[count-1].type = HWLOC_OBJ_PU;
+
+  for(i=0; i<HWLOC_OBJ_TYPE_MAX; i++) {
+    type_count[i] = 0;
+  }
+  for(i=count-1; i>0; i--) {
+    hwloc_obj_type_t type = data->level[i].type;
+    if (type != HWLOC_OBJ_TYPE_NONE) {
+      type_count[type]++;
+    }
+  }
+
+  /* sanity checks */
+  if (!type_count[HWLOC_OBJ_PU]) {
+    if (verbose)
+      fprintf(stderr, "Synthetic string missing ending number of PUs\n");
+    errno = EINVAL;
+    return -1;
+  } else if (type_count[HWLOC_OBJ_PU] > 1) {
+    if (verbose)
+      fprintf(stderr, "Synthetic string cannot have several PU levels\n");
+    errno = EINVAL;
+    return -1;
+  }
+  if (type_count[HWLOC_OBJ_PACKAGE] > 1) {
+    if (verbose)
+      fprintf(stderr, "Synthetic string cannot have several package levels\n");
+    errno = EINVAL;
+    return -1;
+  }
+  if (type_count[HWLOC_OBJ_NUMANODE] > 1) {
+    if (verbose)
+      fprintf(stderr, "Synthetic string cannot have several NUMA node levels\n");
+    errno = EINVAL;
+    return -1;
+  }
+  if (type_count[HWLOC_OBJ_CORE] > 1) {
+    if (verbose)
+      fprintf(stderr, "Synthetic string cannot have several core levels\n");
+    errno = EINVAL;
+    return -1;
+  }
+  if (type_count[HWLOC_OBJ_MACHINE] > 1) {
+    if (verbose)
+      fprintf(stderr, "Synthetic string cannot have several machine levels\n");
+    errno = EINVAL;
+    return -1;
+  }
+
+  /* initialize the top level (not specified in the string) */
+  if (type_count[HWLOC_OBJ_MACHINE] == 1) {
+    data->level[0].type = HWLOC_OBJ_SYSTEM;
+    type_count[HWLOC_OBJ_SYSTEM] = 1;
+  } else {
+    data->level[0].type = HWLOC_OBJ_MACHINE;
+    type_count[HWLOC_OBJ_MACHINE] = 1;
+  }
+
+  /* deal with missing intermediate levels */
+  unset = 0;
+  for(i=1; i<count-1; i++) {
+    if (data->level[i].type == HWLOC_OBJ_TYPE_NONE)
+      unset++;
+  }
+  if (unset && unset != count-2) {
+    if (verbose)
+      fprintf(stderr, "Synthetic string cannot mix unspecified and specified types for levels\n");
+    errno = EINVAL;
+    return -1;
+  }
+  if (unset) {
+    /* we want in priority: numa, package, core, up to 3 caches, groups */
+    unsigned neednuma = count >= 3;
+    unsigned needpack = count >= 4;
+    unsigned needcore = count >= 5;
+    unsigned needcaches = count <= 5 ? 0 : count >= 9 ? 4 : count-5;
+    unsigned needgroups = count-2-neednuma-needpack-needcore-needcaches;
+    /* we place them in order: groups, package, numa, caches, core */
+    for(i = 0; i < needgroups; i++) {
+      unsigned depth = 1 + i;
+      data->level[depth].type = HWLOC_OBJ_GROUP;
+      type_count[HWLOC_OBJ_GROUP]++;
+    }
+    if (needpack) {
+      unsigned depth = 1 + needgroups;
+      data->level[depth].type = HWLOC_OBJ_PACKAGE;
+      type_count[HWLOC_OBJ_PACKAGE] = 1;
+    }
+    if (neednuma) {
+      unsigned depth = 1 + needgroups + needpack;
+      data->level[depth].type = HWLOC_OBJ_NUMANODE;
+      type_count[HWLOC_OBJ_NUMANODE] = 1;
+    }
+    if (needcaches) {
+      /* priority: l2, l1, l3, l1i */
+      /* order: l3, l2, l1, l1i */
+      unsigned l3depth = 1 + needgroups + needpack + neednuma;
+      unsigned l2depth = l3depth + (needcaches >= 3);
+      unsigned l1depth = l2depth + 1;
+      unsigned l1idepth = l1depth + 1;
+      if (needcaches >= 3) {
+	data->level[l3depth].type = HWLOC_OBJ_L3CACHE;
+	data->level[l3depth].depth = 3;
+	data->level[l3depth].cachetype = HWLOC_OBJ_CACHE_UNIFIED;
+	type_count[HWLOC_OBJ_L3CACHE] = 1;
+      }
+      data->level[l2depth].type = HWLOC_OBJ_L2CACHE;
+      data->level[l2depth].depth = 2;
+      data->level[l2depth].cachetype = HWLOC_OBJ_CACHE_UNIFIED;
+      type_count[HWLOC_OBJ_L2CACHE] = 1;
+      if (needcaches >= 2) {
+	data->level[l1depth].type = HWLOC_OBJ_L1CACHE;
+	data->level[l1depth].depth = 1;
+	data->level[l1depth].cachetype = HWLOC_OBJ_CACHE_DATA;
+	type_count[HWLOC_OBJ_L1CACHE] = 1;
+      }
+      if (needcaches >= 4) {
+	data->level[l1idepth].type = HWLOC_OBJ_L1ICACHE;
+	data->level[l1idepth].depth = 1;
+	data->level[l1idepth].cachetype = HWLOC_OBJ_CACHE_INSTRUCTION;
+	type_count[HWLOC_OBJ_L1ICACHE] = 1;
+      }
+    }
+    if (needcore) {
+      unsigned depth = 1 + needgroups + needpack + neednuma + needcaches;
+      data->level[depth].type = HWLOC_OBJ_CORE;
+      type_count[HWLOC_OBJ_CORE] = 1;
+    }
+  }
+
+  /* enforce a NUMA level */
+  if (!type_count[HWLOC_OBJ_NUMANODE]) {
+    /* insert a NUMA level and the machine level */
+    if (data->level[1].type == HWLOC_OBJ_MACHINE)
+      /* there's an explicit machine level after the automatic system root, insert below both */
+      i = 2;
+    else
+      /* insert below the automatic machine root */
+      i = 1;
+    if (verbose)
+      fprintf(stderr, "Inserting a NUMA level with a single object at depth %u\n", i);
+    /* move existing levels by one */
+    memmove(&data->level[i+1], &data->level[i], (count*i)*sizeof(struct hwloc_synthetic_level_data_s));
+    data->level[i].type = HWLOC_OBJ_NUMANODE;
+    data->level[i].index_string = NULL;
+    data->level[i].index_array = NULL;
+    data->level[i].memorysize = 0;
+    data->level[i].totalwidth = data->level[i-1].totalwidth;
+    /* update arity to insert a single NUMA node per parent */
+    data->level[i].arity = data->level[i-1].arity;
+    data->level[i-1].arity = 1;
+    count++;
+  }
+
+  for (i=0; i<count; i++) {
+    struct hwloc_synthetic_level_data_s *curlevel = &data->level[i];
+    hwloc_obj_type_t type = curlevel->type;
+
+    if (type == HWLOC_OBJ_GROUP) {
+      if (curlevel->depth == (unsigned)-1)
+	curlevel->depth = type_count[HWLOC_OBJ_GROUP]--;
+
+    } else if (hwloc_obj_type_is_cache(type)) {
+      if (!curlevel->memorysize) {
+	if (1 == curlevel->depth)
+	  /* 32Kb in L1 */
+	  curlevel->memorysize = 32*1024;
+	else
+	  /* *4 at each level, starting from 1MB for L2, unified */
+	  curlevel->memorysize = 256*1024 << (2*curlevel->depth);
+      }
+
+    } else if (type == HWLOC_OBJ_NUMANODE && !curlevel->memorysize) {
+      /* 1GB in memory nodes. */
+      curlevel->memorysize = 1024*1024*1024;
+    }
+
+    hwloc_synthetic_process_level_indexes(data, i, verbose);
+  }
+
+  data->string = strdup(description);
+  data->level[count-1].arity = 0;
+  return 0;
+
+ error:
+  for(i=0; i<HWLOC_SYNTHETIC_MAX_DEPTH; i++) {
+    struct hwloc_synthetic_level_data_s *curlevel = &data->level[i];
+    free(curlevel->index_array);
+    if (!curlevel->arity)
+      break;
+  }
+  return -1;
+}
+
+static void
+hwloc_synthetic__post_look_hooks(struct hwloc_synthetic_level_data_s *curlevel,
+				 hwloc_obj_t obj)
+{
+  switch (obj->type) {
+  case HWLOC_OBJ_GROUP:
+    obj->attr->group.kind = HWLOC_GROUP_KIND_SYNTHETIC;
+    obj->attr->group.subkind = curlevel->depth-1;
+    break;
+  case HWLOC_OBJ_SYSTEM:
+    break;
+  case HWLOC_OBJ_MACHINE:
+    break;
+  case HWLOC_OBJ_NUMANODE:
+    break;
+  case HWLOC_OBJ_PACKAGE:
+    break;
+  case HWLOC_OBJ_L1CACHE:
+  case HWLOC_OBJ_L2CACHE:
+  case HWLOC_OBJ_L3CACHE:
+  case HWLOC_OBJ_L4CACHE:
+  case HWLOC_OBJ_L5CACHE:
+  case HWLOC_OBJ_L1ICACHE:
+  case HWLOC_OBJ_L2ICACHE:
+  case HWLOC_OBJ_L3ICACHE:
+    obj->attr->cache.depth = curlevel->depth;
+    obj->attr->cache.linesize = 64;
+    obj->attr->cache.type = curlevel->cachetype;
+    obj->attr->cache.size = curlevel->memorysize;
+    break;
+  case HWLOC_OBJ_CORE:
+    break;
+  case HWLOC_OBJ_PU:
+    break;
+  case HWLOC_OBJ_BRIDGE:
+  case HWLOC_OBJ_PCI_DEVICE:
+  case HWLOC_OBJ_OS_DEVICE:
+  case HWLOC_OBJ_MISC:
+  case HWLOC_OBJ_TYPE_MAX:
+    /* Should never happen */
+    assert(0);
+    break;
+  }
+  if (curlevel->memorysize && !hwloc_obj_type_is_cache(obj->type)) {
+    obj->memory.local_memory = curlevel->memorysize;
+    obj->memory.page_types_len = 1;
+    obj->memory.page_types = malloc(sizeof(*obj->memory.page_types));
+    memset(obj->memory.page_types, 0, sizeof(*obj->memory.page_types));
+    obj->memory.page_types[0].size = 4096;
+    obj->memory.page_types[0].count = curlevel->memorysize / 4096;
+  }
+}
+
+/*
+ * Recursively build objects whose cpu start at first_cpu
+ * - level gives where to look in the type, arity and id arrays
+ * - the id array is used as a variable to get unique IDs for a given level.
+ * - generated memory should be added to *memory_kB.
+ * - generated cpus should be added to parent_cpuset.
+ * - next cpu number to be used should be returned.
+ */
+static void
+hwloc__look_synthetic(struct hwloc_topology *topology,
+		      struct hwloc_synthetic_backend_data_s *data,
+		      int level,
+		      hwloc_bitmap_t parent_cpuset)
+{
+  hwloc_obj_t obj;
+  unsigned i;
+  struct hwloc_synthetic_level_data_s *curlevel = &data->level[level];
+  hwloc_obj_type_t type = curlevel->type;
+  hwloc_bitmap_t set;
+  unsigned os_index;
+
+  /* pre-hooks */
+  switch (type) {
+    case HWLOC_OBJ_GROUP:
+      break;
+    case HWLOC_OBJ_MACHINE:
+      break;
+    case HWLOC_OBJ_NUMANODE:
+      break;
+    case HWLOC_OBJ_PACKAGE:
+      break;
+    case HWLOC_OBJ_L1CACHE:
+    case HWLOC_OBJ_L2CACHE:
+    case HWLOC_OBJ_L3CACHE:
+    case HWLOC_OBJ_L4CACHE:
+    case HWLOC_OBJ_L5CACHE:
+    case HWLOC_OBJ_L1ICACHE:
+    case HWLOC_OBJ_L2ICACHE:
+    case HWLOC_OBJ_L3ICACHE:
+      break;
+    case HWLOC_OBJ_CORE:
+      break;
+    case HWLOC_OBJ_PU:
+      break;
+    case HWLOC_OBJ_SYSTEM:
+    case HWLOC_OBJ_BRIDGE:
+    case HWLOC_OBJ_PCI_DEVICE:
+    case HWLOC_OBJ_OS_DEVICE:
+    case HWLOC_OBJ_MISC:
+    case HWLOC_OBJ_TYPE_MAX:
+      /* Should never happen */
+      assert(0);
+      break;
+  }
+
+  os_index = curlevel->next_os_index++;
+  if (curlevel->index_array)
+    os_index = curlevel->index_array[os_index];
+  else if (hwloc_obj_type_is_cache(type) || type == HWLOC_OBJ_GROUP)
+    /* don't enforce useless os_indexes for Caches and Groups */
+    os_index = -1;
+
+  set = hwloc_bitmap_alloc();
+  if (!curlevel->arity) {
+    hwloc_bitmap_set(set, os_index);
+  } else {
+    for (i = 0; i < curlevel->arity; i++)
+      hwloc__look_synthetic(topology, data, level + 1, set);
+  }
+
+  hwloc_bitmap_or(parent_cpuset, parent_cpuset, set);
+
+  if (hwloc_filter_check_keep_object_type(topology, type)) {
+    obj = hwloc_alloc_setup_object(topology, type, os_index);
+    obj->cpuset = set;
+
+    if (type == HWLOC_OBJ_NUMANODE) {
+      obj->nodeset = hwloc_bitmap_alloc();
+      hwloc_bitmap_set(obj->nodeset, os_index);
+    }
+
+    hwloc_synthetic__post_look_hooks(curlevel, obj);
+
+    hwloc_insert_object_by_cpuset(topology, obj);
+  } else
+    hwloc_bitmap_free(set);
+}
+
+static int
+hwloc_look_synthetic(struct hwloc_backend *backend)
+{
+  struct hwloc_topology *topology = backend->topology;
+  struct hwloc_synthetic_backend_data_s *data = backend->private_data;
+  hwloc_bitmap_t cpuset = hwloc_bitmap_alloc();
+  unsigned i;
+
+  assert(!topology->levels[0][0]->cpuset);
+
+  hwloc_alloc_obj_cpusets(topology->levels[0][0]);
+
+  topology->support.discovery->pu = 1;
+
+  /* start with os_index 0 for each level */
+  for (i = 0; data->level[i].arity > 0; i++)
+    data->level[i].next_os_index = 0;
+  /* ... including the last one */
+  data->level[i].next_os_index = 0;
+
+  /* update first level type according to the synthetic type array */
+  topology->levels[0][0]->type = data->level[0].type;
+  hwloc_synthetic__post_look_hooks(&data->level[0], topology->levels[0][0]);
+
+  for (i = 0; i < data->level[0].arity; i++)
+    hwloc__look_synthetic(topology, data, 1, cpuset);
+
+  hwloc_bitmap_free(cpuset);
+
+  hwloc_obj_add_info(topology->levels[0][0], "Backend", "Synthetic");
+  hwloc_obj_add_info(topology->levels[0][0], "SyntheticDescription", data->string);
+  return 0;
+}
+
+static void
+hwloc_synthetic_backend_disable(struct hwloc_backend *backend)
+{
+  struct hwloc_synthetic_backend_data_s *data = backend->private_data;
+  unsigned i;
+  for(i=0; i<HWLOC_SYNTHETIC_MAX_DEPTH; i++) {
+    struct hwloc_synthetic_level_data_s *curlevel = &data->level[i];
+    free(curlevel->index_array);
+    if (!curlevel->arity)
+      break;
+  }
+  free(data->string);
+  free(data);
+}
+
+static struct hwloc_backend *
+hwloc_synthetic_component_instantiate(struct hwloc_disc_component *component,
+				      const void *_data1,
+				      const void *_data2 __hwloc_attribute_unused,
+				      const void *_data3 __hwloc_attribute_unused)
+{
+  struct hwloc_backend *backend;
+  struct hwloc_synthetic_backend_data_s *data;
+  int err;
+
+  if (!_data1) {
+    const char *env = getenv("HWLOC_SYNTHETIC");
+    if (env) {
+      /* 'synthetic' was given in HWLOC_COMPONENTS without a description */
+      _data1 = env;
+    } else {
+      errno = EINVAL;
+      goto out;
+    }
+  }
+
+  backend = hwloc_backend_alloc(component);
+  if (!backend)
+    goto out;
+
+  data = malloc(sizeof(*data));
+  if (!data) {
+    errno = ENOMEM;
+    goto out_with_backend;
+  }
+
+  err = hwloc_backend_synthetic_init(data, (const char *) _data1);
+  if (err < 0)
+    goto out_with_data;
+
+  backend->private_data = data;
+  backend->discover = hwloc_look_synthetic;
+  backend->disable = hwloc_synthetic_backend_disable;
+  backend->is_thissystem = 0;
+
+  return backend;
+
+ out_with_data:
+  free(data);
+ out_with_backend:
+  free(backend);
+ out:
+  return NULL;
+}
+
+static struct hwloc_disc_component hwloc_synthetic_disc_component = {
+  HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+  "synthetic",
+  ~0,
+  hwloc_synthetic_component_instantiate,
+  30,
+  NULL
+};
+
+const struct hwloc_component hwloc_synthetic_component = {
+  HWLOC_COMPONENT_ABI,
+  NULL, NULL,
+  HWLOC_COMPONENT_TYPE_DISC,
+  0,
+  &hwloc_synthetic_disc_component
+};
+
+static int hwloc_topology_export_synthetic_indexes(struct hwloc_topology * topology,
+						   hwloc_obj_t obj,
+						   char *buffer, size_t buflen)
+{
+  unsigned depth = obj->depth;
+  unsigned total = topology->level_nbobjects[depth];
+  unsigned step = 1;
+  unsigned nr_loops = 0;
+  struct hwloc_synthetic_intlv_loop_s *loops = NULL, *tmploops;
+  hwloc_obj_t cur;
+  unsigned i, j;
+  ssize_t tmplen = buflen;
+  char *tmp = buffer;
+  int res, ret = 0;
+
+  /* must start with 0 */
+  if (obj->os_index)
+    goto exportall;
+
+  while (step != total) {
+    /* must be a divider of the total */
+    if (total % step)
+      goto exportall;
+
+    /* look for os_index == step */
+    for(i=1; i<total; i++)
+      if (topology->levels[depth][i]->os_index == step)
+	break;
+    if (i == total)
+      goto exportall;
+    for(j=2; j<total/i; j++)
+      if (topology->levels[depth][i*j]->os_index != step*j)
+	break;
+
+    nr_loops++;
+    tmploops = realloc(loops, nr_loops*sizeof(*loops));
+    if (!tmploops)
+      goto exportall;
+    loops = tmploops;
+    loops[nr_loops-1].step = i;
+    loops[nr_loops-1].nb = j;
+    step *= j;
+  }
+
+  /* check this interleaving */
+  for(i=0; i<total; i++) {
+    unsigned ind = 0;
+    unsigned mul = 1;
+    for(j=0; j<nr_loops; j++) {
+      ind += (i / loops[j].step) % loops[j].nb * mul;
+      mul *= loops[j].nb;
+    }
+    if (topology->levels[depth][i]->os_index != ind)
+      goto exportall;
+  }
+
+  /* success, print it */
+  for(j=0; j<nr_loops; j++) {
+    res = hwloc_snprintf(tmp, tmplen, "%u*%u%s", loops[j].step, loops[j].nb,
+			 j == nr_loops-1 ? ")" : ":");
+    if (res < 0) {
+      free(loops);
+      return -1;
+    }
+    ret += res;
+    if (res >= tmplen)
+      res = tmplen>0 ? (int)tmplen - 1 : 0;
+    tmp += res;
+    tmplen -= res;
+  }
+
+  free(loops);
+  return ret;
+
+ exportall:
+  free(loops);
+
+  /* dump all indexes */
+  cur = obj;
+  while (cur) {
+    res = snprintf(tmp, tmplen, "%u%s", cur->os_index,
+		   cur->next_cousin ? "," : ")");
+    if (res < 0)
+      return -1;
+    ret += res;
+    if (res >= tmplen)
+      res = tmplen>0 ? (int)tmplen - 1 : 0;
+    tmp += res;
+    tmplen -= res;
+    cur = cur->next_cousin;
+  }
+  return ret;
+}
+
+static int hwloc_topology_export_synthetic_obj_attr(struct hwloc_topology * topology,
+						    hwloc_obj_t obj,
+						    char *buffer, size_t buflen)
+{
+  const char * separator = " ";
+  const char * prefix = "(";
+  char cachesize[64] = "";
+  char memsize[64] = "";
+  int needindexes = 0;
+
+  if (hwloc_obj_type_is_cache(obj->type) && obj->attr->cache.size) {
+    snprintf(cachesize, sizeof(cachesize), "%ssize=%llu",
+	     prefix, (unsigned long long) obj->attr->cache.size);
+    prefix = separator;
+  }
+  if (obj->memory.local_memory) {
+    snprintf(memsize, sizeof(memsize), "%smemory=%llu",
+	     prefix, (unsigned long long) obj->memory.local_memory);
+    prefix = separator;
+  }
+  if (obj->type == HWLOC_OBJ_PU || obj->type == HWLOC_OBJ_NUMANODE) {
+    hwloc_obj_t cur = obj;
+    while (cur) {
+      if (cur->os_index != cur->logical_index) {
+	needindexes = 1;
+	break;
+      }
+      cur = cur->next_cousin;
+    }
+  }
+  if (*cachesize || *memsize || needindexes) {
+    ssize_t tmplen = buflen;
+    char *tmp = buffer;
+    int res, ret = 0;
+
+    res = hwloc_snprintf(tmp, tmplen, "%s%s%s", cachesize, memsize, needindexes ? "" : ")");
+    if (res < 0)
+      return -1;
+    ret += res;
+    if (res >= tmplen)
+      res = tmplen>0 ? (int)tmplen - 1 : 0;
+    tmp += res;
+    tmplen -= res;
+
+    if (needindexes) {
+      res = snprintf(tmp, tmplen, "%sindexes=", prefix);
+      if (res < 0)
+	return -1;
+      ret += res;
+      if (res >= tmplen)
+	res = tmplen>0 ? (int)tmplen - 1 : 0;
+      tmp += res;
+      tmplen -= res;
+
+      res = hwloc_topology_export_synthetic_indexes(topology, obj, tmp, tmplen);
+      if (res < 0)
+	return -1;
+      ret += res;
+      if (res >= tmplen)
+	res = tmplen>0 ? (int)tmplen - 1 : 0;
+      tmp += res;
+      tmplen -= res;
+    }
+    return ret;
+  } else {
+    return 0;
+  }
+}
+
+int
+hwloc_topology_export_synthetic(struct hwloc_topology * topology,
+				char *buffer, size_t buflen,
+				unsigned long flags)
+{
+  hwloc_obj_t obj = hwloc_get_root_obj(topology);
+  ssize_t tmplen = buflen;
+  char *tmp = buffer;
+  int res, ret = 0;
+  unsigned arity;
+  const char * separator = " ";
+  const char * prefix = "";
+
+  if (!topology->is_loaded) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (flags & ~(HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES|HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS)) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  /* TODO: add a flag to ignore symmetric_subtree and I/Os.
+   * just assume things are symmetric with the left branches of the tree.
+   * but the number of objects per level may be wrong, what to do with OS index array in this case?
+   * only allow ignoring symmetric_subtree if the level width remains OK?
+   */
+
+  /* TODO: add a root object by default, with a prefix such as tree=
+   * so that we can backward-compatibly recognize whether there's a root or not.
+   * and add a flag to disable it.
+   */
+
+  /* TODO: flag to force all indexes, not only for PU and NUMA? */
+
+  if (!obj->symmetric_subtree) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (!(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS)) {
+    /* root attributes */
+    res = hwloc_topology_export_synthetic_obj_attr(topology, obj, tmp, tmplen);
+    if (res < 0)
+      return -1;
+    ret += res;
+    if (ret > 0)
+      prefix = separator;
+    if (res >= tmplen)
+      res = tmplen>0 ? (int)tmplen - 1 : 0;
+    tmp += res;
+    tmplen -= res;
+  }
+
+  arity = obj->arity;
+  while (arity) {
+    /* for each level */
+    obj = obj->first_child;
+    if (flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES) {
+      res = hwloc_snprintf(tmp, tmplen, "%s%s:%u", prefix, hwloc_type_name(obj->type), arity);
+    } else {
+      char types[64];
+      hwloc_obj_type_snprintf(types, sizeof(types), obj, 1);
+      res = hwloc_snprintf(tmp, tmplen, "%s%s:%u", prefix, types, arity);
+    }
+    if (res < 0)
+      return -1;
+    ret += res;
+    if (res >= tmplen)
+      res = tmplen>0 ? (int)tmplen - 1 : 0;
+    tmp += res;
+    tmplen -= res;
+
+    if (!(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS)) {
+      /* obj attributes */
+      res = hwloc_topology_export_synthetic_obj_attr(topology, obj, tmp, tmplen);
+      if (res < 0)
+	return -1;
+      ret += res;
+      if (res >= tmplen)
+	res = tmplen>0 ? (int)tmplen - 1 : 0;
+      tmp += res;
+      tmplen -= res;
+    }
+
+    /* next level */
+    prefix = separator;
+    arity = obj->arity;
+  }
+
+  return ret;
+}
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-windows.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-windows.c
new file mode 100644
index 0000000000..cd845ffcde
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-windows.c
@@ -0,0 +1,1171 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2017 Inria.  All rights reserved.
+ * Copyright © 2009-2012 Université Bordeaux
+ * Copyright © 2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/* To try to get all declarations duplicated below.  */
+#define _WIN32_WINNT 0x0601
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <private/private.h>
+#include <private/debug.h>
+
+#include <windows.h>
+
+#ifndef HAVE_KAFFINITY
+typedef ULONG_PTR KAFFINITY, *PKAFFINITY;
+#endif
+
+#ifndef HAVE_PROCESSOR_CACHE_TYPE
+typedef enum _PROCESSOR_CACHE_TYPE {
+  CacheUnified,
+  CacheInstruction,
+  CacheData,
+  CacheTrace
+} PROCESSOR_CACHE_TYPE;
+#endif
+
+#ifndef CACHE_FULLY_ASSOCIATIVE
+#define CACHE_FULLY_ASSOCIATIVE 0xFF
+#endif
+
+#ifndef MAXIMUM_PROC_PER_GROUP /* missing in MinGW */
+#define MAXIMUM_PROC_PER_GROUP 64
+#endif
+
+#ifndef HAVE_CACHE_DESCRIPTOR
+typedef struct _CACHE_DESCRIPTOR {
+  BYTE Level;
+  BYTE Associativity;
+  WORD LineSize;
+  DWORD Size; /* in bytes */
+  PROCESSOR_CACHE_TYPE Type;
+} CACHE_DESCRIPTOR, *PCACHE_DESCRIPTOR;
+#endif
+
+#ifndef HAVE_LOGICAL_PROCESSOR_RELATIONSHIP
+typedef enum _LOGICAL_PROCESSOR_RELATIONSHIP {
+  RelationProcessorCore,
+  RelationNumaNode,
+  RelationCache,
+  RelationProcessorPackage,
+  RelationGroup,
+  RelationAll = 0xffff
+} LOGICAL_PROCESSOR_RELATIONSHIP;
+#else /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */
+#  ifndef HAVE_RELATIONPROCESSORPACKAGE
+#    define RelationProcessorPackage 3
+#    define RelationGroup 4
+#    define RelationAll 0xffff
+#  endif /* HAVE_RELATIONPROCESSORPACKAGE */
+#endif /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */
+
+#ifndef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION
+typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION {
+  ULONG_PTR ProcessorMask;
+  LOGICAL_PROCESSOR_RELATIONSHIP Relationship;
+  _ANONYMOUS_UNION
+  union {
+    struct {
+      BYTE flags;
+    } ProcessorCore;
+    struct {
+      DWORD NodeNumber;
+    } NumaNode;
+    CACHE_DESCRIPTOR Cache;
+    ULONGLONG Reserved[2];
+  } DUMMYUNIONNAME;
+} SYSTEM_LOGICAL_PROCESSOR_INFORMATION, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION;
+#endif
+
+/* Extended interface, for group support */
+
+#ifndef HAVE_GROUP_AFFINITY
+typedef struct _GROUP_AFFINITY {
+  KAFFINITY Mask;
+  WORD Group;
+  WORD Reserved[3];
+} GROUP_AFFINITY, *PGROUP_AFFINITY;
+#endif
+
+#ifndef HAVE_PROCESSOR_RELATIONSHIP
+typedef struct _PROCESSOR_RELATIONSHIP {
+  BYTE Flags;
+  BYTE Reserved[21];
+  WORD GroupCount;
+  GROUP_AFFINITY GroupMask[ANYSIZE_ARRAY];
+} PROCESSOR_RELATIONSHIP, *PPROCESSOR_RELATIONSHIP;
+#endif
+
+#ifndef HAVE_NUMA_NODE_RELATIONSHIP
+typedef struct _NUMA_NODE_RELATIONSHIP {
+  DWORD NodeNumber;
+  BYTE Reserved[20];
+  GROUP_AFFINITY GroupMask;
+} NUMA_NODE_RELATIONSHIP, *PNUMA_NODE_RELATIONSHIP;
+#endif
+
+#ifndef HAVE_CACHE_RELATIONSHIP
+typedef struct _CACHE_RELATIONSHIP {
+  BYTE Level;
+  BYTE Associativity;
+  WORD LineSize;
+  DWORD CacheSize;
+  PROCESSOR_CACHE_TYPE Type;
+  BYTE Reserved[20];
+  GROUP_AFFINITY GroupMask;
+} CACHE_RELATIONSHIP, *PCACHE_RELATIONSHIP;
+#endif
+
+#ifndef HAVE_PROCESSOR_GROUP_INFO
+typedef struct _PROCESSOR_GROUP_INFO {
+  BYTE MaximumProcessorCount;
+  BYTE ActiveProcessorCount;
+  BYTE Reserved[38];
+  KAFFINITY ActiveProcessorMask;
+} PROCESSOR_GROUP_INFO, *PPROCESSOR_GROUP_INFO;
+#endif
+
+#ifndef HAVE_GROUP_RELATIONSHIP
+typedef struct _GROUP_RELATIONSHIP {
+  WORD MaximumGroupCount;
+  WORD ActiveGroupCount;
+  ULONGLONG Reserved[2];
+  PROCESSOR_GROUP_INFO GroupInfo[ANYSIZE_ARRAY];
+} GROUP_RELATIONSHIP, *PGROUP_RELATIONSHIP;
+#endif
+
+#ifndef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX
+typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX {
+  LOGICAL_PROCESSOR_RELATIONSHIP Relationship;
+  DWORD Size;
+  _ANONYMOUS_UNION
+  union {
+    PROCESSOR_RELATIONSHIP Processor;
+    NUMA_NODE_RELATIONSHIP NumaNode;
+    CACHE_RELATIONSHIP Cache;
+    GROUP_RELATIONSHIP Group;
+    /* Odd: no member to tell the cpu mask of the package... */
+  } DUMMYUNIONNAME;
+} SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX;
+#endif
+
+#ifndef HAVE_PSAPI_WORKING_SET_EX_BLOCK
+typedef union _PSAPI_WORKING_SET_EX_BLOCK {
+  ULONG_PTR Flags;
+  struct {
+    unsigned Valid  :1;
+    unsigned ShareCount  :3;
+    unsigned Win32Protection  :11;
+    unsigned Shared  :1;
+    unsigned Node  :6;
+    unsigned Locked  :1;
+    unsigned LargePage  :1;
+  };
+} PSAPI_WORKING_SET_EX_BLOCK;
+#endif
+
+#ifndef HAVE_PSAPI_WORKING_SET_EX_INFORMATION
+typedef struct _PSAPI_WORKING_SET_EX_INFORMATION {
+  PVOID VirtualAddress;
+  PSAPI_WORKING_SET_EX_BLOCK VirtualAttributes;
+} PSAPI_WORKING_SET_EX_INFORMATION;
+#endif
+
+#ifndef HAVE_PROCESSOR_NUMBER
+typedef struct _PROCESSOR_NUMBER {
+  WORD Group;
+  BYTE Number;
+  BYTE Reserved;
+} PROCESSOR_NUMBER, *PPROCESSOR_NUMBER;
+#endif
+
+/* Function pointers */
+
+typedef WORD (WINAPI *PFN_GETACTIVEPROCESSORGROUPCOUNT)(void);
+static PFN_GETACTIVEPROCESSORGROUPCOUNT GetActiveProcessorGroupCountProc;
+
+static unsigned long nr_processor_groups = 1;
+
+typedef WORD (WINAPI *PFN_GETACTIVEPROCESSORCOUNT)(WORD);
+static PFN_GETACTIVEPROCESSORCOUNT GetActiveProcessorCountProc;
+
+typedef DWORD (WINAPI *PFN_GETCURRENTPROCESSORNUMBER)(void);
+static PFN_GETCURRENTPROCESSORNUMBER GetCurrentProcessorNumberProc;
+
+typedef VOID (WINAPI *PFN_GETCURRENTPROCESSORNUMBEREX)(PPROCESSOR_NUMBER);
+static PFN_GETCURRENTPROCESSORNUMBEREX GetCurrentProcessorNumberExProc;
+
+typedef BOOL (WINAPI *PFN_GETLOGICALPROCESSORINFORMATION)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION Buffer, PDWORD ReturnLength);
+static PFN_GETLOGICALPROCESSORINFORMATION GetLogicalProcessorInformationProc;
+
+typedef BOOL (WINAPI *PFN_GETLOGICALPROCESSORINFORMATIONEX)(LOGICAL_PROCESSOR_RELATIONSHIP relationship, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer, PDWORD ReturnLength);
+static PFN_GETLOGICALPROCESSORINFORMATIONEX GetLogicalProcessorInformationExProc;
+
+typedef BOOL (WINAPI *PFN_SETTHREADGROUPAFFINITY)(HANDLE hThread, const GROUP_AFFINITY *GroupAffinity, PGROUP_AFFINITY PreviousGroupAffinity);
+static PFN_SETTHREADGROUPAFFINITY SetThreadGroupAffinityProc;
+
+typedef BOOL (WINAPI *PFN_GETTHREADGROUPAFFINITY)(HANDLE hThread, PGROUP_AFFINITY GroupAffinity);
+static PFN_GETTHREADGROUPAFFINITY GetThreadGroupAffinityProc;
+
+typedef BOOL (WINAPI *PFN_GETNUMAAVAILABLEMEMORYNODE)(UCHAR Node, PULONGLONG AvailableBytes);
+static PFN_GETNUMAAVAILABLEMEMORYNODE GetNumaAvailableMemoryNodeProc;
+
+typedef BOOL (WINAPI *PFN_GETNUMAAVAILABLEMEMORYNODEEX)(USHORT Node, PULONGLONG AvailableBytes);
+static PFN_GETNUMAAVAILABLEMEMORYNODEEX GetNumaAvailableMemoryNodeExProc;
+
+typedef LPVOID (WINAPI *PFN_VIRTUALALLOCEXNUMA)(HANDLE hProcess, LPVOID lpAddress, SIZE_T dwSize, DWORD flAllocationType, DWORD flProtect, DWORD nndPreferred);
+static PFN_VIRTUALALLOCEXNUMA VirtualAllocExNumaProc;
+
+typedef BOOL (WINAPI *PFN_VIRTUALFREEEX)(HANDLE hProcess, LPVOID lpAddress, SIZE_T dwSize, DWORD dwFreeType);
+static PFN_VIRTUALFREEEX VirtualFreeExProc;
+
+typedef BOOL (WINAPI *PFN_QUERYWORKINGSETEX)(HANDLE hProcess, PVOID pv, DWORD cb);
+static PFN_QUERYWORKINGSETEX QueryWorkingSetExProc;
+
+static void hwloc_win_get_function_ptrs(void)
+{
+    HMODULE kernel32;
+
+    kernel32 = LoadLibrary("kernel32.dll");
+    if (kernel32) {
+      GetActiveProcessorGroupCountProc =
+	(PFN_GETACTIVEPROCESSORGROUPCOUNT) GetProcAddress(kernel32, "GetActiveProcessorGroupCount");
+      GetActiveProcessorCountProc =
+	(PFN_GETACTIVEPROCESSORCOUNT) GetProcAddress(kernel32, "GetActiveProcessorCount");
+      GetLogicalProcessorInformationProc =
+	(PFN_GETLOGICALPROCESSORINFORMATION) GetProcAddress(kernel32, "GetLogicalProcessorInformation");
+      GetCurrentProcessorNumberProc =
+	(PFN_GETCURRENTPROCESSORNUMBER) GetProcAddress(kernel32, "GetCurrentProcessorNumber");
+      GetCurrentProcessorNumberExProc =
+	(PFN_GETCURRENTPROCESSORNUMBEREX) GetProcAddress(kernel32, "GetCurrentProcessorNumberEx");
+      SetThreadGroupAffinityProc =
+	(PFN_SETTHREADGROUPAFFINITY) GetProcAddress(kernel32, "SetThreadGroupAffinity");
+      GetThreadGroupAffinityProc =
+	(PFN_GETTHREADGROUPAFFINITY) GetProcAddress(kernel32, "GetThreadGroupAffinity");
+      GetNumaAvailableMemoryNodeProc =
+	(PFN_GETNUMAAVAILABLEMEMORYNODE) GetProcAddress(kernel32, "GetNumaAvailableMemoryNode");
+      GetNumaAvailableMemoryNodeExProc =
+	(PFN_GETNUMAAVAILABLEMEMORYNODEEX) GetProcAddress(kernel32, "GetNumaAvailableMemoryNodeEx");
+      GetLogicalProcessorInformationExProc =
+	(PFN_GETLOGICALPROCESSORINFORMATIONEX)GetProcAddress(kernel32, "GetLogicalProcessorInformationEx");
+      VirtualAllocExNumaProc =
+	(PFN_VIRTUALALLOCEXNUMA) GetProcAddress(kernel32, "K32QueryWorkingSetEx");
+      VirtualAllocExNumaProc =*
+	(PFN_VIRTUALALLOCEXNUMA) GetProcAddress(kernel32, "VirtualAllocExNuma");
+      VirtualFreeExProc =
+	(PFN_VIRTUALFREEEX) GetProcAddress(kernel32, "VirtualFreeEx");
+    }
+
+    if (GetActiveProcessorGroupCountProc)
+      nr_processor_groups = GetActiveProcessorGroupCountProc();
+
+    if (!VirtualAllocExNumaProc) {
+      HMODULE psapi = LoadLibrary("psapi.dll");
+      if (psapi)
+        VirtualAllocExNumaProc = (PFN_VIRTUALALLOCEXNUMA) GetProcAddress(psapi, "QueryWorkingSetEx");
+    }
+}
+
+/*
+ * ULONG_PTR and DWORD_PTR are 64/32bits depending on the arch
+ * while bitmaps use unsigned long (always 32bits)
+ */
+
+static void hwloc_bitmap_from_ULONG_PTR(hwloc_bitmap_t set, ULONG_PTR mask)
+{
+#if SIZEOF_VOID_P == 8
+  hwloc_bitmap_from_ulong(set, mask & 0xffffffff);
+  hwloc_bitmap_set_ith_ulong(set, 1, mask >> 32);
+#else
+  hwloc_bitmap_from_ulong(set, mask);
+#endif
+}
+
+static void hwloc_bitmap_from_ith_ULONG_PTR(hwloc_bitmap_t set, unsigned i, ULONG_PTR mask)
+{
+#if SIZEOF_VOID_P == 8
+  hwloc_bitmap_from_ith_ulong(set, 2*i, mask & 0xffffffff);
+  hwloc_bitmap_set_ith_ulong(set, 2*i+1, mask >> 32);
+#else
+  hwloc_bitmap_from_ith_ulong(set, i, mask);
+#endif
+}
+
+static void hwloc_bitmap_set_ith_ULONG_PTR(hwloc_bitmap_t set, unsigned i, ULONG_PTR mask)
+{
+#if SIZEOF_VOID_P == 8
+  hwloc_bitmap_set_ith_ulong(set, 2*i, mask & 0xffffffff);
+  hwloc_bitmap_set_ith_ulong(set, 2*i+1, mask >> 32);
+#else
+  hwloc_bitmap_set_ith_ulong(set, i, mask);
+#endif
+}
+
+static ULONG_PTR hwloc_bitmap_to_ULONG_PTR(hwloc_const_bitmap_t set)
+{
+#if SIZEOF_VOID_P == 8
+  ULONG_PTR up = hwloc_bitmap_to_ith_ulong(set, 1);
+  up <<= 32;
+  up |= hwloc_bitmap_to_ulong(set);
+  return up;
+#else
+  return hwloc_bitmap_to_ulong(set);
+#endif
+}
+
+static ULONG_PTR hwloc_bitmap_to_ith_ULONG_PTR(hwloc_const_bitmap_t set, unsigned i)
+{
+#if SIZEOF_VOID_P == 8
+  ULONG_PTR up = hwloc_bitmap_to_ith_ulong(set, 2*i+1);
+  up <<= 32;
+  up |= hwloc_bitmap_to_ith_ulong(set, 2*i);
+  return up;
+#else
+  return hwloc_bitmap_to_ith_ulong(set, i);
+#endif
+}
+
+/* convert set into index+mask if all set bits are in the same ULONG.
+ * otherwise return -1.
+ */
+static int hwloc_bitmap_to_single_ULONG_PTR(hwloc_const_bitmap_t set, unsigned *index, ULONG_PTR *mask)
+{
+  unsigned first_ulp, last_ulp;
+  if (hwloc_bitmap_weight(set) == -1)
+    return -1;
+  first_ulp = hwloc_bitmap_first(set) / (sizeof(ULONG_PTR)*8);
+  last_ulp = hwloc_bitmap_last(set) / (sizeof(ULONG_PTR)*8);
+  if (first_ulp != last_ulp)
+    return -1;
+  *mask = hwloc_bitmap_to_ith_ULONG_PTR(set, first_ulp);
+  *index = first_ulp;
+  return 0;
+}
+
+/**************************************************************
+ * hwloc PU numbering with respect to Windows processor groups
+ *
+ * Everywhere below we reserve 64 physical indexes per processor groups because that's
+ * the maximum (MAXIMUM_PROC_PER_GROUP). Windows may actually use less bits than that
+ * in some groups (either to avoid splitting NUMA nodes across groups, or because of OS
+ * tweaks such as "bcdedit /set groupsize 8") but we keep some unused indexes for simplicity.
+ * That means PU physical indexes and cpusets may be non-contigous.
+ * That also means hwloc_fallback_nbprocessors() below must return the last PU index + 1
+ * instead the actual number of processors.
+ */
+
+/********************
+ * last_cpu_location
+ */
+
+static int
+hwloc_win_get_thisthread_last_cpu_location(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cpuset_t set, int flags __hwloc_attribute_unused)
+{
+  assert(GetCurrentProcessorNumberExProc || (GetCurrentProcessorNumberProc && nr_processor_groups == 1));
+
+  if (nr_processor_groups > 1 || !GetCurrentProcessorNumberProc) {
+    PROCESSOR_NUMBER num;
+    GetCurrentProcessorNumberExProc(&num);
+    hwloc_bitmap_from_ith_ULONG_PTR(set, num.Group, ((ULONG_PTR)1) << num.Number);
+    return 0;
+  }
+
+  hwloc_bitmap_from_ith_ULONG_PTR(set, 0, ((ULONG_PTR)1) << GetCurrentProcessorNumberProc());
+  return 0;
+}
+
+/* TODO: hwloc_win_get_thisproc_last_cpu_location() using
+ * CreateToolhelp32Snapshot(), Thread32First/Next()
+ * th.th32OwnerProcessID == GetCurrentProcessId() for filtering within process
+ * OpenThread(THREAD_SET_INFORMATION|THREAD_QUERY_INFORMATION, FALSE, te32.th32ThreadID) to get a handle.
+ */
+
+
+/******************************
+ * set cpu/membind for threads
+ */
+
+/* TODO: SetThreadIdealProcessor{,Ex} */
+
+static int
+hwloc_win_set_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t thread, hwloc_const_bitmap_t hwloc_set, int flags)
+{
+  DWORD_PTR mask;
+  unsigned group;
+
+  if (flags & HWLOC_CPUBIND_NOMEMBIND) {
+    errno = ENOSYS;
+    return -1;
+  }
+
+  if (hwloc_bitmap_to_single_ULONG_PTR(hwloc_set, &group, &mask) < 0) {
+    errno = ENOSYS;
+    return -1;
+  }
+
+  assert(nr_processor_groups == 1 || SetThreadGroupAffinityProc);
+
+  if (nr_processor_groups > 1) {
+    GROUP_AFFINITY aff;
+    memset(&aff, 0, sizeof(aff)); /* we get Invalid Parameter error if Reserved field isn't cleared */
+    aff.Group = group;
+    aff.Mask = mask;
+    if (!SetThreadGroupAffinityProc(thread, &aff, NULL))
+      return -1;
+
+  } else {
+    /* SetThreadAffinityMask() only changes the mask inside the current processor group */
+    /* The resulting binding is always strict */
+    if (!SetThreadAffinityMask(thread, mask))
+      return -1;
+  }
+  return 0;
+}
+
+static int
+hwloc_win_set_thisthread_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags)
+{
+  return hwloc_win_set_thread_cpubind(topology, GetCurrentThread(), hwloc_set, flags);
+}
+
+static int
+hwloc_win_set_thisthread_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  int ret;
+  hwloc_cpuset_t cpuset;
+
+  if ((policy != HWLOC_MEMBIND_DEFAULT && policy != HWLOC_MEMBIND_BIND)
+      || flags & HWLOC_MEMBIND_NOCPUBIND) {
+    errno = ENOSYS;
+    return -1;
+  }
+
+  cpuset = hwloc_bitmap_alloc();
+  hwloc_cpuset_from_nodeset(topology, cpuset, nodeset);
+  ret = hwloc_win_set_thisthread_cpubind(topology, cpuset,
+					 (flags & HWLOC_MEMBIND_STRICT) ? HWLOC_CPUBIND_STRICT : 0);
+  hwloc_bitmap_free(cpuset);
+  return ret;
+}
+
+
+/******************************
+ * get cpu/membind for threads
+ */
+
+static int
+hwloc_win_get_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t thread, hwloc_cpuset_t set, int flags __hwloc_attribute_unused)
+{
+  GROUP_AFFINITY aff;
+
+  assert(GetThreadGroupAffinityProc);
+
+  if (!GetThreadGroupAffinityProc(thread, &aff))
+    return -1;
+  hwloc_bitmap_from_ith_ULONG_PTR(set, aff.Group, aff.Mask);
+  return 0;
+}
+
+static int
+hwloc_win_get_thisthread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cpuset_t set, int flags __hwloc_attribute_unused)
+{
+  return hwloc_win_get_thread_cpubind(topology, GetCurrentThread(), set, flags);
+}
+
+static int
+hwloc_win_get_thisthread_membind(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags)
+{
+  int ret;
+  hwloc_cpuset_t cpuset = hwloc_bitmap_alloc();
+  ret = hwloc_win_get_thread_cpubind(topology, GetCurrentThread(), cpuset, flags);
+  if (!ret) {
+    *policy = HWLOC_MEMBIND_BIND;
+    hwloc_cpuset_to_nodeset(topology, cpuset, nodeset);
+  }
+  hwloc_bitmap_free(cpuset);
+  return ret;
+}
+
+
+/********************************
+ * set cpu/membind for processes
+ */
+
+static int
+hwloc_win_set_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t proc, hwloc_const_bitmap_t hwloc_set, int flags)
+{
+  DWORD_PTR mask;
+
+  assert(nr_processor_groups == 1);
+
+  if (flags & HWLOC_CPUBIND_NOMEMBIND) {
+    errno = ENOSYS;
+    return -1;
+  }
+
+  /* TODO: SetThreadGroupAffinity() for all threads doesn't enforce the whole process affinity,
+   * maybe because of process-specific resource locality */
+  /* TODO: if we are in a single group (check with GetProcessGroupAffinity()),
+   * SetProcessAffinityMask() changes the binding within that same group.
+   */
+  /* TODO: NtSetInformationProcess() works very well for binding to any mask in a single group,
+   * but it's an internal routine.
+   */
+  /* TODO: checks whether hwloc-bind.c needs to pass INHERIT_PARENT_AFFINITY to CreateProcess() instead of execvp(). */
+
+  /* The resulting binding is always strict */
+  mask = hwloc_bitmap_to_ULONG_PTR(hwloc_set);
+  if (!SetProcessAffinityMask(proc, mask))
+    return -1;
+  return 0;
+}
+
+static int
+hwloc_win_set_thisproc_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags)
+{
+  return hwloc_win_set_proc_cpubind(topology, GetCurrentProcess(), hwloc_set, flags);
+}
+
+static int
+hwloc_win_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  int ret;
+  hwloc_cpuset_t cpuset;
+
+  if ((policy != HWLOC_MEMBIND_DEFAULT && policy != HWLOC_MEMBIND_BIND)
+      || flags & HWLOC_MEMBIND_NOCPUBIND) {
+    errno = ENOSYS;
+    return -1;
+  }
+
+  cpuset = hwloc_bitmap_alloc();
+  hwloc_cpuset_from_nodeset(topology, cpuset, nodeset);
+  ret = hwloc_win_set_proc_cpubind(topology, pid, cpuset,
+				   (flags & HWLOC_MEMBIND_STRICT) ? HWLOC_CPUBIND_STRICT : 0);
+  hwloc_bitmap_free(cpuset);
+  return ret;
+}
+
+static int
+hwloc_win_set_thisproc_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  return hwloc_win_set_proc_membind(topology, GetCurrentProcess(), nodeset, policy, flags);
+}
+
+
+/********************************
+ * get cpu/membind for processes
+ */
+
+static int
+hwloc_win_get_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t proc, hwloc_bitmap_t hwloc_set, int flags)
+{
+  DWORD_PTR proc_mask, sys_mask;
+
+  assert(nr_processor_groups == 1);
+
+  if (flags & HWLOC_CPUBIND_NOMEMBIND) {
+    errno = ENOSYS;
+    return -1;
+  }
+
+  /* TODO: if we are in a single group (check with GetProcessGroupAffinity()),
+   * GetProcessAffinityMask() gives the mask within that group.
+   */
+  /* TODO: if we are in multiple groups, GetProcessGroupAffinity() gives their IDs,
+   * but we don't know their masks.
+   */
+  /* TODO: GetThreadGroupAffinity() for all threads can be smaller than the whole process affinity,
+   * maybe because of process-specific resource locality.
+   */
+
+  if (!GetProcessAffinityMask(proc, &proc_mask, &sys_mask))
+    return -1;
+  hwloc_bitmap_from_ULONG_PTR(hwloc_set, proc_mask);
+  return 0;
+}
+
+static int
+hwloc_win_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags)
+{
+  int ret;
+  hwloc_cpuset_t cpuset = hwloc_bitmap_alloc();
+  ret = hwloc_win_get_proc_cpubind(topology, pid, cpuset,
+				   (flags & HWLOC_MEMBIND_STRICT) ? HWLOC_CPUBIND_STRICT : 0);
+  if (!ret) {
+    *policy = HWLOC_MEMBIND_BIND;
+    hwloc_cpuset_to_nodeset(topology, cpuset, nodeset);
+  }
+  hwloc_bitmap_free(cpuset);
+  return ret;
+}
+
+static int
+hwloc_win_get_thisproc_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_cpuset, int flags)
+{
+  return hwloc_win_get_proc_cpubind(topology, GetCurrentProcess(), hwloc_cpuset, flags);
+}
+
+static int
+hwloc_win_get_thisproc_membind(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags)
+{
+  return hwloc_win_get_proc_membind(topology, GetCurrentProcess(), nodeset, policy, flags);
+}
+
+
+/************************
+ * membind alloc/free
+ */
+
+static void *
+hwloc_win_alloc(hwloc_topology_t topology __hwloc_attribute_unused, size_t len) {
+  return VirtualAlloc(NULL, len, MEM_COMMIT|MEM_RESERVE, PAGE_EXECUTE_READWRITE);
+}
+
+static void *
+hwloc_win_alloc_membind(hwloc_topology_t topology __hwloc_attribute_unused, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) {
+  int node;
+
+  switch (policy) {
+    case HWLOC_MEMBIND_DEFAULT:
+    case HWLOC_MEMBIND_BIND:
+      break;
+    default:
+      errno = ENOSYS;
+      return hwloc_alloc_or_fail(topology, len, flags);
+  }
+
+  if (flags & HWLOC_MEMBIND_STRICT) {
+    errno = ENOSYS;
+    return NULL;
+  }
+
+  if (hwloc_bitmap_weight(nodeset) != 1) {
+    /* Not a single node, can't do this */
+    errno = EXDEV;
+    return hwloc_alloc_or_fail(topology, len, flags);
+  }
+
+  node = hwloc_bitmap_first(nodeset);
+  return VirtualAllocExNumaProc(GetCurrentProcess(), NULL, len, MEM_COMMIT|MEM_RESERVE, PAGE_EXECUTE_READWRITE, node);
+}
+
+static int
+hwloc_win_free_membind(hwloc_topology_t topology __hwloc_attribute_unused, void *addr, size_t len __hwloc_attribute_unused) {
+  if (!addr)
+    return 0;
+  if (!VirtualFreeExProc(GetCurrentProcess(), addr, 0, MEM_RELEASE))
+    return -1;
+  return 0;
+}
+
+
+/**********************
+ * membind for areas
+ */
+
+static int
+hwloc_win_get_area_membind(hwloc_topology_t topology __hwloc_attribute_unused, const void *addr, size_t len, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags)
+{
+  SYSTEM_INFO SystemInfo;
+  DWORD page_size;
+  uintptr_t start;
+  unsigned nb;
+
+  GetSystemInfo(&SystemInfo);
+  page_size = SystemInfo.dwPageSize;
+
+  start = (((uintptr_t) addr) / page_size) * page_size;
+  nb = (unsigned)((((uintptr_t) addr + len - start) + page_size - 1) / page_size);
+
+  if (!nb)
+    nb = 1;
+
+  {
+    PSAPI_WORKING_SET_EX_INFORMATION *pv;
+    unsigned i;
+
+    pv = calloc(nb, sizeof(*pv));
+
+    for (i = 0; i < nb; i++)
+      pv[i].VirtualAddress = (void*) (start + i * page_size);
+    if (!QueryWorkingSetExProc(GetCurrentProcess(), pv, nb * sizeof(*pv))) {
+      free(pv);
+      return -1;
+    }
+    *policy = HWLOC_MEMBIND_BIND;
+    if (flags & HWLOC_MEMBIND_STRICT) {
+      unsigned node = pv[0].VirtualAttributes.Node;
+      for (i = 1; i < nb; i++) {
+	if (pv[i].VirtualAttributes.Node != node) {
+	  errno = EXDEV;
+          free(pv);
+	  return -1;
+	}
+      }
+      hwloc_bitmap_only(nodeset, node);
+      free(pv);
+      return 0;
+    }
+    hwloc_bitmap_zero(nodeset);
+    for (i = 0; i < nb; i++)
+      hwloc_bitmap_set(nodeset, pv[i].VirtualAttributes.Node);
+    free(pv);
+    return 0;
+  }
+}
+
+
+/*************************
+ * discovery
+ */
+
+static int
+hwloc_look_windows(struct hwloc_backend *backend)
+{
+  struct hwloc_topology *topology = backend->topology;
+  hwloc_bitmap_t groups_pu_set = NULL;
+  SYSTEM_INFO SystemInfo;
+  DWORD length;
+
+  if (topology->levels[0][0]->cpuset)
+    /* somebody discovered things */
+    return -1;
+
+  hwloc_alloc_obj_cpusets(topology->levels[0][0]);
+
+  GetSystemInfo(&SystemInfo);
+
+  if (!GetLogicalProcessorInformationExProc && GetLogicalProcessorInformationProc) {
+      PSYSTEM_LOGICAL_PROCESSOR_INFORMATION procInfo, tmpprocInfo;
+      unsigned id;
+      unsigned i;
+      struct hwloc_obj *obj;
+      hwloc_obj_type_t type;
+
+      length = 0;
+      procInfo = NULL;
+
+      while (1) {
+	if (GetLogicalProcessorInformationProc(procInfo, &length))
+	  break;
+	if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
+	  return -1;
+	tmpprocInfo = realloc(procInfo, length);
+	if (!tmpprocInfo) {
+	  free(procInfo);
+	  goto out;
+	}
+	procInfo = tmpprocInfo;
+      }
+
+      assert(!length || procInfo);
+
+      for (i = 0; i < length / sizeof(*procInfo); i++) {
+
+        /* Ignore unknown caches */
+	if (procInfo->Relationship == RelationCache
+		&& procInfo->Cache.Type != CacheUnified
+		&& procInfo->Cache.Type != CacheData
+		&& procInfo->Cache.Type != CacheInstruction)
+	  continue;
+
+	id = -1;
+	switch (procInfo[i].Relationship) {
+	  case RelationNumaNode:
+	    type = HWLOC_OBJ_NUMANODE;
+	    id = procInfo[i].NumaNode.NodeNumber;
+	    break;
+	  case RelationProcessorPackage:
+	    type = HWLOC_OBJ_PACKAGE;
+	    break;
+	  case RelationCache:
+	    type = (procInfo[i].Cache.Type == CacheInstruction ? HWLOC_OBJ_L1ICACHE : HWLOC_OBJ_L1CACHE) + procInfo[i].Cache.Level - 1;
+	    break;
+	  case RelationProcessorCore:
+	    type = HWLOC_OBJ_CORE;
+	    break;
+	  case RelationGroup:
+	  default:
+	    type = HWLOC_OBJ_GROUP;
+	    break;
+	}
+
+	if (!hwloc_filter_check_keep_object_type(topology, type))
+	  continue;
+
+	obj = hwloc_alloc_setup_object(topology, type, id);
+        obj->cpuset = hwloc_bitmap_alloc();
+	hwloc_debug("%s#%u mask %lx\n", hwloc_type_name(type), id, procInfo[i].ProcessorMask);
+	/* ProcessorMask is a ULONG_PTR */
+	hwloc_bitmap_set_ith_ULONG_PTR(obj->cpuset, 0, procInfo[i].ProcessorMask);
+	hwloc_debug_2args_bitmap("%s#%u bitmap %s\n", hwloc_type_name(type), id, obj->cpuset);
+
+	switch (type) {
+	  case HWLOC_OBJ_NUMANODE:
+	    {
+	      ULONGLONG avail;
+	      obj->nodeset = hwloc_bitmap_alloc();
+	      hwloc_bitmap_set(obj->nodeset, id);
+	      if ((GetNumaAvailableMemoryNodeExProc && GetNumaAvailableMemoryNodeExProc(id, &avail))
+	       || (GetNumaAvailableMemoryNodeProc && GetNumaAvailableMemoryNodeProc(id, &avail)))
+		obj->memory.local_memory = avail;
+	      obj->memory.page_types_len = 2;
+	      obj->memory.page_types = malloc(2 * sizeof(*obj->memory.page_types));
+	      memset(obj->memory.page_types, 0, 2 * sizeof(*obj->memory.page_types));
+	      obj->memory.page_types_len = 1;
+	      obj->memory.page_types[0].size = SystemInfo.dwPageSize;
+#if HAVE_DECL__SC_LARGE_PAGESIZE
+	      obj->memory.page_types_len++;
+	      obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
+#endif
+	      break;
+	    }
+	  case HWLOC_OBJ_L1CACHE:
+	  case HWLOC_OBJ_L2CACHE:
+	  case HWLOC_OBJ_L3CACHE:
+	  case HWLOC_OBJ_L4CACHE:
+	  case HWLOC_OBJ_L5CACHE:
+	  case HWLOC_OBJ_L1ICACHE:
+	  case HWLOC_OBJ_L2ICACHE:
+	  case HWLOC_OBJ_L3ICACHE:
+	    obj->attr->cache.size = procInfo[i].Cache.Size;
+	    obj->attr->cache.associativity = procInfo[i].Cache.Associativity == CACHE_FULLY_ASSOCIATIVE ? -1 : procInfo[i].Cache.Associativity ;
+	    obj->attr->cache.linesize = procInfo[i].Cache.LineSize;
+	    obj->attr->cache.depth = procInfo[i].Cache.Level;
+	    switch (procInfo->Cache.Type) {
+	      case CacheUnified:
+		obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
+		break;
+	      case CacheData:
+		obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
+		break;
+	      case CacheInstruction:
+		obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
+		break;
+	      default:
+		hwloc_free_unlinked_object(obj);
+		continue;
+	    }
+	    break;
+	  case HWLOC_OBJ_GROUP:
+	    obj->attr->group.kind = procInfo[i].Relationship == RelationGroup ? HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP : HWLOC_GROUP_KIND_WINDOWS_RELATIONSHIP_UNKNOWN;
+	    break;
+	  default:
+	    break;
+	}
+	hwloc_insert_object_by_cpuset(topology, obj);
+      }
+
+      free(procInfo);
+  }
+
+  if (GetLogicalProcessorInformationExProc) {
+      PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX procInfoTotal, tmpprocInfoTotal, procInfo;
+      unsigned id;
+      struct hwloc_obj *obj;
+      hwloc_obj_type_t type;
+
+      length = 0;
+      procInfoTotal = NULL;
+
+      while (1) {
+	if (GetLogicalProcessorInformationExProc(RelationAll, procInfoTotal, &length))
+	  break;
+	if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
+	  return -1;
+        tmpprocInfoTotal = realloc(procInfoTotal, length);
+	if (!tmpprocInfoTotal) {
+	  free(procInfoTotal);
+	  goto out;
+	}
+	procInfoTotal = tmpprocInfoTotal;
+      }
+
+      for (procInfo = procInfoTotal;
+	   (void*) procInfo < (void*) ((uintptr_t) procInfoTotal + length);
+	   procInfo = (void*) ((uintptr_t) procInfo + procInfo->Size)) {
+        unsigned num, i;
+        GROUP_AFFINITY *GroupMask;
+
+        /* Ignore unknown caches */
+	if (procInfo->Relationship == RelationCache
+		&& procInfo->Cache.Type != CacheUnified
+		&& procInfo->Cache.Type != CacheData
+		&& procInfo->Cache.Type != CacheInstruction)
+	  continue;
+
+	id = -1;
+	switch (procInfo->Relationship) {
+	  case RelationNumaNode:
+	    type = HWLOC_OBJ_NUMANODE;
+            num = 1;
+            GroupMask = &procInfo->NumaNode.GroupMask;
+	    id = procInfo->NumaNode.NodeNumber;
+	    break;
+	  case RelationProcessorPackage:
+	    type = HWLOC_OBJ_PACKAGE;
+            num = procInfo->Processor.GroupCount;
+            GroupMask = procInfo->Processor.GroupMask;
+	    break;
+	  case RelationCache:
+	    type = (procInfo->Cache.Type == CacheInstruction ? HWLOC_OBJ_L1ICACHE : HWLOC_OBJ_L1CACHE) + procInfo->Cache.Level - 1;
+            num = 1;
+            GroupMask = &procInfo->Cache.GroupMask;
+	    break;
+	  case RelationProcessorCore:
+	    type = HWLOC_OBJ_CORE;
+            num = procInfo->Processor.GroupCount;
+            GroupMask = procInfo->Processor.GroupMask;
+	    break;
+	  case RelationGroup:
+	    /* So strange an interface... */
+	    for (id = 0; id < procInfo->Group.ActiveGroupCount; id++) {
+              KAFFINITY mask;
+	      hwloc_bitmap_t set;
+
+	      set = hwloc_bitmap_alloc();
+	      mask = procInfo->Group.GroupInfo[id].ActiveProcessorMask;
+	      hwloc_debug("group %u %d cpus mask %lx\n", id,
+			  procInfo->Group.GroupInfo[id].ActiveProcessorCount, mask);
+	      /* KAFFINITY is ULONG_PTR */
+	      hwloc_bitmap_set_ith_ULONG_PTR(set, id, mask);
+	      /* FIXME: what if running 32bits on a 64bits windows with 64-processor groups?
+	       * ULONG_PTR is 32bits, so half the group is invisible?
+	       * maybe scale id to id*8/sizeof(ULONG_PTR) so that groups are 64-PU aligned?
+	       */
+	      hwloc_debug_2args_bitmap("group %u %d bitmap %s\n", id, procInfo->Group.GroupInfo[id].ActiveProcessorCount, set);
+
+	      /* save the set of PUs so that we can create them at the end */
+	      if (!groups_pu_set)
+		groups_pu_set = hwloc_bitmap_alloc();
+	      hwloc_bitmap_or(groups_pu_set, groups_pu_set, set);
+
+	      if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) {
+		obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, id);
+		obj->cpuset = set;
+		obj->attr->group.kind = HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP;
+		hwloc_insert_object_by_cpuset(topology, obj);
+	      } else
+		hwloc_bitmap_free(set);
+	    }
+	    continue;
+	  default:
+	    /* Don't know how to get the mask.  */
+            hwloc_debug("unknown relation %d\n", procInfo->Relationship);
+	    continue;
+	}
+
+	if (!hwloc_filter_check_keep_object_type(topology, type))
+	  continue;
+
+	obj = hwloc_alloc_setup_object(topology, type, id);
+        obj->cpuset = hwloc_bitmap_alloc();
+        for (i = 0; i < num; i++) {
+          hwloc_debug("%s#%u %d: mask %d:%lx\n", hwloc_type_name(type), id, i, GroupMask[i].Group, GroupMask[i].Mask);
+	  /* GROUP_AFFINITY.Mask is KAFFINITY, which is ULONG_PTR */
+	  hwloc_bitmap_set_ith_ULONG_PTR(obj->cpuset, GroupMask[i].Group, GroupMask[i].Mask);
+	  /* FIXME: scale id to id*8/sizeof(ULONG_PTR) as above? */
+        }
+	hwloc_debug_2args_bitmap("%s#%u bitmap %s\n", hwloc_type_name(type), id, obj->cpuset);
+	switch (type) {
+	  case HWLOC_OBJ_NUMANODE:
+	    {
+	      ULONGLONG avail;
+	      obj->nodeset = hwloc_bitmap_alloc();
+	      hwloc_bitmap_set(obj->nodeset, id);
+	      if ((GetNumaAvailableMemoryNodeExProc && GetNumaAvailableMemoryNodeExProc(id, &avail))
+	       || (GetNumaAvailableMemoryNodeProc && GetNumaAvailableMemoryNodeProc(id, &avail)))
+	        obj->memory.local_memory = avail;
+	      obj->memory.page_types = malloc(2 * sizeof(*obj->memory.page_types));
+	      memset(obj->memory.page_types, 0, 2 * sizeof(*obj->memory.page_types));
+	      obj->memory.page_types_len = 1;
+	      obj->memory.page_types[0].size = SystemInfo.dwPageSize;
+#if HAVE_DECL__SC_LARGE_PAGESIZE
+	      obj->memory.page_types_len++;
+	      obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
+#endif
+	      break;
+	    }
+	  case HWLOC_OBJ_L1CACHE:
+	  case HWLOC_OBJ_L2CACHE:
+	  case HWLOC_OBJ_L3CACHE:
+	  case HWLOC_OBJ_L4CACHE:
+	  case HWLOC_OBJ_L5CACHE:
+	  case HWLOC_OBJ_L1ICACHE:
+	  case HWLOC_OBJ_L2ICACHE:
+	  case HWLOC_OBJ_L3ICACHE:
+	    obj->attr->cache.size = procInfo->Cache.CacheSize;
+	    obj->attr->cache.associativity = procInfo->Cache.Associativity == CACHE_FULLY_ASSOCIATIVE ? -1 : procInfo->Cache.Associativity ;
+	    obj->attr->cache.linesize = procInfo->Cache.LineSize;
+	    obj->attr->cache.depth = procInfo->Cache.Level;
+	    switch (procInfo->Cache.Type) {
+	      case CacheUnified:
+		obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
+		break;
+	      case CacheData:
+		obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
+		break;
+	      case CacheInstruction:
+		obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
+		break;
+	      default:
+		hwloc_free_unlinked_object(obj);
+		continue;
+	    }
+	    break;
+	  default:
+	    break;
+	}
+	hwloc_insert_object_by_cpuset(topology, obj);
+      }
+      free(procInfoTotal);
+  }
+
+  if (groups_pu_set) {
+    /* the system supports multiple Groups.
+     * PU indexes may be discontiguous, especially if Groups contain less than 64 procs.
+     */
+    hwloc_obj_t obj;
+    unsigned idx;
+    hwloc_bitmap_foreach_begin(idx, groups_pu_set) {
+      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PU, idx);
+      obj->cpuset = hwloc_bitmap_alloc();
+      hwloc_bitmap_only(obj->cpuset, idx);
+      hwloc_debug_1arg_bitmap("cpu %u has cpuset %s\n",
+			      idx, obj->cpuset);
+      hwloc_insert_object_by_cpuset(topology, obj);
+    } hwloc_bitmap_foreach_end();
+    hwloc_bitmap_free(groups_pu_set);
+  } else {
+    /* no processor groups */
+    SYSTEM_INFO sysinfo;
+    hwloc_obj_t obj;
+    unsigned idx;
+    GetSystemInfo(&sysinfo);
+    for(idx=0; idx<32; idx++)
+      if (sysinfo.dwActiveProcessorMask & (((DWORD_PTR)1)<<idx)) {
+	obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PU, idx);
+	obj->cpuset = hwloc_bitmap_alloc();
+	hwloc_bitmap_only(obj->cpuset, idx);
+	hwloc_debug_1arg_bitmap("cpu %u has cpuset %s\n",
+				idx, obj->cpuset);
+	hwloc_insert_object_by_cpuset(topology, obj);
+      }
+  }
+
+ out:
+  hwloc_obj_add_info(topology->levels[0][0], "Backend", "Windows");
+  hwloc_add_uname_info(topology, NULL);
+  return 0;
+}
+
+void
+hwloc_set_windows_hooks(struct hwloc_binding_hooks *hooks,
+			struct hwloc_topology_support *support)
+{
+  if (GetCurrentProcessorNumberExProc || (GetCurrentProcessorNumberProc && nr_processor_groups == 1))
+    hooks->get_thisthread_last_cpu_location = hwloc_win_get_thisthread_last_cpu_location;
+
+  if (nr_processor_groups == 1) {
+    hooks->set_proc_cpubind = hwloc_win_set_proc_cpubind;
+    hooks->get_proc_cpubind = hwloc_win_get_proc_cpubind;
+    hooks->set_thisproc_cpubind = hwloc_win_set_thisproc_cpubind;
+    hooks->get_thisproc_cpubind = hwloc_win_get_thisproc_cpubind;
+    hooks->set_proc_membind = hwloc_win_set_proc_membind;
+    hooks->get_proc_membind = hwloc_win_get_proc_membind;
+    hooks->set_thisproc_membind = hwloc_win_set_thisproc_membind;
+    hooks->get_thisproc_membind = hwloc_win_get_thisproc_membind;
+  }
+  if (nr_processor_groups == 1 || SetThreadGroupAffinityProc) {
+    hooks->set_thread_cpubind = hwloc_win_set_thread_cpubind;
+    hooks->set_thisthread_cpubind = hwloc_win_set_thisthread_cpubind;
+    hooks->set_thisthread_membind = hwloc_win_set_thisthread_membind;
+  }
+  if (GetThreadGroupAffinityProc) {
+    hooks->get_thread_cpubind = hwloc_win_get_thread_cpubind;
+    hooks->get_thisthread_cpubind = hwloc_win_get_thisthread_cpubind;
+    hooks->get_thisthread_membind = hwloc_win_get_thisthread_membind;
+  }
+
+  if (VirtualAllocExNumaProc) {
+    hooks->alloc_membind = hwloc_win_alloc_membind;
+    hooks->alloc = hwloc_win_alloc;
+    hooks->free_membind = hwloc_win_free_membind;
+    support->membind->bind_membind = 1;
+  }
+
+  if (QueryWorkingSetExProc)
+    hooks->get_area_membind = hwloc_win_get_area_membind;
+}
+
+static int hwloc_windows_component_init(unsigned long flags __hwloc_attribute_unused)
+{
+  hwloc_win_get_function_ptrs();
+  return 0;
+}
+
+static void hwloc_windows_component_finalize(unsigned long flags __hwloc_attribute_unused)
+{
+}
+
+static struct hwloc_backend *
+hwloc_windows_component_instantiate(struct hwloc_disc_component *component,
+				    const void *_data1 __hwloc_attribute_unused,
+				    const void *_data2 __hwloc_attribute_unused,
+				    const void *_data3 __hwloc_attribute_unused)
+{
+  struct hwloc_backend *backend;
+  backend = hwloc_backend_alloc(component);
+  if (!backend)
+    return NULL;
+  backend->discover = hwloc_look_windows;
+  return backend;
+}
+
+static struct hwloc_disc_component hwloc_windows_disc_component = {
+  HWLOC_DISC_COMPONENT_TYPE_CPU,
+  "windows",
+  HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+  hwloc_windows_component_instantiate,
+  50,
+  NULL
+};
+
+const struct hwloc_component hwloc_windows_component = {
+  HWLOC_COMPONENT_ABI,
+  hwloc_windows_component_init, hwloc_windows_component_finalize,
+  HWLOC_COMPONENT_TYPE_DISC,
+  0,
+  &hwloc_windows_disc_component
+};
+
+unsigned
+hwloc_fallback_nbprocessors(struct hwloc_topology *topology) {
+  int n;
+  SYSTEM_INFO sysinfo;
+
+  /* by default, ignore groups (return only the number in the current group) */
+  GetSystemInfo(&sysinfo);
+  n = sysinfo.dwNumberOfProcessors; /* FIXME could be non-contigous, rather return a mask from dwActiveProcessorMask? */
+
+  if (nr_processor_groups > 1) {
+    /* assume n-1 groups are complete, since that's how we store things in cpusets */
+    if (GetActiveProcessorCountProc)
+      n = MAXIMUM_PROC_PER_GROUP*(nr_processor_groups-1)
+	+ GetActiveProcessorCountProc((WORD)nr_processor_groups-1);
+    else
+      n = MAXIMUM_PROC_PER_GROUP*nr_processor_groups;
+  }
+
+  if (n >= 1)
+    topology->support.discovery->pu = 1;
+  else
+    n = 1;
+  return n;
+}
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-x86.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-x86.c
new file mode 100644
index 0000000000..f0336587e9
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-x86.c
@@ -0,0 +1,1437 @@
+/*
+ * Copyright © 2010-2017 Inria.  All rights reserved.
+ * Copyright © 2010-2013 Université Bordeaux
+ * Copyright © 2010-2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ *
+ *
+ * This backend is only used when the operating system does not export
+ * the necessary hardware topology information to user-space applications.
+ * Currently, only the FreeBSD backend relies on this x86 backend.
+ *
+ * Other backends such as Linux have their own way to retrieve various
+ * pieces of hardware topology information from the operating system
+ * on various architectures, without having to use this x86-specific code.
+ */
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <private/private.h>
+#include <private/debug.h>
+#include <private/misc.h>
+
+#include <private/cpuid-x86.h>
+
+#include <sys/types.h>
+#ifdef HAVE_DIRENT_H
+#include <dirent.h>
+#endif
+#ifdef HAVE_VALGRIND_VALGRIND_H
+#include <valgrind/valgrind.h>
+#endif
+
+struct hwloc_x86_backend_data_s {
+  unsigned nbprocs;
+  hwloc_bitmap_t apicid_set;
+  int apicid_unique;
+  char *src_cpuiddump_path;
+  int is_knl;
+};
+
+/************************************
+ * Management of cpuid dump as input
+ */
+
+struct cpuiddump {
+  unsigned nr;
+  struct cpuiddump_entry {
+    unsigned inmask; /* which of ine[abcd]x are set on input */
+    unsigned ineax;
+    unsigned inebx;
+    unsigned inecx;
+    unsigned inedx;
+    unsigned outeax;
+    unsigned outebx;
+    unsigned outecx;
+    unsigned outedx;
+  } *entries;
+};
+
+static void
+cpuiddump_free(struct cpuiddump *cpuiddump)
+{
+  if (cpuiddump->nr)
+    free(cpuiddump->entries);
+  free(cpuiddump);
+}
+
+static struct cpuiddump *
+cpuiddump_read(const char *dirpath, unsigned idx)
+{
+  struct cpuiddump *cpuiddump;
+  struct cpuiddump_entry *cur;
+  char *filename;
+  size_t filenamelen = strlen(dirpath) + 15;
+  FILE *file;
+  char line[128];
+  unsigned nr;
+
+  cpuiddump = malloc(sizeof(*cpuiddump));
+  cpuiddump->nr = 0; /* return a cpuiddump that will raise errors because it matches nothing */
+
+  filename = malloc(filenamelen);
+  snprintf(filename, filenamelen, "%s/pu%u", dirpath, idx);
+  file = fopen(filename, "r");
+  if (!file) {
+    fprintf(stderr, "Could not read dumped cpuid file %s\n", filename);
+    free(filename);
+    return cpuiddump;
+  }
+  free(filename);
+
+  nr = 0;
+  while (fgets(line, sizeof(line), file))
+    nr++;
+  cpuiddump->entries = malloc(nr * sizeof(struct cpuiddump_entry));
+
+  fseek(file, 0, SEEK_SET);
+  cur = &cpuiddump->entries[0];
+  nr = 0;
+  while (fgets(line, sizeof(line), file)) {
+    if (*line == '#')
+      continue;
+    if (sscanf(line, "%x %x %x %x %x => %x %x %x %x",
+	      &cur->inmask,
+	      &cur->ineax, &cur->inebx, &cur->inecx, &cur->inedx,
+	      &cur->outeax, &cur->outebx, &cur->outecx, &cur->outedx) == 9) {
+      cur++;
+      nr++;
+    }
+  }
+  cpuiddump->nr = nr;
+  fclose(file);
+  return cpuiddump;
+}
+
+static void
+cpuiddump_find_by_input(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx, struct cpuiddump *cpuiddump)
+{
+  unsigned i;
+
+  for(i=0; i<cpuiddump->nr; i++) {
+    struct cpuiddump_entry *entry = &cpuiddump->entries[i];
+    if ((entry->inmask & 0x1) && *eax != entry->ineax)
+      continue;
+    if ((entry->inmask & 0x2) && *ebx != entry->inebx)
+      continue;
+    if ((entry->inmask & 0x4) && *ecx != entry->inecx)
+      continue;
+    if ((entry->inmask & 0x8) && *edx != entry->inedx)
+      continue;
+    *eax = entry->outeax;
+    *ebx = entry->outebx;
+    *ecx = entry->outecx;
+    *edx = entry->outedx;
+    return;
+  }
+
+  fprintf(stderr, "Couldn't find %x,%x,%x,%x in dumped cpuid, returning 0s.\n",
+	  *eax, *ebx, *ecx, *edx);
+  *eax = 0;
+  *ebx = 0;
+  *ecx = 0;
+  *edx = 0;
+}
+
+static void cpuid_or_from_dump(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx, struct cpuiddump *src_cpuiddump)
+{
+  if (src_cpuiddump) {
+    cpuiddump_find_by_input(eax, ebx, ecx, edx, src_cpuiddump);
+  } else {
+    hwloc_x86_cpuid(eax, ebx, ecx, edx);
+  }
+}
+
+/*******************************
+ * Core detection routines and structures
+ */
+
+#define has_topoext(features) ((features)[6] & (1 << 22))
+#define has_x2apic(features) ((features)[4] & (1 << 21))
+
+struct cacheinfo {
+  hwloc_obj_cache_type_t type;
+  unsigned level;
+  unsigned nbthreads_sharing;
+  unsigned cacheid;
+
+  unsigned linesize;
+  unsigned linepart;
+  int inclusive;
+  int ways;
+  unsigned sets;
+  unsigned long size;
+};
+
+struct procinfo {
+  unsigned present;
+  unsigned apicid;
+  unsigned packageid;
+  unsigned nodeid;
+  unsigned unitid;
+  unsigned threadid;
+  unsigned coreid;
+  unsigned *otherids;
+  unsigned levels;
+  unsigned numcaches;
+  struct cacheinfo *cache;
+  char cpuvendor[13];
+  char cpumodel[3*4*4+1];
+  unsigned cpustepping;
+  unsigned cpumodelnumber;
+  unsigned cpufamilynumber;
+};
+
+enum cpuid_type {
+  intel,
+  amd,
+  unknown
+};
+
+static void fill_amd_cache(struct procinfo *infos, unsigned level, hwloc_obj_cache_type_t type, unsigned nbthreads_sharing, unsigned cpuid)
+{
+  struct cacheinfo *cache, *tmpcaches;
+  unsigned cachenum;
+  unsigned long size = 0;
+
+  if (level == 1)
+    size = ((cpuid >> 24)) << 10;
+  else if (level == 2)
+    size = ((cpuid >> 16)) << 10;
+  else if (level == 3)
+    size = ((cpuid >> 18)) << 19;
+  if (!size)
+    return;
+
+  tmpcaches = realloc(infos->cache, (infos->numcaches+1)*sizeof(*infos->cache));
+  if (!tmpcaches)
+    /* failed to allocated, ignore that cache */
+    return;
+  infos->cache = tmpcaches;
+  cachenum = infos->numcaches++;
+
+  cache = &infos->cache[cachenum];
+
+  cache->type = type;
+  cache->level = level;
+  cache->nbthreads_sharing = nbthreads_sharing;
+  cache->linesize = cpuid & 0xff;
+  cache->linepart = 0;
+  cache->inclusive = 0; /* old AMD (K8-K10) supposed to have exclusive caches */
+
+  if (level == 1) {
+    cache->ways = (cpuid >> 16) & 0xff;
+    if (cache->ways == 0xff)
+      /* Fully associative */
+      cache->ways = -1;
+  } else {
+    static const unsigned ways_tab[] = { 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, -1 };
+    unsigned ways = (cpuid >> 12) & 0xf;
+    cache->ways = ways_tab[ways];
+  }
+  cache->size = size;
+  cache->sets = 0;
+
+  hwloc_debug("cache L%u t%u linesize %u ways %d size %luKB\n", cache->level, cache->nbthreads_sharing, cache->linesize, cache->ways, cache->size >> 10);
+}
+
+/* Fetch information from the processor itself thanks to cpuid and store it in
+ * infos for summarize to analyze them globally */
+static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type, struct cpuiddump *src_cpuiddump)
+{
+  struct hwloc_x86_backend_data_s *data = backend->private_data;
+  unsigned eax, ebx, ecx = 0, edx;
+  unsigned cachenum;
+  struct cacheinfo *cache;
+  unsigned regs[4];
+  unsigned legacy_max_log_proc; /* not valid on Intel processors with > 256 threads, or when cpuid 0x80000008 is supported */
+  unsigned legacy_log_proc_id;
+  unsigned _model, _extendedmodel, _family, _extendedfamily;
+
+  infos->present = 1;
+
+  /* Get apicid, legacy_max_log_proc, packageid, legacy_log_proc_id from cpuid 0x01 */
+  eax = 0x01;
+  cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+  infos->apicid = ebx >> 24;
+  if (edx & (1 << 28))
+    legacy_max_log_proc = 1 << hwloc_flsl(((ebx >> 16) & 0xff) - 1);
+  else
+    legacy_max_log_proc = 1;
+  hwloc_debug("APIC ID 0x%02x legacy_max_log_proc %u\n", infos->apicid, legacy_max_log_proc);
+  infos->packageid = infos->apicid / legacy_max_log_proc;
+  legacy_log_proc_id = infos->apicid % legacy_max_log_proc;
+  hwloc_debug("phys %u legacy thread %u\n", infos->packageid, legacy_log_proc_id);
+
+  /* Get cpu model/family/stepping numbers from same cpuid */
+  _model          = (eax>>4) & 0xf;
+  _extendedmodel  = (eax>>16) & 0xf;
+  _family         = (eax>>8) & 0xf;
+  _extendedfamily = (eax>>20) & 0xff;
+  if ((cpuid_type == intel || cpuid_type == amd) && _family == 0xf) {
+    infos->cpufamilynumber = _family + _extendedfamily;
+  } else {
+    infos->cpufamilynumber = _family;
+  }
+  if ((cpuid_type == intel && (_family == 0x6 || _family == 0xf))
+      || (cpuid_type == amd && _family == 0xf)) {
+    infos->cpumodelnumber = _model + (_extendedmodel << 4);
+  } else {
+    infos->cpumodelnumber = _model;
+  }
+  infos->cpustepping = eax & 0xf;
+
+  if (cpuid_type == intel && infos->cpufamilynumber == 0x6 &&
+      (infos->cpumodelnumber == 0x57 || infos->cpumodelnumber == 0x85))
+    data->is_knl = 1; /* KNM is the same as KNL */
+
+  /* Get cpu vendor string from cpuid 0x00 */
+  memset(regs, 0, sizeof(regs));
+  regs[0] = 0;
+  cpuid_or_from_dump(&regs[0], &regs[1], &regs[3], &regs[2], src_cpuiddump);
+  memcpy(infos->cpuvendor, regs+1, 4*3);
+  /* infos was calloc'ed, already ends with \0 */
+
+  /* Get cpu model string from cpuid 0x80000002-4 */
+  if (highest_ext_cpuid >= 0x80000004) {
+    memset(regs, 0, sizeof(regs));
+    regs[0] = 0x80000002;
+    cpuid_or_from_dump(&regs[0], &regs[1], &regs[2], &regs[3], src_cpuiddump);
+    memcpy(infos->cpumodel, regs, 4*4);
+    regs[0] = 0x80000003;
+    cpuid_or_from_dump(&regs[0], &regs[1], &regs[2], &regs[3], src_cpuiddump);
+    memcpy(infos->cpumodel + 4*4, regs, 4*4);
+    regs[0] = 0x80000004;
+    cpuid_or_from_dump(&regs[0], &regs[1], &regs[2], &regs[3], src_cpuiddump);
+    memcpy(infos->cpumodel + 4*4*2, regs, 4*4);
+    /* infos was calloc'ed, already ends with \0 */
+  }
+
+  /* Get core/thread information from cpuid 0x80000008
+   * (not supported on Intel)
+   */
+  if (cpuid_type != intel && highest_ext_cpuid >= 0x80000008) {
+    unsigned max_nbcores;
+    unsigned max_nbthreads;
+    unsigned coreidsize;
+    unsigned logprocid;
+    eax = 0x80000008;
+    cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+    coreidsize = (ecx >> 12) & 0xf;
+    hwloc_debug("core ID size: %u\n", coreidsize);
+    if (!coreidsize) {
+      max_nbcores = (ecx & 0xff) + 1;
+    } else
+      max_nbcores = 1 << coreidsize;
+    hwloc_debug("Thus max # of cores: %u\n", max_nbcores);
+    /* Still no multithreaded AMD */
+    max_nbthreads = 1 ;
+    hwloc_debug("and max # of threads: %u\n", max_nbthreads);
+    /* legacy_max_log_proc is deprecated, it can be smaller than max_nbcores,
+     * which is the maximum number of cores that the processor could theoretically support
+     * (see "Multiple Core Calculation" in the AMD CPUID specification).
+     * Recompute packageid/threadid/coreid accordingly.
+     */
+    infos->packageid = infos->apicid / max_nbcores;
+    logprocid = infos->apicid % max_nbcores;
+    infos->threadid = logprocid % max_nbthreads;
+    infos->coreid = logprocid / max_nbthreads;
+    hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
+  }
+
+  infos->numcaches = 0;
+  infos->cache = NULL;
+
+  /* Get apicid, nodeid, unitid from cpuid 0x8000001e
+   * and cache information from cpuid 0x8000001d
+   * (AMD topology extension)
+   */
+  if (cpuid_type != intel && has_topoext(features)) {
+    unsigned apic_id, node_id, nodes_per_proc;
+
+    eax = 0x8000001e;
+    cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+    infos->apicid = apic_id = eax;
+
+    if (infos->cpufamilynumber == 0x16) {
+      /* ecx is reserved */
+      node_id = 0;
+      nodes_per_proc = 1;
+    } else {
+      node_id = ecx & 0xff;
+      nodes_per_proc = ((ecx >> 8) & 7) + 1;
+    }
+    infos->nodeid = node_id;
+    if ((infos->cpufamilynumber == 0x15 && nodes_per_proc > 2)
+	|| (infos->cpufamilynumber == 0x17 && nodes_per_proc > 4)) {
+      hwloc_debug("warning: undefined nodes_per_proc value %u, assuming it means %u\n", nodes_per_proc, nodes_per_proc);
+    }
+
+    if (infos->cpufamilynumber <= 0x16) { /* topoext appeared in 0x15 and compute-units were only used in 0x15 and 0x16 */
+      unsigned unit_id, cores_per_unit;
+      infos->unitid = unit_id = ebx & 0xff;
+      cores_per_unit = ((ebx >> 8) & 0xff) + 1;
+      hwloc_debug("topoext %08x, %u nodes, node %u, %u cores in unit %u\n", apic_id, nodes_per_proc, node_id, cores_per_unit, unit_id);
+      /* coreid and unitid are package-wide (core 0-15 and unit 0-7 on 16-core 2-NUMAnode processor).
+       * The Linux kernel reduces theses to NUMA-node-wide (by applying %core_per_node and %unit_per node respectively).
+       * It's not clear if we should do this as well.
+       */
+    } else {
+      unsigned core_id, threads_per_core;
+      infos->coreid = core_id = ebx & 0xff;
+      threads_per_core = ((ebx >> 8) & 0xff) + 1;
+      hwloc_debug("topoext %08x, %u nodes, node %u, %u threads in core %u\n", apic_id, nodes_per_proc, node_id, threads_per_core, core_id);
+    }
+
+    for (cachenum = 0; ; cachenum++) {
+      eax = 0x8000001d;
+      ecx = cachenum;
+      cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+      if ((eax & 0x1f) == 0)
+	break;
+      infos->numcaches++;
+    }
+
+    cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache));
+
+    for (cachenum = 0; ; cachenum++) {
+      unsigned long linesize, linepart, ways, sets;
+      eax = 0x8000001d;
+      ecx = cachenum;
+      cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+
+      if ((eax & 0x1f) == 0)
+	break;
+      switch (eax & 0x1f) {
+      case 1: cache->type = HWLOC_OBJ_CACHE_DATA; break;
+      case 2: cache->type = HWLOC_OBJ_CACHE_INSTRUCTION; break;
+      default: cache->type = HWLOC_OBJ_CACHE_UNIFIED; break;
+      }
+
+      cache->level = (eax >> 5) & 0x7;
+      /* Note: actually number of cores */
+      cache->nbthreads_sharing = ((eax >> 14) &  0xfff) + 1;
+
+      cache->linesize = linesize = (ebx & 0xfff) + 1;
+      cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1;
+      ways = ((ebx >> 22) & 0x3ff) + 1;
+
+      if (eax & (1 << 9))
+	/* Fully associative */
+	cache->ways = -1;
+      else
+	cache->ways = ways;
+      cache->sets = sets = ecx + 1;
+      cache->size = linesize * linepart * ways * sets;
+      cache->inclusive = edx & 0x2;
+
+      hwloc_debug("cache %u L%u%c t%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n",
+		  cachenum, cache->level,
+		  cache->type == HWLOC_OBJ_CACHE_DATA ? 'd' : cache->type == HWLOC_OBJ_CACHE_INSTRUCTION ? 'i' : 'u',
+		  cache->nbthreads_sharing, linesize, linepart, ways, sets, cache->size >> 10);
+
+      cache++;
+    }
+  } else {
+    /* If there's no topoext,
+     * get cache information from cpuid 0x80000005 and 0x80000006
+     * (not supported on Intel)
+     */
+    if (cpuid_type != intel && highest_ext_cpuid >= 0x80000005) {
+      eax = 0x80000005;
+      cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+      fill_amd_cache(infos, 1, HWLOC_OBJ_CACHE_DATA, 1, ecx); /* private L1d */
+      fill_amd_cache(infos, 1, HWLOC_OBJ_CACHE_INSTRUCTION, 1, edx); /* private L1i */
+    }
+    if (cpuid_type != intel && highest_ext_cpuid >= 0x80000006) {
+      eax = 0x80000006;
+      cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+      if (ecx & 0xf000)
+	/* This is actually supported on Intel but LinePerTag isn't returned in bits 8-11.
+	 * Could be useful if some Intels (at least before Core micro-architecture)
+	 * support this leaf without leaf 0x4.
+	 */
+	fill_amd_cache(infos, 2, HWLOC_OBJ_CACHE_UNIFIED, 1, ecx); /* private L2u */
+      if (edx & 0xf000)
+	fill_amd_cache(infos, 3, HWLOC_OBJ_CACHE_UNIFIED, legacy_max_log_proc, edx); /* package-wide L3u */
+    }
+  }
+
+  /* Get thread/core + cache information from cpuid 0x04
+   * (not supported on AMD)
+   */
+  if (cpuid_type != amd && highest_cpuid >= 0x04) {
+    unsigned max_nbcores;
+    unsigned max_nbthreads;
+    unsigned level;
+
+    for (cachenum = 0; ; cachenum++) {
+      eax = 0x04;
+      ecx = cachenum;
+      cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+
+      hwloc_debug("cache %u type %u\n", cachenum, eax & 0x1f);
+      if ((eax & 0x1f) == 0)
+	break;
+      level = (eax >> 5) & 0x7;
+      if (data->is_knl && level == 3)
+	/* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */
+	break;
+      infos->numcaches++;
+
+      if (!cachenum) {
+	/* by the way, get thread/core information from the first cache */
+	max_nbcores = ((eax >> 26) & 0x3f) + 1;
+	max_nbthreads = legacy_max_log_proc / max_nbcores;
+	hwloc_debug("thus %u threads\n", max_nbthreads);
+	infos->threadid = legacy_log_proc_id % max_nbthreads;
+	infos->coreid = legacy_log_proc_id / max_nbthreads;
+	hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
+      }
+    }
+
+    cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache));
+
+    for (cachenum = 0; ; cachenum++) {
+      unsigned long linesize, linepart, ways, sets;
+      eax = 0x04;
+      ecx = cachenum;
+      cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+
+      if ((eax & 0x1f) == 0)
+	break;
+      level = (eax >> 5) & 0x7;
+      if (data->is_knl && level == 3)
+	/* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */
+	break;
+      switch (eax & 0x1f) {
+      case 1: cache->type = HWLOC_OBJ_CACHE_DATA; break;
+      case 2: cache->type = HWLOC_OBJ_CACHE_INSTRUCTION; break;
+      default: cache->type = HWLOC_OBJ_CACHE_UNIFIED; break;
+      }
+
+      cache->level = level;
+      cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1;
+
+      cache->linesize = linesize = (ebx & 0xfff) + 1;
+      cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1;
+      ways = ((ebx >> 22) & 0x3ff) + 1;
+      if (eax & (1 << 9))
+        /* Fully associative */
+        cache->ways = -1;
+      else
+        cache->ways = ways;
+      cache->sets = sets = ecx + 1;
+      cache->size = linesize * linepart * ways * sets;
+      cache->inclusive = edx & 0x2;
+
+      hwloc_debug("cache %u L%u%c t%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n",
+		  cachenum, cache->level,
+		  cache->type == HWLOC_OBJ_CACHE_DATA ? 'd' : cache->type == HWLOC_OBJ_CACHE_INSTRUCTION ? 'i' : 'u',
+		  cache->nbthreads_sharing, linesize, linepart, ways, sets, cache->size >> 10);
+      cache++;
+    }
+  }
+
+  /* Get package/core/thread information from cpuid 0x0b
+   * (Intel x2APIC)
+   */
+  if (cpuid_type == intel && highest_cpuid >= 0x0b && has_x2apic(features)) {
+    unsigned level, apic_nextshift, apic_number, apic_type, apic_id = 0, apic_shift = 0, id;
+    for (level = 0; ; level++) {
+      ecx = level;
+      eax = 0x0b;
+      cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+      if (!eax && !ebx)
+        break;
+    }
+    if (level) {
+      infos->levels = level;
+      infos->otherids = malloc(level * sizeof(*infos->otherids));
+      for (level = 0; ; level++) {
+	ecx = level;
+	eax = 0x0b;
+	cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+	if (!eax && !ebx)
+	  break;
+	apic_nextshift = eax & 0x1f;
+	apic_number = ebx & 0xffff;
+	apic_type = (ecx & 0xff00) >> 8;
+	apic_id = edx;
+	id = (apic_id >> apic_shift) & ((1 << (apic_nextshift - apic_shift)) - 1);
+	hwloc_debug("x2APIC %08x %u: nextshift %u num %2u type %u id %2u\n", apic_id, level, apic_nextshift, apic_number, apic_type, id);
+	infos->apicid = apic_id;
+	infos->otherids[level] = UINT_MAX;
+	switch (apic_type) {
+	case 1:
+	  infos->threadid = id;
+	  /* apic_number is the actual number of threads per core */
+	  break;
+	case 2:
+	  infos->coreid = id;
+	  /* apic_number is the actual number of threads per package */
+	  break;
+	default:
+	  hwloc_debug("x2APIC %u: unknown type %u\n", level, apic_type);
+	  infos->otherids[level] = apic_id >> apic_shift;
+	  break;
+	}
+	apic_shift = apic_nextshift;
+      }
+      infos->apicid = apic_id;
+      infos->packageid = apic_id >> apic_shift;
+      hwloc_debug("x2APIC remainder: %u\n", infos->packageid);
+      hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
+    }
+  }
+
+  /* Now that we have all info, compute cacheids and apply quirks */
+  for (cachenum = 0; cachenum < infos->numcaches; cachenum++) {
+    cache = &infos->cache[cachenum];
+
+    /* default cacheid value */
+    cache->cacheid = infos->apicid / cache->nbthreads_sharing;
+
+    /* AMD quirk */
+    if (cpuid_type == amd
+	&& infos->cpufamilynumber== 0x10 && infos->cpumodelnumber == 0x9
+	&& cache->level == 3
+	&& (cache->ways == -1 || (cache->ways % 2 == 0)) && cache->nbthreads_sharing >= 8) {
+      /* Fix AMD family 0x10 model 0x9 (Magny-Cours) with 8 or 12 cores.
+       * The L3 (and its associativity) is actually split into two halves).
+       */
+      if (cache->nbthreads_sharing == 16)
+	cache->nbthreads_sharing = 12; /* nbthreads_sharing is a power of 2 but the processor actually has 8 or 12 cores */
+      cache->nbthreads_sharing /= 2;
+      cache->size /= 2;
+      if (cache->ways != -1)
+	cache->ways /= 2;
+      /* AMD Magny-Cours 12-cores processor reserve APIC ids as AAAAAABBBBBB....
+       * among first L3 (A), second L3 (B), and unexisting cores (.).
+       * On multi-socket servers, L3 in non-first sockets may have APIC id ranges
+       * such as [16-21] that are not aligned on multiple of nbthreads_sharing (6).
+       * That means, we can't just compare apicid/nbthreads_sharing to identify siblings.
+       */
+      cache->cacheid = (infos->apicid % legacy_max_log_proc) / cache->nbthreads_sharing /* cacheid within the package */
+	+ 2 * (infos->apicid / legacy_max_log_proc); /* add 2 caches per previous package */
+
+    } else if (cpuid_type == amd
+	       && infos->cpufamilynumber == 0x15
+	       && (infos->cpumodelnumber == 0x1 /* Bulldozer */ || infos->cpumodelnumber == 0x2 /* Piledriver */)
+	       && cache->level == 3 && cache->nbthreads_sharing == 6) {
+      /* AMD Bulldozer and Piledriver 12-core processors have same APIC ids as Magny-Cours above,
+       * but we can't merge the checks because the original nbthreads_sharing must be exactly 6 here.
+       */
+      cache->cacheid = (infos->apicid % legacy_max_log_proc) / cache->nbthreads_sharing /* cacheid within the package */
+	+ 2 * (infos->apicid / legacy_max_log_proc); /* add 2 cache per previous package */
+    }
+  }
+
+  if (hwloc_bitmap_isset(data->apicid_set, infos->apicid))
+    data->apicid_unique = 0;
+  else
+    hwloc_bitmap_set(data->apicid_set, infos->apicid);
+}
+
+static void
+hwloc_x86_add_cpuinfos(hwloc_obj_t obj, struct procinfo *info, int nodup)
+{
+  char number[8];
+  hwloc_obj_add_info_nodup(obj, "CPUVendor", info->cpuvendor, nodup);
+  snprintf(number, sizeof(number), "%u", info->cpufamilynumber);
+  hwloc_obj_add_info_nodup(obj, "CPUFamilyNumber", number, nodup);
+  snprintf(number, sizeof(number), "%u", info->cpumodelnumber);
+  hwloc_obj_add_info_nodup(obj, "CPUModelNumber", number, nodup);
+  if (info->cpumodel[0]) {
+    const char *c = info->cpumodel;
+    while (*c == ' ')
+      c++;
+    hwloc_obj_add_info_nodup(obj, "CPUModel", c, nodup);
+  }
+  snprintf(number, sizeof(number), "%u", info->cpustepping);
+  hwloc_obj_add_info_nodup(obj, "CPUStepping", number, nodup);
+}
+
+/* Analyse information stored in infos, and build/annotate topology levels accordingly */
+static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscovery)
+{
+  struct hwloc_topology *topology = backend->topology;
+  struct hwloc_x86_backend_data_s *data = backend->private_data;
+  unsigned nbprocs = data->nbprocs;
+  hwloc_bitmap_t complete_cpuset = hwloc_bitmap_alloc();
+  unsigned i, j, l, level;
+  int one = -1;
+  hwloc_bitmap_t remaining_cpuset;
+
+  for (i = 0; i < nbprocs; i++)
+    if (infos[i].present) {
+      hwloc_bitmap_set(complete_cpuset, i);
+      one = i;
+    }
+
+  if (one == -1) {
+    hwloc_bitmap_free(complete_cpuset);
+    return;
+  }
+
+  remaining_cpuset = hwloc_bitmap_alloc();
+
+  /* Ideally, when fulldiscovery=0, we could add any object that doesn't exist yet.
+   * But what if the x86 and the native backends disagree because one is buggy? Which one to trust?
+   * We only add missing caches, and annotate other existing objects for now.
+   */
+
+  if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) {
+    /* Look for packages */
+    hwloc_obj_t package;
+
+    hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
+    while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
+      if (fulldiscovery) {
+	unsigned packageid = infos[i].packageid;
+	hwloc_bitmap_t package_cpuset = hwloc_bitmap_alloc();
+
+	for (j = i; j < nbprocs; j++) {
+	  if (infos[j].packageid == packageid) {
+	    hwloc_bitmap_set(package_cpuset, j);
+	    hwloc_bitmap_clr(remaining_cpuset, j);
+	  }
+	}
+	package = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PACKAGE, packageid);
+	package->cpuset = package_cpuset;
+
+	hwloc_x86_add_cpuinfos(package, &infos[i], 0);
+
+	hwloc_debug_1arg_bitmap("os package %u has cpuset %s\n",
+				packageid, package_cpuset);
+	hwloc_insert_object_by_cpuset(topology, package);
+
+      } else {
+	/* Annotate packages previously-existing packages */
+	hwloc_bitmap_t set = hwloc_bitmap_alloc();
+	hwloc_bitmap_set(set, i);
+	package = hwloc_get_next_obj_covering_cpuset_by_type(topology, set, HWLOC_OBJ_PACKAGE, NULL);
+	hwloc_bitmap_free(set);
+	if (package) {
+	  /* Found package above that PU, annotate if no such attribute yet */
+	  hwloc_x86_add_cpuinfos(package, &infos[i], 1);
+	  hwloc_bitmap_andnot(remaining_cpuset, remaining_cpuset, package->cpuset);
+	} else {
+	  /* No package, annotate the root object */
+	  hwloc_x86_add_cpuinfos(hwloc_get_root_obj(topology), &infos[i], 1);
+	  break;
+	}
+      }
+    }
+  }
+
+  /* Look for Numa nodes inside packages (cannot be filtered-out) */
+  if (fulldiscovery) {
+    hwloc_bitmap_t node_cpuset;
+    hwloc_obj_t node;
+
+    /* FIXME: if there's memory inside the root object, divide it into NUMA nodes? */
+
+    hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
+    while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
+      unsigned packageid = infos[i].packageid;
+      unsigned nodeid = infos[i].nodeid;
+
+      if (nodeid == (unsigned)-1) {
+        hwloc_bitmap_clr(remaining_cpuset, i);
+	continue;
+      }
+
+      node_cpuset = hwloc_bitmap_alloc();
+      for (j = i; j < nbprocs; j++) {
+	if (infos[j].nodeid == (unsigned) -1) {
+	  hwloc_bitmap_clr(remaining_cpuset, j);
+	  continue;
+	}
+
+        if (infos[j].packageid == packageid && infos[j].nodeid == nodeid) {
+          hwloc_bitmap_set(node_cpuset, j);
+          hwloc_bitmap_clr(remaining_cpuset, j);
+        }
+      }
+      node = hwloc_alloc_setup_object(topology, HWLOC_OBJ_NUMANODE, nodeid);
+      node->cpuset = node_cpuset;
+      node->nodeset = hwloc_bitmap_alloc();
+      hwloc_bitmap_set(node->nodeset, nodeid);
+      hwloc_debug_1arg_bitmap("os node %u has cpuset %s\n",
+          nodeid, node_cpuset);
+      hwloc_insert_object_by_cpuset(topology, node);
+    }
+  }
+
+  if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) {
+    /* Look for Compute units inside packages */
+    if (fulldiscovery) {
+      hwloc_bitmap_t unit_cpuset;
+      hwloc_obj_t unit;
+
+      hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
+      while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
+	unsigned packageid = infos[i].packageid;
+	unsigned unitid = infos[i].unitid;
+
+	if (unitid == (unsigned)-1) {
+	  hwloc_bitmap_clr(remaining_cpuset, i);
+	  continue;
+	}
+
+	unit_cpuset = hwloc_bitmap_alloc();
+	for (j = i; j < nbprocs; j++) {
+	  if (infos[j].unitid == (unsigned) -1) {
+	    hwloc_bitmap_clr(remaining_cpuset, j);
+	    continue;
+	  }
+
+	  if (infos[j].packageid == packageid && infos[j].unitid == unitid) {
+	    hwloc_bitmap_set(unit_cpuset, j);
+	    hwloc_bitmap_clr(remaining_cpuset, j);
+	  }
+	}
+	unit = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, unitid);
+	unit->cpuset = unit_cpuset;
+	unit->subtype = strdup("ComputeUnit");
+	unit->attr->group.kind = HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT;
+	hwloc_debug_1arg_bitmap("os unit %u has cpuset %s\n",
+				unitid, unit_cpuset);
+	hwloc_insert_object_by_cpuset(topology, unit);
+      }
+    }
+
+    /* Look for unknown objects */
+    if (infos[one].otherids) {
+      for (level = infos[one].levels-1; level <= infos[one].levels-1; level--) {
+	if (infos[one].otherids[level] != UINT_MAX) {
+	  hwloc_bitmap_t unknown_cpuset;
+	  hwloc_obj_t unknown_obj;
+
+	  hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
+	  while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
+	    unsigned unknownid = infos[i].otherids[level];
+
+	    unknown_cpuset = hwloc_bitmap_alloc();
+	    for (j = i; j < nbprocs; j++) {
+	      if (infos[j].otherids[level] == unknownid) {
+		hwloc_bitmap_set(unknown_cpuset, j);
+		hwloc_bitmap_clr(remaining_cpuset, j);
+	      }
+	    }
+	    unknown_obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, unknownid);
+	    unknown_obj->cpuset = unknown_cpuset;
+	    unknown_obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_X2APIC_UNKNOWN;
+	    unknown_obj->attr->group.subkind = level;
+	    hwloc_debug_2args_bitmap("os unknown%u %u has cpuset %s\n",
+				     level, unknownid, unknown_cpuset);
+	    hwloc_insert_object_by_cpuset(topology, unknown_obj);
+	  }
+	}
+      }
+    }
+  }
+
+  if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) {
+    /* Look for cores */
+    if (fulldiscovery) {
+      hwloc_bitmap_t core_cpuset;
+      hwloc_obj_t core;
+
+      hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
+      while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
+	unsigned packageid = infos[i].packageid;
+	unsigned nodeid = infos[i].nodeid;
+	unsigned coreid = infos[i].coreid;
+
+	if (coreid == (unsigned) -1) {
+	  hwloc_bitmap_clr(remaining_cpuset, i);
+	  continue;
+	}
+
+	core_cpuset = hwloc_bitmap_alloc();
+	for (j = i; j < nbprocs; j++) {
+	  if (infos[j].coreid == (unsigned) -1) {
+	    hwloc_bitmap_clr(remaining_cpuset, j);
+	    continue;
+	  }
+
+	  if (infos[j].packageid == packageid && infos[j].nodeid == nodeid && infos[j].coreid == coreid) {
+	    hwloc_bitmap_set(core_cpuset, j);
+	    hwloc_bitmap_clr(remaining_cpuset, j);
+	  }
+	}
+	core = hwloc_alloc_setup_object(topology, HWLOC_OBJ_CORE, coreid);
+	core->cpuset = core_cpuset;
+	hwloc_debug_1arg_bitmap("os core %u has cpuset %s\n",
+				coreid, core_cpuset);
+	hwloc_insert_object_by_cpuset(topology, core);
+      }
+    }
+  }
+
+  /* Look for PUs (cannot be filtered-out) */
+  if (fulldiscovery) {
+    hwloc_debug("%s", "\n\n * CPU cpusets *\n\n");
+    for (i=0; i<nbprocs; i++)
+      if(infos[i].present) { /* Only add present PU. We don't know if others actually exist */
+       struct hwloc_obj *obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PU, i);
+       obj->cpuset = hwloc_bitmap_alloc();
+       hwloc_bitmap_only(obj->cpuset, i);
+       hwloc_debug_1arg_bitmap("PU %u has cpuset %s\n", i, obj->cpuset);
+       hwloc_insert_object_by_cpuset(topology, obj);
+     }
+  }
+
+  /* Look for caches */
+  /* First find max level */
+  level = 0;
+  for (i = 0; i < nbprocs; i++)
+    for (j = 0; j < infos[i].numcaches; j++)
+      if (infos[i].cache[j].level > level)
+        level = infos[i].cache[j].level;
+  while (level > 0) {
+    hwloc_obj_cache_type_t type;
+    HWLOC_BUILD_ASSERT(HWLOC_OBJ_CACHE_DATA == HWLOC_OBJ_CACHE_UNIFIED+1);
+    HWLOC_BUILD_ASSERT(HWLOC_OBJ_CACHE_INSTRUCTION == HWLOC_OBJ_CACHE_DATA+1);
+    for (type = HWLOC_OBJ_CACHE_UNIFIED; type <= HWLOC_OBJ_CACHE_INSTRUCTION; type++) {
+      /* Look for caches of that type at level level */
+      hwloc_obj_type_t otype;
+      hwloc_obj_t cache;
+
+      otype = hwloc_cache_type_by_depth_type(level, type);
+      if (otype == HWLOC_OBJ_TYPE_NONE)
+	continue;
+      if (!hwloc_filter_check_keep_object_type(topology, otype))
+	continue;
+
+      hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
+      while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
+	hwloc_bitmap_t puset;
+
+	for (l = 0; l < infos[i].numcaches; l++) {
+	  if (infos[i].cache[l].level == level && infos[i].cache[l].type == type)
+	    break;
+	}
+	if (l == infos[i].numcaches) {
+	  /* no cache Llevel of that type in i */
+	  hwloc_bitmap_clr(remaining_cpuset, i);
+	  continue;
+	}
+
+	puset = hwloc_bitmap_alloc();
+	hwloc_bitmap_set(puset, i);
+	cache = hwloc_get_next_obj_covering_cpuset_by_type(topology, puset, otype, NULL);
+	hwloc_bitmap_free(puset);
+
+	if (cache) {
+	  /* Found cache above that PU, annotate if no such attribute yet */
+	  if (!hwloc_obj_get_info_by_name(cache, "Inclusive"))
+	    hwloc_obj_add_info(cache, "Inclusive", infos[i].cache[l].inclusive ? "1" : "0");
+	  hwloc_bitmap_andnot(remaining_cpuset, remaining_cpuset, cache->cpuset);
+	} else {
+	  /* Add the missing cache */
+	  hwloc_bitmap_t cache_cpuset;
+	  unsigned packageid = infos[i].packageid;
+	  unsigned cacheid = infos[i].cache[l].cacheid;
+	  /* Now look for others sharing it */
+	  cache_cpuset = hwloc_bitmap_alloc();
+	  for (j = i; j < nbprocs; j++) {
+	    unsigned l2;
+	    for (l2 = 0; l2 < infos[j].numcaches; l2++) {
+	      if (infos[j].cache[l2].level == level && infos[j].cache[l2].type == type)
+		break;
+	    }
+	    if (l2 == infos[j].numcaches) {
+	      /* no cache Llevel of that type in j */
+	      hwloc_bitmap_clr(remaining_cpuset, j);
+	      continue;
+	    }
+	    if (infos[j].packageid == packageid && infos[j].cache[l2].cacheid == cacheid) {
+	      hwloc_bitmap_set(cache_cpuset, j);
+	      hwloc_bitmap_clr(remaining_cpuset, j);
+	    }
+	  }
+	  cache = hwloc_alloc_setup_object(topology, otype, -1);
+	  cache->attr->cache.depth = level;
+	  cache->attr->cache.size = infos[i].cache[l].size;
+	  cache->attr->cache.linesize = infos[i].cache[l].linesize;
+	  cache->attr->cache.associativity = infos[i].cache[l].ways;
+	  cache->attr->cache.type = infos[i].cache[l].type;
+	  cache->cpuset = cache_cpuset;
+	  hwloc_obj_add_info(cache, "Inclusive", infos[i].cache[l].inclusive ? "1" : "0");
+	  hwloc_debug_2args_bitmap("os L%u cache %u has cpuset %s\n",
+				   level, cacheid, cache_cpuset);
+	  hwloc_insert_object_by_cpuset(topology, cache);
+	}
+      }
+    }
+    level--;
+  }
+
+  /* FIXME: if KNL and L2 disabled, add tiles instead of L2 */
+
+  hwloc_bitmap_free(remaining_cpuset);
+  hwloc_bitmap_free(complete_cpuset);
+}
+
+static int
+look_procs(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscovery,
+	   unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type,
+	   int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags),
+	   int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags))
+{
+  struct hwloc_x86_backend_data_s *data = backend->private_data;
+  struct hwloc_topology *topology = backend->topology;
+  unsigned nbprocs = data->nbprocs;
+  hwloc_bitmap_t orig_cpuset = NULL;
+  hwloc_bitmap_t set = NULL;
+  unsigned i;
+
+  if (!data->src_cpuiddump_path) {
+    orig_cpuset = hwloc_bitmap_alloc();
+    if (get_cpubind(topology, orig_cpuset, HWLOC_CPUBIND_STRICT)) {
+      hwloc_bitmap_free(orig_cpuset);
+      return -1;
+    }
+    set = hwloc_bitmap_alloc();
+  }
+
+  for (i = 0; i < nbprocs; i++) {
+    struct cpuiddump *src_cpuiddump = NULL;
+    if (data->src_cpuiddump_path) {
+      src_cpuiddump = cpuiddump_read(data->src_cpuiddump_path, i);
+    } else {
+      hwloc_bitmap_only(set, i);
+      hwloc_debug("binding to CPU%u\n", i);
+      if (set_cpubind(topology, set, HWLOC_CPUBIND_STRICT)) {
+	hwloc_debug("could not bind to CPU%u: %s\n", i, strerror(errno));
+	continue;
+      }
+    }
+
+    look_proc(backend, &infos[i], highest_cpuid, highest_ext_cpuid, features, cpuid_type, src_cpuiddump);
+
+    if (data->src_cpuiddump_path) {
+      cpuiddump_free(src_cpuiddump);
+    }
+  }
+
+  if (!data->src_cpuiddump_path) {
+    set_cpubind(topology, orig_cpuset, 0);
+    hwloc_bitmap_free(set);
+    hwloc_bitmap_free(orig_cpuset);
+  }
+
+  if (!data->apicid_unique)
+    fulldiscovery = 0;
+  else
+    summarize(backend, infos, fulldiscovery);
+  return 0;
+}
+
+#if defined HWLOC_FREEBSD_SYS && defined HAVE_CPUSET_SETID
+#include <sys/param.h>
+#include <sys/cpuset.h>
+typedef cpusetid_t hwloc_x86_os_state_t;
+static void hwloc_x86_os_state_save(hwloc_x86_os_state_t *state, struct cpuiddump *src_cpuiddump)
+{
+  if (!src_cpuiddump) {
+    /* temporary make all cpus available during discovery */
+    cpuset_getid(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, state);
+    cpuset_setid(CPU_WHICH_PID, -1, 0);
+  }
+}
+static void hwloc_x86_os_state_restore(hwloc_x86_os_state_t *state, struct cpuiddump *src_cpuiddump)
+{
+  if (!src_cpuiddump) {
+    /* restore initial cpuset */
+    cpuset_setid(CPU_WHICH_PID, -1, *state);
+  }
+}
+#else /* !defined HWLOC_FREEBSD_SYS || !defined HAVE_CPUSET_SETID */
+typedef void * hwloc_x86_os_state_t;
+static void hwloc_x86_os_state_save(hwloc_x86_os_state_t *state __hwloc_attribute_unused, struct cpuiddump *src_cpuiddump __hwloc_attribute_unused) { }
+static void hwloc_x86_os_state_restore(hwloc_x86_os_state_t *state __hwloc_attribute_unused, struct cpuiddump *src_cpuiddump __hwloc_attribute_unused) { }
+#endif /* !defined HWLOC_FREEBSD_SYS || !defined HAVE_CPUSET_SETID */
+
+
+#define INTEL_EBX ('G' | ('e'<<8) | ('n'<<16) | ('u'<<24))
+#define INTEL_EDX ('i' | ('n'<<8) | ('e'<<16) | ('I'<<24))
+#define INTEL_ECX ('n' | ('t'<<8) | ('e'<<16) | ('l'<<24))
+
+#define AMD_EBX ('A' | ('u'<<8) | ('t'<<16) | ('h'<<24))
+#define AMD_EDX ('e' | ('n'<<8) | ('t'<<16) | ('i'<<24))
+#define AMD_ECX ('c' | ('A'<<8) | ('M'<<16) | ('D'<<24))
+
+/* fake cpubind for when nbprocs=1 and no binding support */
+static int fake_get_cpubind(hwloc_topology_t topology __hwloc_attribute_unused,
+			    hwloc_cpuset_t set __hwloc_attribute_unused,
+			    int flags __hwloc_attribute_unused)
+{
+  return 0;
+}
+static int fake_set_cpubind(hwloc_topology_t topology __hwloc_attribute_unused,
+			    hwloc_const_cpuset_t set __hwloc_attribute_unused,
+			    int flags __hwloc_attribute_unused)
+{
+  return 0;
+}
+
+static
+int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery)
+{
+  struct hwloc_x86_backend_data_s *data = backend->private_data;
+  unsigned nbprocs = data->nbprocs;
+  unsigned eax, ebx, ecx = 0, edx;
+  unsigned i;
+  unsigned highest_cpuid;
+  unsigned highest_ext_cpuid;
+  /* This stores cpuid features with the same indexing as Linux */
+  unsigned features[10] = { 0 };
+  struct procinfo *infos = NULL;
+  enum cpuid_type cpuid_type = unknown;
+  hwloc_x86_os_state_t os_state;
+  struct hwloc_binding_hooks hooks;
+  struct hwloc_topology_support support;
+  struct hwloc_topology_membind_support memsupport __hwloc_attribute_unused;
+  int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags) = NULL;
+  int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags) = NULL;
+  struct cpuiddump *src_cpuiddump = NULL;
+  int ret = -1;
+
+  if (data->src_cpuiddump_path) {
+    /* just read cpuid from the dump */
+    src_cpuiddump = cpuiddump_read(data->src_cpuiddump_path, 0);
+  } else {
+    /* otherwise check if binding works */
+    memset(&hooks, 0, sizeof(hooks));
+    support.membind = &memsupport;
+    hwloc_set_native_binding_hooks(&hooks, &support);
+    if (hooks.get_thisthread_cpubind && hooks.set_thisthread_cpubind) {
+      get_cpubind = hooks.get_thisthread_cpubind;
+      set_cpubind = hooks.set_thisthread_cpubind;
+    } else if (hooks.get_thisproc_cpubind && hooks.set_thisproc_cpubind) {
+      /* FIXME: if called by a multithreaded program, we will restore the original process binding
+       * for each thread instead of their own original thread binding.
+       * See issue #158.
+       */
+      get_cpubind = hooks.get_thisproc_cpubind;
+      set_cpubind = hooks.set_thisproc_cpubind;
+    } else {
+      /* we need binding support if there are multiple PUs */
+      if (nbprocs > 1)
+	goto out;
+      get_cpubind = fake_get_cpubind;
+      set_cpubind = fake_set_cpubind;
+    }
+  }
+
+  if (!src_cpuiddump && !hwloc_have_x86_cpuid())
+    goto out;
+
+  infos = calloc(nbprocs, sizeof(struct procinfo));
+  if (NULL == infos)
+    goto out;
+  for (i = 0; i < nbprocs; i++) {
+    infos[i].nodeid = (unsigned) -1;
+    infos[i].packageid = (unsigned) -1;
+    infos[i].unitid = (unsigned) -1;
+    infos[i].coreid = (unsigned) -1;
+    infos[i].threadid = (unsigned) -1;
+  }
+
+  eax = 0x00;
+  cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+  highest_cpuid = eax;
+  if (ebx == INTEL_EBX && ecx == INTEL_ECX && edx == INTEL_EDX)
+    cpuid_type = intel;
+  if (ebx == AMD_EBX && ecx == AMD_ECX && edx == AMD_EDX)
+    cpuid_type = amd;
+
+  hwloc_debug("highest cpuid %x, cpuid type %u\n", highest_cpuid, cpuid_type);
+  if (highest_cpuid < 0x01) {
+      goto out_with_infos;
+  }
+
+  eax = 0x01;
+  cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+  features[0] = edx;
+  features[4] = ecx;
+
+  eax = 0x80000000;
+  cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+  highest_ext_cpuid = eax;
+
+  hwloc_debug("highest extended cpuid %x\n", highest_ext_cpuid);
+
+  if (highest_cpuid >= 0x7) {
+    eax = 0x7;
+    ecx = 0;
+    cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+    features[9] = ebx;
+  }
+
+  if (cpuid_type != intel && highest_ext_cpuid >= 0x80000001) {
+    eax = 0x80000001;
+    cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
+    features[1] = edx;
+    features[6] = ecx;
+  }
+
+  hwloc_x86_os_state_save(&os_state, src_cpuiddump);
+
+  ret = look_procs(backend, infos, fulldiscovery,
+		   highest_cpuid, highest_ext_cpuid, features, cpuid_type,
+		   get_cpubind, set_cpubind);
+  if (!ret)
+    /* success, we're done */
+    goto out_with_os_state;
+
+  if (nbprocs == 1) {
+    /* only one processor, no need to bind */
+    look_proc(backend, &infos[0], highest_cpuid, highest_ext_cpuid, features, cpuid_type, src_cpuiddump);
+    summarize(backend, infos, fulldiscovery);
+    ret = 0;
+  }
+
+out_with_os_state:
+  hwloc_x86_os_state_restore(&os_state, src_cpuiddump);
+
+out_with_infos:
+  if (NULL != infos) {
+    for (i = 0; i < nbprocs; i++) {
+      free(infos[i].cache);
+      free(infos[i].otherids);
+    }
+    free(infos);
+  }
+
+out:
+  if (src_cpuiddump)
+    cpuiddump_free(src_cpuiddump);
+  return ret;
+}
+
+static int
+hwloc_x86_discover(struct hwloc_backend *backend)
+{
+  struct hwloc_x86_backend_data_s *data = backend->private_data;
+  struct hwloc_topology *topology = backend->topology;
+  int alreadypus = 0;
+  int ret;
+
+#if HAVE_DECL_RUNNING_ON_VALGRIND
+  if (RUNNING_ON_VALGRIND && !data->src_cpuiddump_path) {
+    fprintf(stderr, "hwloc x86 backend cannot work under Valgrind, disabling.\n"
+	    "May be reenabled by dumping CPUIDs with hwloc-gather-cpuid\n"
+	    "and reloading them under Valgrind with HWLOC_CPUID_PATH.\n");
+    return 0;
+  }
+#endif
+
+  if (!data->src_cpuiddump_path)
+    data->nbprocs = hwloc_fallback_nbprocessors(topology);
+
+  if (topology->levels[0][0]->cpuset) {
+    /* somebody else discovered things */
+    if (topology->nb_levels == 2 && topology->level_nbobjects[1] == data->nbprocs) {
+      /* only PUs were discovered, as much as we would, complete the topology with everything else */
+      alreadypus = 1;
+      goto fulldiscovery;
+    }
+
+    /* several object types were added, we can't easily complete, just do partial discovery */
+    hwloc_topology_reconnect(topology, 0);
+    ret = hwloc_look_x86(backend, 0);
+    if (ret)
+      hwloc_obj_add_info(topology->levels[0][0], "Backend", "x86");
+    return 0;
+  } else {
+    /* topology is empty, initialize it */
+    hwloc_alloc_obj_cpusets(topology->levels[0][0]);
+  }
+
+fulldiscovery:
+  if (hwloc_look_x86(backend, 1) < 0) {
+    /* if failed, create PUs */
+    if (!alreadypus)
+      hwloc_setup_pu_level(topology, data->nbprocs);
+  }
+
+  hwloc_obj_add_info(topology->levels[0][0], "Backend", "x86");
+
+  if (!data->src_cpuiddump_path) { /* CPUID dump works for both x86 and x86_64 */
+#ifdef HAVE_UNAME
+    hwloc_add_uname_info(topology, NULL); /* we already know is_thissystem() is true */
+#else
+    /* uname isn't available, manually setup the "Architecture" info */
+#ifdef HWLOC_X86_64_ARCH
+    hwloc_obj_add_info(topology->levels[0][0], "Architecture", "x86_64");
+#else
+    hwloc_obj_add_info(topology->levels[0][0], "Architecture", "x86");
+#endif
+#endif
+  }
+
+  return 1;
+}
+
+static int
+hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t set)
+{
+#if !(defined HWLOC_WIN_SYS && !defined __MINGW32__) /* needs a lot of work */
+  struct dirent *dirent;
+  DIR *dir;
+  char *path;
+  FILE *file;
+  char line [32];
+
+  dir = opendir(src_cpuiddump_path);
+  if (!dir)
+    return -1;
+
+  path = malloc(strlen(src_cpuiddump_path) + strlen("/hwloc-cpuid-info") + 1);
+  if (!path)
+    goto out_with_dir;
+
+  sprintf(path, "%s/hwloc-cpuid-info", src_cpuiddump_path);
+  file = fopen(path, "r");
+  if (!file) {
+    fprintf(stderr, "Couldn't open dumped cpuid summary %s\n", path);
+    free(path);
+    goto out_with_dir;
+  }
+  if (!fgets(line, sizeof(line), file)) {
+    fprintf(stderr, "Found read dumped cpuid summary in %s\n", path);
+    fclose(file);
+    free(path);
+    goto out_with_dir;
+  }
+  fclose(file);
+  if (strcmp(line, "Architecture: x86\n")) {
+    fprintf(stderr, "Found non-x86 dumped cpuid summary in %s: %s\n", path, line);
+    free(path);
+    goto out_with_dir;
+  }
+  free(path);
+
+  while ((dirent = readdir(dir)) != NULL) {
+    if (!strncmp(dirent->d_name, "pu", 2)) {
+      char *end;
+      unsigned long idx = strtoul(dirent->d_name+2, &end, 10);
+      if (!*end)
+	hwloc_bitmap_set(set, idx);
+      else
+	fprintf(stderr, "Ignoring invalid dirent `%s' in dumped cpuid directory `%s'\n",
+		dirent->d_name, src_cpuiddump_path);
+    }
+  }
+  closedir(dir);
+
+  if (hwloc_bitmap_iszero(set)) {
+    fprintf(stderr, "Did not find any valid pu%%u entry in dumped cpuid directory `%s'\n",
+	    src_cpuiddump_path);
+    return -1;
+  } else if (hwloc_bitmap_last(set) != hwloc_bitmap_weight(set) - 1) {
+    /* The x86 backends enforces contigous set of PUs starting at 0 so far */
+    fprintf(stderr, "Found non-contigous pu%%u range in dumped cpuid directory `%s'\n",
+	    src_cpuiddump_path);
+    return -1;
+  }
+
+  return 0;
+
+out_with_dir:
+  closedir(dir);
+#endif /* HWLOC_WIN_SYS & !__MINGW32__ needs a lot of work */
+  return -1;
+}
+
+static void
+hwloc_x86_backend_disable(struct hwloc_backend *backend)
+{
+  struct hwloc_x86_backend_data_s *data = backend->private_data;
+  hwloc_bitmap_free(data->apicid_set);
+  free(data->src_cpuiddump_path);
+  free(data);
+}
+
+static struct hwloc_backend *
+hwloc_x86_component_instantiate(struct hwloc_disc_component *component,
+				const void *_data1 __hwloc_attribute_unused,
+				const void *_data2 __hwloc_attribute_unused,
+				const void *_data3 __hwloc_attribute_unused)
+{
+  struct hwloc_backend *backend;
+  struct hwloc_x86_backend_data_s *data;
+  const char *src_cpuiddump_path;
+
+  backend = hwloc_backend_alloc(component);
+  if (!backend)
+    goto out;
+
+  data = malloc(sizeof(*data));
+  if (!data) {
+    errno = ENOMEM;
+    goto out_with_backend;
+  }
+
+  backend->private_data = data;
+  backend->discover = hwloc_x86_discover;
+  backend->disable = hwloc_x86_backend_disable;
+
+  /* default values */
+  data->is_knl = 0;
+  data->apicid_set = hwloc_bitmap_alloc();
+  data->apicid_unique = 1;
+  data->src_cpuiddump_path = NULL;
+
+  src_cpuiddump_path = getenv("HWLOC_CPUID_PATH");
+  if (src_cpuiddump_path) {
+    hwloc_bitmap_t set = hwloc_bitmap_alloc();
+    if (!hwloc_x86_check_cpuiddump_input(src_cpuiddump_path, set)) {
+      backend->is_thissystem = 0;
+      data->src_cpuiddump_path = strdup(src_cpuiddump_path);
+      data->nbprocs = hwloc_bitmap_weight(set);
+    } else {
+      fprintf(stderr, "Ignoring dumped cpuid directory.\n");
+    }
+    hwloc_bitmap_free(set);
+  }
+
+  return backend;
+
+ out_with_backend:
+  free(backend);
+ out:
+  return NULL;
+}
+
+static struct hwloc_disc_component hwloc_x86_disc_component = {
+  HWLOC_DISC_COMPONENT_TYPE_CPU,
+  "x86",
+  HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+  hwloc_x86_component_instantiate,
+  45, /* between native and no_os */
+  NULL
+};
+
+const struct hwloc_component hwloc_x86_component = {
+  HWLOC_COMPONENT_ABI,
+  NULL, NULL,
+  HWLOC_COMPONENT_TYPE_DISC,
+  0,
+  &hwloc_x86_disc_component
+};
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-xml-libxml.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-xml-libxml.c
new file mode 100644
index 0000000000..c51289399a
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-xml-libxml.c
@@ -0,0 +1,569 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2016 Inria.  All rights reserved.
+ * Copyright © 2009-2011 Université Bordeaux
+ * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <hwloc/plugins.h>
+
+/* private headers allowed because this plugin is built within hwloc */
+#include <private/xml.h>
+#include <private/debug.h>
+#include <private/misc.h>
+
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+
+/*******************
+ * Common routines *
+ *******************/
+
+static void hwloc_libxml2_error_callback(void * ctx __hwloc_attribute_unused, const char * msg __hwloc_attribute_unused, ...) { /* do nothing */ }
+
+/* by default, do not cleanup to avoid issues with concurrent libxml users */
+static int hwloc_libxml2_needs_cleanup = 0;
+
+static void
+hwloc_libxml2_init_once(void)
+{
+  static int checked = 0;
+  if (!checked) {
+    /* disable stderr warnings */
+    xmlSetGenericErrorFunc(NULL, hwloc__xml_verbose() ? xmlGenericError : hwloc_libxml2_error_callback);
+    /* enforce libxml2 cleanup ? */
+    if (getenv("HWLOC_LIBXML_CLEANUP"))
+      hwloc_libxml2_needs_cleanup = 1;
+    checked = 1;
+  }
+}
+
+static void
+hwloc_libxml2_cleanup(void)
+{
+  if (hwloc_libxml2_needs_cleanup) {
+    xmlCleanupParser();
+  }
+}
+
+/*******************
+ * Import routines *
+ *******************/
+
+typedef struct hwloc__libxml_import_state_data_s {
+  xmlNode *node; /* current libxml node, always valid */
+  xmlNode *child; /* last processed child, or NULL if none yet */
+  xmlAttr *attr; /* last processed attribute, or NULL if none yet */
+} __hwloc_attribute_may_alias * hwloc__libxml_import_state_data_t;
+
+static int
+hwloc__libxml_import_next_attr(hwloc__xml_import_state_t state, char **namep, char **valuep)
+{
+  hwloc__libxml_import_state_data_t lstate = (void*) state->data;
+
+  xmlAttr *attr;
+  if (lstate->attr)
+    attr = lstate->attr->next;
+  else
+    attr = lstate->node->properties;
+  for (; attr; attr = attr->next)
+    if (attr->type == XML_ATTRIBUTE_NODE) {
+      /* use the first valid attribute content */
+      xmlNode *subnode;
+      for (subnode = attr->children; subnode; subnode = subnode->next) {
+	if (subnode->type == XML_TEXT_NODE) {
+	  if (subnode->content && subnode->content[0] != '\0' && subnode->content[0] != '\n') {
+	    *namep = (char *) attr->name;
+	    *valuep = (char *) subnode->content;
+	    lstate->attr = attr;
+	    return 0;
+	  }
+	} else {
+	  if (hwloc__xml_verbose())
+	    fprintf(stderr, "ignoring unexpected xml attr node type %u\n", subnode->type);
+	}
+      }
+    } else {
+      if (hwloc__xml_verbose())
+	fprintf(stderr, "ignoring unexpected xml attr type %u\n", attr->type);
+    }
+  return -1;
+}
+
+static int
+hwloc__libxml_import_find_child(hwloc__xml_import_state_t state,
+				hwloc__xml_import_state_t childstate,
+				char **tagp)
+{
+  hwloc__libxml_import_state_data_t lstate = (void*) state->data;
+  hwloc__libxml_import_state_data_t lchildstate = (void*) childstate->data;
+  xmlNode *child;
+  childstate->parent = state;
+  childstate->global = state->global;
+  if (!lstate->child)
+    return 0;
+  child = lstate->child->next;
+  for (; child; child = child->next)
+    if (child->type == XML_ELEMENT_NODE) {
+      lstate->child = lchildstate->node = child;
+      lchildstate->child = child->children;
+      lchildstate->attr = NULL;
+      *tagp = (char*) child->name;
+      return 1;
+    } else if (child->type == XML_TEXT_NODE) {
+      if (child->content && child->content[0] != '\0' && child->content[0] != '\n')
+	if (hwloc__xml_verbose())
+	  fprintf(stderr, "ignoring object text content %s\n", (const char*) child->content);
+    } else if (child->type != XML_COMMENT_NODE) {
+      if (hwloc__xml_verbose())
+	fprintf(stderr, "ignoring unexpected xml node type %u\n", child->type);
+    }
+
+  return 0;
+}
+
+static int
+hwloc__libxml_import_close_tag(hwloc__xml_import_state_t state __hwloc_attribute_unused)
+{
+  return 0;
+}
+
+static void
+hwloc__libxml_import_close_child(hwloc__xml_import_state_t state __hwloc_attribute_unused)
+{
+  /* nothing to do */
+}
+
+static int
+hwloc__libxml_import_get_content(hwloc__xml_import_state_t state,
+				 char **beginp, size_t expected_length)
+{
+  hwloc__libxml_import_state_data_t lstate = (void*) state->data;
+  xmlNode *child;
+  size_t length;
+
+  child = lstate->node->children;
+  if (!child || child->type != XML_TEXT_NODE) {
+    if (expected_length)
+      return -1;
+    *beginp = "";
+    return 0;
+  }
+
+  length = strlen((char *) child->content);
+  if (length != expected_length)
+    return -1;
+  *beginp = (char *) child->content;
+  return 1;
+}
+
+static void
+hwloc__libxml_import_close_content(hwloc__xml_import_state_t state __hwloc_attribute_unused)
+{
+  /* nothing to do */
+}
+
+static int
+hwloc_libxml_look_init(struct hwloc_xml_backend_data_s *bdata,
+		       struct hwloc__xml_import_state_s *state)
+{
+  hwloc__libxml_import_state_data_t lstate = (void*) state->data;
+  xmlNode* root_node;
+  xmlDtd *dtd;
+
+  assert(sizeof(*lstate) <= sizeof(state->data));
+
+  dtd = xmlGetIntSubset((xmlDoc*) bdata->data);
+  if (!dtd) {
+    if (hwloc__xml_verbose())
+      fprintf(stderr, "Loading XML topology without DTD\n");
+  } else if (strcmp((char *) dtd->SystemID, "hwloc.dtd")) {
+    if (hwloc__xml_verbose())
+      fprintf(stderr, "Loading XML topology with wrong DTD SystemID (%s instead of %s)\n",
+	      (char *) dtd->SystemID, "hwloc.dtd");
+  }
+
+  root_node = xmlDocGetRootElement((xmlDoc*) bdata->data);
+
+  if (strcmp((const char *) root_node->name, "topology") && strcmp((const char *) root_node->name, "root")) {
+    /* root node should be in "topology" class (or "root" if importing from < 1.0) */
+    if (hwloc__xml_verbose())
+      fprintf(stderr, "ignoring object of class `%s' not at the top the xml hierarchy\n", (const char *) root_node->name);
+    goto failed;
+  }
+
+  state->global->next_attr = hwloc__libxml_import_next_attr;
+  state->global->find_child = hwloc__libxml_import_find_child;
+  state->global->close_tag = hwloc__libxml_import_close_tag;
+  state->global->close_child = hwloc__libxml_import_close_child;
+  state->global->get_content = hwloc__libxml_import_get_content;
+  state->global->close_content = hwloc__libxml_import_close_content;
+  state->parent = NULL;
+  lstate->node = root_node;
+  lstate->child = root_node->children;
+  lstate->attr = NULL;
+  return 0; /* success */
+
+ failed:
+  return -1; /* failed */
+}
+
+static int
+hwloc_libxml_import_diff(struct hwloc__xml_import_state_s *state, const char *xmlpath, const char *xmlbuffer, int xmlbuflen, hwloc_topology_diff_t *firstdiffp, char **refnamep)
+{
+  hwloc__libxml_import_state_data_t lstate = (void*) state->data;
+  char *refname = NULL;
+  xmlDoc *doc = NULL;
+  xmlNode* root_node;
+  xmlDtd *dtd;
+  int ret;
+
+  assert(sizeof(*lstate) <= sizeof(state->data));
+
+  LIBXML_TEST_VERSION;
+  hwloc_libxml2_init_once();
+
+  errno = 0; /* set to 0 so that we know if libxml2 changed it */
+
+  if (xmlpath)
+    doc = xmlReadFile(xmlpath, NULL, 0);
+  else if (xmlbuffer)
+    doc = xmlReadMemory(xmlbuffer, xmlbuflen, "", NULL, 0);
+
+  if (!doc) {
+    if (!errno)
+      /* libxml2 read the file fine, but it got an error during parsing */
+    errno = EINVAL;
+    goto out;
+  }
+
+  dtd = xmlGetIntSubset(doc);
+  if (!dtd) {
+    if (hwloc__xml_verbose())
+      fprintf(stderr, "Loading XML topologydiff without DTD\n");
+  } else if (strcmp((char *) dtd->SystemID, "hwloc.dtd")) {
+    if (hwloc__xml_verbose())
+      fprintf(stderr, "Loading XML topologydiff with wrong DTD SystemID (%s instead of %s)\n",
+	      (char *) dtd->SystemID, "hwloc.dtd");
+  }
+
+  root_node = xmlDocGetRootElement(doc);
+
+  if (strcmp((const char *) root_node->name, "topologydiff")) {
+    /* root node should be in "topologydiff" class */
+    if (hwloc__xml_verbose())
+      fprintf(stderr, "ignoring object of class `%s' not at the top the xml hierarchy\n", (const char *) root_node->name);
+    goto out_with_doc;
+  }
+
+  state->global->next_attr = hwloc__libxml_import_next_attr;
+  state->global->find_child = hwloc__libxml_import_find_child;
+  state->global->close_tag = hwloc__libxml_import_close_tag;
+  state->global->close_child = hwloc__libxml_import_close_child;
+  state->global->get_content = hwloc__libxml_import_get_content;
+  state->global->close_content = hwloc__libxml_import_close_content;
+  state->parent = NULL;
+  lstate->node = root_node;
+  lstate->child = root_node->children;
+  lstate->attr = NULL;
+
+  while (1) {
+    char *attrname, *attrvalue;
+    if (state->global->next_attr(state, &attrname, &attrvalue) < 0)
+      break;
+    if (!strcmp(attrname, "refname")) {
+      free(refname);
+      refname = strdup(attrvalue);
+    } else
+      goto out_with_doc;
+  }
+
+  ret = hwloc__xml_import_diff(state, firstdiffp);
+  if (refnamep && !ret)
+    *refnamep = refname;
+  else
+    free(refname);
+
+  xmlFreeDoc(doc);
+  hwloc_libxml2_cleanup();
+  return ret;
+
+out_with_doc:
+  free(refname);
+  xmlFreeDoc(doc);
+  hwloc_libxml2_cleanup();
+out:
+  return -1; /* failed */
+}
+
+/********************
+ * Backend routines *
+ ********************/
+
+static void
+hwloc_libxml_backend_exit(struct hwloc_xml_backend_data_s *bdata)
+{
+  xmlFreeDoc((xmlDoc*)bdata->data);
+  hwloc_libxml2_cleanup();
+}
+
+static int
+hwloc_libxml_backend_init(struct hwloc_xml_backend_data_s *bdata,
+			  const char *xmlpath, const char *xmlbuffer, int xmlbuflen)
+{
+  xmlDoc *doc = NULL;
+
+  LIBXML_TEST_VERSION;
+  hwloc_libxml2_init_once();
+
+  errno = 0; /* set to 0 so that we know if libxml2 changed it */
+
+  if (xmlpath)
+    doc = xmlReadFile(xmlpath, NULL, 0);
+  else if (xmlbuffer)
+    doc = xmlReadMemory(xmlbuffer, xmlbuflen, "", NULL, 0);
+
+  if (!doc) {
+    if (!errno)
+      /* libxml2 read the file fine, but it got an error during parsing */
+    errno = EINVAL;
+    return -1;
+  }
+
+  bdata->look_init = hwloc_libxml_look_init;
+  bdata->look_failed = NULL;
+  bdata->backend_exit = hwloc_libxml_backend_exit;
+  bdata->data = doc;
+  return 0;
+}
+
+/*******************
+ * Export routines *
+ *******************/
+
+typedef struct hwloc__libxml_export_state_data_s {
+  xmlNodePtr current_node; /* current node to output */
+} __hwloc_attribute_may_alias * hwloc__libxml_export_state_data_t;
+
+static void
+hwloc__libxml_export_new_child(hwloc__xml_export_state_t parentstate,
+			       hwloc__xml_export_state_t state,
+			       const char *name)
+{
+  hwloc__libxml_export_state_data_t lpdata = (void *) parentstate->data;
+  hwloc__libxml_export_state_data_t ldata = (void *) state->data;
+
+  state->parent = parentstate;
+  state->new_child = parentstate->new_child;
+  state->new_prop = parentstate->new_prop;
+  state->add_content = parentstate->add_content;
+  state->end_object = parentstate->end_object;
+
+  ldata->current_node = xmlNewChild(lpdata->current_node, NULL, BAD_CAST name, NULL);
+}
+
+static void
+hwloc__libxml_export_new_prop(hwloc__xml_export_state_t state, const char *name, const char *value)
+{
+  hwloc__libxml_export_state_data_t ldata = (void *) state->data;
+  xmlNewProp(ldata->current_node, BAD_CAST name, BAD_CAST value);
+}
+
+static void
+hwloc__libxml_export_end_object(hwloc__xml_export_state_t state __hwloc_attribute_unused, const char *name __hwloc_attribute_unused)
+{
+  /* nothing to do */
+}
+
+static void
+hwloc__libxml_export_add_content(hwloc__xml_export_state_t state, const char *buffer, size_t length)
+{
+  hwloc__libxml_export_state_data_t ldata = (void *) state->data;
+  xmlNodeAddContentLen(ldata->current_node, BAD_CAST buffer, length);
+}
+
+static xmlDocPtr
+hwloc__libxml2_prepare_export(hwloc_topology_t topology, unsigned long flags)
+{
+  struct hwloc__xml_export_state_s state;
+  hwloc__libxml_export_state_data_t data = (void *) state.data;
+  xmlDocPtr doc = NULL;       /* document pointer */
+  xmlNodePtr root_node = NULL; /* root pointer */
+
+  assert(sizeof(*data) <= sizeof(state.data));
+
+  LIBXML_TEST_VERSION;
+  hwloc_libxml2_init_once();
+
+  /* Creates a new document, a node and set it as a root node. */
+  doc = xmlNewDoc(BAD_CAST "1.0");
+  root_node = xmlNewNode(NULL, BAD_CAST "topology");
+  xmlDocSetRootElement(doc, root_node);
+
+  /* Creates a DTD declaration. Isn't mandatory. */
+  (void) xmlCreateIntSubset(doc, BAD_CAST "topology", NULL, BAD_CAST "hwloc.dtd");
+
+  state.new_child = hwloc__libxml_export_new_child;
+  state.new_prop = hwloc__libxml_export_new_prop;
+  state.add_content = hwloc__libxml_export_add_content;
+  state.end_object = hwloc__libxml_export_end_object;
+
+  data->current_node = root_node;
+
+  hwloc__xml_export_topology (&state, topology, flags);
+
+  return doc;
+}
+
+static int
+hwloc_libxml_export_file(hwloc_topology_t topology, const char *filename, unsigned long flags)
+{
+  xmlDocPtr doc;
+  int ret;
+
+  errno = 0; /* set to 0 so that we know if libxml2 changed it */
+
+  doc = hwloc__libxml2_prepare_export(topology, flags);
+  ret = xmlSaveFormatFileEnc(filename, doc, "UTF-8", 1);
+  xmlFreeDoc(doc);
+  hwloc_libxml2_cleanup();
+
+  if (ret < 0) {
+    if (!errno)
+      /* libxml2 likely got an error before doing I/O */
+      errno = EINVAL;
+    return ret;
+  }
+  return 0;
+}
+
+static int
+hwloc_libxml_export_buffer(hwloc_topology_t topology, char **xmlbuffer, int *buflen, unsigned long flags)
+{
+  xmlDocPtr doc;
+
+  doc = hwloc__libxml2_prepare_export(topology, flags);
+  xmlDocDumpFormatMemoryEnc(doc, (xmlChar **)xmlbuffer, buflen, "UTF-8", 1);
+  xmlFreeDoc(doc);
+  hwloc_libxml2_cleanup();
+  return 0;
+}
+
+static xmlDocPtr
+hwloc__libxml2_prepare_export_diff(hwloc_topology_diff_t diff, const char *refname)
+{
+  struct hwloc__xml_export_state_s state;
+  hwloc__libxml_export_state_data_t data = (void *) state.data;
+  xmlDocPtr doc = NULL;       /* document pointer */
+  xmlNodePtr root_node = NULL; /* root pointer */
+
+  assert(sizeof(*data) <= sizeof(state.data));
+
+  LIBXML_TEST_VERSION;
+  hwloc_libxml2_init_once();
+
+  /* Creates a new document, a node and set it as a root node. */
+  doc = xmlNewDoc(BAD_CAST "1.0");
+  root_node = xmlNewNode(NULL, BAD_CAST "topologydiff");
+  if (refname)
+    xmlNewProp(root_node, BAD_CAST "refname", BAD_CAST refname);
+  xmlDocSetRootElement(doc, root_node);
+
+  /* Creates a DTD declaration. Isn't mandatory. */
+  (void) xmlCreateIntSubset(doc, BAD_CAST "topologydiff", NULL, BAD_CAST "hwloc.dtd");
+
+  state.new_child = hwloc__libxml_export_new_child;
+  state.new_prop = hwloc__libxml_export_new_prop;
+  state.add_content = hwloc__libxml_export_add_content;
+  state.end_object = hwloc__libxml_export_end_object;
+
+  data->current_node = root_node;
+
+  hwloc__xml_export_diff (&state, diff);
+
+  return doc;
+}
+
+static int
+hwloc_libxml_export_diff_file(hwloc_topology_diff_t diff, const char *refname, const char *filename)
+{
+  xmlDocPtr doc;
+  int ret;
+
+  errno = 0; /* set to 0 so that we know if libxml2 changed it */
+
+  doc = hwloc__libxml2_prepare_export_diff(diff, refname);
+  ret = xmlSaveFormatFileEnc(filename, doc, "UTF-8", 1);
+  xmlFreeDoc(doc);
+  hwloc_libxml2_cleanup();
+
+  if (ret < 0) {
+    if (!errno)
+      /* libxml2 likely got an error before doing I/O */
+      errno = EINVAL;
+    return ret;
+  }
+  return 0;
+}
+
+static int
+hwloc_libxml_export_diff_buffer(hwloc_topology_diff_t diff, const char *refname, char **xmlbuffer, int *buflen)
+{
+  xmlDocPtr doc;
+
+  doc = hwloc__libxml2_prepare_export_diff(diff, refname);
+  xmlDocDumpFormatMemoryEnc(doc, (xmlChar **)xmlbuffer, buflen, "UTF-8", 1);
+  xmlFreeDoc(doc);
+  hwloc_libxml2_cleanup();
+  return 0;
+}
+
+static void
+hwloc_libxml_free_buffer(void *xmlbuffer)
+{
+  xmlFree(BAD_CAST xmlbuffer);
+}
+
+/*************
+ * Callbacks *
+ *************/
+
+static struct hwloc_xml_callbacks hwloc_xml_libxml_callbacks = {
+  hwloc_libxml_backend_init,
+  hwloc_libxml_export_file,
+  hwloc_libxml_export_buffer,
+  hwloc_libxml_free_buffer,
+  hwloc_libxml_import_diff,
+  hwloc_libxml_export_diff_file,
+  hwloc_libxml_export_diff_buffer
+};
+
+static struct hwloc_xml_component hwloc_libxml_xml_component = {
+  NULL,
+  &hwloc_xml_libxml_callbacks
+};
+
+static int
+hwloc_xml_libxml_component_init(unsigned long flags)
+{
+  if (flags)
+    return -1;
+  if (hwloc_plugin_check_namespace("xml_libxml", "hwloc__xml_verbose") < 0)
+    return -1;
+  return 0;
+}
+
+#ifdef HWLOC_INSIDE_PLUGIN
+HWLOC_DECLSPEC extern const struct hwloc_component hwloc_xml_libxml_component;
+#endif
+
+const struct hwloc_component hwloc_xml_libxml_component = {
+  HWLOC_COMPONENT_ABI,
+  hwloc_xml_libxml_component_init, NULL,
+  HWLOC_COMPONENT_TYPE_XML,
+  0,
+  &hwloc_libxml_xml_component
+};
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-xml-nolibxml.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-xml-nolibxml.c
new file mode 100644
index 0000000000..42635cf893
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-xml-nolibxml.c
@@ -0,0 +1,873 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2017 Inria.  All rights reserved.
+ * Copyright © 2009-2011 Université Bordeaux
+ * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <hwloc/plugins.h>
+#include <private/private.h>
+#include <private/misc.h>
+#include <private/xml.h>
+#include <private/debug.h>
+
+#include <string.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+/*******************
+ * Import routines *
+ *******************/
+
+struct hwloc__nolibxml_backend_data_s {
+  size_t buflen; /* size of both buffer and copy buffers, set during backend_init() */
+  char *buffer; /* allocated and filled during backend_init() */
+  char *copy; /* allocated during backend_init(), used later during actual parsing */
+};
+
+typedef struct hwloc__nolibxml_import_state_data_s {
+  char *tagbuffer; /* buffer containing the next tag */
+  char *attrbuffer; /* buffer containing the next attribute of the current node */
+  char *tagname; /* tag name of the current node */
+  int closed; /* set if the current node is auto-closing */
+} __hwloc_attribute_may_alias * hwloc__nolibxml_import_state_data_t;
+
+static char *
+hwloc__nolibxml_import_ignore_spaces(char *buffer)
+{
+  return buffer + strspn(buffer, " \t\n");
+}
+
+static int
+hwloc__nolibxml_import_next_attr(hwloc__xml_import_state_t state, char **namep, char **valuep)
+{
+  hwloc__nolibxml_import_state_data_t nstate = (void*) state->data;
+  size_t namelen;
+  size_t len, escaped;
+  char *buffer, *value, *end;
+
+  if (!nstate->attrbuffer)
+    return -1;
+
+  /* find the beginning of an attribute */
+  buffer = hwloc__nolibxml_import_ignore_spaces(nstate->attrbuffer);
+  namelen = strspn(buffer, "abcdefghijklmnopqrstuvwxyz_");
+  if (buffer[namelen] != '=' || buffer[namelen+1] != '\"')
+    return -1;
+  buffer[namelen] = '\0';
+  *namep = buffer;
+
+  /* find the beginning of its value, and unescape it */
+  *valuep = value = buffer+namelen+2;
+  len = 0; escaped = 0;
+  while (value[len+escaped] != '\"') {
+    if (value[len+escaped] == '&') {
+      if (!strncmp(&value[1+len+escaped], "#10;", 4)) {
+	escaped += 4;
+	value[len] = '\n';
+      } else if (!strncmp(&value[1+len+escaped], "#13;", 4)) {
+	escaped += 4;
+	value[len] = '\r';
+      } else if (!strncmp(&value[1+len+escaped], "#9;", 3)) {
+	escaped += 3;
+	value[len] = '\t';
+      } else if (!strncmp(&value[1+len+escaped], "quot;", 5)) {
+	escaped += 5;
+	value[len] = '\"';
+      } else if (!strncmp(&value[1+len+escaped], "lt;", 3)) {
+	escaped += 3;
+	value[len] = '<';
+      } else if (!strncmp(&value[1+len+escaped], "gt;", 3)) {
+	escaped += 3;
+	value[len] = '>';
+      } else if (!strncmp(&value[1+len+escaped], "amp;", 4)) {
+	escaped += 4;
+	value[len] = '&';
+      } else {
+	return -1;
+      }
+    } else {
+      value[len] = value[len+escaped];
+    }
+    len++;
+    if (value[len+escaped] == '\0')
+      return -1;
+  }
+  value[len] = '\0';
+
+  /* find next attribute */
+  end = &value[len+escaped+1]; /* skip the ending " */
+  nstate->attrbuffer = hwloc__nolibxml_import_ignore_spaces(end);
+  return 0;
+}
+
+static int
+hwloc__nolibxml_import_find_child(hwloc__xml_import_state_t state,
+				  hwloc__xml_import_state_t childstate,
+				  char **tagp)
+{
+  hwloc__nolibxml_import_state_data_t nstate = (void*) state->data;
+  hwloc__nolibxml_import_state_data_t nchildstate = (void*) childstate->data;
+  char *buffer = nstate->tagbuffer;
+  char *end;
+  size_t namelen;
+
+  childstate->parent = state;
+  childstate->global = state->global;
+
+  /* auto-closed tags have no children */
+  if (nstate->closed)
+    return 0;
+
+  /* find the beginning of the tag */
+  buffer = hwloc__nolibxml_import_ignore_spaces(buffer);
+  if (buffer[0] != '<')
+    return -1;
+  buffer++;
+
+  /* if closing tag, return nothing and do not advance */
+  if (buffer[0] == '/')
+    return 0;
+
+  /* normal tag */
+  *tagp = nchildstate->tagname = buffer;
+
+  /* find the end, mark it and return it */
+  end = strchr(buffer, '>');
+  if (!end)
+    return -1;
+  end[0] = '\0';
+  nchildstate->tagbuffer = end+1;
+
+  /* handle auto-closing tags */
+  if (end[-1] == '/') {
+    nchildstate->closed = 1;
+    end[-1] = '\0';
+  } else
+    nchildstate->closed = 0;
+
+  /* find attributes */
+  namelen = strspn(buffer, "abcdefghijklmnopqrstuvwxyz1234567890_");
+
+  if (buffer[namelen] == '\0') {
+    /* no attributes */
+    nchildstate->attrbuffer = NULL;
+    return 1;
+  }
+
+  if (buffer[namelen] != ' ')
+    return -1;
+
+  /* found a space, likely starting attributes */
+  buffer[namelen] = '\0';
+  nchildstate->attrbuffer = buffer+namelen+1;
+  return 1;
+}
+
+static int
+hwloc__nolibxml_import_close_tag(hwloc__xml_import_state_t state)
+{
+  hwloc__nolibxml_import_state_data_t nstate = (void*) state->data;
+  char *buffer = nstate->tagbuffer;
+  char *end;
+
+  /* auto-closed tags need nothing */
+  if (nstate->closed)
+    return 0;
+
+  /* find the beginning of the tag */
+  buffer = hwloc__nolibxml_import_ignore_spaces(buffer);
+  if (buffer[0] != '<')
+    return -1;
+  buffer++;
+
+  /* find the end, mark it and return it to the parent */
+  end = strchr(buffer, '>');
+  if (!end)
+    return -1;
+  end[0] = '\0';
+  nstate->tagbuffer = end+1;
+
+  /* if closing tag, return nothing */
+  if (buffer[0] != '/' || strcmp(buffer+1, nstate->tagname) )
+    return -1;
+  return 0;
+}
+
+static void
+hwloc__nolibxml_import_close_child(hwloc__xml_import_state_t state)
+{
+  hwloc__nolibxml_import_state_data_t nstate = (void*) state->data;
+  hwloc__nolibxml_import_state_data_t nparent = (void*) state->parent->data;
+  nparent->tagbuffer = nstate->tagbuffer;
+}
+
+static int
+hwloc__nolibxml_import_get_content(hwloc__xml_import_state_t state,
+				   char **beginp, size_t expected_length)
+{
+  hwloc__nolibxml_import_state_data_t nstate = (void*) state->data;
+  char *buffer = nstate->tagbuffer;
+  size_t length;
+  char *end;
+
+  /* auto-closed tags have no content */
+  if (nstate->closed) {
+    if (expected_length)
+      return -1;
+    *beginp = "";
+    return 0;
+  }
+
+  /* find the next tag, where the content ends */
+  end = strchr(buffer, '<');
+  if (!end)
+    return -1;
+
+  length = (size_t) (end-buffer);
+  if (length != expected_length)
+    return -1;
+  nstate->tagbuffer = end;
+  *end = '\0'; /* mark as 0-terminated for now */
+  *beginp = buffer;
+  return 1;
+}
+
+static void
+hwloc__nolibxml_import_close_content(hwloc__xml_import_state_t state)
+{
+  /* put back the '<' that we overwrote to 0-terminate the content */
+  hwloc__nolibxml_import_state_data_t nstate = (void*) state->data;
+  if (!nstate->closed)
+    *nstate->tagbuffer = '<';
+}
+
+static int
+hwloc_nolibxml_look_init(struct hwloc_xml_backend_data_s *bdata,
+			 struct hwloc__xml_import_state_s *state)
+{
+  hwloc__nolibxml_import_state_data_t nstate = (void*) state->data;
+  struct hwloc__nolibxml_backend_data_s *nbdata = bdata->data;
+  char *buffer;
+
+  assert(sizeof(*nstate) <= sizeof(state->data));
+
+  /* use a copy in the temporary buffer, we may modify during parsing */
+  buffer = nbdata->copy;
+  memcpy(buffer, nbdata->buffer, nbdata->buflen);
+
+  /* skip headers */
+  while (!strncmp(buffer, "<?xml ", 6) || !strncmp(buffer, "<!DOCTYPE ", 10)) {
+    buffer = strchr(buffer, '\n');
+    if (!buffer)
+      goto failed;
+    buffer++;
+  }
+
+  /* find topology tag */
+  if (strncmp(buffer, "<topology>", 10))
+    goto failed;
+
+  state->global->next_attr = hwloc__nolibxml_import_next_attr;
+  state->global->find_child = hwloc__nolibxml_import_find_child;
+  state->global->close_tag = hwloc__nolibxml_import_close_tag;
+  state->global->close_child = hwloc__nolibxml_import_close_child;
+  state->global->get_content = hwloc__nolibxml_import_get_content;
+  state->global->close_content = hwloc__nolibxml_import_close_content;
+  state->parent = NULL;
+  nstate->closed = 0;
+  nstate->tagbuffer = buffer+10;
+  nstate->tagname = (char *) "topology";
+  nstate->attrbuffer = NULL;
+  return 0; /* success */
+
+ failed:
+  return -1; /* failed */
+}
+
+static void
+hwloc_nolibxml_look_failed(struct hwloc_xml_backend_data_s *bdata __hwloc_attribute_unused)
+{
+  /* not only when verbose */
+  fprintf(stderr, "Failed to parse XML input with the minimalistic parser. If it was not\n"
+	  "generated by hwloc, try enabling full XML support with libxml2.\n");
+}
+
+/********************
+ * Backend routines *
+ ********************/
+
+static void
+hwloc_nolibxml_backend_exit(struct hwloc_xml_backend_data_s *bdata)
+{
+  struct hwloc__nolibxml_backend_data_s *nbdata = bdata->data;
+  free(nbdata->buffer);
+  free(nbdata->copy);
+  free(nbdata);
+}
+
+static int
+hwloc_nolibxml_read_file(const char *xmlpath, char **bufferp, size_t *buflenp)
+{
+  FILE * file;
+  size_t buflen, offset, readlen;
+  struct stat statbuf;
+  char *buffer, *tmp;
+  size_t ret;
+
+  if (!strcmp(xmlpath, "-"))
+    xmlpath = "/dev/stdin";
+
+  file = fopen(xmlpath, "r");
+  if (!file)
+    goto out;
+
+  /* find the required buffer size for regular files, or use 4k when unknown, we'll realloc later if needed */
+  buflen = 4096;
+  if (!stat(xmlpath, &statbuf))
+    if (S_ISREG(statbuf.st_mode))
+      buflen = statbuf.st_size+1; /* one additional byte so that the first fread() gets EOF too */
+
+  buffer = malloc(buflen+1); /* one more byte for the ending \0 */
+  if (!buffer)
+    goto out_with_file;
+
+  offset = 0; readlen = buflen;
+  while (1) {
+    ret = fread(buffer+offset, 1, readlen, file);
+
+    offset += ret;
+    buffer[offset] = 0;
+
+    if (ret != readlen)
+      break;
+
+    buflen *= 2;
+    tmp = realloc(buffer, buflen+1);
+    if (!tmp)
+      goto out_with_buffer;
+    buffer = tmp;
+    readlen = buflen/2;
+  }
+
+  fclose(file);
+  *bufferp = buffer;
+  *buflenp = offset+1;
+  return 0;
+
+ out_with_buffer:
+  free(buffer);
+ out_with_file:
+  fclose(file);
+ out:
+  return -1;
+}
+
+static int
+hwloc_nolibxml_backend_init(struct hwloc_xml_backend_data_s *bdata,
+			    const char *xmlpath, const char *xmlbuffer, int xmlbuflen)
+{
+  struct hwloc__nolibxml_backend_data_s *nbdata = malloc(sizeof(*nbdata));
+
+  if (!nbdata)
+    goto out;
+  bdata->data = nbdata;
+
+  if (xmlbuffer) {
+    nbdata->buffer = malloc(xmlbuflen);
+    if (!nbdata->buffer)
+      goto out_with_nbdata;
+    nbdata->buflen = xmlbuflen;
+    memcpy(nbdata->buffer, xmlbuffer, xmlbuflen);
+
+  } else {
+    int err = hwloc_nolibxml_read_file(xmlpath, &nbdata->buffer, &nbdata->buflen);
+    if (err < 0)
+      goto out_with_nbdata;
+  }
+
+  /* allocate a temporary copy buffer that we may modify during parsing */
+  nbdata->copy = malloc(nbdata->buflen);
+  if (!nbdata->copy)
+    goto out_with_buffer;
+
+  bdata->look_init = hwloc_nolibxml_look_init;
+  bdata->look_failed = hwloc_nolibxml_look_failed;
+  bdata->backend_exit = hwloc_nolibxml_backend_exit;
+  return 0;
+
+out_with_buffer:
+  free(nbdata->buffer);
+out_with_nbdata:
+  free(nbdata);
+out:
+  return -1;
+}
+
+static int
+hwloc_nolibxml_import_diff(struct hwloc__xml_import_state_s *state,
+			   const char *xmlpath, const char *xmlbuffer, int xmlbuflen,
+			   hwloc_topology_diff_t *firstdiffp, char **refnamep)
+{
+  hwloc__nolibxml_import_state_data_t nstate = (void*) state->data;
+  struct hwloc__xml_import_state_s childstate;
+  char *refname = NULL;
+  char *buffer, *tmp, *tag;
+  size_t buflen;
+  int ret;
+
+  assert(sizeof(*nstate) <= sizeof(state->data));
+
+  if (xmlbuffer) {
+    buffer = malloc(xmlbuflen);
+    if (!buffer)
+      goto out;
+    memcpy(buffer, xmlbuffer, xmlbuflen);
+    buflen = xmlbuflen;
+
+  } else {
+    ret = hwloc_nolibxml_read_file(xmlpath, &buffer, &buflen);
+    if (ret < 0)
+      goto out;
+  }
+
+  /* skip headers */
+  tmp = buffer;
+  while (!strncmp(tmp, "<?xml ", 6) || !strncmp(tmp, "<!DOCTYPE ", 10)) {
+    tmp = strchr(tmp, '\n');
+    if (!tmp)
+      goto out_with_buffer;
+    tmp++;
+  }
+
+  state->global->next_attr = hwloc__nolibxml_import_next_attr;
+  state->global->find_child = hwloc__nolibxml_import_find_child;
+  state->global->close_tag = hwloc__nolibxml_import_close_tag;
+  state->global->close_child = hwloc__nolibxml_import_close_child;
+  state->global->get_content = hwloc__nolibxml_import_get_content;
+  state->global->close_content = hwloc__nolibxml_import_close_content;
+  state->parent = NULL;
+  nstate->closed = 0;
+  nstate->tagbuffer = tmp;
+  nstate->tagname = NULL;
+  nstate->attrbuffer = NULL;
+
+  /* find root */
+  ret = hwloc__nolibxml_import_find_child(state, &childstate, &tag);
+  if (ret < 0)
+    goto out_with_buffer;
+  if (strcmp(tag, "topologydiff"))
+    goto out_with_buffer;
+
+  while (1) {
+    char *attrname, *attrvalue;
+    if (hwloc__nolibxml_import_next_attr(&childstate, &attrname, &attrvalue) < 0)
+      break;
+    if (!strcmp(attrname, "refname")) {
+      free(refname);
+      refname = strdup(attrvalue);
+    } else
+      goto out_with_buffer;
+  }
+
+  ret = hwloc__xml_import_diff(&childstate, firstdiffp);
+  if (refnamep && !ret)
+    *refnamep = refname;
+  else
+    free(refname);
+
+  free(buffer);
+  return ret;
+
+out_with_buffer:
+  free(buffer);
+out:
+  return -1;
+}
+
+/*******************
+ * Export routines *
+ *******************/
+
+typedef struct hwloc__nolibxml_export_state_data_s {
+  char *buffer; /* (moving) buffer where to write */
+  size_t written; /* how many bytes were written (or would have be written if not truncated) */
+  size_t remaining; /* how many bytes are still available in the buffer */
+  unsigned indent; /* indentation level for the next line */
+  unsigned nr_children;
+  unsigned has_content;
+} __hwloc_attribute_may_alias * hwloc__nolibxml_export_state_data_t;
+
+static void
+hwloc__nolibxml_export_update_buffer(hwloc__nolibxml_export_state_data_t ndata, int res)
+{
+  if (res >= 0) {
+    ndata->written += res;
+    if (res >= (int) ndata->remaining)
+      res = ndata->remaining>0 ? (int)ndata->remaining-1 : 0;
+    ndata->buffer += res;
+    ndata->remaining -= res;
+  }
+}
+
+static char *
+hwloc__nolibxml_export_escape_string(const char *src)
+{
+  size_t fulllen, sublen;
+  char *escaped, *dst;
+
+  fulllen = strlen(src);
+
+  sublen = strcspn(src, "\n\r\t\"<>&");
+  if (sublen == fulllen)
+    return NULL; /* nothing to escape */
+
+  escaped = malloc(fulllen*6+1); /* escaped chars are replaced by at most 6 char */
+  dst = escaped;
+
+  memcpy(dst, src, sublen);
+  src += sublen;
+  dst += sublen;
+
+  while (*src) {
+    int replen;
+    switch (*src) {
+    case '\n': strcpy(dst, "&#10;");  replen=5; break;
+    case '\r': strcpy(dst, "&#13;");  replen=5; break;
+    case '\t': strcpy(dst, "&#9;");   replen=4; break;
+    case '\"': strcpy(dst, "&quot;"); replen=6; break;
+    case '<':  strcpy(dst, "&lt;");   replen=4; break;
+    case '>':  strcpy(dst, "&gt;");   replen=4; break;
+    case '&':  strcpy(dst, "&amp;");  replen=5; break;
+    default: replen=0; break;
+    }
+    dst+=replen; src++;
+
+    sublen = strcspn(src, "\n\r\t\"<>&");
+    memcpy(dst, src, sublen);
+    src += sublen;
+    dst += sublen;
+  }
+
+  *dst = 0;
+  return escaped;
+}
+
+static void
+hwloc__nolibxml_export_new_child(hwloc__xml_export_state_t parentstate,
+				 hwloc__xml_export_state_t state,
+				 const char *name)
+{
+  hwloc__nolibxml_export_state_data_t npdata = (void *) parentstate->data;
+  hwloc__nolibxml_export_state_data_t ndata = (void *) state->data;
+  int res;
+
+  assert(!npdata->has_content);
+  if (!npdata->nr_children) {
+    res = hwloc_snprintf(npdata->buffer, npdata->remaining, ">\n");
+    hwloc__nolibxml_export_update_buffer(npdata, res);
+  }
+  npdata->nr_children++;
+
+  state->parent = parentstate;
+  state->new_child = parentstate->new_child;
+  state->new_prop = parentstate->new_prop;
+  state->add_content = parentstate->add_content;
+  state->end_object = parentstate->end_object;
+
+  ndata->buffer = npdata->buffer;
+  ndata->written = npdata->written;
+  ndata->remaining = npdata->remaining;
+  ndata->indent = npdata->indent + 2;
+
+  ndata->nr_children = 0;
+  ndata->has_content = 0;
+
+  res = hwloc_snprintf(ndata->buffer, ndata->remaining, "%*s<%s", (int) npdata->indent, "", name);
+  hwloc__nolibxml_export_update_buffer(ndata, res);
+}
+
+static void
+hwloc__nolibxml_export_new_prop(hwloc__xml_export_state_t state, const char *name, const char *value)
+{
+  hwloc__nolibxml_export_state_data_t ndata = (void *) state->data;
+  char *escaped = hwloc__nolibxml_export_escape_string(value);
+  int res = hwloc_snprintf(ndata->buffer, ndata->remaining, " %s=\"%s\"", name, escaped ? (const char *) escaped : value);
+  hwloc__nolibxml_export_update_buffer(ndata, res);
+  free(escaped);
+}
+
+static void
+hwloc__nolibxml_export_end_object(hwloc__xml_export_state_t state, const char *name)
+{
+  hwloc__nolibxml_export_state_data_t ndata = (void *) state->data;
+  hwloc__nolibxml_export_state_data_t npdata = (void *) state->parent->data;
+  int res;
+
+  assert (!(ndata->has_content && ndata->nr_children));
+  if (ndata->has_content) {
+    res = hwloc_snprintf(ndata->buffer, ndata->remaining, "</%s>\n", name);
+  } else if (ndata->nr_children) {
+    res = hwloc_snprintf(ndata->buffer, ndata->remaining, "%*s</%s>\n", (int) npdata->indent, "", name);
+  } else {
+    res = hwloc_snprintf(ndata->buffer, ndata->remaining, "/>\n");
+  }
+  hwloc__nolibxml_export_update_buffer(ndata, res);
+
+  npdata->buffer = ndata->buffer;
+  npdata->written = ndata->written;
+  npdata->remaining = ndata->remaining;
+}
+
+static void
+hwloc__nolibxml_export_add_content(hwloc__xml_export_state_t state, const char *buffer, size_t length)
+{
+  hwloc__nolibxml_export_state_data_t ndata = (void *) state->data;
+  int res;
+
+  assert(!ndata->nr_children);
+  if (!ndata->has_content) {
+    res = hwloc_snprintf(ndata->buffer, ndata->remaining, ">");
+    hwloc__nolibxml_export_update_buffer(ndata, res);
+  }
+  ndata->has_content = 1;
+
+  res = hwloc_snprintf(ndata->buffer, ndata->remaining, buffer, length);
+  hwloc__nolibxml_export_update_buffer(ndata, res);
+}
+
+static size_t
+hwloc___nolibxml_prepare_export(hwloc_topology_t topology, char *xmlbuffer, int buflen, unsigned long flags)
+{
+  struct hwloc__xml_export_state_s state, childstate;
+  hwloc__nolibxml_export_state_data_t ndata = (void *) &state.data;
+  int res;
+
+  assert(sizeof(*ndata) <= sizeof(state.data));
+
+  state.new_child = hwloc__nolibxml_export_new_child;
+  state.new_prop = hwloc__nolibxml_export_new_prop;
+  state.add_content = hwloc__nolibxml_export_add_content;
+  state.end_object = hwloc__nolibxml_export_end_object;
+
+  ndata->indent = 0;
+  ndata->written = 0;
+  ndata->buffer = xmlbuffer;
+  ndata->remaining = buflen;
+
+  ndata->nr_children = 1; /* don't close a non-existing previous tag when opening the topology tag */
+  ndata->has_content = 0;
+
+  res = hwloc_snprintf(ndata->buffer, ndata->remaining,
+		 "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+		 "<!DOCTYPE topology SYSTEM \"hwloc.dtd\">\n");
+  hwloc__nolibxml_export_update_buffer(ndata, res);
+  hwloc__nolibxml_export_new_child(&state, &childstate, "topology");
+  hwloc__xml_export_topology (&childstate, topology, flags);
+  hwloc__nolibxml_export_end_object(&childstate, "topology");
+
+  return ndata->written+1;
+}
+
+static int
+hwloc_nolibxml_export_buffer(hwloc_topology_t topology, char **bufferp, int *buflenp, unsigned long flags)
+{
+  char *buffer;
+  size_t bufferlen, res;
+
+  bufferlen = 16384; /* random guess for large enough default */
+  buffer = malloc(bufferlen);
+  if (!buffer)
+    return -1;
+  res = hwloc___nolibxml_prepare_export(topology, buffer, (int)bufferlen, flags);
+
+  if (res > bufferlen) {
+    char *tmp = realloc(buffer, res);
+    if (!tmp) {
+      free(buffer);
+      return -1;
+    }
+    buffer = tmp;
+    hwloc___nolibxml_prepare_export(topology, buffer, (int)res, flags);
+  }
+
+  *bufferp = buffer;
+  *buflenp = (int)res;
+  return 0;
+}
+
+static int
+hwloc_nolibxml_export_file(hwloc_topology_t topology, const char *filename, unsigned long flags)
+{
+  FILE *file;
+  char *buffer;
+  int bufferlen;
+  int ret;
+
+  ret = hwloc_nolibxml_export_buffer(topology, &buffer, &bufferlen, flags);
+  if (ret < 0)
+    return -1;
+
+  if (!strcmp(filename, "-")) {
+    file = stdout;
+  } else {
+    file = fopen(filename, "w");
+    if (!file) {
+      free(buffer);
+      return -1;
+    }
+  }
+
+  ret = (int)fwrite(buffer, 1, bufferlen-1 /* don't write the ending \0 */, file);
+  if (ret == bufferlen-1) {
+    ret = 0;
+  } else {
+    errno = ferror(file);
+    ret = -1;
+  }
+
+  free(buffer);
+
+  if (file != stdout)
+    fclose(file);
+  return ret;
+}
+
+static size_t
+hwloc___nolibxml_prepare_export_diff(hwloc_topology_diff_t diff, const char *refname, char *xmlbuffer, int buflen)
+{
+  struct hwloc__xml_export_state_s state, childstate;
+  hwloc__nolibxml_export_state_data_t ndata = (void *) &state.data;
+  int res;
+
+  assert(sizeof(*ndata) <= sizeof(state.data));
+
+  state.new_child = hwloc__nolibxml_export_new_child;
+  state.new_prop = hwloc__nolibxml_export_new_prop;
+  state.add_content = hwloc__nolibxml_export_add_content;
+  state.end_object = hwloc__nolibxml_export_end_object;
+
+  ndata->indent = 0;
+  ndata->written = 0;
+  ndata->buffer = xmlbuffer;
+  ndata->remaining = buflen;
+
+  ndata->nr_children = 1; /* don't close a non-existing previous tag when opening the topology tag */
+  ndata->has_content = 0;
+
+  res = hwloc_snprintf(ndata->buffer, ndata->remaining,
+		 "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+		 "<!DOCTYPE topologydiff SYSTEM \"hwloc.dtd\">\n");
+  hwloc__nolibxml_export_update_buffer(ndata, res);
+  hwloc__nolibxml_export_new_child(&state, &childstate, "topologydiff");
+  if (refname)
+    hwloc__nolibxml_export_new_prop(&childstate, "refname", refname);
+  hwloc__xml_export_diff (&childstate, diff);
+  hwloc__nolibxml_export_end_object(&childstate, "topologydiff");
+
+  return ndata->written+1;
+}
+
+static int
+hwloc_nolibxml_export_diff_buffer(hwloc_topology_diff_t diff, const char *refname, char **bufferp, int *buflenp)
+{
+  char *buffer;
+  size_t bufferlen, res;
+
+  bufferlen = 16384; /* random guess for large enough default */
+  buffer = malloc(bufferlen);
+  if (!buffer)
+    return -1;
+  res = hwloc___nolibxml_prepare_export_diff(diff, refname, buffer, (int)bufferlen);
+
+  if (res > bufferlen) {
+    char *tmp = realloc(buffer, res);
+    if (!tmp) {
+      free(buffer);
+      return -1;
+    }
+    buffer = tmp;
+    hwloc___nolibxml_prepare_export_diff(diff, refname, buffer, (int)res);
+  }
+
+  *bufferp = buffer;
+  *buflenp = (int)res;
+  return 0;
+}
+
+static int
+hwloc_nolibxml_export_diff_file(hwloc_topology_diff_t diff, const char *refname, const char *filename)
+{
+  FILE *file;
+  char *buffer;
+  int bufferlen;
+  int ret;
+
+  ret = hwloc_nolibxml_export_diff_buffer(diff, refname, &buffer, &bufferlen);
+  if (ret < 0)
+    return -1;
+
+  if (!strcmp(filename, "-")) {
+    file = stdout;
+  } else {
+    file = fopen(filename, "w");
+    if (!file) {
+      free(buffer);
+      return -1;
+    }
+  }
+
+  ret = (int)fwrite(buffer, 1, bufferlen-1 /* don't write the ending \0 */, file);
+  if (ret == bufferlen-1) {
+    ret = 0;
+  } else {
+    errno = ferror(file);
+    ret = -1;
+  }
+
+  free(buffer);
+
+  if (file != stdout)
+    fclose(file);
+  return ret;
+}
+
+static void
+hwloc_nolibxml_free_buffer(void *xmlbuffer)
+{
+  free(xmlbuffer);
+}
+
+/*************
+ * Callbacks *
+ *************/
+
+static struct hwloc_xml_callbacks hwloc_xml_nolibxml_callbacks = {
+  hwloc_nolibxml_backend_init,
+  hwloc_nolibxml_export_file,
+  hwloc_nolibxml_export_buffer,
+  hwloc_nolibxml_free_buffer,
+  hwloc_nolibxml_import_diff,
+  hwloc_nolibxml_export_diff_file,
+  hwloc_nolibxml_export_diff_buffer
+};
+
+static struct hwloc_xml_component hwloc_nolibxml_xml_component = {
+  &hwloc_xml_nolibxml_callbacks,
+  NULL
+};
+
+const struct hwloc_component hwloc_xml_nolibxml_component = {
+  HWLOC_COMPONENT_ABI,
+  NULL, NULL,
+  HWLOC_COMPONENT_TYPE_XML,
+  0,
+  &hwloc_nolibxml_xml_component
+};
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-xml.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-xml.c
new file mode 100644
index 0000000000..73a9021751
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology-xml.c
@@ -0,0 +1,2398 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2017 Inria.  All rights reserved.
+ * Copyright © 2009-2011 Université Bordeaux
+ * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <private/xml.h>
+#include <private/private.h>
+#include <private/misc.h>
+#include <private/debug.h>
+
+#include <math.h>
+
+int
+hwloc__xml_verbose(void)
+{
+  static int checked = 0;
+  static int verbose = 0;
+  if (!checked) {
+    const char *env = getenv("HWLOC_XML_VERBOSE");
+    if (env)
+      verbose = atoi(env);
+    checked = 1;
+  }
+  return verbose;
+}
+
+static int
+hwloc_nolibxml_import(void)
+{
+  static int checked = 0;
+  static int nolibxml = 0;
+  if (!checked) {
+    const char *env = getenv("HWLOC_NO_LIBXML_IMPORT");
+    if (env)
+      nolibxml = atoi(env);
+    checked = 1;
+  }
+  return nolibxml;
+}
+
+static int
+hwloc_nolibxml_export(void)
+{
+  static int checked = 0;
+  static int nolibxml = 0;
+  if (!checked) {
+    const char *env = getenv("HWLOC_NO_LIBXML_EXPORT");
+    if (env)
+      nolibxml = atoi(env);
+    checked = 1;
+  }
+  return nolibxml;
+}
+
+#define BASE64_ENCODED_LENGTH(length) (4*(((length)+2)/3))
+
+/*********************************
+ ********* XML callbacks *********
+ *********************************/
+
+/* set when registering nolibxml and libxml components.
+ * modifications protected by the components mutex.
+ * read by the common XML code in topology-xml.c to jump to the right XML backend.
+ */
+static struct hwloc_xml_callbacks *hwloc_nolibxml_callbacks = NULL, *hwloc_libxml_callbacks = NULL;
+
+void
+hwloc_xml_callbacks_register(struct hwloc_xml_component *comp)
+{
+  if (!hwloc_nolibxml_callbacks)
+    hwloc_nolibxml_callbacks = comp->nolibxml_callbacks;
+  if (!hwloc_libxml_callbacks)
+    hwloc_libxml_callbacks = comp->libxml_callbacks;
+}
+
+void
+hwloc_xml_callbacks_reset(void)
+{
+  hwloc_nolibxml_callbacks = NULL;
+  hwloc_libxml_callbacks = NULL;
+}
+
+/************************************************
+ ********* XML import (common routines) *********
+ ************************************************/
+
+#define _HWLOC_OBJ_CACHE_OLD HWLOC_OBJ_L5CACHE /* temporarily used when importing pre-v2.0 attribute-less cache types */
+
+static void
+hwloc__xml_import_object_attr(struct hwloc_topology *topology, struct hwloc_obj *obj,
+			      const char *name, const char *value,
+			      hwloc__xml_import_state_t state)
+{
+  if (!strcmp(name, "type")) {
+    /* already handled */
+    return;
+  }
+
+  else if (!strcmp(name, "os_level"))
+    { /* ignored since v2.0 but still allowed for backward compat with v1.10 */ }
+  else if (!strcmp(name, "os_index"))
+    obj->os_index = strtoul(value, NULL, 10);
+  else if (!strcmp(name, "gp_index")) {
+    obj->gp_index = strtoull(value, NULL, 10);
+    if (!obj->gp_index && hwloc__xml_verbose())
+      fprintf(stderr, "%s: unexpected zero gp_index, topology may be invalid\n", state->global->msgprefix);
+    if (obj->gp_index >= topology->next_gp_index)
+      topology->next_gp_index = obj->gp_index + 1;
+  } else if (!strcmp(name, "cpuset")) {
+    obj->cpuset = hwloc_bitmap_alloc();
+    hwloc_bitmap_sscanf(obj->cpuset, value);
+  } else if (!strcmp(name, "complete_cpuset")) {
+    obj->complete_cpuset = hwloc_bitmap_alloc();
+    hwloc_bitmap_sscanf(obj->complete_cpuset,value);
+  } else if (!strcmp(name, "online_cpuset")) {
+    { /* ignored since v2.0 but still allowed for backward compat with v1.10 */ }
+  } else if (!strcmp(name, "allowed_cpuset")) {
+    obj->allowed_cpuset = hwloc_bitmap_alloc();
+    hwloc_bitmap_sscanf(obj->allowed_cpuset, value);
+  } else if (!strcmp(name, "nodeset")) {
+    obj->nodeset = hwloc_bitmap_alloc();
+    hwloc_bitmap_sscanf(obj->nodeset, value);
+  } else if (!strcmp(name, "complete_nodeset")) {
+    obj->complete_nodeset = hwloc_bitmap_alloc();
+    hwloc_bitmap_sscanf(obj->complete_nodeset, value);
+  } else if (!strcmp(name, "allowed_nodeset")) {
+    obj->allowed_nodeset = hwloc_bitmap_alloc();
+    hwloc_bitmap_sscanf(obj->allowed_nodeset, value);
+  } else if (!strcmp(name, "name"))
+    obj->name = strdup(value);
+  else if (!strcmp(name, "subtype"))
+    obj->subtype = strdup(value);
+
+  else if (!strcmp(name, "cache_size")) {
+    unsigned long long lvalue = strtoull(value, NULL, 10);
+    if (hwloc_obj_type_is_cache(obj->type))
+      obj->attr->cache.size = lvalue;
+    else if (hwloc__xml_verbose())
+      fprintf(stderr, "%s: ignoring cache_size attribute for non-cache object type\n",
+	      state->global->msgprefix);
+  }
+
+  else if (!strcmp(name, "cache_linesize")) {
+    unsigned long lvalue = strtoul(value, NULL, 10);
+    if (hwloc_obj_type_is_cache(obj->type))
+      obj->attr->cache.linesize = lvalue;
+    else if (hwloc__xml_verbose())
+      fprintf(stderr, "%s: ignoring cache_linesize attribute for non-cache object type\n",
+	      state->global->msgprefix);
+  }
+
+  else if (!strcmp(name, "cache_associativity")) {
+    int lvalue = atoi(value);
+    if (hwloc_obj_type_is_cache(obj->type))
+      obj->attr->cache.associativity = lvalue;
+    else if (hwloc__xml_verbose())
+      fprintf(stderr, "%s: ignoring cache_associativity attribute for non-cache object type\n",
+	      state->global->msgprefix);
+  }
+
+  else if (!strcmp(name, "cache_type")) {
+    unsigned long lvalue = strtoul(value, NULL, 10);
+    if (hwloc_obj_type_is_cache(obj->type)) {
+      if (lvalue == HWLOC_OBJ_CACHE_UNIFIED
+	  || lvalue == HWLOC_OBJ_CACHE_DATA
+	  || lvalue == HWLOC_OBJ_CACHE_INSTRUCTION)
+	obj->attr->cache.type = (hwloc_obj_cache_type_t) lvalue;
+      else
+	fprintf(stderr, "%s: ignoring invalid cache_type attribute %lu\n",
+		state->global->msgprefix, lvalue);
+    } else if (hwloc__xml_verbose())
+      fprintf(stderr, "%s: ignoring cache_type attribute for non-cache object type\n",
+	      state->global->msgprefix);
+  }
+
+  else if (!strcmp(name, "local_memory"))
+    obj->memory.local_memory = strtoull(value, NULL, 10);
+
+  else if (!strcmp(name, "depth")) {
+    unsigned long lvalue = strtoul(value, NULL, 10);
+    switch (obj->type) {
+      case HWLOC_OBJ_L1CACHE:
+      case HWLOC_OBJ_L2CACHE:
+      case HWLOC_OBJ_L3CACHE:
+      case HWLOC_OBJ_L4CACHE:
+      case HWLOC_OBJ_L5CACHE:
+      case HWLOC_OBJ_L1ICACHE:
+      case HWLOC_OBJ_L2ICACHE:
+      case HWLOC_OBJ_L3ICACHE:
+	obj->attr->cache.depth = lvalue;
+	break;
+      case HWLOC_OBJ_GROUP:
+	/* will be overwritten by the core */
+	break;
+      case HWLOC_OBJ_BRIDGE:
+	/* will be overwritten by the core */
+	break;
+      default:
+	if (hwloc__xml_verbose())
+	  fprintf(stderr, "%s: ignoring depth attribute for object type without depth\n",
+		  state->global->msgprefix);
+	break;
+    }
+  }
+
+  else if (!strcmp(name, "kind")) {
+    unsigned long lvalue = strtoul(value, NULL, 10);
+    if (obj->type == HWLOC_OBJ_GROUP)
+      obj->attr->group.kind = lvalue;
+    else if (hwloc__xml_verbose())
+      fprintf(stderr, "%s: ignoring kind attribute for non-group object type\n",
+	      state->global->msgprefix);
+  }
+
+  else if (!strcmp(name, "subkind")) {
+    unsigned long lvalue = strtoul(value, NULL, 10);
+    if (obj->type == HWLOC_OBJ_GROUP)
+      obj->attr->group.subkind = lvalue;
+    else if (hwloc__xml_verbose())
+      fprintf(stderr, "%s: ignoring subkind attribute for non-group object type\n",
+	      state->global->msgprefix);
+  }
+
+  else if (!strcmp(name, "pci_busid")) {
+    switch (obj->type) {
+    case HWLOC_OBJ_PCI_DEVICE:
+    case HWLOC_OBJ_BRIDGE: {
+      unsigned domain, bus, dev, func;
+      if (sscanf(value, "%04x:%02x:%02x.%01x",
+		 &domain, &bus, &dev, &func) != 4) {
+	if (hwloc__xml_verbose())
+	  fprintf(stderr, "%s: ignoring invalid pci_busid format string %s\n",
+		  state->global->msgprefix, value);
+      } else {
+	obj->attr->pcidev.domain = domain;
+	obj->attr->pcidev.bus = bus;
+	obj->attr->pcidev.dev = dev;
+	obj->attr->pcidev.func = func;
+      }
+      break;
+    }
+    default:
+      if (hwloc__xml_verbose())
+	fprintf(stderr, "%s: ignoring pci_busid attribute for non-PCI object\n",
+		state->global->msgprefix);
+      break;
+    }
+  }
+
+  else if (!strcmp(name, "pci_type")) {
+    switch (obj->type) {
+    case HWLOC_OBJ_PCI_DEVICE:
+    case HWLOC_OBJ_BRIDGE: {
+      unsigned classid, vendor, device, subvendor, subdevice, revision;
+      if (sscanf(value, "%04x [%04x:%04x] [%04x:%04x] %02x",
+		 &classid, &vendor, &device, &subvendor, &subdevice, &revision) != 6) {
+	if (hwloc__xml_verbose())
+	  fprintf(stderr, "%s: ignoring invalid pci_type format string %s\n",
+		  state->global->msgprefix, value);
+      } else {
+	obj->attr->pcidev.class_id = classid;
+	obj->attr->pcidev.vendor_id = vendor;
+	obj->attr->pcidev.device_id = device;
+	obj->attr->pcidev.subvendor_id = subvendor;
+	obj->attr->pcidev.subdevice_id = subdevice;
+	obj->attr->pcidev.revision = revision;
+      }
+      break;
+    }
+    default:
+      if (hwloc__xml_verbose())
+	fprintf(stderr, "%s: ignoring pci_type attribute for non-PCI object\n",
+		state->global->msgprefix);
+      break;
+    }
+  }
+
+  else if (!strcmp(name, "pci_link_speed")) {
+    switch (obj->type) {
+    case HWLOC_OBJ_PCI_DEVICE:
+    case HWLOC_OBJ_BRIDGE: {
+      obj->attr->pcidev.linkspeed = (float) atof(value);
+      break;
+    }
+    default:
+      if (hwloc__xml_verbose())
+	fprintf(stderr, "%s: ignoring pci_link_speed attribute for non-PCI object\n",
+		state->global->msgprefix);
+      break;
+    }
+  }
+
+  else if (!strcmp(name, "bridge_type")) {
+    switch (obj->type) {
+    case HWLOC_OBJ_BRIDGE: {
+      unsigned upstream_type, downstream_type;
+      if (sscanf(value, "%u-%u", &upstream_type, &downstream_type) != 2) {
+	if (hwloc__xml_verbose())
+	  fprintf(stderr, "%s: ignoring invalid bridge_type format string %s\n",
+		  state->global->msgprefix, value);
+      } else {
+	obj->attr->bridge.upstream_type = (hwloc_obj_bridge_type_t) upstream_type;
+	obj->attr->bridge.downstream_type = (hwloc_obj_bridge_type_t) downstream_type;
+      };
+      break;
+    }
+    default:
+      if (hwloc__xml_verbose())
+	fprintf(stderr, "%s: ignoring bridge_type attribute for non-bridge object\n",
+		state->global->msgprefix);
+      break;
+    }
+  }
+
+  else if (!strcmp(name, "bridge_pci")) {
+    switch (obj->type) {
+    case HWLOC_OBJ_BRIDGE: {
+      unsigned domain, secbus, subbus;
+      if (sscanf(value, "%04x:[%02x-%02x]",
+		 &domain, &secbus, &subbus) != 3) {
+	if (hwloc__xml_verbose())
+	  fprintf(stderr, "%s: ignoring invalid bridge_pci format string %s\n",
+		  state->global->msgprefix, value);
+      } else {
+	obj->attr->bridge.downstream.pci.domain = domain;
+	obj->attr->bridge.downstream.pci.secondary_bus = secbus;
+	obj->attr->bridge.downstream.pci.subordinate_bus = subbus;
+      }
+      break;
+    }
+    default:
+      if (hwloc__xml_verbose())
+	fprintf(stderr, "%s: ignoring bridge_pci attribute for non-bridge object\n",
+		state->global->msgprefix);
+      break;
+    }
+  }
+
+  else if (!strcmp(name, "osdev_type")) {
+    switch (obj->type) {
+    case HWLOC_OBJ_OS_DEVICE: {
+      unsigned osdev_type;
+      if (sscanf(value, "%u", &osdev_type) != 1) {
+	if (hwloc__xml_verbose())
+	  fprintf(stderr, "%s: ignoring invalid osdev_type format string %s\n",
+		  state->global->msgprefix, value);
+      } else
+	obj->attr->osdev.type = (hwloc_obj_osdev_type_t) osdev_type;
+      break;
+    }
+    default:
+      if (hwloc__xml_verbose())
+	fprintf(stderr, "%s: ignoring osdev_type attribute for non-osdev object\n",
+		state->global->msgprefix);
+      break;
+    }
+  }
+
+
+
+
+  /*************************
+   * deprecated (from 1.0)
+   */
+  else if (!strcmp(name, "dmi_board_vendor")) {
+    hwloc_obj_add_info(obj, "DMIBoardVendor", value);
+  }
+  else if (!strcmp(name, "dmi_board_name")) {
+    hwloc_obj_add_info(obj, "DMIBoardName", value);
+  }
+
+  /*************************
+   * deprecated (from 0.9)
+   */
+  else if (!strcmp(name, "memory_kB")) {
+    unsigned long long lvalue = strtoull(value, NULL, 10);
+    switch (obj->type) {
+      case _HWLOC_OBJ_CACHE_OLD:
+	obj->attr->cache.size = lvalue << 10;
+	break;
+      case HWLOC_OBJ_NUMANODE:
+      case HWLOC_OBJ_MACHINE:
+      case HWLOC_OBJ_SYSTEM:
+	obj->memory.local_memory = lvalue << 10;
+	break;
+      default:
+	if (hwloc__xml_verbose())
+	  fprintf(stderr, "%s: ignoring memory_kB attribute for object type without memory\n",
+		  state->global->msgprefix);
+	break;
+    }
+  }
+  else if (!strcmp(name, "huge_page_size_kB")) {
+    unsigned long lvalue = strtoul(value, NULL, 10);
+    switch (obj->type) {
+      case HWLOC_OBJ_NUMANODE:
+      case HWLOC_OBJ_MACHINE:
+      case HWLOC_OBJ_SYSTEM:
+	if (!obj->memory.page_types) {
+	  obj->memory.page_types = malloc(sizeof(*obj->memory.page_types));
+	  obj->memory.page_types_len = 1;
+	}
+	obj->memory.page_types[0].size = lvalue << 10;
+	break;
+      default:
+	if (hwloc__xml_verbose())
+	  fprintf(stderr, "%s: ignoring huge_page_size_kB attribute for object type without huge pages\n",
+		  state->global->msgprefix);
+	break;
+    }
+  }
+  else if (!strcmp(name, "huge_page_free")) {
+    unsigned long lvalue = strtoul(value, NULL, 10);
+    switch (obj->type) {
+      case HWLOC_OBJ_NUMANODE:
+      case HWLOC_OBJ_MACHINE:
+      case HWLOC_OBJ_SYSTEM:
+	if (!obj->memory.page_types) {
+	  obj->memory.page_types = malloc(sizeof(*obj->memory.page_types));
+	  obj->memory.page_types_len = 1;
+	}
+	obj->memory.page_types[0].count = lvalue;
+	break;
+      default:
+	if (hwloc__xml_verbose())
+	  fprintf(stderr, "%s: ignoring huge_page_free attribute for object type without huge pages\n",
+		  state->global->msgprefix);
+	break;
+    }
+  }
+  /*
+   * end of deprecated (from 0.9)
+   *******************************/
+
+
+
+  else if (hwloc__xml_verbose())
+    fprintf(stderr, "%s: ignoring unknown object attribute %s\n",
+	    state->global->msgprefix, name);
+}
+
+
+static int
+hwloc__xml_import_info(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj,
+		       hwloc__xml_import_state_t state)
+{
+  char *infoname = NULL;
+  char *infovalue = NULL;
+
+  while (1) {
+    char *attrname, *attrvalue;
+    if (state->global->next_attr(state, &attrname, &attrvalue) < 0)
+      break;
+    if (!strcmp(attrname, "name"))
+      infoname = attrvalue;
+    else if (!strcmp(attrname, "value"))
+      infovalue = attrvalue;
+    else
+      return -1;
+  }
+
+  if (infoname) {
+    /* empty strings are ignored by libxml */
+    if (!strcmp(infoname, "Type") || !strcmp(infoname, "CoProcType")) {
+      if (infovalue)
+	obj->subtype = strdup(infovalue);
+    } else {
+      hwloc_obj_add_info(obj, infoname, infovalue ? infovalue : "");
+    }
+  }
+
+  return state->global->close_tag(state);
+}
+
+static int
+hwloc__xml_import_pagetype(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj,
+			   hwloc__xml_import_state_t state)
+{
+  uint64_t size = 0, count = 0;
+
+  while (1) {
+    char *attrname, *attrvalue;
+    if (state->global->next_attr(state, &attrname, &attrvalue) < 0)
+      break;
+    if (!strcmp(attrname, "size"))
+      size = strtoull(attrvalue, NULL, 10);
+    else if (!strcmp(attrname, "count"))
+      count = strtoull(attrvalue, NULL, 10);
+    else
+      return -1;
+  }
+
+  if (size) {
+    int idx = obj->memory.page_types_len;
+    struct hwloc_obj_memory_page_type_s *tmp;
+    tmp = realloc(obj->memory.page_types, (idx+1)*sizeof(*obj->memory.page_types));
+    if (tmp) { /* if failed to allocate, ignore this page_type entry */
+      obj->memory.page_types = tmp;
+      obj->memory.page_types_len = idx+1;
+      obj->memory.page_types[idx].size = size;
+      obj->memory.page_types[idx].count = count;
+    }
+  }
+
+  return state->global->close_tag(state);
+}
+
+static int
+hwloc__xml_import_v1distances(struct hwloc_xml_backend_data_s *data,
+			      hwloc_obj_t obj,
+			      hwloc__xml_import_state_t state)
+{
+  unsigned long reldepth = 0, nbobjs = 0;
+  float latbase = 0;
+  char *tag;
+  int ret;
+
+  while (1) {
+    char *attrname, *attrvalue;
+    if (state->global->next_attr(state, &attrname, &attrvalue) < 0)
+      break;
+    if (!strcmp(attrname, "nbobjs"))
+      nbobjs = strtoul(attrvalue, NULL, 10);
+    else if (!strcmp(attrname, "relative_depth"))
+      reldepth = strtoul(attrvalue, NULL, 10);
+    else if (!strcmp(attrname, "latency_base"))
+      latbase = (float) atof(attrvalue);
+    else
+      return -1;
+  }
+
+  if (nbobjs && reldepth && latbase) {
+    unsigned i;
+    float *matrix;
+    struct hwloc__xml_imported_v1distances_s *v1dist;
+
+    matrix = malloc(nbobjs*nbobjs*sizeof(float));
+    v1dist = malloc(sizeof(*v1dist));
+    if (!matrix || !v1dist) {
+      if (hwloc__xml_verbose())
+	fprintf(stderr, "%s: failed to allocate v1distance matrix for %lu objects\n",
+		state->global->msgprefix, nbobjs);
+      free(v1dist);
+      free(matrix);
+      return -1;
+    }
+
+    v1dist->kind = HWLOC_DISTANCES_KIND_FROM_OS|HWLOC_DISTANCES_KIND_MEANS_LATENCY;
+    /* TODO: we can't know for sure if it comes from the OS.
+     * On Linux/x86, it would be 10 on the diagonal.
+     * On Solaris/T5, 15 on the diagonal.
+     * Just check whether all values are integers, and that all values on the diagonal are minimal and identical?
+     */
+
+    v1dist->nbobjs = nbobjs;
+    v1dist->floats = matrix;
+
+    for(i=0; i<nbobjs*nbobjs; i++) {
+      struct hwloc__xml_import_state_s childstate;
+      char *attrname, *attrvalue;
+      float val;
+
+      ret = state->global->find_child(state, &childstate, &tag);
+      if (ret <= 0 || strcmp(tag, "latency")) {
+	/* a latency child is needed */
+	free(matrix);
+	free(v1dist);
+	return -1;
+      }
+
+      ret = state->global->next_attr(&childstate, &attrname, &attrvalue);
+      if (ret < 0 || strcmp(attrname, "value")) {
+	free(matrix);
+	free(v1dist);
+	return -1;
+      }
+
+      val = (float) atof((char *) attrvalue);
+      matrix[i] = val * latbase;
+
+      ret = state->global->close_tag(&childstate);
+      if (ret < 0)
+	return -1;
+
+      state->global->close_child(&childstate);
+    }
+
+    if (nbobjs < 2) {
+      /* distances with a single object are useless, even if the XML isn't invalid */
+      assert(nbobjs == 1);
+      if (hwloc__xml_verbose())
+	fprintf(stderr, "%s: ignoring invalid distance matrix with only 1 object\n",
+		state->global->msgprefix);
+      free(matrix);
+      free(v1dist);
+
+    } else if (obj->parent) {
+      /* we currently only import distances attached to root.
+       * we can't save obj in v1dist because obj could be dropped during insert if ignored.
+       * we could save its complete_cpu/nodeset instead to find it back later.
+       * but it doesn't matter much since only NUMA distances attached to root matter.
+       */
+      free(matrix);
+      free(v1dist);
+
+    } else {
+      /* queue the distance for real */
+      v1dist->prev = data->last_v1dist;
+      v1dist->next = NULL;
+      if (data->last_v1dist)
+	data->last_v1dist->next = v1dist;
+      else
+	data->first_v1dist = v1dist;
+      data->last_v1dist = v1dist;
+    }
+  }
+
+  return state->global->close_tag(state);
+}
+
+static int
+hwloc__xml_import_userdata(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj,
+			   hwloc__xml_import_state_t state)
+{
+  size_t length = 0;
+  int encoded = 0;
+  char *name = NULL; /* optional */
+  int ret;
+
+  while (1) {
+    char *attrname, *attrvalue;
+    if (state->global->next_attr(state, &attrname, &attrvalue) < 0)
+      break;
+    if (!strcmp(attrname, "length"))
+      length = strtoul(attrvalue, NULL, 10);
+    else if (!strcmp(attrname, "encoding"))
+      encoded = !strcmp(attrvalue, "base64");
+    else if (!strcmp(attrname, "name"))
+      name = attrvalue;
+    else
+      return -1;
+  }
+
+  if (!topology->userdata_import_cb) {
+    char *buffer;
+    size_t reallength = encoded ? BASE64_ENCODED_LENGTH(length) : length;
+    ret = state->global->get_content(state, &buffer, reallength);
+    if (ret < 0)
+      return -1;
+
+  } else if (topology->userdata_not_decoded) {
+      char *buffer, *fakename;
+      size_t reallength = encoded ? BASE64_ENCODED_LENGTH(length) : length;
+      ret = state->global->get_content(state, &buffer, reallength);
+      if (ret < 0)
+        return -1;
+      fakename = malloc(6 + 1 + (name ? strlen(name) : 4) + 1);
+      if (!fakename)
+	return -1;
+      sprintf(fakename, encoded ? "base64%c%s" : "normal%c%s", name ? ':' : '-', name ? name : "anon");
+      topology->userdata_import_cb(topology, obj, fakename, buffer, length);
+      free(fakename);
+
+  } else if (encoded && length) {
+      char *encoded_buffer;
+      size_t encoded_length = BASE64_ENCODED_LENGTH(length);
+      ret = state->global->get_content(state, &encoded_buffer, encoded_length);
+      if (ret < 0)
+        return -1;
+      if (ret) {
+	char *decoded_buffer = malloc(length+1);
+	if (!decoded_buffer)
+	  return -1;
+	assert(encoded_buffer[encoded_length] == 0);
+	ret = hwloc_decode_from_base64(encoded_buffer, decoded_buffer, length+1);
+	if (ret != (int) length) {
+	  free(decoded_buffer);
+	  return -1;
+	}
+	topology->userdata_import_cb(topology, obj, name, decoded_buffer, length);
+	free(decoded_buffer);
+      }
+
+  } else { /* always handle length==0 in the non-encoded case */
+      char *buffer = "";
+      if (length) {
+	ret = state->global->get_content(state, &buffer, length);
+	if (ret < 0)
+	  return -1;
+      }
+      topology->userdata_import_cb(topology, obj, name, buffer, length);
+  }
+
+  state->global->close_content(state);
+  return state->global->close_tag(state);
+}
+
+static void hwloc__xml_import_report_outoforder(hwloc_topology_t topology, hwloc_obj_t new, hwloc_obj_t old)
+{
+  char *progname = hwloc_progname(topology);
+  const char *origversion = hwloc_obj_get_info_by_name(topology->levels[0][0], "hwlocVersion");
+  const char *origprogname = hwloc_obj_get_info_by_name(topology->levels[0][0], "ProcessName");
+  char *c1, *cc1, t1[64];
+  char *c2 = NULL, *cc2 = NULL, t2[64];
+
+  hwloc_bitmap_asprintf(&c1, new->cpuset);
+  hwloc_bitmap_asprintf(&cc1, new->complete_cpuset);
+  hwloc_obj_type_snprintf(t1, sizeof(t1), new, 0);
+
+  if (old->cpuset)
+    hwloc_bitmap_asprintf(&c2, old->cpuset);
+  if (old->complete_cpuset)
+    hwloc_bitmap_asprintf(&cc2, old->complete_cpuset);
+  hwloc_obj_type_snprintf(t2, sizeof(t2), old, 0);
+
+  fprintf(stderr, "****************************************************************************\n");
+  fprintf(stderr, "* hwloc has encountered an out-of-order XML topology load.\n");
+  fprintf(stderr, "* Object %s cpuset %s complete %s\n",
+	  t1, c1, cc1);
+  fprintf(stderr, "* was inserted after object %s with %s and %s.\n",
+	  t2, c2 ? c2 : "none", cc2 ? cc2 : "none");
+  fprintf(stderr, "* The error occured in hwloc %s inside process `%s', while\n",
+	  HWLOC_VERSION,
+	  progname ? progname : "<unknown>");
+  if (origversion || origprogname)
+    fprintf(stderr, "* the input XML was generated by hwloc %s inside process `%s'.\n",
+	    origversion ? origversion : "(unknown version)",
+	    origprogname ? origprogname : "<unknown>");
+  else
+    fprintf(stderr, "* the input XML was generated by an unspecified ancient hwloc release.\n");
+  fprintf(stderr, "* Please check that your input topology XML file is valid.\n");
+  fprintf(stderr, "* Set HWLOC_DEBUG_CHECK=1 in the environment to detect further issues.\n");
+  fprintf(stderr, "****************************************************************************\n");
+
+  free(c1);
+  free(cc1);
+  free(c2);
+  free(cc2);
+  free(progname);
+}
+
+static int
+hwloc__xml_import_object(hwloc_topology_t topology,
+			 struct hwloc_xml_backend_data_s *data,
+			 hwloc_obj_t parent, hwloc_obj_t obj, int *gotignored,
+			 hwloc__xml_import_state_t state)
+{
+  int ignored = 0;
+  int childrengotignored = 0;
+  int attribute_less_cache = 0;
+
+  /* process attributes */
+  while (1) {
+    char *attrname, *attrvalue;
+    if (state->global->next_attr(state, &attrname, &attrvalue) < 0)
+      break;
+    if (!strcmp(attrname, "type")) {
+      if (hwloc_type_sscanf(attrvalue, &obj->type, NULL, 0) < 0) {
+	if (!strcasecmp(attrvalue, "Cache")) {
+	  obj->type = _HWLOC_OBJ_CACHE_OLD; /* will be fixed below */
+	  attribute_less_cache = 1;
+	} else
+	  goto error_with_object;
+      }
+    } else {
+      /* type needed first */
+      if (obj->type == HWLOC_OBJ_TYPE_NONE)
+	goto error_with_object;
+      hwloc__xml_import_object_attr(topology, obj, attrname, attrvalue, state);
+    }
+  }
+
+  /* fixup attribute-less caches imported from pre-v2.0 XMLs */
+  if (attribute_less_cache) {
+    assert(obj->type == _HWLOC_OBJ_CACHE_OLD);
+    obj->type = hwloc_cache_type_by_depth_type(obj->attr->cache.depth, obj->attr->cache.type);
+  }
+
+  /* check that cache attributes are coherent with the actual type */
+  if (hwloc_obj_type_is_cache(obj->type)
+      && obj->type != hwloc_cache_type_by_depth_type(obj->attr->cache.depth, obj->attr->cache.type)) {
+    if (hwloc__xml_verbose())
+      fprintf(stderr, "invalid cache type %s with attribute depth %u and type %d\n",
+	      hwloc_type_name(obj->type), obj->attr->cache.depth, (int) obj->attr->cache.type);
+    goto error_with_object;
+  }
+
+  /* fixup Misc objects inserted by cpusets in pre-v2.0 XMLs */
+  if (obj->type == HWLOC_OBJ_MISC && obj->cpuset)
+    obj->type = HWLOC_OBJ_GROUP;
+
+  /* check special types vs cpuset */
+  if (!obj->cpuset && !hwloc_obj_type_is_special(obj->type)) {
+    if (hwloc__xml_verbose())
+      fprintf(stderr, "invalid normal object %s P#%u without cpuset\n",
+	      hwloc_type_name(obj->type), obj->os_index);
+    goto error_with_object;
+  }
+  if (obj->cpuset && hwloc_obj_type_is_special(obj->type)) {
+    if (hwloc__xml_verbose())
+      fprintf(stderr, "invalid special object %s with cpuset\n",
+	      hwloc_type_name(obj->type));
+    goto error_with_object;
+  }
+
+  /* check parent vs child sets */
+  if (obj->cpuset && parent && !parent->cpuset) {
+    if (hwloc__xml_verbose())
+      fprintf(stderr, "invalid object %s P#%u with cpuset while parent has none\n",
+	      hwloc_type_name(obj->type), obj->os_index);
+    goto error_with_object;
+  }
+  if (obj->nodeset && parent && !parent->nodeset) {
+    if (hwloc__xml_verbose())
+      fprintf(stderr, "invalid object %s P#%u with nodeset while parent has none\n",
+	      hwloc_type_name(obj->type), obj->os_index);
+    goto error_with_object;
+  }
+
+  /* check set consistency.
+   * 1.7.2 and earlier reported I/O Groups with only a cpuset, we don't want to reject those XMLs yet.
+   * Ignore those Groups since fixing the missing sets is hard (would need to look at children sets which are not available yet).
+   * Just abort the XML for non-Groups.
+   */
+  if (!obj->cpuset != !obj->allowed_cpuset
+      || !obj->cpuset != !obj->complete_cpuset) {
+    /* has some cpuset without others */
+    if (obj->type == HWLOC_OBJ_GROUP)
+      ignored = 1;
+    else
+      goto error_with_object;
+  } else if (!obj->nodeset != !obj->allowed_nodeset
+	     || !obj->nodeset != !obj->complete_nodeset) {
+    /* has some nodeset withot others */
+    if (obj->type == HWLOC_OBJ_GROUP)
+      ignored = 1;
+    else
+      goto error_with_object;
+  } else if (obj->nodeset && !obj->cpuset) {
+    /* has nodesets without cpusets (the contrary is allowed in pre-2.0) */
+    if (obj->type == HWLOC_OBJ_GROUP)
+      ignored = 1;
+    else
+      goto error_with_object;
+  }
+
+  /* check NUMA nodes */
+  if (obj->type == HWLOC_OBJ_NUMANODE) {
+    if (!obj->nodeset) {
+      if (hwloc__xml_verbose())
+	fprintf(stderr, "invalid NUMA node object P#%u without nodeset\n",
+		obj->os_index);
+      goto error_with_object;
+    }
+    data->nbnumanodes++;
+    obj->prev_cousin = data->last_numanode;
+    obj->next_cousin = NULL;
+    if (data->last_numanode)
+      data->last_numanode->next_cousin = obj;
+    else
+      data->first_numanode = obj;
+    data->last_numanode = obj;
+  }
+
+  if (!hwloc_filter_check_keep_object(topology, obj)) {
+    /* Ignore this object instead of inserting it.
+     *
+     * Well, let the core ignore the root object later
+     * because we don't know yet if root has more than one child.
+     */
+    if (parent)
+      ignored = 1;
+  }
+
+  if (parent && !ignored) {
+    /* root->parent is NULL, and root is already inserted */
+    hwloc_insert_object_by_parent(topology, parent, obj);
+    /* insert_object_by_parent() doesn't merge during insert, so obj is still valid */
+  }
+
+  /* process subnodes */
+  while (1) {
+    struct hwloc__xml_import_state_s childstate;
+    char *tag;
+    int ret;
+
+    ret = state->global->find_child(state, &childstate, &tag);
+    if (ret < 0)
+      goto error;
+    if (!ret)
+      break;
+
+    if (!strcmp(tag, "object")) {
+      hwloc_obj_t childobj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_TYPE_MAX, -1);
+      ret = hwloc__xml_import_object(topology, data, ignored ? parent : obj, childobj,
+				     &childrengotignored,
+				     &childstate);
+    } else if (!strcmp(tag, "page_type")) {
+      ret = hwloc__xml_import_pagetype(topology, obj, &childstate);
+    } else if (!strcmp(tag, "info")) {
+      ret = hwloc__xml_import_info(topology, obj, &childstate);
+    } else if (!strcmp(tag, "distances")) {
+      ret = hwloc__xml_import_v1distances(data, obj, &childstate);
+    } else if (!strcmp(tag, "userdata")) {
+      ret = hwloc__xml_import_userdata(topology, obj, &childstate);
+    } else
+      ret = -1;
+
+    if (ret < 0)
+      goto error;
+
+    state->global->close_child(&childstate);
+  }
+
+  if (ignored) {
+    /* drop that object, and tell the parent that one child got ignored */
+    hwloc_free_unlinked_object(obj);
+    *gotignored = 1;
+
+  } else if (obj->first_child) {
+    /* now that all children are inserted, make sure they are in-order,
+     * so that the core doesn't have to deal with crappy children list.
+     */
+    hwloc_obj_t cur, next;
+    for(cur = obj->first_child, next = cur->next_sibling;
+	next;
+	cur = next, next = next->next_sibling) {
+      /* If reordering is needed, at least one pair of consecutive children will be out-of-order.
+       * So just check pairs of consecutive children.
+       *
+       * We checked above that complete_cpuset is always set.
+       */
+      if (hwloc_bitmap_compare_first(next->complete_cpuset, cur->complete_cpuset) < 0) {
+	/* next should be before cur */
+	if (!childrengotignored) {
+	  static int reported = 0;
+	  if (!reported && !hwloc_hide_errors()) {
+	    hwloc__xml_import_report_outoforder(topology, next, cur);
+	    reported = 1;
+	  }
+	}
+	hwloc__reorder_children(obj);
+	break;
+      }
+    }
+  }
+
+  return state->global->close_tag(state);
+
+ error_with_object:
+  if (parent)
+    /* root->parent is NULL, and root is already inserted. the caller will cleanup that root. */
+    hwloc_free_unlinked_object(obj);
+ error:
+  return -1;
+}
+
+static int
+hwloc__xml_import_v2distances(hwloc_topology_t topology,
+			      hwloc__xml_import_state_t state)
+{
+  hwloc_obj_type_t type = HWLOC_OBJ_TYPE_NONE;
+  unsigned nbobjs = 0;
+  int indexing = 0;
+  int os_indexing = 0;
+  int gp_indexing = 0;
+  unsigned long kind = 0;
+  unsigned nr_indexes, nr_u64values;
+  uint64_t *indexes;
+  uint64_t *u64values;
+  int ret;
+
+  /* process attributes */
+  while (1) {
+    char *attrname, *attrvalue;
+    if (state->global->next_attr(state, &attrname, &attrvalue) < 0)
+      break;
+    if (!strcmp(attrname, "nbobjs"))
+      nbobjs = strtoul(attrvalue, NULL, 10);
+    else if (!strcmp(attrname, "type")) {
+      if (hwloc_type_sscanf(attrvalue, &type, NULL, 0) < 0)
+	goto out;
+    }
+    else if (!strcmp(attrname, "indexing")) {
+      indexing = 1;
+      if (!strcmp(attrvalue, "os"))
+	os_indexing = 1;
+      else if (!strcmp(attrvalue, "gp"))
+	gp_indexing = 1;
+    }
+    else if (!strcmp(attrname, "kind")) {
+      kind = strtoul(attrvalue, NULL, 10);
+    }
+    else {
+      if (hwloc__xml_verbose())
+	fprintf(stderr, "%s: ignoring unknown distance attribute %s\n",
+		state->global->msgprefix, attrname);
+    }
+  }
+
+  /* abort if missing attribute */
+  if (!nbobjs || type == HWLOC_OBJ_TYPE_NONE || !indexing || !kind) {
+    if (hwloc__xml_verbose())
+      fprintf(stderr, "%s: distance2 missing some attributes\n",
+	      state->global->msgprefix);
+    goto out;
+  }
+
+  indexes = malloc(nbobjs*sizeof(*indexes));
+  u64values = malloc(nbobjs*nbobjs*sizeof(*u64values));
+  if (!indexes || !u64values) {
+    if (hwloc__xml_verbose())
+      fprintf(stderr, "%s: failed to allocate distances arrays for %u objects\n",
+	      state->global->msgprefix, nbobjs);
+    goto out_with_arrays;
+  }
+
+  /* process children */
+  nr_indexes = 0;
+  nr_u64values = 0;
+  while (1) {
+    struct hwloc__xml_import_state_s childstate;
+    char *attrname, *attrvalue, *tag, *buffer;
+    int length;
+    int is_index = 0;
+    int is_u64values = 0;
+
+    ret = state->global->find_child(state, &childstate, &tag);
+    if (ret <= 0)
+      break;
+
+    if (!strcmp(tag, "indexes"))
+      is_index = 1;
+    else if (!strcmp(tag, "u64values"))
+      is_u64values = 1;
+    if (!is_index && !is_u64values) {
+      if (hwloc__xml_verbose())
+	fprintf(stderr, "%s: distance2 with unrecognized child %s\n",
+		state->global->msgprefix, tag);
+      goto out_with_arrays;
+    }
+
+    if (state->global->next_attr(&childstate, &attrname, &attrvalue) < 0
+	|| strcmp(attrname, "length")) {
+      if (hwloc__xml_verbose())
+	fprintf(stderr, "%s: distance2 child must have length attribute\n",
+		state->global->msgprefix);
+      goto out_with_arrays;
+    }
+    length = atoi(attrvalue);
+
+    ret = state->global->get_content(&childstate, &buffer, length);
+    if (ret < 0) {
+      if (hwloc__xml_verbose())
+	fprintf(stderr, "%s: distance2 child needs content of length %d\n",
+		state->global->msgprefix, length);
+      goto out_with_arrays;
+    }
+
+    if (is_index) {
+      /* get indexes */
+      char *tmp;
+      if (nr_indexes >= nbobjs) {
+	if (hwloc__xml_verbose())
+	  fprintf(stderr, "%s: distance2 with more than %u indexes\n",
+		  state->global->msgprefix, nbobjs);
+	goto out_with_arrays;
+      }
+      tmp = buffer;
+      while (1) {
+	char *next;
+	unsigned long long u = strtoull(tmp, &next, 0);
+	if (next == tmp)
+	  break;
+	indexes[nr_indexes++] = u;
+	if (*next != ' ')
+	  break;
+	if (nr_indexes == nbobjs)
+	  break;
+	tmp = next+1;
+      }
+
+    } else if (is_u64values) {
+      /* get uint64_t values */
+      char *tmp;
+      if (nr_u64values >= nbobjs*nbobjs) {
+	if (hwloc__xml_verbose())
+	  fprintf(stderr, "%s: distance2 with more than %u u64values\n",
+		  state->global->msgprefix, nbobjs*nbobjs);
+	goto out_with_arrays;
+      }
+      tmp = buffer;
+      while (1) {
+	char *next;
+	unsigned long long u = strtoull(tmp, &next, 0);
+	if (next == tmp)
+	  break;
+	u64values[nr_u64values++] = u;
+	if (*next != ' ')
+	  break;
+	if (nr_u64values == nbobjs*nbobjs)
+	  break;
+	tmp = next+1;
+      }
+    }
+
+    state->global->close_content(&childstate);
+
+    ret = state->global->close_tag(&childstate);
+    if (ret < 0) {
+      if (hwloc__xml_verbose())
+	fprintf(stderr, "%s: distance2 with more than %u indexes\n",
+		state->global->msgprefix, nbobjs);
+      goto out_with_arrays;
+    }
+
+    state->global->close_child(&childstate);
+  }
+
+  if (nr_indexes != nbobjs) {
+    if (hwloc__xml_verbose())
+      fprintf(stderr, "%s: distance2 with less than %u indexes\n",
+	      state->global->msgprefix, nbobjs);
+    goto out_with_arrays;
+  }
+  if (nr_u64values != nbobjs*nbobjs) {
+    if (hwloc__xml_verbose())
+      fprintf(stderr, "%s: distance2 with less than %u u64values\n",
+	      state->global->msgprefix, nbobjs*nbobjs);
+    goto out_with_arrays;
+  }
+
+  if (nbobjs < 2) {
+    /* distances with a single object are useless, even if the XML isn't invalid */
+    if (hwloc__xml_verbose())
+      fprintf(stderr, "%s: ignoring distances2 with only %u objects\n",
+	      state->global->msgprefix, nbobjs);
+    goto out_ignore;
+  }
+  if (type == HWLOC_OBJ_PU || type == HWLOC_OBJ_NUMANODE) {
+    if (!os_indexing) {
+      if (hwloc__xml_verbose())
+	fprintf(stderr, "%s: ignoring PU or NUMA distances2 without os_indexing\n",
+		state->global->msgprefix);
+      goto out_ignore;
+    }
+  } else {
+    if (!gp_indexing) {
+      if (hwloc__xml_verbose())
+	fprintf(stderr, "%s: ignoring !PU or !NUMA distances2 without gp_indexing\n",
+		state->global->msgprefix);
+      goto out_ignore;
+    }
+  }
+
+  hwloc_internal_distances_add_by_index(topology, type, nbobjs, indexes, u64values, kind, 0);
+
+  /* prevent freeing below */
+  indexes = NULL;
+  u64values = NULL;
+
+ out_ignore:
+  free(indexes);
+  free(u64values);
+  return state->global->close_tag(state);
+
+ out_with_arrays:
+  free(indexes);
+  free(u64values);
+ out:
+  return -1;
+}
+
+static int
+hwloc__xml_import_diff_one(hwloc__xml_import_state_t state,
+			   hwloc_topology_diff_t *firstdiffp,
+			   hwloc_topology_diff_t *lastdiffp)
+{
+  char *type_s = NULL;
+  char *obj_depth_s = NULL;
+  char *obj_index_s = NULL;
+  char *obj_attr_type_s = NULL;
+/* char *obj_attr_index_s = NULL; unused for now */
+  char *obj_attr_name_s = NULL;
+  char *obj_attr_oldvalue_s = NULL;
+  char *obj_attr_newvalue_s = NULL;
+
+  while (1) {
+    char *attrname, *attrvalue;
+    if (state->global->next_attr(state, &attrname, &attrvalue) < 0)
+      break;
+    if (!strcmp(attrname, "type"))
+      type_s = attrvalue;
+    else if (!strcmp(attrname, "obj_depth"))
+      obj_depth_s = attrvalue;
+    else if (!strcmp(attrname, "obj_index"))
+      obj_index_s = attrvalue;
+    else if (!strcmp(attrname, "obj_attr_type"))
+      obj_attr_type_s = attrvalue;
+    else if (!strcmp(attrname, "obj_attr_index"))
+      { /* obj_attr_index_s = attrvalue; unused for now */ }
+    else if (!strcmp(attrname, "obj_attr_name"))
+      obj_attr_name_s = attrvalue;
+    else if (!strcmp(attrname, "obj_attr_oldvalue"))
+      obj_attr_oldvalue_s = attrvalue;
+    else if (!strcmp(attrname, "obj_attr_newvalue"))
+      obj_attr_newvalue_s = attrvalue;
+    else {
+      if (hwloc__xml_verbose())
+	fprintf(stderr, "%s: ignoring unknown diff attribute %s\n",
+		state->global->msgprefix, attrname);
+      return -1;
+    }
+  }
+
+  if (type_s) {
+    switch (atoi(type_s)) {
+    default:
+      break;
+    case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR: {
+      /* object attribute diff */
+      hwloc_topology_diff_obj_attr_type_t obj_attr_type;
+      hwloc_topology_diff_t diff;
+
+      /* obj_attr mandatory generic attributes */
+      if (!obj_depth_s || !obj_index_s || !obj_attr_type_s) {
+	if (hwloc__xml_verbose())
+	  fprintf(stderr, "%s: missing mandatory obj attr generic attributes\n",
+		  state->global->msgprefix);
+	break;
+      }
+
+      /* obj_attr mandatory attributes common to all subtypes */
+      if (!obj_attr_oldvalue_s || !obj_attr_newvalue_s) {
+	if (hwloc__xml_verbose())
+	  fprintf(stderr, "%s: missing mandatory obj attr value attributes\n",
+		  state->global->msgprefix);
+	break;
+      }
+
+      /* mandatory attributes for obj_attr_info subtype */
+      obj_attr_type = atoi(obj_attr_type_s);
+      if (obj_attr_type == HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO && !obj_attr_name_s) {
+	if (hwloc__xml_verbose())
+	  fprintf(stderr, "%s: missing mandatory obj attr info name attribute\n",
+		  state->global->msgprefix);
+	break;
+      }
+
+      /* now we know we have everything we need */
+      diff = malloc(sizeof(*diff));
+      if (!diff)
+	return -1;
+      diff->obj_attr.type = HWLOC_TOPOLOGY_DIFF_OBJ_ATTR;
+      diff->obj_attr.obj_depth = atoi(obj_depth_s);
+      diff->obj_attr.obj_index = atoi(obj_index_s);
+      memset(&diff->obj_attr.diff, 0, sizeof(diff->obj_attr.diff));
+      diff->obj_attr.diff.generic.type = obj_attr_type;
+
+      switch (atoi(obj_attr_type_s)) {
+      case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_SIZE:
+	diff->obj_attr.diff.uint64.oldvalue = strtoull(obj_attr_oldvalue_s, NULL, 0);
+	diff->obj_attr.diff.uint64.newvalue = strtoull(obj_attr_newvalue_s, NULL, 0);
+	break;
+      case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO:
+	diff->obj_attr.diff.string.name = strdup(obj_attr_name_s);
+	/* FALLTHRU */
+      case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME:
+	diff->obj_attr.diff.string.oldvalue = strdup(obj_attr_oldvalue_s);
+	diff->obj_attr.diff.string.newvalue = strdup(obj_attr_newvalue_s);
+	break;
+      }
+
+      if (*firstdiffp)
+	(*lastdiffp)->generic.next = diff;
+      else
+        *firstdiffp = diff;
+      *lastdiffp = diff;
+      diff->generic.next = NULL;
+    }
+    }
+  }
+
+  return state->global->close_tag(state);
+}
+
+int
+hwloc__xml_import_diff(hwloc__xml_import_state_t state,
+		       hwloc_topology_diff_t *firstdiffp)
+{
+  hwloc_topology_diff_t firstdiff = NULL, lastdiff = NULL;
+  *firstdiffp = NULL;
+
+  while (1) {
+    struct hwloc__xml_import_state_s childstate;
+    char *tag;
+    int ret;
+
+    ret = state->global->find_child(state, &childstate, &tag);
+    if (ret < 0)
+      return -1;
+    if (!ret)
+      break;
+
+    if (!strcmp(tag, "diff")) {
+      ret = hwloc__xml_import_diff_one(&childstate, &firstdiff, &lastdiff);
+    } else
+      ret = -1;
+
+    if (ret < 0)
+      return ret;
+
+    state->global->close_child(&childstate);
+  }
+
+  *firstdiffp = firstdiff;
+  return 0;
+}
+
+/***********************************
+ ********* main XML import *********
+ ***********************************/
+
+static void
+hwloc_convert_from_v1dist_floats(hwloc_topology_t topology, unsigned nbobjs, float *floats, uint64_t *u64s)
+{
+  unsigned i;
+  int is_uint;
+  char *env;
+  float scale = 1000.f;
+  char scalestring[20];
+
+  env = getenv("HWLOC_XML_V1DIST_SCALE");
+  if (env) {
+    scale = (float) atof(env);
+    goto scale;
+  }
+
+  is_uint = 1;
+  /* find out if all values are integers */
+  for(i=0; i<nbobjs*nbobjs; i++) {
+    float f, iptr, fptr;
+    f = floats[i];
+    if (f < 0.f) {
+      is_uint = 0;
+      break;
+    }
+    fptr = modff(f, &iptr);
+    if (fptr > .001f && fptr < .999f) {
+      is_uint = 0;
+      break;
+    }
+    u64s[i] = (int)(f+.5f);
+  }
+  if (is_uint)
+    return;
+
+ scale:
+  /* TODO heuristic to find a good scale */
+  for(i=0; i<nbobjs*nbobjs; i++)
+    u64s[i] = (uint64_t)(scale * floats[i]);
+
+  /* save the scale in root info attrs.
+   * Not perfect since we may have multiple of them,
+   * and some distances might disappear in case f restrict, etc.
+   */
+  sprintf(scalestring, "%f", scale);
+  hwloc_obj_add_info(hwloc_get_root_obj(topology), "xmlv1DistancesScale", scalestring);
+}
+
+/* this canNOT be the first XML call */
+static int
+hwloc_look_xml(struct hwloc_backend *backend)
+{
+  struct hwloc_topology *topology = backend->topology;
+  struct hwloc_xml_backend_data_s *data = backend->private_data;
+  struct hwloc__xml_import_state_s state, childstate;
+  struct hwloc_obj *root = topology->levels[0][0];
+  char *tag;
+  int gotignored = 0;
+  hwloc_localeswitch_declare;
+  int ret;
+
+  state.global = data;
+
+  assert(!root->cpuset);
+
+  hwloc_localeswitch_init();
+
+  data->nbnumanodes = 0;
+  data->first_numanode = data->last_numanode = NULL;
+  data->first_v1dist = data->last_v1dist = NULL;
+
+  ret = data->look_init(data, &state);
+  if (ret < 0)
+    goto failed;
+
+  /* find root object tag and import it */
+  ret = state.global->find_child(&state, &childstate, &tag);
+  if (ret < 0 || !ret || strcmp(tag, "object"))
+    goto failed;
+  ret = hwloc__xml_import_object(topology, data, NULL /*  no parent */, root,
+				 &gotignored,
+				 &childstate);
+  if (ret < 0)
+    goto failed;
+  state.global->close_child(&childstate);
+  assert(!gotignored);
+
+  /* find v2 distances */
+  while (1) {
+    ret = state.global->find_child(&state, &childstate, &tag);
+    if (ret < 0)
+      goto failed;
+    if (!ret)
+      break;
+    if (strcmp(tag, "distances2"))
+      goto failed;
+    ret = hwloc__xml_import_v2distances(topology, &childstate);
+    if (ret < 0)
+      goto failed;
+    state.global->close_child(&childstate);
+  }
+
+  /* find end of topology tag */
+  state.global->close_tag(&state);
+
+  if (!root->cpuset) {
+    if (hwloc__xml_verbose())
+      fprintf(stderr, "invalid root object without cpuset\n");
+    goto err;
+  }
+
+  /* handle v1 distances */
+  if (data->first_v1dist) {
+    struct hwloc__xml_imported_v1distances_s *v1dist, *v1next = data->first_v1dist;
+    while ((v1dist = v1next) != NULL) {
+      unsigned nbobjs = v1dist->nbobjs;
+      v1next = v1dist->next;
+      /* Handle distances as NUMA node distances if nbobjs matches.
+       * Otherwise drop, only NUMA distances really matter.
+       *
+       * We could also attach to a random level with the right nbobjs,
+       * but it would require to have those objects in the original XML order (like the first_numanode cousin-list).
+       * because the topology order can be different if some parents are ignored during load.
+       */
+      if (nbobjs == data->nbnumanodes) {
+	hwloc_obj_t *objs = malloc(nbobjs*sizeof(hwloc_obj_t));
+	uint64_t *values = malloc(nbobjs*nbobjs*sizeof(*values));
+	if (objs && values) {
+	  hwloc_obj_t node;
+	  unsigned i;
+	  for(i=0, node = data->first_numanode;
+	      i<nbobjs;
+	      i++, node = node->next_cousin)
+	    objs[i] = node;
+hwloc_convert_from_v1dist_floats(topology, nbobjs, v1dist->floats, values);
+	  hwloc_internal_distances_add(topology, nbobjs, objs, values, v1dist->kind, 0);
+	} else {
+	  free(objs);
+	  free(values);
+	}
+      }
+      free(v1dist->floats);
+      free(v1dist);
+    }
+    data->first_v1dist = data->last_v1dist = NULL;
+  }
+
+  /* FIXME:
+   * We should check that the existing object sets are consistent:
+   * no intersection between objects of a same level,
+   * object sets included in parent sets.
+   * hwloc never generated such buggy XML, but users could create one.
+   *
+   * We want to add these checks to the existing core code that
+   * adds missing sets and propagates parent/children sets
+   * (in case another backend ever generates buggy object sets as well).
+   */
+
+  if (!data->nbnumanodes) {
+    /* before 2.0, XML could have no NUMA node objects and no nodesets */
+    hwloc_obj_t numa;
+    /* create missing root nodesets and make sure they are consistent with the upcoming NUMA node */
+    if (!root->nodeset)
+      root->nodeset = hwloc_bitmap_alloc();
+    if (!root->allowed_nodeset)
+      root->allowed_nodeset = hwloc_bitmap_alloc();
+    if (!root->complete_nodeset)
+      root->complete_nodeset = hwloc_bitmap_alloc();
+    hwloc_bitmap_only(root->nodeset, 0);
+    hwloc_bitmap_only(root->allowed_nodeset, 0);
+    hwloc_bitmap_only(root->complete_nodeset, 0);
+    /* add a NUMA node and move the root memory there */
+    numa = hwloc_alloc_setup_object(topology, HWLOC_OBJ_NUMANODE, 0);
+    numa->cpuset = hwloc_bitmap_dup(root->cpuset);
+    numa->nodeset = hwloc_bitmap_alloc();
+    hwloc_bitmap_set(numa->nodeset, 0);
+    memcpy(&numa->memory, &topology->levels[0][0]->memory, sizeof(numa->memory));
+    memset(&topology->levels[0][0]->memory, 0, sizeof(numa->memory));
+    /* insert by cpuset so that it goes between root and its existing children */
+    hwloc_insert_object_by_cpuset(topology, numa);
+  }
+
+  /* make sure we have a nodeset now. if we got NUMA nodes without nodeset, something bad happened */
+  if (!root->nodeset) {
+    if (hwloc__xml_verbose())
+      fprintf(stderr, "invalid root object without nodeset\n");
+    goto err;
+  }
+
+  /* allocate default cpusets and nodesets if missing, the core will restrict them */
+  hwloc_alloc_obj_cpusets(root);
+
+  /* keep the "Backend" information intact */
+  /* we could add "BackendSource=XML" to notify that XML was used between the actual backend and here */
+
+  topology->support.discovery->pu = 1;
+
+  hwloc_localeswitch_fini();
+  return 0;
+
+ failed:
+  if (data->look_failed)
+    data->look_failed(data);
+  if (hwloc__xml_verbose())
+    fprintf(stderr, "%s: XML component discovery failed.\n",
+	    data->msgprefix);
+ err:
+  hwloc_free_object_siblings_and_children(root->first_child);
+  root->first_child = NULL;
+  hwloc_free_object_siblings_and_children(root->io_first_child);
+  root->io_first_child = NULL;
+  hwloc_free_object_siblings_and_children(root->misc_first_child);
+  root->misc_first_child = NULL;
+
+  hwloc_localeswitch_fini();
+  return -1;
+}
+
+/* this can be the first XML call */
+int
+hwloc_topology_diff_load_xml(const char *xmlpath,
+			     hwloc_topology_diff_t *firstdiffp, char **refnamep)
+{
+  struct hwloc__xml_import_state_s state;
+  struct hwloc_xml_backend_data_s fakedata; /* only for storing global info during parsing */
+  hwloc_localeswitch_declare;
+  const char *basename;
+  int force_nolibxml;
+  int ret;
+
+  state.global = &fakedata;
+
+  basename = strrchr(xmlpath, '/');
+  if (basename)
+    basename++;
+  else
+    basename = xmlpath;
+  fakedata.msgprefix = strdup(basename);
+
+  hwloc_components_init();
+  assert(hwloc_nolibxml_callbacks);
+
+  hwloc_localeswitch_init();
+
+  *firstdiffp = NULL;
+
+  force_nolibxml = hwloc_nolibxml_import();
+retry:
+  if (!hwloc_libxml_callbacks || (hwloc_nolibxml_callbacks && force_nolibxml))
+    ret = hwloc_nolibxml_callbacks->import_diff(&state, xmlpath, NULL, 0, firstdiffp, refnamep);
+  else {
+    ret = hwloc_libxml_callbacks->import_diff(&state, xmlpath, NULL, 0, firstdiffp, refnamep);
+    if (ret < 0 && errno == ENOSYS) {
+      hwloc_libxml_callbacks = NULL;
+      goto retry;
+    }
+  }
+
+  hwloc_localeswitch_fini();
+  hwloc_components_fini();
+  free(fakedata.msgprefix);
+  return ret;
+}
+
+/* this can be the first XML call */
+int
+hwloc_topology_diff_load_xmlbuffer(const char *xmlbuffer, int buflen,
+				   hwloc_topology_diff_t *firstdiffp, char **refnamep)
+{
+  struct hwloc__xml_import_state_s state;
+  struct hwloc_xml_backend_data_s fakedata; /* only for storing global info during parsing */
+  hwloc_localeswitch_declare;
+  int force_nolibxml;
+  int ret;
+
+  state.global = &fakedata;
+  fakedata.msgprefix = strdup("xmldiffbuffer");
+
+  hwloc_components_init();
+  assert(hwloc_nolibxml_callbacks);
+
+  hwloc_localeswitch_init();
+
+  *firstdiffp = NULL;
+
+  force_nolibxml = hwloc_nolibxml_import();
+ retry:
+  if (!hwloc_libxml_callbacks || (hwloc_nolibxml_callbacks && force_nolibxml))
+    ret = hwloc_nolibxml_callbacks->import_diff(&state, NULL, xmlbuffer, buflen, firstdiffp, refnamep);
+  else {
+    ret = hwloc_libxml_callbacks->import_diff(&state, NULL, xmlbuffer, buflen, firstdiffp, refnamep);
+    if (ret < 0 && errno == ENOSYS) {
+      hwloc_libxml_callbacks = NULL;
+      goto retry;
+    }
+  }
+
+  hwloc_localeswitch_fini();
+  hwloc_components_fini();
+  free(fakedata.msgprefix);
+  return ret;
+}
+
+/************************************************
+ ********* XML export (common routines) *********
+ ************************************************/
+
+#define HWLOC_XML_CHAR_VALID(c) (((c) >= 32 && (c) <= 126) || (c) == '\t' || (c) == '\n' || (c) == '\r')
+
+static int
+hwloc__xml_export_check_buffer(const char *buf, size_t length)
+{
+  unsigned i;
+  for(i=0; i<length; i++)
+    if (!HWLOC_XML_CHAR_VALID(buf[i]))
+      return -1;
+  return 0;
+}
+
+/* strdup and remove ugly chars from random string */
+static char*
+hwloc__xml_export_safestrdup(const char *old)
+{
+  char *new = malloc(strlen(old)+1);
+  char *dst = new;
+  const char *src = old;
+  while (*src) {
+    if (HWLOC_XML_CHAR_VALID(*src))
+      *(dst++) = *src;
+    src++;
+  }
+  *dst = '\0';
+  return new;
+}
+
+static void
+hwloc__xml_export_object (hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, hwloc_obj_t obj, unsigned long flags)
+{
+  struct hwloc__xml_export_state_s state;
+  hwloc_obj_t child;
+  char *cpuset = NULL;
+  char tmp[255];
+  int v1export = flags & HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1;
+  unsigned i;
+
+  parentstate->new_child(parentstate, &state, "object");
+
+  if (v1export && obj->type == HWLOC_OBJ_PACKAGE)
+    state.new_prop(&state, "type", "Socket");
+  else if (v1export && hwloc_obj_type_is_cache(obj->type))
+    state.new_prop(&state, "type", "Cache");
+  else
+    state.new_prop(&state, "type", hwloc_type_name(obj->type));
+
+  if (obj->os_index != (unsigned) -1) {
+    sprintf(tmp, "%u", obj->os_index);
+    state.new_prop(&state, "os_index", tmp);
+  }
+  if (obj->cpuset) {
+    hwloc_bitmap_asprintf(&cpuset, obj->cpuset);
+    state.new_prop(&state, "cpuset", cpuset);
+    free(cpuset);
+  }
+  if (obj->complete_cpuset) {
+    hwloc_bitmap_asprintf(&cpuset, obj->complete_cpuset);
+    state.new_prop(&state, "complete_cpuset", cpuset);
+    free(cpuset);
+  }
+  if (v1export && obj->cpuset) {
+    hwloc_bitmap_asprintf(&cpuset, obj->cpuset);
+    state.new_prop(&state, "online_cpuset", cpuset);
+    free(cpuset);
+  }
+  if (obj->allowed_cpuset) {
+    hwloc_bitmap_asprintf(&cpuset, obj->allowed_cpuset);
+    state.new_prop(&state, "allowed_cpuset", cpuset);
+    free(cpuset);
+  }
+  if (obj->nodeset && !hwloc_bitmap_isfull(obj->nodeset)) {
+    hwloc_bitmap_asprintf(&cpuset, obj->nodeset);
+    state.new_prop(&state, "nodeset", cpuset);
+    free(cpuset);
+  }
+  if (obj->complete_nodeset && !hwloc_bitmap_isfull(obj->complete_nodeset)) {
+    hwloc_bitmap_asprintf(&cpuset, obj->complete_nodeset);
+    state.new_prop(&state, "complete_nodeset", cpuset);
+    free(cpuset);
+  }
+  if (obj->allowed_nodeset && !hwloc_bitmap_isfull(obj->allowed_nodeset)) {
+    hwloc_bitmap_asprintf(&cpuset, obj->allowed_nodeset);
+    state.new_prop(&state, "allowed_nodeset", cpuset);
+    free(cpuset);
+  }
+
+  if (!v1export) {
+    sprintf(tmp, "%llu", (unsigned long long) obj->gp_index);
+    state.new_prop(&state, "gp_index", tmp);
+  }
+
+  if (obj->name) {
+    char *name = hwloc__xml_export_safestrdup(obj->name);
+    state.new_prop(&state, "name", name);
+    free(name);
+  }
+  if (!v1export && obj->subtype) {
+    char *subtype = hwloc__xml_export_safestrdup(obj->subtype);
+    state.new_prop(&state, "subtype", subtype);
+    free(subtype);
+  }
+
+  switch (obj->type) {
+  case HWLOC_OBJ_L1CACHE:
+  case HWLOC_OBJ_L2CACHE:
+  case HWLOC_OBJ_L3CACHE:
+  case HWLOC_OBJ_L4CACHE:
+  case HWLOC_OBJ_L5CACHE:
+  case HWLOC_OBJ_L1ICACHE:
+  case HWLOC_OBJ_L2ICACHE:
+  case HWLOC_OBJ_L3ICACHE:
+    sprintf(tmp, "%llu", (unsigned long long) obj->attr->cache.size);
+    state.new_prop(&state, "cache_size", tmp);
+    sprintf(tmp, "%u", obj->attr->cache.depth);
+    state.new_prop(&state, "depth", tmp);
+    sprintf(tmp, "%u", (unsigned) obj->attr->cache.linesize);
+    state.new_prop(&state, "cache_linesize", tmp);
+    sprintf(tmp, "%d", obj->attr->cache.associativity);
+    state.new_prop(&state, "cache_associativity", tmp);
+    sprintf(tmp, "%d", (int) obj->attr->cache.type);
+    state.new_prop(&state, "cache_type", tmp);
+    break;
+  case HWLOC_OBJ_GROUP:
+    sprintf(tmp, "%u", obj->attr->group.depth);
+    state.new_prop(&state, "depth", tmp);
+    if (!v1export) {
+      sprintf(tmp, "%u", obj->attr->group.kind);
+      state.new_prop(&state, "kind", tmp);
+      sprintf(tmp, "%u", obj->attr->group.subkind);
+      state.new_prop(&state, "subkind", tmp);
+    }
+    break;
+  case HWLOC_OBJ_BRIDGE:
+    sprintf(tmp, "%d-%d", (int) obj->attr->bridge.upstream_type, (int) obj->attr->bridge.downstream_type);
+    state.new_prop(&state, "bridge_type", tmp);
+    sprintf(tmp, "%u", obj->attr->bridge.depth);
+    state.new_prop(&state, "depth", tmp);
+    if (obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI) {
+      sprintf(tmp, "%04x:[%02x-%02x]",
+	      (unsigned) obj->attr->bridge.downstream.pci.domain,
+	      (unsigned) obj->attr->bridge.downstream.pci.secondary_bus,
+	      (unsigned) obj->attr->bridge.downstream.pci.subordinate_bus);
+      state.new_prop(&state, "bridge_pci", tmp);
+    }
+    if (obj->attr->bridge.upstream_type != HWLOC_OBJ_BRIDGE_PCI)
+      break;
+    /* FALLTHRU */
+  case HWLOC_OBJ_PCI_DEVICE:
+    sprintf(tmp, "%04x:%02x:%02x.%01x",
+	    (unsigned) obj->attr->pcidev.domain,
+	    (unsigned) obj->attr->pcidev.bus,
+	    (unsigned) obj->attr->pcidev.dev,
+	    (unsigned) obj->attr->pcidev.func);
+    state.new_prop(&state, "pci_busid", tmp);
+    sprintf(tmp, "%04x [%04x:%04x] [%04x:%04x] %02x",
+	    (unsigned) obj->attr->pcidev.class_id,
+	    (unsigned) obj->attr->pcidev.vendor_id, (unsigned) obj->attr->pcidev.device_id,
+	    (unsigned) obj->attr->pcidev.subvendor_id, (unsigned) obj->attr->pcidev.subdevice_id,
+	    (unsigned) obj->attr->pcidev.revision);
+    state.new_prop(&state, "pci_type", tmp);
+    sprintf(tmp, "%f", obj->attr->pcidev.linkspeed);
+    state.new_prop(&state, "pci_link_speed", tmp);
+    break;
+  case HWLOC_OBJ_OS_DEVICE:
+    sprintf(tmp, "%d", (int) obj->attr->osdev.type);
+    state.new_prop(&state, "osdev_type", tmp);
+    break;
+  default:
+    break;
+  }
+
+  if (obj->memory.local_memory) {
+    sprintf(tmp, "%llu", (unsigned long long) obj->memory.local_memory);
+    state.new_prop(&state, "local_memory", tmp);
+  }
+
+  for(i=0; i<obj->memory.page_types_len; i++) {
+    struct hwloc__xml_export_state_s childstate;
+    state.new_child(&state, &childstate, "page_type");
+    sprintf(tmp, "%llu", (unsigned long long) obj->memory.page_types[i].size);
+    childstate.new_prop(&childstate, "size", tmp);
+    sprintf(tmp, "%llu", (unsigned long long) obj->memory.page_types[i].count);
+    childstate.new_prop(&childstate, "count", tmp);
+    childstate.end_object(&childstate, "page_type");
+  }
+
+  for(i=0; i<obj->infos_count; i++) {
+    char *name = hwloc__xml_export_safestrdup(obj->infos[i].name);
+    char *value = hwloc__xml_export_safestrdup(obj->infos[i].value);
+    struct hwloc__xml_export_state_s childstate;
+    state.new_child(&state, &childstate, "info");
+    childstate.new_prop(&childstate, "name", name);
+    childstate.new_prop(&childstate, "value", value);
+    childstate.end_object(&childstate, "info");
+    free(name);
+    free(value);
+  }
+  if (v1export && obj->subtype) {
+    char *subtype = hwloc__xml_export_safestrdup(obj->subtype);
+    struct hwloc__xml_export_state_s childstate;
+    int is_coproctype = (obj->type == HWLOC_OBJ_OS_DEVICE && obj->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC);
+    state.new_child(&state, &childstate, "info");
+    childstate.new_prop(&childstate, "name", is_coproctype ? "CoProcType" : "Type");
+    childstate.new_prop(&childstate, "value", subtype);
+    childstate.end_object(&childstate, "info");
+    free(subtype);
+  }
+
+  if (v1export && !obj->parent) {
+    /* only latency matrices covering the entire machine can be exported to v1 */
+    struct hwloc_internal_distances_s *dist;
+    /* refresh distances since we need objects below */
+    hwloc_internal_distances_refresh(topology);
+    for(dist = topology->first_dist; dist; dist = dist->next) {
+      struct hwloc__xml_export_state_s childstate;
+      unsigned nbobjs = dist->nbobjs;
+      unsigned *logical_to_v2array;
+
+      if (nbobjs != (unsigned) hwloc_get_nbobjs_by_type(topology, dist->type))
+	continue;
+      if (!(dist->kind & HWLOC_DISTANCES_KIND_MEANS_LATENCY))
+	continue;
+
+      logical_to_v2array = malloc(nbobjs*sizeof(*logical_to_v2array));
+      if (!logical_to_v2array)
+	continue;
+      for(i=0; i<nbobjs; i++)
+	logical_to_v2array[dist->objs[i]->logical_index] = i;
+
+      state.new_child(&state, &childstate, "distances");
+      sprintf(tmp, "%u", nbobjs);
+      childstate.new_prop(&childstate, "nbobjs", tmp);
+      sprintf(tmp, "%d", hwloc_get_type_depth(topology, dist->type));
+      childstate.new_prop(&childstate, "relative_depth", tmp);
+      sprintf(tmp, "%f", 1.f);
+      childstate.new_prop(&childstate, "latency_base", tmp);
+      for(i=0; i<nbobjs*nbobjs; i++) {
+	struct hwloc__xml_export_state_s greatchildstate;
+	childstate.new_child(&childstate, &greatchildstate, "latency");
+	sprintf(tmp, "%f", (float) dist->values[i]);
+	greatchildstate.new_prop(&greatchildstate, "value", tmp);
+	greatchildstate.end_object(&greatchildstate, "latency");
+      }
+      childstate.end_object(&childstate, "distances");
+
+      free(logical_to_v2array);
+    }
+  }
+
+  if (obj->userdata && topology->userdata_export_cb)
+    topology->userdata_export_cb((void*) &state, topology, obj);
+
+  for(child = obj->first_child; child; child = child->next_sibling)
+    hwloc__xml_export_object (&state, topology, child, flags);
+  for(child = obj->io_first_child; child; child = child->next_sibling)
+    hwloc__xml_export_object (&state, topology, child, flags);
+  for(child = obj->misc_first_child; child; child = child->next_sibling)
+    hwloc__xml_export_object (&state, topology, child, flags);
+
+  state.end_object(&state, "object");
+}
+
+#define EXPORT_ARRAY(state, type, nr, values, tagname, format, maxperline) do { \
+  unsigned _i = 0; \
+  while (_i<(nr)) { \
+    char _tmp[255]; /* enough for (snprintf(format)+space) x maxperline */ \
+    char _tmp2[16]; \
+    size_t _len = 0; \
+    unsigned _j; \
+    struct hwloc__xml_export_state_s _childstate; \
+    (state)->new_child(state, &_childstate, tagname); \
+    for(_j=0; \
+	_i+_j<(nr) && _j<maxperline; \
+	_j++) \
+      _len += sprintf(_tmp+_len, format " ", (type) (values)[_i+_j]); \
+    _i += _j; \
+    sprintf(_tmp2, "%lu", (unsigned long) _len); \
+    _childstate.new_prop(&_childstate, "length", _tmp2); \
+    _childstate.add_content(&_childstate, _tmp, _len); \
+    _childstate.end_object(&_childstate, tagname); \
+  } \
+} while (0)
+
+static void
+hwloc__xml_export_v2distances(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology)
+{
+  struct hwloc_internal_distances_s *dist;
+  for(dist = topology->first_dist; dist; dist = dist->next) {
+    char tmp[255];
+    unsigned nbobjs = dist->nbobjs;
+    struct hwloc__xml_export_state_s state;
+
+    parentstate->new_child(parentstate, &state, "distances2");
+
+    state.new_prop(&state, "type", hwloc_type_name(dist->type));
+    sprintf(tmp, "%u", nbobjs);
+    state.new_prop(&state, "nbobjs", tmp);
+    sprintf(tmp, "%lu", dist->kind);
+    state.new_prop(&state, "kind", tmp);
+
+    state.new_prop(&state, "indexing",
+		   (dist->type == HWLOC_OBJ_NUMANODE || dist->type == HWLOC_OBJ_PU) ? "os" : "gp");
+    /* TODO don't hardwire 10 below. either snprintf the max to guess it, or just append until the end of the buffer */
+    EXPORT_ARRAY(&state, unsigned long long, nbobjs, dist->indexes, "indexes", "%llu", 10);
+    EXPORT_ARRAY(&state, unsigned long long, nbobjs*nbobjs, dist->values, "u64values", "%llu", 10);
+    state.end_object(&state, "distances2");
+  }
+}
+
+void
+hwloc__xml_export_topology(hwloc__xml_export_state_t state, hwloc_topology_t topology, unsigned long flags)
+{
+  hwloc__xml_export_object (state, topology, hwloc_get_root_obj(topology), flags);
+  if (!(flags & HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1))
+    hwloc__xml_export_v2distances (state, topology);
+}
+
+void
+hwloc__xml_export_diff(hwloc__xml_export_state_t parentstate, hwloc_topology_diff_t diff)
+{
+  while (diff) {
+    struct hwloc__xml_export_state_s state;
+    char tmp[255];
+
+    parentstate->new_child(parentstate, &state, "diff");
+
+    sprintf(tmp, "%d", (int) diff->generic.type);
+    state.new_prop(&state, "type", tmp);
+
+    switch (diff->generic.type) {
+    case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR:
+      sprintf(tmp, "%d", (int) diff->obj_attr.obj_depth);
+      state.new_prop(&state, "obj_depth", tmp);
+      sprintf(tmp, "%u", diff->obj_attr.obj_index);
+      state.new_prop(&state, "obj_index", tmp);
+
+      sprintf(tmp, "%d", (int) diff->obj_attr.diff.generic.type);
+      state.new_prop(&state, "obj_attr_type", tmp);
+
+      switch (diff->obj_attr.diff.generic.type) {
+      case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_SIZE:
+	sprintf(tmp, "%llu", (unsigned long long) diff->obj_attr.diff.uint64.index);
+	state.new_prop(&state, "obj_attr_index", tmp);
+	sprintf(tmp, "%llu", (unsigned long long) diff->obj_attr.diff.uint64.oldvalue);
+	state.new_prop(&state, "obj_attr_oldvalue", tmp);
+	sprintf(tmp, "%llu", (unsigned long long) diff->obj_attr.diff.uint64.newvalue);
+	state.new_prop(&state, "obj_attr_newvalue", tmp);
+	break;
+      case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME:
+      case HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO:
+	if (diff->obj_attr.diff.string.name)
+	  state.new_prop(&state, "obj_attr_name", diff->obj_attr.diff.string.name);
+	state.new_prop(&state, "obj_attr_oldvalue", diff->obj_attr.diff.string.oldvalue);
+	state.new_prop(&state, "obj_attr_newvalue", diff->obj_attr.diff.string.newvalue);
+	break;
+      }
+
+      break;
+    default:
+      assert(0);
+    }
+    state.end_object(&state, "diff");
+
+    diff = diff->generic.next;
+  }
+}
+
+/**********************************
+ ********* main XML export ********
+ **********************************/
+
+/* this can be the first XML call */
+int hwloc_topology_export_xml(hwloc_topology_t topology, const char *filename, unsigned long flags)
+{
+  hwloc_localeswitch_declare;
+  int force_nolibxml;
+  int ret;
+
+  if (!topology->is_loaded) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  assert(hwloc_nolibxml_callbacks); /* the core called components_init() for the topology */
+
+  if (flags & ~HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  hwloc_localeswitch_init();
+
+  force_nolibxml = hwloc_nolibxml_export();
+retry:
+  if (!hwloc_libxml_callbacks || (hwloc_nolibxml_callbacks && force_nolibxml))
+    ret = hwloc_nolibxml_callbacks->export_file(topology, filename, flags);
+  else {
+    ret = hwloc_libxml_callbacks->export_file(topology, filename, flags);
+    if (ret < 0 && errno == ENOSYS) {
+      hwloc_libxml_callbacks = NULL;
+      goto retry;
+    }
+  }
+
+  hwloc_localeswitch_fini();
+  return ret;
+}
+
+/* this can be the first XML call */
+int hwloc_topology_export_xmlbuffer(hwloc_topology_t topology, char **xmlbuffer, int *buflen, unsigned long flags)
+{
+  hwloc_localeswitch_declare;
+  int force_nolibxml;
+  int ret;
+
+  if (!topology->is_loaded) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  assert(hwloc_nolibxml_callbacks); /* the core called components_init() for the topology */
+
+  if (flags & ~HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  hwloc_localeswitch_init();
+
+  force_nolibxml = hwloc_nolibxml_export();
+retry:
+  if (!hwloc_libxml_callbacks || (hwloc_nolibxml_callbacks && force_nolibxml))
+    ret = hwloc_nolibxml_callbacks->export_buffer(topology, xmlbuffer, buflen, flags);
+  else {
+    ret = hwloc_libxml_callbacks->export_buffer(topology, xmlbuffer, buflen, flags);
+    if (ret < 0 && errno == ENOSYS) {
+      hwloc_libxml_callbacks = NULL;
+      goto retry;
+    }
+  }
+
+  hwloc_localeswitch_fini();
+  return ret;
+}
+
+/* this can be the first XML call */
+int
+hwloc_topology_diff_export_xml(hwloc_topology_diff_t diff, const char *refname,
+			       const char *filename)
+{
+  hwloc_localeswitch_declare;
+  hwloc_topology_diff_t tmpdiff;
+  int force_nolibxml;
+  int ret;
+
+  tmpdiff = diff;
+  while (tmpdiff) {
+    if (tmpdiff->generic.type == HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX) {
+      errno = EINVAL;
+      return -1;
+    }
+    tmpdiff = tmpdiff->generic.next;
+  }
+
+  hwloc_components_init();
+  assert(hwloc_nolibxml_callbacks);
+
+  hwloc_localeswitch_init();
+
+  force_nolibxml = hwloc_nolibxml_export();
+retry:
+  if (!hwloc_libxml_callbacks || (hwloc_nolibxml_callbacks && force_nolibxml))
+    ret = hwloc_nolibxml_callbacks->export_diff_file(diff, refname, filename);
+  else {
+    ret = hwloc_libxml_callbacks->export_diff_file(diff, refname, filename);
+    if (ret < 0 && errno == ENOSYS) {
+      hwloc_libxml_callbacks = NULL;
+      goto retry;
+    }
+  }
+
+  hwloc_localeswitch_fini();
+  hwloc_components_fini();
+  return ret;
+}
+
+/* this can be the first XML call */
+int
+hwloc_topology_diff_export_xmlbuffer(hwloc_topology_diff_t diff, const char *refname,
+				     char **xmlbuffer, int *buflen)
+{
+  hwloc_localeswitch_declare;
+  hwloc_topology_diff_t tmpdiff;
+  int force_nolibxml;
+  int ret;
+
+  tmpdiff = diff;
+  while (tmpdiff) {
+    if (tmpdiff->generic.type == HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX) {
+      errno = EINVAL;
+      return -1;
+    }
+    tmpdiff = tmpdiff->generic.next;
+  }
+
+  hwloc_components_init();
+  assert(hwloc_nolibxml_callbacks);
+
+  hwloc_localeswitch_init();
+
+  force_nolibxml = hwloc_nolibxml_export();
+retry:
+  if (!hwloc_libxml_callbacks || (hwloc_nolibxml_callbacks && force_nolibxml))
+    ret = hwloc_nolibxml_callbacks->export_diff_buffer(diff, refname, xmlbuffer, buflen);
+  else {
+    ret = hwloc_libxml_callbacks->export_diff_buffer(diff, refname, xmlbuffer, buflen);
+    if (ret < 0 && errno == ENOSYS) {
+      hwloc_libxml_callbacks = NULL;
+      goto retry;
+    }
+  }
+
+  hwloc_localeswitch_fini();
+  hwloc_components_fini();
+  return ret;
+}
+
+void hwloc_free_xmlbuffer(hwloc_topology_t topology __hwloc_attribute_unused, char *xmlbuffer)
+{
+  int force_nolibxml;
+
+  assert(hwloc_nolibxml_callbacks); /* the core called components_init() for the topology */
+
+  force_nolibxml = hwloc_nolibxml_export();
+  if (!hwloc_libxml_callbacks || (hwloc_nolibxml_callbacks && force_nolibxml))
+    hwloc_nolibxml_callbacks->free_buffer(xmlbuffer);
+  else
+    hwloc_libxml_callbacks->free_buffer(xmlbuffer);
+}
+
+void
+hwloc_topology_set_userdata_export_callback(hwloc_topology_t topology,
+					    void (*export)(void *reserved, struct hwloc_topology *topology, struct hwloc_obj *obj))
+{
+  topology->userdata_export_cb = export;
+}
+
+static void
+hwloc__export_obj_userdata(hwloc__xml_export_state_t parentstate, int encoded,
+			   const char *name, size_t length, const void *buffer, size_t encoded_length)
+{
+  struct hwloc__xml_export_state_s state;
+  char tmp[255];
+  parentstate->new_child(parentstate, &state, "userdata");
+  if (name)
+    state.new_prop(&state, "name", name);
+  sprintf(tmp, "%lu", (unsigned long) length);
+  state.new_prop(&state, "length", tmp);
+  if (encoded)
+    state.new_prop(&state, "encoding", "base64");
+  if (encoded_length)
+    state.add_content(&state, buffer, encoded ? encoded_length : length);
+  state.end_object(&state, "userdata");
+}
+
+int
+hwloc_export_obj_userdata(void *reserved,
+			  struct hwloc_topology *topology, struct hwloc_obj *obj __hwloc_attribute_unused,
+			  const char *name, const void *buffer, size_t length)
+{
+  hwloc__xml_export_state_t state = reserved;
+
+  if (!buffer) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if ((name && hwloc__xml_export_check_buffer(name, strlen(name)) < 0)
+      || hwloc__xml_export_check_buffer(buffer, length) < 0) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (topology->userdata_not_decoded) {
+    int encoded;
+    size_t encoded_length;
+    const char *realname;
+    if (!strncmp(name, "normal", 6)) {
+      encoded = 0;
+      encoded_length = length;
+    } else if (!strncmp(name, "base64", 6)) {
+      encoded = 1;
+      encoded_length = BASE64_ENCODED_LENGTH(length);
+    } else
+      assert(0);
+    if (name[6] == ':')
+      realname = name+7;
+    else if (!strcmp(name+6, "-anon"))
+      realname = NULL;
+    else
+      assert(0);
+    hwloc__export_obj_userdata(state, encoded, realname, length, buffer, encoded_length);
+
+  } else
+    hwloc__export_obj_userdata(state, 0, name, length, buffer, length);
+
+  return 0;
+}
+
+int
+hwloc_export_obj_userdata_base64(void *reserved,
+				 struct hwloc_topology *topology __hwloc_attribute_unused, struct hwloc_obj *obj __hwloc_attribute_unused,
+				 const char *name, const void *buffer, size_t length)
+{
+  hwloc__xml_export_state_t state = reserved;
+  size_t encoded_length;
+  char *encoded_buffer;
+  int ret __hwloc_attribute_unused;
+
+  if (!buffer) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  assert(!topology->userdata_not_decoded);
+
+  if (name && hwloc__xml_export_check_buffer(name, strlen(name)) < 0) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  encoded_length = BASE64_ENCODED_LENGTH(length);
+  encoded_buffer = malloc(encoded_length+1);
+  if (!encoded_buffer) {
+    errno = ENOMEM;
+    return -1;
+  }
+
+  ret = hwloc_encode_to_base64(buffer, length, encoded_buffer, encoded_length+1);
+  assert(ret == (int) encoded_length);
+
+  hwloc__export_obj_userdata(state, 1, name, length, encoded_buffer, encoded_length);
+
+  free(encoded_buffer);
+  return 0;
+}
+
+void
+hwloc_topology_set_userdata_import_callback(hwloc_topology_t topology,
+					    void (*import)(struct hwloc_topology *topology, struct hwloc_obj *obj, const char *name, const void *buffer, size_t length))
+{
+  topology->userdata_import_cb = import;
+}
+
+/***************************************
+ ************ XML component ************
+ ***************************************/
+
+static void
+hwloc_xml_backend_disable(struct hwloc_backend *backend)
+{
+  struct hwloc_xml_backend_data_s *data = backend->private_data;
+  data->backend_exit(data);
+  free(data->msgprefix);
+  free(data);
+}
+
+static struct hwloc_backend *
+hwloc_xml_component_instantiate(struct hwloc_disc_component *component,
+				const void *_data1,
+				const void *_data2,
+				const void *_data3)
+{
+  struct hwloc_xml_backend_data_s *data;
+  struct hwloc_backend *backend;
+  const char *env;
+  int force_nolibxml;
+  const char * xmlpath = (const char *) _data1;
+  const char * xmlbuffer = (const char *) _data2;
+  int xmlbuflen = (int)(uintptr_t) _data3;
+  const char *basename;
+  int err;
+
+  assert(hwloc_nolibxml_callbacks); /* the core called components_init() for the component's topology */
+
+  if (!xmlpath && !xmlbuffer) {
+    env = getenv("HWLOC_XMLFILE");
+    if (env) {
+      /* 'xml' was given in HWLOC_COMPONENTS without a filename */
+      xmlpath = env;
+    } else {
+      errno = EINVAL;
+      goto out;
+    }
+  }
+
+  backend = hwloc_backend_alloc(component);
+  if (!backend)
+    goto out;
+
+  data = malloc(sizeof(*data));
+  if (!data) {
+    errno = ENOMEM;
+    goto out_with_backend;
+  }
+
+  backend->private_data = data;
+  backend->discover = hwloc_look_xml;
+  backend->disable = hwloc_xml_backend_disable;
+  backend->is_thissystem = 0;
+
+  if (xmlpath) {
+    basename = strrchr(xmlpath, '/');
+    if (basename)
+      basename++;
+    else
+      basename = xmlpath;
+  } else {
+    basename = "xmlbuffer";
+  }
+  data->msgprefix = strdup(basename);
+
+  force_nolibxml = hwloc_nolibxml_import();
+retry:
+  if (!hwloc_libxml_callbacks || (hwloc_nolibxml_callbacks && force_nolibxml))
+    err = hwloc_nolibxml_callbacks->backend_init(data, xmlpath, xmlbuffer, xmlbuflen);
+  else {
+    err = hwloc_libxml_callbacks->backend_init(data, xmlpath, xmlbuffer, xmlbuflen);
+    if (err < 0 && errno == ENOSYS) {
+      hwloc_libxml_callbacks = NULL;
+      goto retry;
+    }
+  }
+  if (err < 0)
+    goto out_with_data;
+
+  return backend;
+
+ out_with_data:
+  free(data->msgprefix);
+  free(data);
+ out_with_backend:
+  free(backend);
+ out:
+  return NULL;
+}
+
+static struct hwloc_disc_component hwloc_xml_disc_component = {
+  HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
+  "xml",
+  ~0,
+  hwloc_xml_component_instantiate,
+  30,
+  NULL
+};
+
+const struct hwloc_component hwloc_xml_component = {
+  HWLOC_COMPONENT_ABI,
+  NULL, NULL,
+  HWLOC_COMPONENT_TYPE_DISC,
+  0,
+  &hwloc_xml_disc_component
+};
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology.c
new file mode 100644
index 0000000000..be7eec7778
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/topology.c
@@ -0,0 +1,3684 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2017 Inria.  All rights reserved.
+ * Copyright © 2009-2012 Université Bordeaux
+ * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+
+#define _ATFILE_SOURCE
+#include <assert.h>
+#include <sys/types.h>
+#ifdef HAVE_DIRENT_H
+#include <dirent.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <float.h>
+
+#include <hwloc.h>
+#include <private/private.h>
+#include <private/debug.h>
+#include <private/misc.h>
+
+#ifdef HAVE_MACH_MACH_INIT_H
+#include <mach/mach_init.h>
+#endif
+#ifdef HAVE_MACH_MACH_HOST_H
+#include <mach/mach_host.h>
+#endif
+
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+
+#ifdef HAVE_SYS_SYSCTL_H
+#include <sys/sysctl.h>
+#endif
+
+#ifdef HWLOC_WIN_SYS
+#include <windows.h>
+#endif
+
+unsigned hwloc_get_api_version(void)
+{
+  return HWLOC_API_VERSION;
+}
+
+int hwloc_hide_errors(void)
+{
+  static int hide = 0;
+  static int checked = 0;
+  if (!checked) {
+    const char *envvar = getenv("HWLOC_HIDE_ERRORS");
+    if (envvar)
+      hide = atoi(envvar);
+    checked = 1;
+  }
+  return hide;
+}
+
+void hwloc_report_os_error(const char *msg, int line)
+{
+    static int reported = 0;
+
+    if (!reported && !hwloc_hide_errors()) {
+        fprintf(stderr, "****************************************************************************\n");
+        fprintf(stderr, "* hwloc %s has encountered what looks like an error from the operating system.\n", HWLOC_VERSION);
+        fprintf(stderr, "*\n");
+        fprintf(stderr, "* %s\n", msg);
+        fprintf(stderr, "* Error occurred in topology.c line %d\n", line);
+        fprintf(stderr, "*\n");
+        fprintf(stderr, "* The following FAQ entry in the hwloc documentation may help:\n");
+        fprintf(stderr, "*   What should I do when hwloc reports \"operating system\" warnings?\n");
+        fprintf(stderr, "* Otherwise please report this error message to the hwloc user's mailing list,\n");
+#ifdef HWLOC_LINUX_SYS
+        fprintf(stderr, "* along with the output+tarball generated by the hwloc-gather-topology script.\n");
+#else
+	fprintf(stderr, "* along with any relevant topology information from your platform.\n");
+#endif
+        fprintf(stderr, "****************************************************************************\n");
+        reported = 1;
+    }
+}
+
+#if defined(HAVE_SYSCTLBYNAME)
+int hwloc_get_sysctlbyname(const char *name, int64_t *ret)
+{
+  union {
+    int32_t i32;
+    int64_t i64;
+  } n;
+  size_t size = sizeof(n);
+  if (sysctlbyname(name, &n, &size, NULL, 0))
+    return -1;
+  switch (size) {
+    case sizeof(n.i32):
+      *ret = n.i32;
+      break;
+    case sizeof(n.i64):
+      *ret = n.i64;
+      break;
+    default:
+      return -1;
+  }
+  return 0;
+}
+#endif
+
+#if defined(HAVE_SYSCTL)
+int hwloc_get_sysctl(int name[], unsigned namelen, int *ret)
+{
+  int n;
+  size_t size = sizeof(n);
+  if (sysctl(name, namelen, &n, &size, NULL, 0))
+    return -1;
+  if (size != sizeof(n))
+    return -1;
+  *ret = n;
+  return 0;
+}
+#endif
+
+/* Return the OS-provided number of processors.  Unlike other methods such as
+   reading sysfs on Linux, this method is not virtualizable; thus it's only
+   used as a fall-back method, allowing virtual backends (FSROOT, etc) to
+   have the desired effect.  */
+#ifndef HWLOC_WIN_SYS /* The windows implementation is in topology-windows.c */
+unsigned
+hwloc_fallback_nbprocessors(struct hwloc_topology *topology) {
+  int n;
+#if HAVE_DECL__SC_NPROCESSORS_ONLN
+  n = sysconf(_SC_NPROCESSORS_ONLN);
+#elif HAVE_DECL__SC_NPROC_ONLN
+  n = sysconf(_SC_NPROC_ONLN);
+#elif HAVE_DECL__SC_NPROCESSORS_CONF
+  n = sysconf(_SC_NPROCESSORS_CONF);
+#elif HAVE_DECL__SC_NPROC_CONF
+  n = sysconf(_SC_NPROC_CONF);
+#elif defined(HAVE_HOST_INFO) && HAVE_HOST_INFO
+  struct host_basic_info info;
+  mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
+  host_info(mach_host_self(), HOST_BASIC_INFO, (integer_t*) &info, &count);
+  n = info.avail_cpus;
+#elif defined(HAVE_SYSCTLBYNAME)
+  int64_t nn;
+  if (hwloc_get_sysctlbyname("hw.ncpu", &nn))
+    nn = -1;
+  n = nn;
+#elif defined(HAVE_SYSCTL) && HAVE_DECL_CTL_HW && HAVE_DECL_HW_NCPU
+  static int name[2] = {CTL_HW, HW_NPCU};
+  if (hwloc_get_sysctl(name, sizeof(name)/sizeof(*name)), &n)
+    n = -1;
+#else
+#ifdef __GNUC__
+#warning No known way to discover number of available processors on this system
+#warning hwloc_fallback_nbprocessors will default to 1
+#endif
+  n = -1;
+#endif
+  if (n >= 1)
+    topology->support.discovery->pu = 1;
+  else
+    n = 1;
+  return n;
+}
+#endif /* !HWLOC_WIN_SYS */
+
+/*
+ * Use the given number of processors to set a PU level.
+ */
+void
+hwloc_setup_pu_level(struct hwloc_topology *topology,
+		     unsigned nb_pus)
+{
+  struct hwloc_obj *obj;
+  unsigned oscpu,cpu;
+
+  hwloc_debug("%s", "\n\n * CPU cpusets *\n\n");
+  for (cpu=0,oscpu=0; cpu<nb_pus; oscpu++)
+    {
+      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PU, oscpu);
+      obj->cpuset = hwloc_bitmap_alloc();
+      hwloc_bitmap_only(obj->cpuset, oscpu);
+
+      hwloc_debug_2args_bitmap("cpu %u (os %u) has cpuset %s\n",
+		 cpu, oscpu, obj->cpuset);
+      hwloc_insert_object_by_cpuset(topology, obj);
+
+      cpu++;
+    }
+}
+
+#ifdef HWLOC_DEBUG
+/* Just for debugging.  */
+static void
+hwloc_debug_print_object(int indent __hwloc_attribute_unused, hwloc_obj_t obj)
+{
+  char type[64], idx[10], attr[1024], *cpuset = NULL;
+  hwloc_debug("%*s", 2*indent, "");
+  hwloc_obj_type_snprintf(type, sizeof(type), obj, 1);
+  if (obj->os_index != (unsigned) -1)
+    snprintf(idx, sizeof(idx), "#%u", obj->os_index);
+  else
+    *idx = '\0';
+  hwloc_obj_attr_snprintf(attr, sizeof(attr), obj, " ", 1);
+  if (obj->name)
+    hwloc_debug(" name %s", obj->name);
+  hwloc_debug("%s%s%s%s%s", type, idx, *attr ? "(" : "", attr, *attr ? ")" : "");
+  if (obj->subtype)
+    hwloc_debug(" subtype %s", obj->subtype);
+  if (obj->cpuset) {
+    hwloc_bitmap_asprintf(&cpuset, obj->cpuset);
+    hwloc_debug(" cpuset %s", cpuset);
+    free(cpuset);
+  }
+  if (obj->complete_cpuset) {
+    hwloc_bitmap_asprintf(&cpuset, obj->complete_cpuset);
+    hwloc_debug(" complete %s", cpuset);
+    free(cpuset);
+  }
+  if (obj->allowed_cpuset) {
+    hwloc_bitmap_asprintf(&cpuset, obj->allowed_cpuset);
+    hwloc_debug(" allowed %s", cpuset);
+    free(cpuset);
+  }
+  if (obj->nodeset) {
+    hwloc_bitmap_asprintf(&cpuset, obj->nodeset);
+    hwloc_debug(" nodeset %s", cpuset);
+    free(cpuset);
+  }
+  if (obj->complete_nodeset) {
+    hwloc_bitmap_asprintf(&cpuset, obj->complete_nodeset);
+    hwloc_debug(" completeN %s", cpuset);
+    free(cpuset);
+  }
+  if (obj->allowed_nodeset) {
+    hwloc_bitmap_asprintf(&cpuset, obj->allowed_nodeset);
+    hwloc_debug(" allowedN %s", cpuset);
+    free(cpuset);
+  }
+  if (obj->arity)
+    hwloc_debug(" arity %u", obj->arity);
+  hwloc_debug("%s", "\n");
+}
+
+static void
+hwloc_debug_print_objects(int indent __hwloc_attribute_unused, hwloc_obj_t obj)
+{
+  hwloc_obj_t child;
+  hwloc_debug_print_object(indent, obj);
+  for (child = obj->first_child; child; child = child->next_sibling)
+    hwloc_debug_print_objects(indent + 1, child);
+  for (child = obj->io_first_child; child; child = child->next_sibling)
+    hwloc_debug_print_objects(indent + 1, child);
+  for (child = obj->misc_first_child; child; child = child->next_sibling)
+    hwloc_debug_print_objects(indent + 1, child);
+}
+#else /* !HWLOC_DEBUG */
+#define hwloc_debug_print_object(indent, obj) do { /* nothing */ } while (0)
+#define hwloc_debug_print_objects(indent, obj) do { /* nothing */ } while (0)
+#endif /* !HWLOC_DEBUG */
+
+void hwloc__free_infos(struct hwloc_obj_info_s *infos, unsigned count)
+{
+  unsigned i;
+  for(i=0; i<count; i++) {
+    free(infos[i].name);
+    free(infos[i].value);
+  }
+  free(infos);
+}
+
+void hwloc__add_info(struct hwloc_obj_info_s **infosp, unsigned *countp, const char *name, const char *value)
+{
+  unsigned count = *countp;
+  struct hwloc_obj_info_s *infos = *infosp;
+#define OBJECT_INFO_ALLOC 8
+  /* nothing allocated initially, (re-)allocate by multiple of 8 */
+  unsigned alloccount = (count + 1 + (OBJECT_INFO_ALLOC-1)) & ~(OBJECT_INFO_ALLOC-1);
+  if (count != alloccount) {
+    struct hwloc_obj_info_s *tmpinfos = realloc(infos, alloccount*sizeof(*infos));
+    if (!tmpinfos)
+      /* failed to allocate, ignore this info */
+      return;
+    infos = tmpinfos;
+  }
+  infos[count].name = strdup(name);
+  infos[count].value = value ? strdup(value) : NULL;
+  *infosp = infos;
+  *countp = count+1;
+}
+
+char ** hwloc__find_info_slot(struct hwloc_obj_info_s **infosp, unsigned *countp, const char *name)
+{
+  unsigned i;
+  for(i=0; i<*countp; i++) {
+    if (!strcmp((*infosp)[i].name, name))
+      return &(*infosp)[i].value;
+  }
+  hwloc__add_info(infosp, countp, name, NULL);
+  return &(*infosp)[*countp-1].value;
+}
+
+void hwloc__move_infos(struct hwloc_obj_info_s **dst_infosp, unsigned *dst_countp,
+		       struct hwloc_obj_info_s **src_infosp, unsigned *src_countp)
+{
+  unsigned dst_count = *dst_countp;
+  struct hwloc_obj_info_s *dst_infos = *dst_infosp;
+  unsigned src_count = *src_countp;
+  struct hwloc_obj_info_s *src_infos = *src_infosp;
+  unsigned i;
+#define OBJECT_INFO_ALLOC 8
+  /* nothing allocated initially, (re-)allocate by multiple of 8 */
+  unsigned alloccount = (dst_count + src_count + (OBJECT_INFO_ALLOC-1)) & ~(OBJECT_INFO_ALLOC-1);
+  if (dst_count != alloccount) {
+    struct hwloc_obj_info_s *tmp_infos = realloc(dst_infos, alloccount*sizeof(*dst_infos));
+    if (!tmp_infos)
+      /* Failed to realloc, ignore the appended infos */
+      goto drop;
+    dst_infos = tmp_infos;
+  }
+  for(i=0; i<src_count; i++, dst_count++) {
+    dst_infos[dst_count].name = src_infos[i].name;
+    dst_infos[dst_count].value = src_infos[i].value;
+  }
+  *dst_infosp = dst_infos;
+  *dst_countp = dst_count;
+  free(src_infos);
+  *src_infosp = NULL;
+  *src_countp = 0;
+  return;
+
+ drop:
+  for(i=0; i<src_count; i++, dst_count++) {
+    free(src_infos[i].name);
+    free(src_infos[i].value);
+  }
+  free(src_infos);
+  *src_infosp = NULL;
+  *src_countp = 0;
+  /* dst_infos not modified */
+}
+
+void hwloc_obj_add_info(hwloc_obj_t obj, const char *name, const char *value)
+{
+  hwloc__add_info(&obj->infos, &obj->infos_count, name, value);
+}
+
+void hwloc_obj_add_info_nodup(hwloc_obj_t obj, const char *name, const char *value, int nodup)
+{
+  if (nodup && hwloc_obj_get_info_by_name(obj, name))
+    return;
+  hwloc__add_info(&obj->infos, &obj->infos_count, name, value);
+}
+
+/* Traverse children of a parent in a safe way: reread the next pointer as
+ * appropriate to prevent crash on child deletion:  */
+#define for_each_child_safe(child, parent, pchild) \
+  for (pchild = &(parent)->first_child, child = *pchild; \
+       child; \
+       /* Check whether the current child was not dropped.  */ \
+       (*pchild == child ? pchild = &(child->next_sibling) : NULL), \
+       /* Get pointer to next child.  */ \
+        child = *pchild)
+#define for_each_io_child_safe(child, parent, pchild) \
+  for (pchild = &(parent)->io_first_child, child = *pchild; \
+       child; \
+       /* Check whether the current child was not dropped.  */ \
+       (*pchild == child ? pchild = &(child->next_sibling) : NULL), \
+       /* Get pointer to next child.  */ \
+        child = *pchild)
+#define for_each_misc_child_safe(child, parent, pchild) \
+  for (pchild = &(parent)->misc_first_child, child = *pchild; \
+       child; \
+       /* Check whether the current child was not dropped.  */ \
+       (*pchild == child ? pchild = &(child->next_sibling) : NULL), \
+       /* Get pointer to next child.  */ \
+        child = *pchild)
+
+static void
+hwloc__free_object_contents(hwloc_obj_t obj)
+{
+  switch (obj->type) {
+  default:
+    break;
+  }
+  hwloc__free_infos(obj->infos, obj->infos_count);
+  free(obj->memory.page_types);
+  free(obj->attr);
+  free(obj->children);
+  free(obj->subtype);
+  free(obj->name);
+  hwloc_bitmap_free(obj->cpuset);
+  hwloc_bitmap_free(obj->complete_cpuset);
+  hwloc_bitmap_free(obj->allowed_cpuset);
+  hwloc_bitmap_free(obj->nodeset);
+  hwloc_bitmap_free(obj->complete_nodeset);
+  hwloc_bitmap_free(obj->allowed_nodeset);
+}
+
+/* Free an object and all its content.  */
+void
+hwloc_free_unlinked_object(hwloc_obj_t obj)
+{
+  hwloc__free_object_contents(obj);
+  free(obj);
+}
+
+/* Replace old with contents of new object, and make new freeable by the caller.
+ * Only updates next_sibling/first_child pointers,
+ * so may only be used during early discovery.
+ */
+static void
+hwloc_replace_linked_object(hwloc_obj_t old, hwloc_obj_t new)
+{
+  /* drop old fields */
+  hwloc__free_object_contents(old);
+  /* copy old tree pointers to new */
+  new->parent = old->parent;
+  new->next_sibling = old->next_sibling;
+  new->first_child = old->first_child;
+  new->io_first_child = old->io_first_child;
+  new->misc_first_child = old->misc_first_child;
+  /* copy new contents to old now that tree pointers are OK */
+  memcpy(old, new, sizeof(*old));
+  /* clear new to that we may free it */
+  memset(new, 0,sizeof(*new));
+}
+
+/* Remove an object and its children from its parent and free them.
+ * Only updates next_sibling/first_child pointers,
+ * so may only be used during early discovery or during destroy.
+ */
+static void
+unlink_and_free_object_and_children(hwloc_obj_t *pobj)
+{
+  hwloc_obj_t obj = *pobj, child, *pchild;
+
+  for_each_child_safe(child, obj, pchild)
+    unlink_and_free_object_and_children(pchild);
+  for_each_io_child_safe(child, obj, pchild)
+    unlink_and_free_object_and_children(pchild);
+  for_each_misc_child_safe(child, obj, pchild)
+    unlink_and_free_object_and_children(pchild);
+
+  *pobj = obj->next_sibling;
+  hwloc_free_unlinked_object(obj);
+}
+
+/* Free an object and its children without unlinking from parent.
+ */
+void
+hwloc_free_object_and_children(hwloc_obj_t obj)
+{
+  unlink_and_free_object_and_children(&obj);
+}
+
+/* Free an object, its next siblings and their children without unlinking from parent.
+ */
+void
+hwloc_free_object_siblings_and_children(hwloc_obj_t obj)
+{
+  while (obj)
+    unlink_and_free_object_and_children(&obj);
+}
+
+/* insert the (non-empty) list of sibling starting at firstnew as new children of newparent,
+ * and return the address of the pointer to the next one
+ */
+static hwloc_obj_t *
+insert_siblings_list(hwloc_obj_t *firstp, hwloc_obj_t firstnew, hwloc_obj_t newparent)
+{
+  hwloc_obj_t tmp;
+  assert(firstnew);
+  *firstp = tmp = firstnew;
+  tmp->parent = newparent;
+  while (tmp->next_sibling) {
+    tmp = tmp->next_sibling;
+    tmp->parent = newparent;
+  }
+  return &tmp->next_sibling;
+}
+
+/* Take the new list starting at firstnew and prepend it to the old list starting at *firstp,
+ * and mark the new children as children of newparent.
+ * May be used during early or late discovery (updates prev_sibling and sibling_rank).
+ * List firstnew must be non-NULL.
+ */
+static void
+prepend_siblings_list(hwloc_obj_t *firstp, hwloc_obj_t firstnew, hwloc_obj_t newparent)
+{
+  hwloc_obj_t *tmpp, tmp, last;
+  unsigned length;
+
+  /* update parent pointers and find the length and end of the new list */
+  for(length = 0, tmpp = &firstnew, last = NULL ; *tmpp; length++, last = *tmpp, tmpp = &((*tmpp)->next_sibling))
+    (*tmpp)->parent = newparent;
+
+  /* update sibling_rank */
+  for(tmp = *firstp; tmp; tmp = tmp->next_sibling)
+    tmp->sibling_rank += length; /* if it wasn't initialized yet, it'll be overwritten later */
+
+  /* place the existing list at the end of the new one */
+  *tmpp = *firstp;
+  if (*firstp)
+    (*firstp)->prev_sibling = last;
+
+  /* use the beginning of the new list now */
+  *firstp = firstnew;
+}
+
+/* Take the new list starting at firstnew and append it to the old list starting at *firstp,
+ * and mark the new children as children of newparent.
+ * May be used during early or late discovery (updates prev_sibling and sibling_rank).
+ */
+static void
+append_siblings_list(hwloc_obj_t *firstp, hwloc_obj_t firstnew, hwloc_obj_t newparent)
+{
+  hwloc_obj_t *tmpp, tmp, last;
+  unsigned length;
+
+  /* find the length and end of the existing list */
+  for(length = 0, tmpp = firstp, last = NULL ; *tmpp; length++, last = *tmpp, tmpp = &((*tmpp)->next_sibling));
+
+  /* update parent pointers and sibling_rank */
+  for(tmp = firstnew; tmp; tmp = tmp->next_sibling) {
+    tmp->parent = newparent;
+    tmp->sibling_rank += length; /* if it wasn't set yet, it'll be overwritten later */
+  }
+
+  /* place new list at the end of the old one */
+  *tmpp = firstnew;
+  if (firstnew)
+    firstnew->prev_sibling = last;
+}
+
+/* Remove an object from its parent and free it.
+ * Only updates next_sibling/first_child pointers,
+ * so may only be used during early discovery.
+ *
+ * Children are inserted in the parent.
+ * If children should be inserted somewhere else (e.g. when merging with a child),
+ * the caller should move them before calling this function.
+ */
+static void
+unlink_and_free_single_object(hwloc_obj_t *pparent)
+{
+  hwloc_obj_t old = *pparent;
+  hwloc_obj_t *lastp;
+
+  if (old->type == HWLOC_OBJ_MISC) {
+    /* Misc object */
+
+    /* no normal children */
+    assert(!old->first_child);
+
+    /* no I/O children */
+    assert(!old->io_first_child);
+
+    if (old->misc_first_child)
+      /* insert old misc object children as new siblings below parent instead of old */
+      lastp = insert_siblings_list(pparent, old->misc_first_child, old->parent);
+    else
+      lastp = pparent;
+    /* append old siblings back */
+    *lastp = old->next_sibling;
+
+  } else if (hwloc_obj_type_is_io(old->type)) {
+    /* I/O object */
+
+    /* no normal children */
+    assert(!old->first_child);
+
+    if (old->io_first_child)
+      /* insert old I/O object children as new siblings below parent instead of old */
+      lastp = insert_siblings_list(pparent, old->io_first_child, old->parent);
+    else
+      lastp = pparent;
+    /* append old siblings back */
+    *lastp = old->next_sibling;
+
+    /* append old Misc children to parent */
+    if (old->misc_first_child)
+      append_siblings_list(&old->parent->misc_first_child, old->misc_first_child, old->parent);
+
+  } else {
+    /* Normal object */
+
+    if (old->first_child)
+      /* insert old object children as new siblings below parent instead of old */
+      lastp = insert_siblings_list(pparent, old->first_child, old->parent);
+    else
+      lastp = pparent;
+    /* append old siblings back */
+    *lastp = old->next_sibling;
+
+    /* append old I/O and Misc children to parent
+     * old->parent cannot be NULL (removing root), misc children should have been moved by the caller earlier.
+     */
+    if (old->io_first_child)
+      append_siblings_list(&old->parent->io_first_child, old->io_first_child, old->parent);
+    if (old->misc_first_child)
+      append_siblings_list(&old->parent->misc_first_child, old->misc_first_child, old->parent);
+  }
+
+  hwloc_free_unlinked_object(old);
+}
+
+static int
+hwloc__duplicate_object(struct hwloc_topology *newtopology,
+			struct hwloc_obj *newparent,
+			struct hwloc_obj *newobj,
+			struct hwloc_obj *src)
+{
+  size_t len;
+  unsigned i;
+  hwloc_obj_t child;
+  int err = 0;
+
+  /* either we're duplicating to an already allocated new root, which has no newparent,
+   * or we're duplicating to a non-yet allocated new non-root, which will have a newparent.
+   */
+  assert(!newparent == !!newobj);
+
+  if (!newobj) {
+    newobj = hwloc_alloc_setup_object(newtopology, src->type, src->os_index);
+    if (!newobj)
+      return -1;
+  }
+
+  newobj->type = src->type;
+  newobj->os_index = src->os_index;
+  newobj->gp_index = src->gp_index;
+  newobj->symmetric_subtree = src->symmetric_subtree;
+
+  if (src->name)
+    newobj->name = strdup(src->name);
+  if (src->subtype)
+    newobj->subtype = strdup(src->subtype);
+  newobj->userdata = src->userdata;
+
+  memcpy(&newobj->memory, &src->memory, sizeof(struct hwloc_obj_memory_s));
+  if (src->memory.page_types_len) {
+    len = src->memory.page_types_len * sizeof(struct hwloc_obj_memory_page_type_s);
+    newobj->memory.page_types = malloc(len);
+    memcpy(newobj->memory.page_types, src->memory.page_types, len);
+  }
+
+  memcpy(newobj->attr, src->attr, sizeof(*newobj->attr));
+
+  newobj->cpuset = hwloc_bitmap_dup(src->cpuset);
+  newobj->complete_cpuset = hwloc_bitmap_dup(src->complete_cpuset);
+  newobj->allowed_cpuset = hwloc_bitmap_dup(src->allowed_cpuset);
+  newobj->nodeset = hwloc_bitmap_dup(src->nodeset);
+  newobj->complete_nodeset = hwloc_bitmap_dup(src->complete_nodeset);
+  newobj->allowed_nodeset = hwloc_bitmap_dup(src->allowed_nodeset);
+
+  for(i=0; i<src->infos_count; i++)
+    hwloc__add_info(&newobj->infos, &newobj->infos_count, src->infos[i].name, src->infos[i].value);
+
+  for(child = src->first_child; child; child = child->next_sibling) {
+    err = hwloc__duplicate_object(newtopology, newobj, NULL, child);
+    if (err < 0)
+      goto out_with_children;
+  }
+  for(child = src->io_first_child; child; child = child->next_sibling) {
+    err = hwloc__duplicate_object(newtopology, newobj, NULL, child);
+    if (err < 0)
+      goto out_with_children;
+  }
+  for(child = src->misc_first_child; child; child = child->next_sibling) {
+    err = hwloc__duplicate_object(newtopology, newobj, NULL, child);
+    if (err < 0)
+      goto out_with_children;
+  }
+
+ out_with_children:
+
+  /* some children insertion may have failed, but some children may have been inserted below us already.
+   * keep inserting ourself and let the caller clean the entire tree if we return an error.
+   */
+
+  if (newparent) {
+    /* no need to check the children insert order here, the source topology
+     * is supposed to be OK already, and we have debug asserts.
+     */
+    hwloc_insert_object_by_parent(newtopology, newparent, newobj);
+  }
+
+  return err;
+}
+
+int
+hwloc_topology_dup(hwloc_topology_t *newp,
+		   hwloc_topology_t old)
+{
+  hwloc_topology_t new;
+  hwloc_obj_t newroot;
+  hwloc_obj_t oldroot = hwloc_get_root_obj(old);
+  int err;
+
+  if (!old->is_loaded) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  err = hwloc_topology_init(&new);
+  if (err < 0)
+    goto out;
+
+  new->flags = old->flags;
+  memcpy(new->type_filter, old->type_filter, sizeof(old->type_filter));
+  new->is_thissystem = old->is_thissystem;
+  new->is_loaded = 1;
+  new->pid = old->pid;
+  new->next_gp_index = old->next_gp_index;
+
+  memcpy(&new->binding_hooks, &old->binding_hooks, sizeof(old->binding_hooks));
+
+  memcpy(new->support.discovery, old->support.discovery, sizeof(*old->support.discovery));
+  memcpy(new->support.cpubind, old->support.cpubind, sizeof(*old->support.cpubind));
+  memcpy(new->support.membind, old->support.membind, sizeof(*old->support.membind));
+
+  new->userdata_export_cb = old->userdata_export_cb;
+  new->userdata_import_cb = old->userdata_import_cb;
+  new->userdata_not_decoded = old->userdata_not_decoded;
+
+  newroot = hwloc_get_root_obj(new);
+  err = hwloc__duplicate_object(new, NULL, newroot, oldroot);
+  if (err < 0)
+    goto out_with_topology;
+
+  err = hwloc_internal_distances_dup(new, old);
+  if (err < 0)
+    goto out_with_topology;
+
+  /* no need to duplicate backends, topology is already loaded */
+  new->backends = NULL;
+  new->get_pci_busid_cpuset_backend = NULL;
+
+  if (hwloc_topology_reconnect(new, 0) < 0)
+    goto out_with_topology;
+
+#ifndef HWLOC_DEBUG
+  if (getenv("HWLOC_DEBUG_CHECK"))
+#endif
+    hwloc_topology_check(new);
+
+  *newp = new;
+  return 0;
+
+ out_with_topology:
+  hwloc_topology_destroy(new);
+ out:
+  return -1;
+}
+
+/* WARNING: The indexes of this array MUST match the ordering that of
+   the obj_order_type[] array, below.  Specifically, the values must
+   be laid out such that:
+
+       obj_order_type[obj_type_order[N]] = N
+
+   for all HWLOC_OBJ_* values of N.  Put differently:
+
+       obj_type_order[A] = B
+
+   where the A values are in order of the hwloc_obj_type_t enum, and
+   the B values are the corresponding indexes of obj_order_type.
+
+   We can't use C99 syntax to initialize this in a little safer manner
+   -- bummer.  :-(
+
+   Correctness is asserted in hwloc_topology_init() when debug is enabled.
+   */
+/***** Make sure you update obj_type_priority[] below as well. *****/
+static const unsigned obj_type_order[] = {
+    /* first entry is HWLOC_OBJ_SYSTEM */  0,
+    /* next entry is HWLOC_OBJ_MACHINE */  1,
+    /* next entry is HWLOC_OBJ_NUMANODE */ 3,
+    /* next entry is HWLOC_OBJ_PACKAGE */  4,
+    /* next entry is HWLOC_OBJ_CORE */     13,
+    /* next entry is HWLOC_OBJ_PU */       17,
+    /* next entry is HWLOC_OBJ_L1CACHE */  11,
+    /* next entry is HWLOC_OBJ_L2CACHE */  9,
+    /* next entry is HWLOC_OBJ_L3CACHE */  7,
+    /* next entry is HWLOC_OBJ_L4CACHE */  6,
+    /* next entry is HWLOC_OBJ_L5CACHE */  5,
+    /* next entry is HWLOC_OBJ_L1ICACHE */ 12,
+    /* next entry is HWLOC_OBJ_L2ICACHE */ 10,
+    /* next entry is HWLOC_OBJ_L3ICACHE */ 8,
+    /* next entry is HWLOC_OBJ_GROUP */    2,
+    /* next entry is HWLOC_OBJ_MISC */     18,
+    /* next entry is HWLOC_OBJ_BRIDGE */   14,
+    /* next entry is HWLOC_OBJ_PCI_DEVICE */  15,
+    /* next entry is HWLOC_OBJ_OS_DEVICE */   16
+};
+
+static const hwloc_obj_type_t obj_order_type[] = {
+  HWLOC_OBJ_SYSTEM,
+  HWLOC_OBJ_MACHINE,
+  HWLOC_OBJ_GROUP,
+  HWLOC_OBJ_NUMANODE,
+  HWLOC_OBJ_PACKAGE,
+  HWLOC_OBJ_L5CACHE,
+  HWLOC_OBJ_L4CACHE,
+  HWLOC_OBJ_L3CACHE,
+  HWLOC_OBJ_L3ICACHE,
+  HWLOC_OBJ_L2CACHE,
+  HWLOC_OBJ_L2ICACHE,
+  HWLOC_OBJ_L1CACHE,
+  HWLOC_OBJ_L1ICACHE,
+  HWLOC_OBJ_CORE,
+  HWLOC_OBJ_BRIDGE,
+  HWLOC_OBJ_PCI_DEVICE,
+  HWLOC_OBJ_OS_DEVICE,
+  HWLOC_OBJ_PU,
+  HWLOC_OBJ_MISC, /* Misc is always a leaf */
+};
+/***** Make sure you update obj_type_priority[] below as well. *****/
+
+/* priority to be used when merging identical parent/children object
+ * (in merge_useless_child), keep the highest priority one.
+ *
+ * Always keep Machine/NUMANode/PU/PCIDev/OSDev
+ * then System
+ * then Core
+ * then Package
+ * then Cache,
+ * then Instruction Caches
+ * then always drop Group/Misc/Bridge.
+ *
+ * Some type won't actually ever be involved in such merging.
+ */
+/***** Make sure you update this array when changing the list of types. *****/
+static const int obj_type_priority[] = {
+  /* first entry is HWLOC_OBJ_SYSTEM */     80,
+  /* next entry is HWLOC_OBJ_MACHINE */     90,
+  /* next entry is HWLOC_OBJ_NUMANODE */    100,
+  /* next entry is HWLOC_OBJ_PACKAGE */     40,
+  /* next entry is HWLOC_OBJ_CORE */        60,
+  /* next entry is HWLOC_OBJ_PU */          100,
+  /* next entry is HWLOC_OBJ_L1CACHE */     20,
+  /* next entry is HWLOC_OBJ_L2CACHE */     20,
+  /* next entry is HWLOC_OBJ_L3CACHE */     20,
+  /* next entry is HWLOC_OBJ_L4CACHE */     20,
+  /* next entry is HWLOC_OBJ_L5CACHE */     20,
+  /* next entry is HWLOC_OBJ_L1ICACHE */    19,
+  /* next entry is HWLOC_OBJ_L2ICACHE */    19,
+  /* next entry is HWLOC_OBJ_L3ICACHE */    19,
+  /* next entry is HWLOC_OBJ_GROUP */       0,
+  /* next entry is HWLOC_OBJ_MISC */        0,
+  /* next entry is HWLOC_OBJ_BRIDGE */      0,
+  /* next entry is HWLOC_OBJ_PCI_DEVICE */  100,
+  /* next entry is HWLOC_OBJ_OS_DEVICE */   100
+};
+
+int hwloc_compare_types (hwloc_obj_type_t type1, hwloc_obj_type_t type2)
+{
+  unsigned order1 = obj_type_order[type1];
+  unsigned order2 = obj_type_order[type2];
+
+  /* I/O are only comparable with each others and with machine and system */
+  if (hwloc_obj_type_is_io(type1)
+      && !hwloc_obj_type_is_io(type2) && type2 != HWLOC_OBJ_SYSTEM && type2 != HWLOC_OBJ_MACHINE)
+    return HWLOC_TYPE_UNORDERED;
+  if (hwloc_obj_type_is_io(type2)
+      && !hwloc_obj_type_is_io(type1) && type1 != HWLOC_OBJ_SYSTEM && type1 != HWLOC_OBJ_MACHINE)
+    return HWLOC_TYPE_UNORDERED;
+
+  return order1 - order2;
+}
+
+enum hwloc_obj_cmp_e {
+  HWLOC_OBJ_EQUAL = HWLOC_BITMAP_EQUAL,			/**< \brief Equal */
+  HWLOC_OBJ_INCLUDED = HWLOC_BITMAP_INCLUDED,		/**< \brief Strictly included into */
+  HWLOC_OBJ_CONTAINS = HWLOC_BITMAP_CONTAINS,		/**< \brief Strictly contains */
+  HWLOC_OBJ_INTERSECTS = HWLOC_BITMAP_INTERSECTS,	/**< \brief Intersects, but no inclusion! */
+  HWLOC_OBJ_DIFFERENT = HWLOC_BITMAP_DIFFERENT		/**< \brief No intersection */
+};
+
+static enum hwloc_obj_cmp_e
+hwloc_type_cmp(hwloc_obj_t obj1, hwloc_obj_t obj2)
+{
+  hwloc_obj_type_t type1 = obj1->type;
+  hwloc_obj_type_t type2 = obj2->type;
+  int compare;
+
+  compare = hwloc_compare_types(type1, type2);
+  if (compare == HWLOC_TYPE_UNORDERED)
+    return HWLOC_OBJ_DIFFERENT; /* we cannot do better */
+  if (compare > 0)
+    return HWLOC_OBJ_INCLUDED;
+  if (compare < 0)
+    return HWLOC_OBJ_CONTAINS;
+
+  if (obj1->type == HWLOC_OBJ_GROUP
+      && (obj1->attr->group.kind != obj2->attr->group.kind
+	  || obj1->attr->group.subkind != obj2->attr->group.subkind))
+    return HWLOC_OBJ_DIFFERENT; /* we cannot do better */
+
+  return HWLOC_OBJ_EQUAL;
+}
+
+/*
+ * How to compare objects based on cpusets.
+ */
+
+static int
+hwloc_obj_cmp_sets(hwloc_obj_t obj1, hwloc_obj_t obj2)
+{
+  hwloc_bitmap_t set1, set2;
+  int res = HWLOC_OBJ_DIFFERENT;
+
+  assert(!hwloc_obj_type_is_special(obj1->type));
+  assert(!hwloc_obj_type_is_special(obj2->type));
+
+  /* compare cpusets first */
+  if (obj1->complete_cpuset && obj2->complete_cpuset) {
+    set1 = obj1->complete_cpuset;
+    set2 = obj2->complete_cpuset;
+  } else {
+    set1 = obj1->cpuset;
+    set2 = obj2->cpuset;
+  }
+  if (set1 && set2 && !hwloc_bitmap_iszero(set1) && !hwloc_bitmap_iszero(set2)) {
+    res = hwloc_bitmap_compare_inclusion(set1, set2);
+    if (res == HWLOC_OBJ_INTERSECTS)
+      return HWLOC_OBJ_INTERSECTS;
+  }
+
+  /* then compare nodesets, and combine the results */
+  if (obj1->complete_nodeset && obj2->complete_nodeset) {
+    set1 = obj1->complete_nodeset;
+    set2 = obj2->complete_nodeset;
+  } else {
+    set1 = obj1->nodeset;
+    set2 = obj2->nodeset;
+  }
+  if (set1 && set2 && !hwloc_bitmap_iszero(set1) && !hwloc_bitmap_iszero(set2)) {
+    int noderes = hwloc_bitmap_compare_inclusion(set1, set2);
+    /* deal with conflicting cpusets/nodesets inclusions */
+    if (noderes == HWLOC_OBJ_INCLUDED) {
+      if (res == HWLOC_OBJ_CONTAINS)
+	/* contradicting order for cpusets and nodesets */
+	return HWLOC_OBJ_INTERSECTS;
+      res = HWLOC_OBJ_INCLUDED;
+
+    } else if (noderes == HWLOC_OBJ_CONTAINS) {
+      if (res == HWLOC_OBJ_INCLUDED)
+	/* contradicting order for cpusets and nodesets */
+	return HWLOC_OBJ_INTERSECTS;
+      res = HWLOC_OBJ_CONTAINS;
+
+    } else if (noderes == HWLOC_OBJ_INTERSECTS) {
+      return HWLOC_OBJ_INTERSECTS;
+
+    } else {
+      /* nodesets are different, keep the cpuset order */
+      /* FIXME: with upcoming multiple levels of NUMA, we may have to report INCLUDED or CONTAINED here */
+
+    }
+  }
+
+  return res;
+}
+
+/* Compare object cpusets based on complete_cpuset if defined (always correctly ordered),
+ * or fallback to the main cpusets (only correctly ordered during early insert before disallowed bits are cleared).
+ *
+ * This is the sane way to compare object among a horizontal level.
+ */
+int
+hwloc__object_cpusets_compare_first(hwloc_obj_t obj1, hwloc_obj_t obj2)
+{
+  if (obj1->complete_cpuset && obj2->complete_cpuset)
+    return hwloc_bitmap_compare_first(obj1->complete_cpuset, obj2->complete_cpuset);
+  else
+    return hwloc_bitmap_compare_first(obj1->cpuset, obj2->cpuset);
+}
+
+/* format the obj info to print in error messages */
+static void
+hwloc__report_error_format_obj(char *buf, size_t buflen, hwloc_obj_t obj)
+{
+	char typestr[64];
+	char *cpusetstr;
+	char *nodesetstr = NULL;
+	hwloc_obj_type_snprintf(typestr, sizeof(typestr), obj, 0);
+	hwloc_bitmap_asprintf(&cpusetstr, obj->cpuset);
+	if (obj->nodeset) /* may be missing during insert */
+	  hwloc_bitmap_asprintf(&nodesetstr, obj->nodeset);
+	if (obj->os_index != (unsigned) -1)
+	  snprintf(buf, buflen, "%s (P#%u cpuset %s%s%s)",
+		   typestr, obj->os_index, cpusetstr,
+		   nodesetstr ? " nodeset " : "",
+		   nodesetstr ? nodesetstr : "");
+	else
+	  snprintf(buf, buflen, "%s (cpuset %s%s%s)",
+		   typestr, cpusetstr,
+		   nodesetstr ? " nodeset " : "",
+		   nodesetstr ? nodesetstr : "");
+	free(cpusetstr);
+	free(nodesetstr);
+}
+
+/*
+ * How to insert objects into the topology.
+ *
+ * Note: during detection, only the first_child and next_sibling pointers are
+ * kept up to date.  Others are computed only once topology detection is
+ * complete.
+ */
+
+#define merge_index(new, old, field, type) \
+  if ((old)->field == (type) -1) \
+    (old)->field = (new)->field;
+#define merge_sizes(new, old, field) \
+  if (!(old)->field) \
+    (old)->field = (new)->field;
+#ifdef HWLOC_DEBUG
+#define check_sizes(new, old, field) \
+  if ((new)->field) \
+    assert((old)->field == (new)->field)
+#else
+#define check_sizes(new, old, field)
+#endif
+
+static void
+merge_insert_equal(hwloc_obj_t new, hwloc_obj_t old)
+{
+  merge_index(new, old, os_index, unsigned);
+
+  if (new->infos_count) {
+    hwloc__move_infos(&old->infos, &old->infos_count,
+		      &new->infos, &new->infos_count);
+  }
+
+  if (new->name && !old->name) {
+    old->name = new->name;
+    new->name = NULL;
+  }
+  if (new->subtype && !old->subtype) {
+    old->subtype = new->subtype;
+    new->subtype = NULL;
+  }
+
+  /* Ignore userdata. It will be NULL before load().
+   * It may be non-NULL if alloc+insert_group() after load().
+   */
+
+  switch(new->type) {
+  case HWLOC_OBJ_NUMANODE:
+    if (new->memory.local_memory && !old->memory.local_memory) {
+      /* no memory in old, use new memory */
+      old->memory.local_memory = new->memory.local_memory;
+      free(old->memory.page_types);
+      old->memory.page_types_len = new->memory.page_types_len;
+      old->memory.page_types = new->memory.page_types;
+      new->memory.page_types = NULL;
+      new->memory.page_types_len = 0;
+    }
+    /* old->memory.total_memory will be updated by propagate_total_memory() */
+    break;
+  case HWLOC_OBJ_L1CACHE:
+  case HWLOC_OBJ_L2CACHE:
+  case HWLOC_OBJ_L3CACHE:
+  case HWLOC_OBJ_L4CACHE:
+  case HWLOC_OBJ_L5CACHE:
+  case HWLOC_OBJ_L1ICACHE:
+  case HWLOC_OBJ_L2ICACHE:
+  case HWLOC_OBJ_L3ICACHE:
+    merge_sizes(new, old, attr->cache.size);
+    check_sizes(new, old, attr->cache.size);
+    merge_sizes(new, old, attr->cache.linesize);
+    check_sizes(new, old, attr->cache.linesize);
+    break;
+  default:
+    break;
+  }
+}
+
+/* Try to insert OBJ in CUR, recurse if needed.
+ * Returns the object if it was inserted,
+ * the remaining object it was merged,
+ * NULL if failed to insert.
+ */
+static struct hwloc_obj *
+hwloc___insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t cur, hwloc_obj_t obj,
+			        hwloc_report_error_t report_error)
+{
+  hwloc_obj_t child, next_child = NULL;
+  /* These will always point to the pointer to their next last child. */
+  hwloc_obj_t *cur_children = &cur->first_child;
+  hwloc_obj_t *obj_children = &obj->first_child;
+  /* Pointer where OBJ should be put */
+  hwloc_obj_t *putp = NULL; /* OBJ position isn't found yet */
+
+  /* Iteration with prefetching to be completely safe against CHILD removal.
+   * The list is already sorted by cpuset, and there's no intersection between siblings.
+   */
+  for (child = cur->first_child, child ? next_child = child->next_sibling : NULL;
+       child;
+       child = next_child, child ? next_child = child->next_sibling : NULL) {
+
+    int res = hwloc_obj_cmp_sets(obj, child);
+
+    if (res == HWLOC_OBJ_EQUAL) {
+      if (obj->type == HWLOC_OBJ_GROUP) {
+	/* Groups are ignored keep_structure or always. Non-ignored Groups isn't possible. */
+	assert(topology->type_filter[HWLOC_OBJ_GROUP] != HWLOC_TYPE_FILTER_KEEP_ALL);
+        /* Remove the Group now. The normal ignore code path wouldn't tell us whether the Group was removed or not,
+	 * while some callers need to know (at least hwloc_topology_insert_group()).
+	 */
+
+	/* If merging two groups, keep the highest kind.
+	 * Replace the existing Group with the new Group contents
+	 * and let the caller free the new Group.
+	 */
+	if (child->type == HWLOC_OBJ_GROUP
+	    && obj->attr->group.kind > child->attr->group.kind)
+	  hwloc_replace_linked_object(child, obj);
+
+	return child;
+
+      } else if (child->type == HWLOC_OBJ_GROUP) {
+
+	/* Replace the Group with the new object contents
+	 * and let the caller free the new object
+	 */
+	hwloc_replace_linked_object(child, obj);
+	return child;
+
+      } else {
+	/* otherwise compare actual types to decide of the inclusion */
+	res = hwloc_type_cmp(obj, child);
+      }
+    }
+
+    switch (res) {
+      case HWLOC_OBJ_EQUAL:
+	/* Two objects with same type.
+	 * Groups are handled above.
+	 */
+	if (obj->type == child->type
+	    && (obj->type == HWLOC_OBJ_PU || obj->type == HWLOC_OBJ_NUMANODE)
+	    && obj->os_index != child->os_index) {
+	  static int reported = 0;
+	  if (!reported && !hwloc_hide_errors()) {
+	    fprintf(stderr, "Cannot merge similar %s objects with different OS indexes %u and %u\n",
+		    hwloc_type_name(obj->type), child->os_index, obj->os_index);
+	    reported = 1;
+	  }
+          return NULL;
+	}
+	merge_insert_equal(obj, child);
+	/* Already present, no need to insert.  */
+	return child;
+
+      case HWLOC_OBJ_INCLUDED:
+	/* OBJ is strictly contained is some child of CUR, go deeper.  */
+	return hwloc___insert_object_by_cpuset(topology, child, obj, report_error);
+
+      case HWLOC_OBJ_INTERSECTS:
+        if (report_error) {
+	  char childstr[512];
+	  char objstr[512];
+	  char msg[1024];
+	  hwloc__report_error_format_obj(objstr, sizeof(objstr), obj);
+	  hwloc__report_error_format_obj(childstr, sizeof(childstr), child);
+	  snprintf(msg, sizeof(msg), "%s intersects with %s without inclusion!", objstr, childstr);
+	  report_error(msg, __LINE__);
+	}
+	goto putback;
+
+      case HWLOC_OBJ_DIFFERENT:
+        /* OBJ should be a child of CUR before CHILD, mark its position if not found yet. */
+	if (!putp && hwloc__object_cpusets_compare_first(obj, child) < 0)
+	  /* Don't insert yet, there could be intersect errors later */
+	  putp = cur_children;
+	/* Advance cur_children.  */
+	cur_children = &child->next_sibling;
+	break;
+
+      case HWLOC_OBJ_CONTAINS:
+	/* OBJ contains CHILD, remove CHILD from CUR */
+	*cur_children = child->next_sibling;
+	child->next_sibling = NULL;
+	/* Put CHILD in OBJ */
+	*obj_children = child;
+	obj_children = &child->next_sibling;
+	child->parent = obj;
+	break;
+    }
+  }
+  /* cur/obj_children points to last CUR/OBJ child next_sibling pointer, which must be NULL. */
+  assert(!*obj_children);
+  assert(!*cur_children);
+
+  /* Put OBJ where it belongs, or in last in CUR's children.  */
+  if (!putp)
+    putp = cur_children;
+  obj->next_sibling = *putp;
+  *putp = obj;
+  obj->parent = cur;
+
+  topology->modified = 1;
+  return obj;
+
+ putback:
+  /* Put-back OBJ children in CUR and return an error. */
+  if (putp)
+    cur_children = putp; /* No need to try to insert before where OBJ was supposed to go */
+  else
+    cur_children = &cur->first_child; /* Start from the beginning */
+  /* We can insert in order, but there can be holes in the middle. */
+  while ((child = obj->first_child) != NULL) {
+    /* Remove from OBJ */
+    obj->first_child = child->next_sibling;
+    obj->parent = cur;
+    /* Find child position in CUR, and insert. */
+    while (*cur_children && hwloc__object_cpusets_compare_first(*cur_children, child) < 0)
+      cur_children = &(*cur_children)->next_sibling;
+    child->next_sibling = *cur_children;
+    *cur_children = child;
+  }
+  return NULL;
+}
+
+/* insertion routine that lets you change the error reporting callback */
+struct hwloc_obj *
+hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj,
+			       hwloc_report_error_t report_error)
+{
+  struct hwloc_obj *result;
+
+#ifdef HWLOC_DEBUG
+  assert(!hwloc_obj_type_is_special(obj->type));
+
+  /* we need at least one non-empty set (normal or complete, cpuset or nodeset) */
+  assert((obj->cpuset && !hwloc_bitmap_iszero(obj->cpuset))
+	 || (obj->complete_cpuset && !hwloc_bitmap_iszero(obj->complete_cpuset))
+	 || (obj->nodeset && !hwloc_bitmap_iszero(obj->nodeset))
+	 || (obj->complete_nodeset && !hwloc_bitmap_iszero(obj->complete_nodeset)));
+#endif
+
+  /* Start at the top.  */
+  result = hwloc___insert_object_by_cpuset(topology, topology->levels[0][0], obj, report_error);
+  if (result != obj) {
+    /* either failed to insert, or got merged, free the original object */
+    hwloc_free_unlinked_object(obj);
+  } else {
+    /* Add the cpuset to the top */
+    hwloc_bitmap_or(topology->levels[0][0]->complete_cpuset, topology->levels[0][0]->complete_cpuset, obj->cpuset);
+    if (obj->nodeset)
+      hwloc_bitmap_or(topology->levels[0][0]->complete_nodeset, topology->levels[0][0]->complete_nodeset, obj->nodeset);
+  }
+  return result;
+}
+
+/* the default insertion routine warns in case of error.
+ * it's used by most backends */
+struct hwloc_obj *
+hwloc_insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj)
+{
+  return hwloc__insert_object_by_cpuset(topology, obj, hwloc_report_os_error);
+}
+
+void
+hwloc_insert_object_by_parent(struct hwloc_topology *topology, hwloc_obj_t parent, hwloc_obj_t obj)
+{
+  hwloc_obj_t *current;
+
+  if (obj->type == HWLOC_OBJ_MISC) {
+    /* Append to the end of the Misc list */
+    for (current = &parent->misc_first_child; *current; current = &(*current)->next_sibling);
+  } else if (hwloc_obj_type_is_io(obj->type)) {
+    /* Append to the end of the I/O list */
+    for (current = &parent->io_first_child; *current; current = &(*current)->next_sibling);
+  } else {
+    /* Append to the end of the list.
+     * The caller takes care of inserting children in the right cpuset order, without intersection between them.
+     * Duplicating doesn't need to check the order since the source topology is supposed to be OK already.
+     * XML reorders if needed, and fails on intersecting siblings.
+     * Other callers just insert random objects such as I/O or Misc, no cpuset issue there.
+     */
+    for (current = &parent->first_child; *current; current = &(*current)->next_sibling);
+  }
+
+  *current = obj;
+  obj->parent = parent;
+  obj->next_sibling = NULL;
+  topology->modified = 1;
+}
+
+hwloc_obj_t
+hwloc_alloc_setup_object(hwloc_topology_t topology,
+			 hwloc_obj_type_t type, signed os_index)
+{
+  struct hwloc_obj *obj = malloc(sizeof(*obj));
+  memset(obj, 0, sizeof(*obj));
+  obj->type = type;
+  obj->os_index = os_index;
+  obj->gp_index = topology->next_gp_index++;
+  obj->attr = malloc(sizeof(*obj->attr));
+  memset(obj->attr, 0, sizeof(*obj->attr));
+  /* do not allocate the cpuset here, let the caller do it */
+  return obj;
+}
+
+hwloc_obj_t
+hwloc_topology_alloc_group_object(struct hwloc_topology *topology)
+{
+  if (!topology->is_loaded) {
+    /* this could actually work, see insert() below */
+    errno = EINVAL;
+    return NULL;
+  }
+  return hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, -1);
+}
+
+static void hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root);
+static void propagate_total_memory(hwloc_obj_t obj);
+static void hwloc_set_group_depth(hwloc_topology_t topology);
+
+hwloc_obj_t
+hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t obj)
+{
+  hwloc_obj_t res, root;
+  int has_memory = (obj->memory.local_memory != 0);
+
+  if (!topology->is_loaded) {
+    /* this could actually work, we would just need to disable connect_children/levels below */
+    hwloc_free_unlinked_object(obj);
+    errno = EINVAL;
+    return NULL;
+  }
+
+  if (topology->type_filter[HWLOC_OBJ_GROUP] == HWLOC_TYPE_FILTER_KEEP_NONE) {
+    hwloc_free_unlinked_object(obj);
+    errno = EINVAL;
+    return NULL;
+  }
+
+  root = hwloc_get_root_obj(topology);
+  if (obj->cpuset)
+    hwloc_bitmap_and(obj->cpuset, obj->cpuset, root->cpuset);
+  if (obj->complete_cpuset)
+    hwloc_bitmap_and(obj->complete_cpuset, obj->complete_cpuset, root->complete_cpuset);
+  if (obj->nodeset)
+    hwloc_bitmap_and(obj->nodeset, obj->nodeset, root->nodeset);
+  if (obj->complete_nodeset)
+    hwloc_bitmap_and(obj->complete_nodeset, obj->complete_nodeset, root->complete_nodeset);
+
+  if ((!obj->cpuset || hwloc_bitmap_iszero(obj->cpuset))
+      && (!obj->complete_cpuset || hwloc_bitmap_iszero(obj->complete_cpuset))
+      && (!obj->nodeset || hwloc_bitmap_iszero(obj->nodeset))
+      && (!obj->complete_nodeset || hwloc_bitmap_iszero(obj->complete_nodeset))) {
+    hwloc_free_unlinked_object(obj);
+    errno = EINVAL;
+    return NULL;
+  }
+
+  res = hwloc__insert_object_by_cpuset(topology, obj, NULL /* do not show errors on stdout */);
+  if (!res)
+    return NULL;
+  if (res != obj)
+    /* merged */
+    return res;
+
+  /* properly inserted */
+  hwloc_obj_add_children_sets(obj);
+  if (hwloc_topology_reconnect(topology, 0) < 0)
+    return NULL;
+
+  hwloc_propagate_symmetric_subtree(topology, topology->levels[0][0]);
+  hwloc_set_group_depth(topology);
+
+  if (has_memory)
+    propagate_total_memory(topology->levels[0][0]);
+
+  return obj;
+}
+
+hwloc_obj_t
+hwloc_topology_insert_misc_object(struct hwloc_topology *topology, hwloc_obj_t parent, const char *name)
+{
+  hwloc_obj_t obj;
+
+  if (topology->type_filter[HWLOC_OBJ_MISC] == HWLOC_TYPE_FILTER_KEEP_NONE) {
+    errno = EINVAL;
+    return NULL;
+  }
+
+  if (!topology->is_loaded) {
+    errno = EINVAL;
+    return NULL;
+  }
+
+  obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_MISC, -1);
+  if (name)
+    obj->name = strdup(name);
+
+  hwloc_insert_object_by_parent(topology, parent, obj);
+
+  /* FIXME: only connect misc parent children and misc level,
+   * but this API is likely not performance critical anyway
+   */
+  hwloc_topology_reconnect(topology, 0);
+
+  return obj;
+}
+
+/* assuming set is included in the topology complete_cpuset
+ * and all objects have a proper complete_cpuset,
+ * return the best one containing set.
+ * if some object are equivalent (same complete_cpuset), return the highest one.
+ */
+static hwloc_obj_t
+hwloc_get_highest_obj_covering_complete_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set)
+{
+  hwloc_obj_t current = hwloc_get_root_obj(topology);
+  hwloc_obj_t child;
+
+  if (hwloc_bitmap_isequal(set, current->complete_cpuset))
+    /* root cpuset is exactly what we want, no need to look at children, we want the highest */
+    return current;
+
+ recurse:
+  /* find the right child */
+  child = current->first_child;
+  while (child) {
+    if (hwloc_bitmap_isequal(set, child->complete_cpuset))
+      /* child puset is exactly what we want, no need to look at children, we want the highest */
+      return child;
+    if (!hwloc_bitmap_iszero(child->complete_cpuset) && hwloc_bitmap_isincluded(set, child->complete_cpuset))
+      break;
+    child = child->next_sibling;
+  }
+
+  if (child) {
+    current = child;
+    goto recurse;
+  }
+
+  /* no better child */
+  return current;
+}
+
+hwloc_obj_t
+hwloc_find_insert_io_parent_by_complete_cpuset(struct hwloc_topology *topology, hwloc_cpuset_t cpuset)
+{
+  hwloc_obj_t group_obj, largeparent, parent;
+
+  /* restrict to the existing complete cpuset to avoid errors later */
+  hwloc_bitmap_and(cpuset, cpuset, hwloc_topology_get_complete_cpuset(topology));
+  if (hwloc_bitmap_iszero(cpuset))
+    /* remaining cpuset is empty, invalid */
+    return NULL;
+
+  largeparent = hwloc_get_highest_obj_covering_complete_cpuset(topology, cpuset);
+  if (hwloc_bitmap_isequal(largeparent->complete_cpuset, cpuset))
+    /* Found a valid object (normal case) */
+    return largeparent;
+
+  /* we need to insert an intermediate group */
+  group_obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, -1);
+  if (!group_obj)
+    /* Failed to insert the exact Group, fallback to largeparent */
+    return largeparent;
+
+  group_obj->complete_cpuset = hwloc_bitmap_dup(cpuset);
+  hwloc_bitmap_and(cpuset, cpuset, hwloc_topology_get_topology_cpuset(topology));
+  group_obj->cpuset = hwloc_bitmap_dup(cpuset);
+  group_obj->attr->group.kind = HWLOC_GROUP_KIND_IO;
+  parent = hwloc__insert_object_by_cpuset(topology, group_obj, hwloc_report_os_error);
+  if (!parent)
+    /* Failed to insert the Group, maybe a conflicting cpuset */
+    return largeparent;
+
+  /* Group couldn't get merged or we would have gotten the right largeparent earlier */
+  assert(parent == group_obj);
+
+  /* Group inserted without being merged, everything OK, setup its sets */
+  hwloc_obj_add_children_sets(group_obj);
+
+  return parent;
+}
+
+static int hwloc_memory_page_type_compare(const void *_a, const void *_b)
+{
+  const struct hwloc_obj_memory_page_type_s *a = _a;
+  const struct hwloc_obj_memory_page_type_s *b = _b;
+  /* consider 0 as larger so that 0-size page_type go to the end */
+  if (!b->size)
+    return -1;
+  /* don't cast a-b in int since those are ullongs */
+  if (b->size == a->size)
+    return 0;
+  return a->size < b->size ? -1 : 1;
+}
+
+/* Propagate memory counts */
+static void
+propagate_total_memory(hwloc_obj_t obj)
+{
+  hwloc_obj_t *temp, child;
+  unsigned i;
+
+  /* reset total before counting local and children memory */
+  obj->memory.total_memory = 0;
+
+  /* Propagate memory up. */
+  for_each_child_safe(child, obj, temp) {
+    propagate_total_memory(child);
+    obj->memory.total_memory += child->memory.total_memory;
+  }
+  /* No memory under I/O or Misc */
+
+  obj->memory.total_memory += obj->memory.local_memory;
+
+  /* By the way, sort the page_type array.
+   * Cannot do it on insert since some backends (e.g. XML) add page_types after inserting the object.
+   */
+  qsort(obj->memory.page_types, obj->memory.page_types_len, sizeof(*obj->memory.page_types), hwloc_memory_page_type_compare);
+  /* Ignore 0-size page_types, they are at the end */
+  for(i=obj->memory.page_types_len; i>=1; i--)
+    if (obj->memory.page_types[i-1].size)
+      break;
+  obj->memory.page_types_len = i;
+}
+
+/* Collect the cpuset of all the PU objects. */
+static void
+collect_proc_cpuset(hwloc_obj_t obj, hwloc_obj_t sys)
+{
+  hwloc_obj_t child, *temp;
+
+  if (sys) {
+    /* We are already given a pointer to a system object */
+    if (obj->type == HWLOC_OBJ_PU)
+      hwloc_bitmap_or(sys->cpuset, sys->cpuset, obj->cpuset);
+  } else {
+    if (obj->cpuset) {
+      /* This object is the root of a machine */
+      sys = obj;
+      /* Assume no PU for now */
+      hwloc_bitmap_zero(obj->cpuset);
+    }
+  }
+
+  for_each_child_safe(child, obj, temp)
+    collect_proc_cpuset(child, sys);
+  /* No PU under I/O or Misc */
+}
+
+/* While traversing down and up, propagate the disallowed cpus by
+ * and'ing them to and from the first object that has a cpuset */
+static void
+propagate_unused_cpuset(hwloc_obj_t obj, hwloc_obj_t sys)
+{
+  hwloc_obj_t child, *temp;
+
+  if (obj->cpuset) {
+    if (sys) {
+      /* We are already given a pointer to an system object, update it and update ourselves */
+      hwloc_bitmap_t mask = hwloc_bitmap_alloc();
+
+      /* Apply the topology cpuset */
+      hwloc_bitmap_and(obj->cpuset, obj->cpuset, sys->cpuset);
+
+      /* Update complete cpuset down */
+      if (obj->complete_cpuset) {
+	hwloc_bitmap_and(obj->complete_cpuset, obj->complete_cpuset, sys->complete_cpuset);
+      } else {
+	obj->complete_cpuset = hwloc_bitmap_dup(sys->complete_cpuset);
+	hwloc_bitmap_and(obj->complete_cpuset, obj->complete_cpuset, obj->cpuset);
+      }
+
+      /* Update allowed cpusets */
+      if (obj->allowed_cpuset) {
+	/* Update ours */
+	hwloc_bitmap_and(obj->allowed_cpuset, obj->allowed_cpuset, sys->allowed_cpuset);
+
+	/* Update the given cpuset, but only what we know */
+	hwloc_bitmap_copy(mask, obj->cpuset);
+	hwloc_bitmap_not(mask, mask);
+	hwloc_bitmap_or(mask, mask, obj->allowed_cpuset);
+	hwloc_bitmap_and(sys->allowed_cpuset, sys->allowed_cpuset, mask);
+      } else {
+	/* Just take it as such */
+	obj->allowed_cpuset = hwloc_bitmap_dup(sys->allowed_cpuset);
+	hwloc_bitmap_and(obj->allowed_cpuset, obj->allowed_cpuset, obj->cpuset);
+      }
+
+      hwloc_bitmap_free(mask);
+    } else {
+      /* This object is the root of a machine */
+      sys = obj;
+      /* Apply complete_cpuset to cpuset and allowed_cpuset, it
+       * will automatically be applied below */
+      if (obj->complete_cpuset)
+        hwloc_bitmap_and(obj->cpuset, obj->cpuset, obj->complete_cpuset);
+      else
+        obj->complete_cpuset = hwloc_bitmap_dup(obj->cpuset);
+      if (obj->allowed_cpuset)
+        hwloc_bitmap_and(obj->allowed_cpuset, obj->allowed_cpuset, obj->complete_cpuset);
+      else
+        obj->allowed_cpuset = hwloc_bitmap_dup(obj->cpuset);
+    }
+  }
+
+  for_each_child_safe(child, obj, temp)
+    propagate_unused_cpuset(child, sys);
+  /* No PU under I/O or Misc */
+}
+
+/* Setup object cpusets/nodesets by OR'ing its children. */
+int
+hwloc_obj_add_other_obj_sets(hwloc_obj_t dst, hwloc_obj_t src)
+{
+#define ADD_OTHER_OBJ_SET(_dst, _src, _set)			\
+  if ((_src)->_set) {						\
+    if (!(_dst)->_set)						\
+      (_dst)->_set = hwloc_bitmap_alloc();			\
+    hwloc_bitmap_or((_dst)->_set, (_dst)->_set, (_src)->_set);	\
+  }
+  ADD_OTHER_OBJ_SET(dst, src, cpuset);
+  ADD_OTHER_OBJ_SET(dst, src, complete_cpuset);
+  ADD_OTHER_OBJ_SET(dst, src, allowed_cpuset);
+  ADD_OTHER_OBJ_SET(dst, src, nodeset);
+  ADD_OTHER_OBJ_SET(dst, src, complete_nodeset);
+  ADD_OTHER_OBJ_SET(dst, src, allowed_nodeset);
+  return 0;
+}
+
+int
+hwloc_obj_add_children_sets(hwloc_obj_t obj)
+{
+  hwloc_obj_t child;
+  assert(obj->cpuset != NULL);
+  child = obj->first_child;
+  while (child) {
+    assert(child->cpuset != NULL);
+    hwloc_obj_add_other_obj_sets(obj, child);
+    child = child->next_sibling;
+  }
+  /* No need to look at Misc children, they contain no PU. */
+  return 0;
+}
+
+/* Propagate nodesets up and down */
+static void
+propagate_nodeset(hwloc_obj_t obj, hwloc_obj_t sys)
+{
+  hwloc_obj_t child, *temp;
+  hwloc_bitmap_t parent_nodeset = NULL;
+  int parent_weight = 0;
+
+  if (!sys && obj->nodeset) {
+    sys = obj;
+    if (!obj->complete_nodeset)
+      obj->complete_nodeset = hwloc_bitmap_dup(obj->nodeset);
+    if (!obj->allowed_nodeset)
+      obj->allowed_nodeset = hwloc_bitmap_dup(obj->nodeset);
+  }
+
+  if (sys) {
+    if (obj->nodeset) {
+      /* Some existing nodeset coming from above, to possibly propagate down */
+      parent_nodeset = obj->nodeset;
+      parent_weight = hwloc_bitmap_weight(parent_nodeset);
+    } else
+      obj->nodeset = hwloc_bitmap_alloc();
+  }
+
+  for_each_child_safe(child, obj, temp) {
+    /* Propagate singleton nodesets down */
+    if (parent_weight == 1) {
+      if (!child->nodeset)
+        child->nodeset = hwloc_bitmap_dup(obj->nodeset);
+      else if (!hwloc_bitmap_isequal(child->nodeset, parent_nodeset)) {
+        hwloc_debug_bitmap("Oops, parent nodeset %s", parent_nodeset);
+        hwloc_debug_bitmap(" is different from child nodeset %s, ignoring the child one\n", child->nodeset);
+        hwloc_bitmap_copy(child->nodeset, parent_nodeset);
+      }
+    }
+
+    /* Recurse */
+    propagate_nodeset(child, sys);
+
+    /* Propagate children nodesets up */
+    if (sys && child->nodeset)
+      hwloc_bitmap_or(obj->nodeset, obj->nodeset, child->nodeset);
+  }
+  /* No nodeset under I/O or Misc */
+}
+
+/* Propagate allowed and complete nodesets */
+static void
+propagate_nodesets(hwloc_obj_t obj)
+{
+  hwloc_bitmap_t mask = hwloc_bitmap_alloc();
+  hwloc_obj_t child, *temp;
+
+  for_each_child_safe(child, obj, temp) {
+    if (obj->nodeset) {
+      /* Update complete nodesets down */
+      if (child->complete_nodeset) {
+        hwloc_bitmap_and(child->complete_nodeset, child->complete_nodeset, obj->complete_nodeset);
+      } else if (child->nodeset) {
+        child->complete_nodeset = hwloc_bitmap_dup(obj->complete_nodeset);
+        hwloc_bitmap_and(child->complete_nodeset, child->complete_nodeset, child->nodeset);
+      } /* else the child doesn't have nodeset information, we can not provide a complete nodeset */
+
+      /* Update allowed nodesets down */
+      if (child->allowed_nodeset) {
+        hwloc_bitmap_and(child->allowed_nodeset, child->allowed_nodeset, obj->allowed_nodeset);
+      } else if (child->nodeset) {
+        child->allowed_nodeset = hwloc_bitmap_dup(obj->allowed_nodeset);
+        hwloc_bitmap_and(child->allowed_nodeset, child->allowed_nodeset, child->nodeset);
+      }
+    }
+
+    propagate_nodesets(child);
+
+    if (obj->nodeset) {
+      /* Update allowed nodesets up */
+      if (child->nodeset && child->allowed_nodeset) {
+        hwloc_bitmap_copy(mask, child->nodeset);
+        hwloc_bitmap_andnot(mask, mask, child->allowed_nodeset);
+        hwloc_bitmap_andnot(obj->allowed_nodeset, obj->allowed_nodeset, mask);
+      }
+    }
+  }
+  hwloc_bitmap_free(mask);
+  /* No nodeset under I/O or Misc */
+
+  if (obj->nodeset) {
+    /* Apply complete nodeset to nodeset and allowed_nodeset */
+    if (obj->complete_nodeset)
+      hwloc_bitmap_and(obj->nodeset, obj->nodeset, obj->complete_nodeset);
+    else
+      obj->complete_nodeset = hwloc_bitmap_dup(obj->nodeset);
+    if (obj->allowed_nodeset)
+      hwloc_bitmap_and(obj->allowed_nodeset, obj->allowed_nodeset, obj->complete_nodeset);
+    else
+      obj->allowed_nodeset = hwloc_bitmap_dup(obj->nodeset);
+  }
+}
+
+static void
+remove_unused_sets(hwloc_obj_t obj)
+{
+  hwloc_obj_t child, *temp;
+
+  if (obj->cpuset) {
+    hwloc_bitmap_and(obj->cpuset, obj->cpuset, obj->allowed_cpuset);
+  }
+  if (obj->nodeset) {
+    hwloc_bitmap_and(obj->nodeset, obj->nodeset, obj->allowed_nodeset);
+  }
+  if (obj->type == HWLOC_OBJ_NUMANODE && obj->os_index != (unsigned) -1 &&
+      !hwloc_bitmap_isset(obj->allowed_nodeset, obj->os_index)) {
+    unsigned i;
+    hwloc_debug("Dropping memory from disallowed node %u\n", obj->os_index);
+    obj->memory.local_memory = 0;
+    obj->memory.total_memory = 0;
+    for(i=0; i<obj->memory.page_types_len; i++)
+      obj->memory.page_types[i].count = 0;
+  }
+
+  for_each_child_safe(child, obj, temp)
+    remove_unused_sets(child);
+  /* No cpuset under I/O or Misc */
+}
+
+static void
+hwloc__filter_bridges(hwloc_topology_t topology, hwloc_obj_t root, int depth)
+{
+  hwloc_obj_t child, *pchild;
+
+  /* filter I/O children and recurse */
+  for_each_io_child_safe(child, root, pchild) {
+    enum hwloc_type_filter_e filter = topology->type_filter[child->type];
+
+    /* recurse into grand-children */
+    hwloc__filter_bridges(topology, child, depth+1);
+
+    child->attr->bridge.depth = depth;
+
+    if (child->type == HWLOC_OBJ_BRIDGE
+	&& filter == HWLOC_TYPE_FILTER_KEEP_IMPORTANT
+	&& !child->io_first_child) {
+      unlink_and_free_single_object(pchild);
+      topology->modified = 1;
+    }
+  }
+}
+
+static void
+hwloc_filter_bridges(hwloc_topology_t topology, hwloc_obj_t parent)
+{
+  hwloc_obj_t child = parent->first_child;
+  while (child) {
+    hwloc_filter_bridges(topology, child);
+    child = child->next_sibling;
+  }
+
+  hwloc__filter_bridges(topology, parent, 0);
+}
+
+void
+hwloc__reorder_children(hwloc_obj_t parent)
+{
+  /* move the children list on the side */
+  hwloc_obj_t *prev, child, children = parent->first_child;
+  parent->first_child = NULL;
+  while (children) {
+    /* dequeue child */
+    child = children;
+    children = child->next_sibling;
+    /* find where to enqueue it */
+    prev = &parent->first_child;
+    while (*prev && hwloc__object_cpusets_compare_first(child, *prev) > 0)
+      prev = &((*prev)->next_sibling);
+    /* enqueue */
+    child->next_sibling = *prev;
+    *prev = child;
+  }
+  /* No ordering to enforce for Misc or I/O children. */
+}
+
+/* Remove all children whose cpuset is empty, except NUMA nodes
+ * since we want to keep memory information, and except PCI bridges and devices.
+ */
+static void
+remove_empty(hwloc_topology_t topology, hwloc_obj_t *pobj)
+{
+  hwloc_obj_t obj = *pobj, child, *pchild;
+
+  for_each_child_safe(child, obj, pchild)
+    remove_empty(topology, pchild);
+  /* No cpuset under I/O or Misc */
+
+  if (obj->type != HWLOC_OBJ_NUMANODE
+      && !obj->first_child /* only remove if all children were removed above, so that we don't remove parents of NUMAnode */
+      && !obj->io_first_child /* only remove if no I/O is attached there */
+      && hwloc_bitmap_iszero(obj->cpuset)) {
+    /* Remove empty children (even if it has Misc children) */
+    hwloc_debug("%s", "\nRemoving empty object ");
+    hwloc_debug_print_object(0, obj);
+    unlink_and_free_single_object(pobj);
+    topology->modified = 1;
+  }
+}
+
+/* compare i-th and i-1-th levels structure */
+static int
+hwloc_compare_levels_structure(hwloc_topology_t topology, unsigned i)
+{
+  unsigned j;
+  if (topology->level_nbobjects[i-1] != topology->level_nbobjects[i])
+    return -1;
+  for(j=0; j<topology->level_nbobjects[i]; j++)
+    if (topology->levels[i-1][0]->arity != 1)
+      return -1;
+  /* same number of objects with arity 1 above, no problem */
+  return 0;
+}
+
+/* return > 0 if any level was removed, which means reconnect is needed */
+static void
+hwloc_filter_levels_keep_structure(hwloc_topology_t topology)
+{
+  unsigned i, j;
+  int res = 0;
+
+  /* start from the bottom since we'll remove intermediate levels */
+  for(i=topology->nb_levels-1; i>0; i--) {
+    int replacechild = 0, replaceparent = 0;
+    hwloc_obj_type_t type1 = topology->levels[i-1][0]->type;
+    hwloc_obj_type_t type2 = topology->levels[i][0]->type;
+
+    /* Check whether parents and/or children can be replaced */
+    if (topology->type_filter[type1] == HWLOC_TYPE_FILTER_KEEP_STRUCTURE)
+      /* Parents can be ignored in favor of children.  */
+      replaceparent = 1;
+    if (topology->type_filter[type2] == HWLOC_TYPE_FILTER_KEEP_STRUCTURE)
+      /* Children can be ignored in favor of parents.  */
+      replacechild = 1;
+    if (!replacechild && !replaceparent)
+      /* no ignoring */
+      continue;
+    /* Decide which one to actually replace */
+    if (replaceparent && replacechild) {
+      /* If both may be replaced, look at obj_type_priority */
+      if (obj_type_priority[type1] >= obj_type_priority[type2])
+	replaceparent = 0;
+      else
+	replacechild = 0;
+    }
+    /* Are these levels actually identical? */
+    if (hwloc_compare_levels_structure(topology, i) < 0)
+      continue;
+    hwloc_debug("may merge levels #%u=%s and #%u=%s\n",
+		i-1, hwloc_type_name(type1), i, hwloc_type_name(type2));
+
+    /* OK, remove intermediate objects from the tree. */
+    for(j=0; j<topology->level_nbobjects[i]; j++) {
+      hwloc_obj_t parent = topology->levels[i-1][j];
+      hwloc_obj_t child = topology->levels[i][j];
+      unsigned k;
+      if (replacechild) {
+	/* move child's children to parent */
+	parent->first_child = child->first_child;
+	parent->last_child = child->last_child;
+	parent->arity = child->arity;
+	free(parent->children);
+	parent->children = child->children;
+	child->children = NULL;
+	/* update children parent */
+	for(k=0; k<parent->arity; k++)
+	  parent->children[k]->parent = parent;
+	/* append child io/misc children to parent */
+	if (child->io_first_child) {
+	  append_siblings_list(&parent->io_first_child, child->io_first_child, parent);
+	  parent->io_arity += child->io_arity;
+	}
+	if (child->misc_first_child) {
+	  append_siblings_list(&parent->misc_first_child, child->misc_first_child, parent);
+	  parent->misc_arity += child->misc_arity;
+	}
+	hwloc_free_unlinked_object(child);
+      } else {
+	/* replace parent with child in grand-parent */
+	if (parent->parent) {
+	  parent->parent->children[parent->sibling_rank] = child;
+	  child->sibling_rank = parent->sibling_rank;
+	  if (!parent->sibling_rank)
+	    parent->parent->first_child = child;
+	  if (parent->sibling_rank == parent->parent->arity-1)
+	    parent->parent->last_child = child;
+	  /* update child parent */
+	  child->parent = parent->parent;
+	} else {
+	  /* make child the new root */
+	  topology->levels[0][0] = child;
+	  child->parent = NULL;
+	}
+	/* prepend parent io/misc children to child */
+	if (parent->io_first_child) {
+	  prepend_siblings_list(&child->io_first_child, parent->io_first_child, child);
+	  child->io_arity += parent->io_arity;
+	}
+	if (parent->misc_first_child) {
+	  prepend_siblings_list(&child->misc_first_child, parent->misc_first_child, child);
+	  child->misc_arity += parent->misc_arity;
+	}
+	hwloc_free_unlinked_object(parent);
+	/* prev/next_sibling will be updated below in another loop */
+      }
+    }
+    if (replaceparent && i>1) {
+      /* Update sibling list within modified parent->parent arrays */
+      for(j=0; j<topology->level_nbobjects[i]; j++) {
+	hwloc_obj_t child = topology->levels[i][j];
+	unsigned rank = child->sibling_rank;
+	child->prev_sibling = rank > 0 ? child->parent->children[rank-1] : NULL;
+	child->next_sibling = rank < child->parent->arity-1 ? child->parent->children[rank+1] : NULL;
+      }
+    }
+
+    /* Update levels so that the next reconnect isn't confused */
+    if (replaceparent) {
+      /* Removing level i-1, so move levels [i..nb_levels-1] to [i-1..] */
+      free(topology->levels[i-1]);
+      memmove(&topology->levels[i-1],
+	      &topology->levels[i],
+	      (topology->nb_levels-i)*sizeof(topology->levels[i]));
+      memmove(&topology->level_nbobjects[i-1],
+	      &topology->level_nbobjects[i],
+	      (topology->nb_levels-i)*sizeof(topology->level_nbobjects[i]));
+      hwloc_debug("removed parent level %s at depth %u\n",
+		  hwloc_type_name(type1), i-1);
+    } else {
+      /* Removing level i, so move levels [i+1..nb_levels-1] and later to [i..] */
+      free(topology->levels[i]);
+      memmove(&topology->levels[i],
+	      &topology->levels[i+1],
+	      (topology->nb_levels-1-i)*sizeof(topology->levels[i]));
+      memmove(&topology->level_nbobjects[i],
+	      &topology->level_nbobjects[i+1],
+	      (topology->nb_levels-1-i)*sizeof(topology->level_nbobjects[i]));
+      hwloc_debug("removed child level %s at depth %u\n",
+		  hwloc_type_name(type2), i);
+    }
+    topology->level_nbobjects[topology->nb_levels-1] = 0;
+    topology->levels[topology->nb_levels-1] = NULL;
+    topology->nb_levels--;
+
+    res++;
+  }
+
+  if (res > 0) {
+    /* Update object and type depths if some levels were removed */
+    for(i=0; i<topology->nb_levels; i++)
+      topology->type_depth[topology->levels[i][0]->type] = HWLOC_TYPE_DEPTH_UNKNOWN;
+    for(i=0; i<topology->nb_levels; i++) {
+      hwloc_obj_type_t type = topology->levels[i][0]->type;
+      for(j=0; j<topology->level_nbobjects[i]; j++)
+	topology->levels[i][j]->depth = i;
+      if (topology->type_depth[type] == HWLOC_TYPE_DEPTH_UNKNOWN)
+	topology->type_depth[type] = i;
+      else
+	topology->type_depth[type] = HWLOC_TYPE_DEPTH_MULTIPLE;
+    }
+  }
+}
+
+static void
+hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root)
+{
+  hwloc_obj_t child, *array;
+  int ok;
+
+  /* assume we're not symmetric by default */
+  root->symmetric_subtree = 0;
+
+  /* if no child, we are symmetric */
+  if (!root->arity) {
+    root->symmetric_subtree = 1;
+    return;
+  }
+
+  /* look at normal children only, I/O and Misc are ignored.
+   * return if any child is not symmetric.
+   */
+  ok = 1;
+  for(child = root->first_child; child; child = child->next_sibling) {
+    hwloc_propagate_symmetric_subtree(topology, child);
+    if (!child->symmetric_subtree)
+      ok = 0;
+  }
+  if (!ok)
+    return;
+  /* Misc and I/O children do not care about symmetric_subtree */
+
+  /* now check that children subtrees are identical.
+   * just walk down the first child in each tree and compare their depth and arities
+   */
+  array = malloc(root->arity * sizeof(*array));
+  memcpy(array, root->children, root->arity * sizeof(*array));
+  while (1) {
+    unsigned i;
+    /* check current level arities and depth */
+    for(i=1; i<root->arity; i++)
+      if (array[i]->depth != array[0]->depth
+	  || array[i]->arity != array[0]->arity) {
+      free(array);
+      return;
+    }
+    if (!array[0]->arity)
+      /* no more children level, we're ok */
+      break;
+    /* look at first child of each element now */
+    for(i=0; i<root->arity; i++)
+      array[i] = array[i]->first_child;
+  }
+  free(array);
+
+  /* everything went fine, we're symmetric */
+  root->symmetric_subtree = 1;
+}
+
+static void hwloc_set_group_depth(hwloc_topology_t topology)
+{
+  int groupdepth = 0;
+  unsigned i, j;
+  for(i=0; i<topology->nb_levels; i++)
+    if (topology->levels[i][0]->type == HWLOC_OBJ_GROUP) {
+      for (j = 0; j < topology->level_nbobjects[i]; j++)
+	topology->levels[i][j]->attr->group.depth = groupdepth;
+      groupdepth++;
+    }
+}
+
+/*
+ * Initialize handy pointers in the whole topology.
+ * The topology only had first_child and next_sibling pointers.
+ * When this funtions return, all parent/children pointers are initialized.
+ * The remaining fields (levels, cousins, logical_index, depth, ...) will
+ * be setup later in hwloc_connect_levels().
+ *
+ * Can be called several times, so may have to update the array.
+ */
+static void
+hwloc_connect_children(hwloc_obj_t parent)
+{
+  unsigned n, oldn = parent->arity;
+  hwloc_obj_t child, prev_child;
+  int ok;
+
+  /* Main children list */
+
+  ok = 1;
+  prev_child = NULL;
+  for (n = 0, child = parent->first_child;
+       child;
+       n++,   prev_child = child, child = child->next_sibling) {
+    child->sibling_rank = n;
+    child->prev_sibling = prev_child;
+    /* already OK in the array? */
+    if (n >= oldn || parent->children[n] != child)
+      ok = 0;
+    /* recurse */
+    hwloc_connect_children(child);
+  }
+  parent->last_child = prev_child;
+  parent->arity = n;
+  if (!n) {
+    /* no need for an array anymore */
+    free(parent->children);
+    parent->children = NULL;
+    goto io;
+  }
+  if (ok)
+    /* array is already OK (even if too large) */
+    goto io;
+
+  /* alloc a larger array if needed */
+  if (oldn < n) {
+    free(parent->children);
+    parent->children = malloc(n * sizeof(*parent->children));
+  }
+  /* refill */
+  for (n = 0, child = parent->first_child;
+       child;
+       n++,   child = child->next_sibling) {
+    parent->children[n] = child;
+  }
+
+  /* I/O children list */
+ io:
+
+  prev_child = NULL;
+  for (n = 0, child = parent->io_first_child;
+       child;
+       n++,   prev_child = child, child = child->next_sibling) {
+    child->parent = parent;
+    child->sibling_rank = n;
+    child->prev_sibling = prev_child;
+    hwloc_connect_children(child);
+  }
+  parent->io_arity = n;
+
+  /* Misc children list */
+
+  prev_child = NULL;
+  for (n = 0, child = parent->misc_first_child;
+       child;
+       n++,   prev_child = child, child = child->next_sibling) {
+    child->parent = parent;
+    child->sibling_rank = n;
+    child->prev_sibling = prev_child;
+    hwloc_connect_children(child);
+  }
+  parent->misc_arity = n;
+}
+
+/*
+ * Check whether there is an object below ROOT that has the same type as OBJ
+ */
+static int
+find_same_type(hwloc_obj_t root, hwloc_obj_t obj)
+{
+  hwloc_obj_t child;
+
+  if (hwloc_type_cmp(root, obj) == HWLOC_OBJ_EQUAL)
+    return 1;
+
+  for (child = root->first_child; child; child = child->next_sibling)
+    if (find_same_type(child, obj))
+      return 1;
+
+  return 0;
+}
+
+/* traverse the array of current object and compare them with top_obj.
+ * if equal, take the object and put its children into the remaining objs.
+ * if not equal, put the object into the remaining objs.
+ */
+static int
+hwloc_level_take_objects(hwloc_obj_t top_obj,
+			 hwloc_obj_t *current_objs, unsigned n_current_objs,
+			 hwloc_obj_t *taken_objs, unsigned n_taken_objs __hwloc_attribute_unused,
+			 hwloc_obj_t *remaining_objs, unsigned n_remaining_objs __hwloc_attribute_unused)
+{
+  unsigned taken_i = 0;
+  unsigned new_i = 0;
+  unsigned i, j;
+
+  for (i = 0; i < n_current_objs; i++)
+    if (hwloc_type_cmp(top_obj, current_objs[i]) == HWLOC_OBJ_EQUAL) {
+      /* Take it, add main children.  */
+      taken_objs[taken_i++] = current_objs[i];
+      for (j = 0; j < current_objs[i]->arity; j++)
+	remaining_objs[new_i++] = current_objs[i]->children[j];
+    } else {
+      /* Leave it.  */
+      remaining_objs[new_i++] = current_objs[i];
+    }
+
+#ifdef HWLOC_DEBUG
+  /* Make sure we didn't mess up.  */
+  assert(taken_i == n_taken_objs);
+  assert(new_i == n_current_objs - n_taken_objs + n_remaining_objs);
+#endif
+
+  return new_i;
+}
+
+static int
+hwloc_build_level_from_list(struct hwloc_special_level_s *slevel)
+{
+  unsigned i, nb;
+  struct hwloc_obj * obj;
+
+  /* count */
+  obj = slevel->first;
+  i = 0;
+  while (obj) {
+    i++;
+    obj = obj->next_cousin;
+  }
+  nb = i;
+
+  if (nb) {
+    /* allocate and fill level */
+    slevel->objs = malloc(nb * sizeof(struct hwloc_obj *));
+    obj = slevel->first;
+    i = 0;
+    while (obj) {
+      obj->logical_index = i;
+      slevel->objs[i] = obj;
+      i++;
+      obj = obj->next_cousin;
+    }
+  }
+
+  slevel->nbobjs = nb;
+  return 0;
+}
+
+static void
+hwloc_append_special_object(struct hwloc_special_level_s *level, hwloc_obj_t obj)
+{
+  if (level->first) {
+    obj->prev_cousin = level->last;
+    obj->prev_cousin->next_cousin = obj;
+    level->last = obj;
+  } else {
+    obj->prev_cousin = NULL;
+    level->first = level->last = obj;
+  }
+}
+
+/* Append I/O and Misc objects to their lists */
+static void
+hwloc_list_io_misc_objects(hwloc_topology_t topology, hwloc_obj_t obj)
+{
+  hwloc_obj_t child, *temp;
+
+  if (obj->type == HWLOC_OBJ_MISC) {
+    obj->next_cousin = NULL;
+    obj->depth = HWLOC_TYPE_DEPTH_MISC;
+    /* Insert the main Misc list */
+    hwloc_append_special_object(&topology->slevels[HWLOC_SLEVEL_MISC], obj);
+    /* Recurse, Misc only have Misc children */
+    for_each_misc_child_safe(child, obj, temp)
+      hwloc_list_io_misc_objects(topology, child);
+
+  } else if (hwloc_obj_type_is_io(obj->type)) {
+    obj->next_cousin = NULL;
+
+    if (obj->type == HWLOC_OBJ_BRIDGE) {
+      obj->depth = HWLOC_TYPE_DEPTH_BRIDGE;
+      /* Insert in the main bridge list */
+      hwloc_append_special_object(&topology->slevels[HWLOC_SLEVEL_BRIDGE], obj);
+
+    } else if (obj->type == HWLOC_OBJ_PCI_DEVICE) {
+      obj->depth = HWLOC_TYPE_DEPTH_PCI_DEVICE;
+      /* Insert in the main pcidev list */
+      hwloc_append_special_object(&topology->slevels[HWLOC_SLEVEL_PCIDEV], obj);
+
+    } else if (obj->type == HWLOC_OBJ_OS_DEVICE) {
+      obj->depth = HWLOC_TYPE_DEPTH_OS_DEVICE;
+      /* Insert in the main osdev list */
+      hwloc_append_special_object(&topology->slevels[HWLOC_SLEVEL_OSDEV], obj);
+    }
+    /* Recurse, I/O only have I/O and Misc children */
+    for_each_io_child_safe(child, obj, temp)
+      hwloc_list_io_misc_objects(topology, child);
+    for_each_misc_child_safe(child, obj, temp)
+      hwloc_list_io_misc_objects(topology, child);
+
+  } else {
+    /* Recurse */
+    for_each_child_safe(child, obj, temp)
+      hwloc_list_io_misc_objects(topology, child);
+    for_each_io_child_safe(child, obj, temp)
+      hwloc_list_io_misc_objects(topology, child);
+    for_each_misc_child_safe(child, obj, temp)
+      hwloc_list_io_misc_objects(topology, child);
+  }
+}
+
+/* Build I/O levels */
+static void
+hwloc_connect_io_misc_levels(hwloc_topology_t topology)
+{
+  unsigned i;
+
+  for(i=0; i<HWLOC_NR_SLEVELS; i++)
+    free(topology->slevels[i].objs);
+  memset(&topology->slevels, 0, sizeof(topology->slevels));
+
+  hwloc_list_io_misc_objects(topology, topology->levels[0][0]);
+
+  for(i=0; i<HWLOC_NR_SLEVELS; i++)
+    hwloc_build_level_from_list(&topology->slevels[i]);
+}
+
+/*
+ * Do the remaining work that hwloc_connect_children() did not do earlier.
+ * Requires object arity and children list to be properly initialized (by hwloc_connect_children()).
+ */
+static int
+hwloc_connect_levels(hwloc_topology_t topology)
+{
+  unsigned l, i=0;
+  hwloc_obj_t *objs, *taken_objs, *new_objs, top_obj, root;
+  unsigned n_objs, n_taken_objs, n_new_objs;
+
+  /* reset non-root levels (root was initialized during init and will not change here) */
+  for(l=1; l<topology->nb_levels; l++)
+    free(topology->levels[l]);
+  memset(topology->levels+1, 0, (topology->nb_levels-1)*sizeof(*topology->levels));
+  memset(topology->level_nbobjects+1, 0, (topology->nb_levels-1)*sizeof(*topology->level_nbobjects));
+  topology->nb_levels = 1;
+
+  /* initialize all non-IO/non-Misc depths to unknown */
+  for (l = HWLOC_OBJ_SYSTEM; l < HWLOC_OBJ_MISC; l++)
+    topology->type_depth[l] = HWLOC_TYPE_DEPTH_UNKNOWN;
+
+  /* initialize root type depth */
+  root = topology->levels[0][0];
+  root->depth = 0;
+  topology->type_depth[root->type] = 0;
+  /* root level */
+  root->logical_index = 0;
+  root->prev_cousin = NULL;
+  root->next_cousin = NULL;
+  /* root as a child of nothing */
+  root->parent = NULL;
+  root->sibling_rank = 0;
+  root->prev_sibling = NULL;
+  root->next_sibling = NULL;
+
+  /* Start with children of the whole system.  */
+  n_objs = topology->levels[0][0]->arity;
+  objs = malloc(n_objs * sizeof(objs[0]));
+  if (!objs) {
+    errno = ENOMEM;
+    return -1;
+  }
+  memcpy(objs, topology->levels[0][0]->children, n_objs*sizeof(objs[0]));
+
+  /* Keep building levels while there are objects left in OBJS.  */
+  while (n_objs) {
+    /* At this point, the objs array contains only objects that may go into levels */
+
+    /* First find which type of object is the topmost.
+     * Don't use PU if there are other types since we want to keep PU at the bottom.
+     */
+
+    /* Look for the first non-PU object, and use the first PU if we really find nothing else */
+    for (i = 0; i < n_objs; i++)
+      if (objs[i]->type != HWLOC_OBJ_PU)
+        break;
+    top_obj = i == n_objs ? objs[0] : objs[i];
+
+    /* See if this is actually the topmost object */
+    for (i = 0; i < n_objs; i++) {
+      if (hwloc_type_cmp(top_obj, objs[i]) != HWLOC_OBJ_EQUAL) {
+	if (find_same_type(objs[i], top_obj)) {
+	  /* OBJS[i] is strictly above an object of the same type as TOP_OBJ, so it
+	   * is above TOP_OBJ.  */
+	  top_obj = objs[i];
+	}
+      }
+    }
+
+    /* Now peek all objects of the same type, build a level with that and
+     * replace them with their children.  */
+
+    /* First count them.  */
+    n_taken_objs = 0;
+    n_new_objs = 0;
+    for (i = 0; i < n_objs; i++)
+      if (hwloc_type_cmp(top_obj, objs[i]) == HWLOC_OBJ_EQUAL) {
+	n_taken_objs++;
+	n_new_objs += objs[i]->arity;
+      }
+
+    /* New level.  */
+    taken_objs = malloc((n_taken_objs + 1) * sizeof(taken_objs[0]));
+    /* New list of pending objects.  */
+    if (n_objs - n_taken_objs + n_new_objs) {
+      new_objs = malloc((n_objs - n_taken_objs + n_new_objs) * sizeof(new_objs[0]));
+    } else {
+#ifdef HWLOC_DEBUG
+      assert(!n_new_objs);
+      assert(n_objs == n_taken_objs);
+#endif
+      new_objs = NULL;
+    }
+
+    n_new_objs = hwloc_level_take_objects(top_obj,
+					  objs, n_objs,
+					  taken_objs, n_taken_objs,
+					  new_objs, n_new_objs);
+
+    /* Ok, put numbers in the level and link cousins.  */
+    for (i = 0; i < n_taken_objs; i++) {
+      taken_objs[i]->depth = topology->nb_levels;
+      taken_objs[i]->logical_index = i;
+      if (i) {
+	taken_objs[i]->prev_cousin = taken_objs[i-1];
+	taken_objs[i-1]->next_cousin = taken_objs[i];
+      }
+    }
+    taken_objs[0]->prev_cousin = NULL;
+    taken_objs[n_taken_objs-1]->next_cousin = NULL;
+
+    /* One more level!  */
+    hwloc_debug("--- %s level", hwloc_type_name(top_obj->type));
+    hwloc_debug(" has number %u\n\n", topology->nb_levels);
+
+    if (topology->type_depth[top_obj->type] == HWLOC_TYPE_DEPTH_UNKNOWN)
+      topology->type_depth[top_obj->type] = topology->nb_levels;
+    else
+      topology->type_depth[top_obj->type] = HWLOC_TYPE_DEPTH_MULTIPLE; /* mark as unknown */
+
+    taken_objs[n_taken_objs] = NULL;
+
+    if (topology->nb_levels == topology->nb_levels_allocated) {
+      /* extend the arrays of levels */
+      void *tmplevels, *tmpnbobjs;
+      tmplevels = realloc(topology->levels,
+			  2 * topology->nb_levels_allocated * sizeof(*topology->levels));
+      tmpnbobjs = realloc(topology->level_nbobjects,
+			  2 * topology->nb_levels_allocated * sizeof(*topology->level_nbobjects));
+      if (!tmplevels || !tmpnbobjs) {
+	fprintf(stderr, "hwloc failed to realloc level arrays to %u\n", topology->nb_levels_allocated * 2);
+
+	/* if one realloc succeeded, make sure the caller will free the new buffer */
+	if (tmplevels)
+	  topology->levels = tmplevels;
+	if (tmpnbobjs)
+	  topology->level_nbobjects = tmpnbobjs;
+	/* the realloc that failed left topology->level_foo untouched, will be freed by the caller */
+
+	free(objs);
+	free(taken_objs);
+	free(new_objs);
+	errno = ENOMEM;
+	return -1;
+      }
+      topology->levels = tmplevels;
+      topology->level_nbobjects = tmpnbobjs;
+      memset(topology->levels + topology->nb_levels_allocated,
+	     0, topology->nb_levels_allocated * sizeof(*topology->levels));
+      memset(topology->level_nbobjects + topology->nb_levels_allocated,
+	     0, topology->nb_levels_allocated * sizeof(*topology->level_nbobjects));
+      topology->nb_levels_allocated *= 2;
+    }
+    /* add the new level */
+    topology->level_nbobjects[topology->nb_levels] = n_taken_objs;
+    topology->levels[topology->nb_levels] = taken_objs;
+
+    topology->nb_levels++;
+
+    free(objs);
+
+    /* Switch to new_objs */
+    objs = new_objs;
+    n_objs = n_new_objs;
+  }
+
+  /* It's empty now.  */
+  free(objs);
+
+  return 0;
+}
+
+int
+hwloc_topology_reconnect(struct hwloc_topology *topology, unsigned long flags)
+{
+  if (flags) {
+    errno = EINVAL;
+    return -1;
+  }
+  if (!topology->modified)
+    return 0;
+
+  hwloc_connect_children(topology->levels[0][0]);
+
+  if (hwloc_connect_levels(topology) < 0)
+    return -1;
+
+  hwloc_connect_io_misc_levels(topology);
+
+  topology->modified = 0;
+
+  return 0;
+}
+
+void hwloc_alloc_obj_cpusets(hwloc_obj_t obj)
+{
+  if (!obj->cpuset)
+    obj->cpuset = hwloc_bitmap_alloc_full();
+  if (!obj->complete_cpuset)
+    obj->complete_cpuset = hwloc_bitmap_alloc();
+  if (!obj->allowed_cpuset)
+    obj->allowed_cpuset = hwloc_bitmap_alloc_full();
+  if (!obj->nodeset)
+    obj->nodeset = hwloc_bitmap_alloc();
+  if (!obj->complete_nodeset)
+    obj->complete_nodeset = hwloc_bitmap_alloc();
+  if (!obj->allowed_nodeset)
+    obj->allowed_nodeset = hwloc_bitmap_alloc_full();
+}
+
+/* Main discovery loop */
+static int
+hwloc_discover(struct hwloc_topology *topology)
+{
+  struct hwloc_backend *backend;
+
+  topology->modified = 0; /* no need to reconnect yet */
+
+  /* discover() callbacks should use hwloc_insert to add objects initialized
+   * through hwloc_alloc_setup_object.
+   * For node levels, nodeset and memory must be initialized.
+   * For cache levels, memory and type/depth must be initialized.
+   * For group levels, depth must be initialized.
+   */
+
+  /* There must be at least a PU object for each logical processor, at worse
+   * produced by hwloc_setup_pu_level()
+   */
+
+  /* To be able to just use hwloc_insert_object_by_cpuset to insert the object
+   * in the topology according to the cpuset, the cpuset field must be
+   * initialized.
+   */
+
+  /* A priori, All processors are visible in the topology, and allowed
+   * for the application.
+   *
+   * - If some processors exist but topology information is unknown for them
+   *   (and thus the backend couldn't create objects for them), they should be
+   *   added to the complete_cpuset field of the lowest object where the object
+   *   could reside.
+   *
+   * - If some processors are not allowed for the application (e.g. for
+   *   administration reasons), they should be dropped from the allowed_cpuset
+   *   field.
+   *
+   * The same applies to the node sets complete_nodeset and allowed_cpuset.
+   *
+   * If such field doesn't exist yet, it can be allocated, and initialized to
+   * zero (for complete), or to full (for allowed). The values are
+   * automatically propagated to the whole tree after detection.
+   */
+
+  /*
+   * Discover CPUs first
+   */
+  backend = topology->backends;
+  while (NULL != backend) {
+    if (backend->component->type != HWLOC_DISC_COMPONENT_TYPE_CPU
+	&& backend->component->type != HWLOC_DISC_COMPONENT_TYPE_GLOBAL)
+      /* not yet */
+      goto next_cpubackend;
+    if (!backend->discover)
+      goto next_cpubackend;
+    backend->discover(backend);
+    hwloc_debug_print_objects(0, topology->levels[0][0]);
+
+next_cpubackend:
+    backend = backend->next;
+  }
+
+  /* Update objects cpusets and nodesets now that the CPU/GLOBAL backend populated PUs and nodes */
+  hwloc_debug("%s", "\nRestrict topology cpusets to existing PU and NODE objects\n");
+  collect_proc_cpuset(topology->levels[0][0], NULL);
+
+  /* One backend should have allocated root cpusets with hwloc_alloc_obj_cpusets()
+   * and collect_proc_cpuset() should have set bits based on existing PUs.
+   */
+  if (!topology->levels[0][0]->cpuset || hwloc_bitmap_iszero(topology->levels[0][0]->cpuset)) {
+    hwloc_debug("%s", "No PU added by any CPU and global backend\n");
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (topology->binding_hooks.get_allowed_resources && topology->is_thissystem) {
+    const char *env = getenv("HWLOC_THISSYSTEM_ALLOWED_RESOURCES");
+    if ((env && atoi(env))
+	|| (topology->flags & HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES))
+      topology->binding_hooks.get_allowed_resources(topology);
+  }
+  hwloc_debug("%s", "\nPropagate disallowed cpus down and up\n");
+  hwloc_bitmap_and(topology->levels[0][0]->allowed_cpuset, topology->levels[0][0]->allowed_cpuset, topology->levels[0][0]->cpuset);
+  propagate_unused_cpuset(topology->levels[0][0], NULL);
+
+  /* Backends must allocate root->*nodeset.
+   *
+   * Most of them call hwloc_alloc_obj_cpusets() on the root to do so.
+   * root->complete_nodeset is empty by default, and filled by the core
+   * when NUMA nodes are added with insert_by_cpuset().
+   * root->allowed_nodeset is everything by default, unless reduced by backends.
+   *
+   * The XML backend takes care of everything to properly support old XML input
+   * with missing nodesets and/or NUMA nodes. It checks nodesets and fix them if needed.
+   */
+  assert(topology->levels[0][0]->nodeset);
+  assert(topology->levels[0][0]->complete_nodeset);
+  assert(topology->levels[0][0]->allowed_nodeset);
+  /* If there's no NUMA node, add one with all the memory */
+  if (hwloc_bitmap_iszero(topology->levels[0][0]->complete_nodeset)) {
+    hwloc_obj_t node = hwloc_alloc_setup_object(topology, HWLOC_OBJ_NUMANODE, 0);
+    node->cpuset = hwloc_bitmap_dup(topology->levels[0][0]->cpuset); /* requires root cpuset to be initialized above */
+    node->complete_cpuset = hwloc_bitmap_dup(topology->levels[0][0]->complete_cpuset); /* requires root cpuset to be initialized above */
+    node->allowed_cpuset = hwloc_bitmap_dup(topology->levels[0][0]->allowed_cpuset); /* requires root cpuset to be initialized above */
+    node->nodeset = hwloc_bitmap_alloc();
+    /* other nodesets will be filled below */
+    hwloc_bitmap_set(node->nodeset, 0);
+    memcpy(&node->memory, &topology->levels[0][0]->memory, sizeof(node->memory));
+    memset(&topology->levels[0][0]->memory, 0, sizeof(node->memory));
+    hwloc_insert_object_by_cpuset(topology, node);
+  }
+  hwloc_debug("%s", "\nPropagate nodesets\n");
+  propagate_nodeset(topology->levels[0][0], NULL);
+  propagate_nodesets(topology->levels[0][0]);
+
+  hwloc_debug_print_objects(0, topology->levels[0][0]);
+
+  if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM)) {
+    hwloc_debug("%s", "\nRemoving unauthorized sets from all sets\n");
+    remove_unused_sets(topology->levels[0][0]);
+    hwloc_debug_print_objects(0, topology->levels[0][0]);
+  }
+
+  /* see if we should ignore the root now that we know how many children it has */
+  if (!hwloc_filter_check_keep_object(topology, topology->levels[0][0])
+      && topology->levels[0][0]->first_child && !topology->levels[0][0]->first_child->next_sibling) {
+    hwloc_obj_t oldroot = topology->levels[0][0];
+    hwloc_obj_t newroot = oldroot->first_child;
+    /* switch to the new root */
+    newroot->parent = NULL;
+    topology->levels[0][0] = newroot;
+    /* move oldroot misc/io children before newroot children */
+    if (oldroot->io_first_child)
+      prepend_siblings_list(&newroot->io_first_child, oldroot->io_first_child, newroot);
+    if (oldroot->misc_first_child)
+      prepend_siblings_list(&newroot->misc_first_child, oldroot->misc_first_child, newroot);
+    /* destroy oldroot and use the new one */
+    hwloc_free_unlinked_object(oldroot);
+  }
+
+  /*
+   * All object cpusets and nodesets are properly set now.
+   */
+
+  /* Now connect handy pointers to make remaining discovery easier. */
+  hwloc_debug("%s", "\nOk, finished tweaking, now connect\n");
+  if (hwloc_topology_reconnect(topology, 0) < 0)
+    return -1;
+  hwloc_debug_print_objects(0, topology->levels[0][0]);
+
+  /*
+   * Additional discovery with other backends
+   */
+
+  backend = topology->backends;
+  while (NULL != backend) {
+    if (backend->component->type == HWLOC_DISC_COMPONENT_TYPE_CPU
+	|| backend->component->type == HWLOC_DISC_COMPONENT_TYPE_GLOBAL)
+      /* already done above */
+      goto next_noncpubackend;
+    if (!backend->discover)
+      goto next_noncpubackend;
+    backend->discover(backend);
+    hwloc_debug_print_objects(0, topology->levels[0][0]);
+
+next_noncpubackend:
+    backend = backend->next;
+  }
+
+  hwloc_pci_belowroot_apply_locality(topology);
+
+  hwloc_debug("%s", "\nNow reconnecting\n");
+  hwloc_debug_print_objects(0, topology->levels[0][0]);
+
+  /* Remove some stuff */
+
+  hwloc_debug("%s", "\nRemoving bridge objects if needed\n");
+  hwloc_filter_bridges(topology, topology->levels[0][0]);
+  hwloc_debug_print_objects(0, topology->levels[0][0]);
+
+  hwloc_debug("%s", "\nRemoving empty objects except numa nodes and PCI devices\n");
+  remove_empty(topology, &topology->levels[0][0]);
+    if (!topology->levels[0][0]) {
+    fprintf(stderr, "Topology became empty, aborting!\n");
+    abort();
+  }
+  hwloc_debug_print_objects(0, topology->levels[0][0]);
+
+  /* Reconnect things after all these changes.
+   * Often needed because of Groups inserted for I/Os.
+   * And required for KEEP_STRUCTURE below.
+   */
+  if (hwloc_topology_reconnect(topology, 0) < 0)
+    return -1;
+
+  hwloc_debug("%s", "\nRemoving levels with HWLOC_TYPE_FILTER_KEEP_STRUCTURE\n");
+  hwloc_filter_levels_keep_structure(topology);
+  hwloc_debug_print_objects(0, topology->levels[0][0]);
+
+  /* accumulate children memory in total_memory fields (only once parent is set) */
+  hwloc_debug("%s", "\nPropagate total memory up\n");
+  propagate_total_memory(topology->levels[0][0]);
+
+  /* setup the symmetric_subtree attribute */
+  hwloc_propagate_symmetric_subtree(topology, topology->levels[0][0]);
+
+  /* apply group depths */
+  hwloc_set_group_depth(topology);
+
+  /* add some identification attributes if not loading from XML */
+  if (topology->backends
+      && strcmp(topology->backends->component->name, "xml")) {
+    char *value;
+    /* add a hwlocVersion */
+    hwloc_obj_add_info(topology->levels[0][0], "hwlocVersion", HWLOC_VERSION);
+    /* add a ProcessName */
+    value = hwloc_progname(topology);
+    if (value) {
+      hwloc_obj_add_info(topology->levels[0][0], "ProcessName", value);
+      free(value);
+    }
+  }
+
+  return 0;
+}
+
+/* To be called before discovery is actually launched,
+ * Resets everything in case a previous load initialized some stuff.
+ */
+void
+hwloc_topology_setup_defaults(struct hwloc_topology *topology)
+{
+  struct hwloc_obj *root_obj;
+  unsigned l;
+
+  /* reset support */
+  memset(&topology->binding_hooks, 0, sizeof(topology->binding_hooks));
+  memset(topology->support.discovery, 0, sizeof(*topology->support.discovery));
+  memset(topology->support.cpubind, 0, sizeof(*topology->support.cpubind));
+  memset(topology->support.membind, 0, sizeof(*topology->support.membind));
+
+  /* Only the System object on top by default */
+  topology->next_gp_index = 1; /* keep 0 as an invalid value */
+  topology->nb_levels = 1; /* there's at least SYSTEM */
+  topology->levels[0] = malloc (sizeof (hwloc_obj_t));
+  topology->level_nbobjects[0] = 1;
+
+  /* NULLify other special levels */
+  memset(&topology->slevels, 0, sizeof(topology->slevels));
+  /* assert the indexes of special levels */
+  HWLOC_BUILD_ASSERT(HWLOC_SLEVEL_BRIDGE == HWLOC_SLEVEL_FROM_DEPTH(HWLOC_TYPE_DEPTH_BRIDGE));
+  HWLOC_BUILD_ASSERT(HWLOC_SLEVEL_PCIDEV == HWLOC_SLEVEL_FROM_DEPTH(HWLOC_TYPE_DEPTH_PCI_DEVICE));
+  HWLOC_BUILD_ASSERT(HWLOC_SLEVEL_OSDEV == HWLOC_SLEVEL_FROM_DEPTH(HWLOC_TYPE_DEPTH_OS_DEVICE));
+  HWLOC_BUILD_ASSERT(HWLOC_SLEVEL_MISC == HWLOC_SLEVEL_FROM_DEPTH(HWLOC_TYPE_DEPTH_MISC));
+
+  /* sane values to type_depth */
+  for (l = HWLOC_OBJ_SYSTEM; l < HWLOC_OBJ_MISC; l++)
+    topology->type_depth[l] = HWLOC_TYPE_DEPTH_UNKNOWN;
+  topology->type_depth[HWLOC_OBJ_BRIDGE] = HWLOC_TYPE_DEPTH_BRIDGE;
+  topology->type_depth[HWLOC_OBJ_PCI_DEVICE] = HWLOC_TYPE_DEPTH_PCI_DEVICE;
+  topology->type_depth[HWLOC_OBJ_OS_DEVICE] = HWLOC_TYPE_DEPTH_OS_DEVICE;
+  topology->type_depth[HWLOC_OBJ_MISC] = HWLOC_TYPE_DEPTH_MISC;
+
+  /* Create the actual machine object, but don't touch its attributes yet
+   * since the OS backend may still change the object into something else
+   * (for instance System)
+   */
+  root_obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_MACHINE, 0);
+  topology->levels[0][0] = root_obj;
+}
+
+static void hwloc__topology_filter_init(struct hwloc_topology *topology);
+
+int
+hwloc_topology_init (struct hwloc_topology **topologyp)
+{
+  struct hwloc_topology *topology;
+
+  topology = malloc (sizeof (struct hwloc_topology));
+  if(!topology)
+    return -1;
+
+  hwloc_components_init();
+  hwloc_backends_init(topology);
+  hwloc_pci_discovery_init(topology); /* make sure both dup() and load() get sane variables */
+
+  /* Setup topology context */
+  topology->is_loaded = 0;
+  topology->flags = 0;
+  topology->is_thissystem = 1;
+  topology->pid = 0;
+  topology->userdata = NULL;
+
+  topology->support.discovery = malloc(sizeof(*topology->support.discovery));
+  topology->support.cpubind = malloc(sizeof(*topology->support.cpubind));
+  topology->support.membind = malloc(sizeof(*topology->support.membind));
+
+  topology->nb_levels_allocated = 16; /* enough for default 9 levels = Mach+Pack+NUMA+L3+L2+L1d+L1i+Co+PU */
+  topology->levels = calloc(topology->nb_levels_allocated, sizeof(*topology->levels));
+  topology->level_nbobjects = calloc(topology->nb_levels_allocated, sizeof(*topology->level_nbobjects));
+
+  hwloc__topology_filter_init(topology);
+
+  hwloc_internal_distances_init(topology);
+
+  topology->userdata_export_cb = NULL;
+  topology->userdata_import_cb = NULL;
+  topology->userdata_not_decoded = 0;
+
+  /* Make the topology look like something coherent but empty */
+  hwloc_topology_setup_defaults(topology);
+
+  *topologyp = topology;
+  return 0;
+}
+
+int
+hwloc_topology_set_pid(struct hwloc_topology *topology __hwloc_attribute_unused,
+                       hwloc_pid_t pid __hwloc_attribute_unused)
+{
+  if (topology->is_loaded) {
+    errno = EBUSY;
+    return -1;
+  }
+
+  /* this does *not* change the backend */
+#ifdef HWLOC_LINUX_SYS
+  topology->pid = pid;
+  return 0;
+#else /* HWLOC_LINUX_SYS */
+  errno = ENOSYS;
+  return -1;
+#endif /* HWLOC_LINUX_SYS */
+}
+
+int
+hwloc_topology_set_synthetic(struct hwloc_topology *topology, const char *description)
+{
+  if (topology->is_loaded) {
+    errno = EBUSY;
+    return -1;
+  }
+
+  return hwloc_disc_component_force_enable(topology,
+					   0 /* api */,
+					   -1, "synthetic",
+					   description, NULL, NULL);
+}
+
+int
+hwloc_topology_set_xml(struct hwloc_topology *topology,
+		       const char *xmlpath)
+{
+  if (topology->is_loaded) {
+    errno = EBUSY;
+    return -1;
+  }
+
+  return hwloc_disc_component_force_enable(topology,
+					   0 /* api */,
+					   -1, "xml",
+					   xmlpath, NULL, NULL);
+}
+
+int
+hwloc_topology_set_xmlbuffer(struct hwloc_topology *topology,
+                             const char *xmlbuffer,
+                             int size)
+{
+  if (topology->is_loaded) {
+    errno = EBUSY;
+    return -1;
+  }
+
+  return hwloc_disc_component_force_enable(topology,
+					   0 /* api */,
+					   -1, "xml", NULL,
+					   xmlbuffer, (void*) (uintptr_t) size);
+}
+
+int
+hwloc_topology_set_flags (struct hwloc_topology *topology, unsigned long flags)
+{
+  if (topology->is_loaded) {
+    /* actually harmless */
+    errno = EBUSY;
+    return -1;
+  }
+
+  if (flags & ~(HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM|HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  topology->flags = flags;
+  return 0;
+}
+
+unsigned long
+hwloc_topology_get_flags (struct hwloc_topology *topology)
+{
+  return topology->flags;
+}
+
+static void
+hwloc__topology_filter_init(struct hwloc_topology *topology)
+{
+  hwloc_obj_type_t type;
+  /* Only ignore useless cruft by default */
+  for(type = HWLOC_OBJ_SYSTEM; type < HWLOC_OBJ_TYPE_MAX; type++)
+    topology->type_filter[type] = HWLOC_TYPE_FILTER_KEEP_ALL;
+  topology->type_filter[HWLOC_OBJ_L1ICACHE] = HWLOC_TYPE_FILTER_KEEP_NONE;
+  topology->type_filter[HWLOC_OBJ_L2ICACHE] = HWLOC_TYPE_FILTER_KEEP_NONE;
+  topology->type_filter[HWLOC_OBJ_L3ICACHE] = HWLOC_TYPE_FILTER_KEEP_NONE;
+  topology->type_filter[HWLOC_OBJ_GROUP] = HWLOC_TYPE_FILTER_KEEP_STRUCTURE;
+  topology->type_filter[HWLOC_OBJ_MISC] = HWLOC_TYPE_FILTER_KEEP_NONE;
+  topology->type_filter[HWLOC_OBJ_BRIDGE] = HWLOC_TYPE_FILTER_KEEP_NONE;
+  topology->type_filter[HWLOC_OBJ_PCI_DEVICE] = HWLOC_TYPE_FILTER_KEEP_NONE;
+  topology->type_filter[HWLOC_OBJ_OS_DEVICE] = HWLOC_TYPE_FILTER_KEEP_NONE;
+}
+
+static int
+hwloc__topology_set_type_filter(struct hwloc_topology *topology, hwloc_obj_type_t type, enum hwloc_type_filter_e filter)
+{
+  if (type == HWLOC_OBJ_PU || type == HWLOC_OBJ_NUMANODE) {
+    if (filter != HWLOC_TYPE_FILTER_KEEP_ALL) {
+      /* we need the PU and NUMA levels */
+      errno = EINVAL;
+      return -1;
+    }
+  } else if (hwloc_obj_type_is_special(type)) {
+    if (filter == HWLOC_TYPE_FILTER_KEEP_STRUCTURE) {
+      /* I/O and Misc are outside of the main topology structure, makes no sense. */
+      errno = EINVAL;
+      return -1;
+    }
+  } else if (type == HWLOC_OBJ_GROUP) {
+    if (filter == HWLOC_TYPE_FILTER_KEEP_ALL) {
+      /* Groups are always ignored, at least keep_structure */
+      errno = EINVAL;
+      return -1;
+    }
+  }
+
+  /* "important" just means "all" for non-I/O non-Misc */
+  if (!hwloc_obj_type_is_special(type) && filter == HWLOC_TYPE_FILTER_KEEP_IMPORTANT)
+    filter = HWLOC_TYPE_FILTER_KEEP_ALL;
+
+  topology->type_filter[type] = filter;
+  return 0;
+}
+
+int
+hwloc_topology_set_type_filter(struct hwloc_topology *topology, hwloc_obj_type_t type, enum hwloc_type_filter_e filter)
+{
+  if ((unsigned) type >= HWLOC_OBJ_TYPE_MAX) {
+    errno = EINVAL;
+    return -1;
+  }
+  if (topology->is_loaded) {
+    errno = EBUSY;
+    return -1;
+  }
+  return hwloc__topology_set_type_filter(topology, type, filter);
+}
+
+int
+hwloc_topology_set_all_types_filter(struct hwloc_topology *topology, enum hwloc_type_filter_e filter)
+{
+  hwloc_obj_type_t type;
+  if (topology->is_loaded) {
+    errno = EBUSY;
+    return -1;
+  }
+  for(type = HWLOC_OBJ_SYSTEM; type < HWLOC_OBJ_TYPE_MAX; type++)
+    hwloc__topology_set_type_filter(topology, type, filter);
+  return 0;
+}
+
+int
+hwloc_topology_get_type_filter(struct hwloc_topology *topology, hwloc_obj_type_t type, enum hwloc_type_filter_e *filterp)
+{
+  if (type >= HWLOC_OBJ_TYPE_MAX) {
+    errno = EINVAL;
+    return -1;
+  }
+  *filterp = topology->type_filter[type];
+  return 0;
+}
+
+void
+hwloc_topology_clear (struct hwloc_topology *topology)
+{
+  /* no need to set to NULL after free() since callers will call setup_defaults() or just destroy the rest of the topology */
+  unsigned l;
+  hwloc_internal_distances_destroy(topology);
+  hwloc_free_object_and_children(topology->levels[0][0]);
+  for (l=0; l<topology->nb_levels; l++)
+    free(topology->levels[l]);
+  for(l=0; l<HWLOC_NR_SLEVELS; l++)
+    free(topology->slevels[l].objs);
+}
+
+void
+hwloc_topology_destroy (struct hwloc_topology *topology)
+{
+  hwloc_backends_disable_all(topology);
+  hwloc_components_fini();
+
+  hwloc_topology_clear(topology);
+
+  free(topology->levels);
+  free(topology->level_nbobjects);
+
+  free(topology->support.discovery);
+  free(topology->support.cpubind);
+  free(topology->support.membind);
+  free(topology);
+}
+
+int
+hwloc_topology_load (struct hwloc_topology *topology)
+{
+  int err;
+
+  if (topology->is_loaded) {
+    errno = EBUSY;
+    return -1;
+  }
+
+  hwloc_internal_distances_prepare(topology);
+
+  if (getenv("HWLOC_XML_USERDATA_NOT_DECODED"))
+    topology->userdata_not_decoded = 1;
+
+  /* Ignore variables if HWLOC_COMPONENTS is set. It will be processed later */
+  if (!getenv("HWLOC_COMPONENTS")) {
+    /* Only apply variables if we have not changed the backend yet.
+     * Only the first one will be kept.
+     * Check for FSROOT first since it's for debugging so likely needs to override everything else.
+     * Check for XML last (that's the one that may be set system-wide by administrators)
+     * so that it's only used if other variables are not set,
+     * to allow users to override easily.
+     */
+    if (!topology->backends) {
+      const char *fsroot_path_env = getenv("HWLOC_FSROOT");
+      if (fsroot_path_env)
+	hwloc_disc_component_force_enable(topology,
+					  1 /* env force */,
+					  HWLOC_DISC_COMPONENT_TYPE_CPU, "linux",
+					  NULL /* backend will getenv again */, NULL, NULL);
+    }
+    if (!topology->backends) {
+      const char *cpuid_path_env = getenv("HWLOC_CPUID_PATH");
+      if (cpuid_path_env)
+	hwloc_disc_component_force_enable(topology,
+					  1 /* env force */,
+					  HWLOC_DISC_COMPONENT_TYPE_CPU, "x86",
+					  NULL /* backend will getenv again */, NULL, NULL);
+    }
+    if (!topology->backends) {
+      const char *synthetic_env = getenv("HWLOC_SYNTHETIC");
+      if (synthetic_env)
+	hwloc_disc_component_force_enable(topology,
+					  1 /* env force */,
+					  -1, "synthetic",
+					  synthetic_env, NULL, NULL);
+    }
+    if (!topology->backends) {
+      const char *xmlpath_env = getenv("HWLOC_XMLFILE");
+      if (xmlpath_env)
+	hwloc_disc_component_force_enable(topology,
+					  1 /* env force */,
+					  -1, "xml",
+					  xmlpath_env, NULL, NULL);
+    }
+  }
+
+  /* instantiate all possible other backends now */
+  hwloc_disc_components_enable_others(topology);
+  /* now that backends are enabled, update the thissystem flag and some callbacks */
+  hwloc_backends_is_thissystem(topology);
+  hwloc_backends_find_callbacks(topology);
+  /*
+   * Now set binding hooks according to topology->is_thissystem
+   * and what the native OS backend offers.
+   */
+  hwloc_set_binding_hooks(topology);
+
+  hwloc_pci_discovery_prepare(topology);
+
+  /* actual topology discovery */
+  err = hwloc_discover(topology);
+  if (err < 0)
+    goto out;
+
+  hwloc_pci_discovery_exit(topology);
+
+#ifndef HWLOC_DEBUG
+  if (getenv("HWLOC_DEBUG_CHECK"))
+#endif
+    hwloc_topology_check(topology);
+
+  /* Mark distances objs arrays as invalid since we may have removed objects
+   * from the topology after adding the distances (remove_empty, etc).
+   * It would be hard to actually verify whether it's needed.
+   * We'll refresh them if users ever actually look at distances.
+   */
+  hwloc_internal_distances_invalidate_cached_objs(topology);
+
+  topology->is_loaded = 1;
+  return 0;
+
+ out:
+  hwloc_pci_discovery_exit(topology);
+  hwloc_topology_clear(topology);
+  hwloc_topology_setup_defaults(topology);
+  hwloc_backends_disable_all(topology);
+  return -1;
+}
+
+/* adjust object cpusets according the given droppedcpuset,
+ * drop object whose cpuset becomes empty and that have no children,
+ * and propagate NUMA node removal as nodeset changes in parents.
+ */
+static void
+restrict_object_by_cpuset(hwloc_topology_t topology, unsigned long flags, hwloc_obj_t *pobj,
+			  hwloc_bitmap_t droppedcpuset, hwloc_bitmap_t droppednodeset)
+{
+  hwloc_obj_t obj = *pobj, child, *pchild;
+  int modified = hwloc_bitmap_intersects(obj->complete_cpuset, droppedcpuset);
+
+  hwloc_bitmap_andnot(obj->cpuset, obj->cpuset, droppedcpuset);
+  hwloc_bitmap_andnot(obj->complete_cpuset, obj->complete_cpuset, droppedcpuset);
+  hwloc_bitmap_andnot(obj->allowed_cpuset, obj->allowed_cpuset, droppedcpuset);
+
+  if (modified) {
+    for_each_child_safe(child, obj, pchild)
+      restrict_object_by_cpuset(topology, flags, pchild, droppedcpuset, droppednodeset);
+    /* Nothing to restrict under I/O or Misc */
+  }
+
+  if (!obj->first_child /* arity not updated before connect_children() */
+      && hwloc_bitmap_iszero(obj->cpuset)
+      && (obj->type != HWLOC_OBJ_NUMANODE || (flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS))) {
+    /* remove object */
+    hwloc_debug("%s", "\nRemoving object during restrict");
+    hwloc_debug_print_object(0, obj);
+
+    if (obj->type == HWLOC_OBJ_NUMANODE)
+      hwloc_bitmap_set(droppednodeset, obj->os_index);
+    if (!(flags & HWLOC_RESTRICT_FLAG_ADAPT_IO)) {
+      hwloc_free_object_siblings_and_children(obj->io_first_child);
+      obj->io_first_child = NULL;
+    }
+    if (!(flags & HWLOC_RESTRICT_FLAG_ADAPT_MISC)) {
+      hwloc_free_object_siblings_and_children(obj->misc_first_child);
+      obj->misc_first_child = NULL;
+    }
+    unlink_and_free_single_object(pobj);
+    /* do not remove children. if they were to be removed, they would have been already */
+    topology->modified = 1;
+
+  } else {
+    /* keep object, update its nodeset if removing CPU-less NUMA-node is enabled */
+    if (flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS) {
+      hwloc_bitmap_andnot(obj->nodeset, obj->nodeset, droppednodeset);
+      hwloc_bitmap_andnot(obj->complete_nodeset, obj->complete_nodeset, droppednodeset);
+      hwloc_bitmap_andnot(obj->allowed_nodeset, obj->allowed_nodeset, droppednodeset);
+    }
+  }
+}
+
+int
+hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_cpuset_t cpuset, unsigned long flags)
+{
+  hwloc_bitmap_t droppedcpuset, droppednodeset;
+
+  if (!topology->is_loaded) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (flags & ~(HWLOC_RESTRICT_FLAG_REMOVE_CPULESS
+		|HWLOC_RESTRICT_FLAG_ADAPT_MISC|HWLOC_RESTRICT_FLAG_ADAPT_IO)) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  /* make sure we'll keep something in the topology */
+  if (!hwloc_bitmap_intersects(cpuset, topology->levels[0][0]->cpuset)) {
+    errno = EINVAL; /* easy failure, just don't touch the topology */
+    return -1;
+  }
+
+  droppedcpuset = hwloc_bitmap_alloc();
+  droppednodeset = hwloc_bitmap_alloc();
+
+  /* drop PUs and parents based on the reverse of set,
+   * and fill the droppednodeset when removing NUMA nodes to update parent nodesets
+   */
+  hwloc_bitmap_not(droppedcpuset, cpuset);
+  restrict_object_by_cpuset(topology, flags, &topology->levels[0][0], droppedcpuset, droppednodeset);
+
+  hwloc_bitmap_free(droppedcpuset);
+  hwloc_bitmap_free(droppednodeset);
+
+  if (hwloc_topology_reconnect(topology, 0) < 0)
+    goto out;
+
+  /* some objects may have disappeared, we need to update distances objs arrays */
+  hwloc_internal_distances_invalidate_cached_objs(topology);
+
+  hwloc_filter_levels_keep_structure(topology);
+  hwloc_propagate_symmetric_subtree(topology, topology->levels[0][0]);
+  propagate_total_memory(topology->levels[0][0]);
+  return 0;
+
+ out:
+  /* unrecoverable failure, re-init the topology */
+   hwloc_topology_clear(topology);
+   hwloc_topology_setup_defaults(topology);
+   return -1;
+}
+
+int
+hwloc_topology_is_thissystem(struct hwloc_topology *topology)
+{
+  return topology->is_thissystem;
+}
+
+unsigned
+hwloc_topology_get_depth(struct hwloc_topology *topology)
+{
+  return topology->nb_levels;
+}
+
+const struct hwloc_topology_support *
+hwloc_topology_get_support(struct hwloc_topology * topology)
+{
+  return &topology->support;
+}
+
+void hwloc_topology_set_userdata(struct hwloc_topology * topology, const void *userdata)
+{
+  topology->userdata = (void *) userdata;
+}
+
+void * hwloc_topology_get_userdata(struct hwloc_topology * topology)
+{
+  return topology->userdata;
+}
+
+/****************
+ * Debug Checks *
+ ****************/
+
+static void
+hwloc__check_object(hwloc_topology_t topology, hwloc_obj_t obj);
+
+/* check children between a parent object */
+static void
+hwloc__check_children(hwloc_topology_t topology, hwloc_obj_t parent)
+{
+  unsigned j;
+
+  if (!parent->arity) {
+    /* check whether that parent has no children for real */
+    assert(!parent->children);
+    assert(!parent->first_child);
+    assert(!parent->last_child);
+    return;
+  }
+  /* check whether that parent has children for real */
+  assert(parent->children);
+  assert(parent->first_child);
+  assert(parent->last_child);
+
+  /* sibling checks */
+  for(j=0; j<parent->arity; j++) {
+    hwloc_obj_t child = parent->children[j];
+    assert(child->parent == parent);
+    assert(child->sibling_rank == j);
+    if (j)
+      assert(child->prev_sibling == parent->children[j-1]);
+    else
+      assert(!child->prev_sibling);
+    if (j == parent->arity-1)
+      assert(!child->next_sibling);
+    else
+      assert(child->next_sibling == parent->children[j+1]);
+    assert(child->depth > parent->depth);
+    /* recurse */
+    hwloc__check_object(topology, child);
+  }
+  assert(parent->first_child == parent->children[0]);
+  assert(parent->last_child == parent->children[parent->arity-1]);
+
+  /* we already checked in the caller that objects have either all sets or none */
+
+  {
+    /* check that parent->cpuset == exclusive OR of children
+     * (can be wrong for complete_cpuset since disallowed/offline/unknown PUs can be removed)
+     */
+    hwloc_bitmap_t remaining_parent_cpuset = hwloc_bitmap_dup(parent->cpuset);
+    hwloc_bitmap_t remaining_parent_nodeset = hwloc_bitmap_dup(parent->nodeset);
+    for(j=0; j<parent->arity; j++) {
+      if (!parent->children[j]->cpuset)
+	continue;
+      /* check that child cpuset is included in the reminder of the parent */
+      assert(hwloc_bitmap_isincluded(parent->children[j]->cpuset, remaining_parent_cpuset));
+      hwloc_bitmap_andnot(remaining_parent_cpuset, remaining_parent_cpuset, parent->children[j]->cpuset);
+      /* check that child cpuset is included in the parent (multiple children may have the same nodeset when we're below a NUMA node) */
+      assert(hwloc_bitmap_isincluded(parent->children[j]->nodeset, parent->nodeset));
+      hwloc_bitmap_andnot(remaining_parent_nodeset, remaining_parent_nodeset, parent->children[j]->nodeset);
+    }
+
+    if (parent->type == HWLOC_OBJ_PU) {
+      /* if parent is a PU (with Misc children for instance),
+       * its os_index bit may remain in cpuset. */
+      assert(hwloc_bitmap_weight(remaining_parent_cpuset) == 1);
+      assert(hwloc_bitmap_first(remaining_parent_cpuset) == (int)parent->os_index);
+    } else {
+      /* nothing remains */
+      assert(hwloc_bitmap_iszero(remaining_parent_cpuset));
+    }
+    hwloc_bitmap_free(remaining_parent_cpuset);
+
+    if (parent->type == HWLOC_OBJ_NUMANODE)
+      /* if parent is a NUMA node, its os_index bit may remain.
+       * or it could already have been removed by a child. */
+      hwloc_bitmap_clr(remaining_parent_nodeset, parent->os_index);
+    if (parent->type == HWLOC_OBJ_PU) {
+      /* if parent is a PU (with Misc children for instance),
+       * one bit may remain in nodeset. */
+      assert(hwloc_bitmap_weight(remaining_parent_nodeset) == 1);
+    } else {
+      /* nothing remains */
+      assert(hwloc_bitmap_iszero(remaining_parent_nodeset));
+    }
+    hwloc_bitmap_free(remaining_parent_nodeset);
+  }
+
+  /* check that children complete_cpuset are properly ordered, empty ones may be anywhere
+   * (can be wrong for main cpuset since removed PUs can break the ordering).
+   */
+  {
+    int firstchild;
+    int prev_firstchild = -1; /* -1 works fine with first comparisons below */
+    for(j=0; j<parent->arity; j++) {
+      if (!parent->children[j]->complete_cpuset
+	  || hwloc_bitmap_iszero(parent->children[j]->complete_cpuset))
+	continue;
+
+      firstchild = hwloc_bitmap_first(parent->children[j]->complete_cpuset);
+      assert(prev_firstchild < firstchild);
+      prev_firstchild = firstchild;
+    }
+  }
+}
+
+static void
+hwloc__check_io_children(hwloc_topology_t topology, hwloc_obj_t parent)
+{
+  unsigned j;
+  hwloc_obj_t child, prev;
+
+  if (!parent->io_arity) {
+    /* check whether that parent has no children for real */
+    assert(!parent->io_first_child);
+    return;
+  }
+  /* check whether that parent has children for real */
+  assert(parent->io_first_child);
+
+  for(prev = NULL, child = parent->io_first_child, j = 0;
+      child;
+      prev = child, child = child->next_sibling, j++) {
+    /* all children must be I/O */
+    assert(hwloc_obj_type_is_io(child->type));
+
+    /* check siblings */
+    assert(child->parent == parent);
+    assert(child->sibling_rank == j);
+    if (prev)
+      assert(prev->next_sibling == child);
+    assert(child->prev_sibling == prev);
+    if (j == parent->io_arity-1)
+      assert(child->next_sibling == NULL);
+
+    /* only I/O and Misc children, recurse */
+    assert(!child->first_child);
+    hwloc__check_object(topology, child);
+  }
+  /* check arity */
+  assert(j == parent->io_arity);
+}
+
+static void
+hwloc__check_misc_children(hwloc_topology_t topology, hwloc_obj_t parent)
+{
+  unsigned j;
+  hwloc_obj_t child, prev;
+
+  if (!parent->misc_arity) {
+    /* check whether that parent has no children for real */
+    assert(!parent->misc_first_child);
+    return;
+  }
+  /* check whether that parent has children for real */
+  assert(parent->misc_first_child);
+
+  for(prev = NULL, child = parent->misc_first_child, j = 0;
+      child;
+      prev = child, child = child->next_sibling, j++) {
+    /* all children must be Misc */
+    assert(child->type == HWLOC_OBJ_MISC);
+
+    /* check siblings */
+    assert(child->parent == parent);
+    assert(child->sibling_rank == j);
+    if (prev)
+      assert(prev->next_sibling == child);
+    assert(child->prev_sibling == prev);
+    if (j == parent->misc_arity-1)
+      assert(child->next_sibling == NULL);
+
+    /* only Misc children, recurse */
+    assert(!child->first_child);
+    assert(!child->io_first_child);
+    hwloc__check_object(topology, child);
+  }
+  /* check arity */
+  assert(j == parent->misc_arity);
+}
+
+static void
+hwloc__check_object(hwloc_topology_t topology, hwloc_obj_t obj)
+{
+  /* check that sets and depth */
+  if (hwloc_obj_type_is_special(obj->type)) {
+    assert(!obj->cpuset);
+    if (obj->type == HWLOC_OBJ_BRIDGE)
+      assert(obj->depth == (unsigned) HWLOC_TYPE_DEPTH_BRIDGE);
+    else if (obj->type == HWLOC_OBJ_PCI_DEVICE)
+      assert(obj->depth == (unsigned) HWLOC_TYPE_DEPTH_PCI_DEVICE);
+    else if (obj->type == HWLOC_OBJ_OS_DEVICE)
+      assert(obj->depth == (unsigned) HWLOC_TYPE_DEPTH_OS_DEVICE);
+    else if (obj->type == HWLOC_OBJ_MISC)
+      assert(obj->depth == (unsigned) HWLOC_TYPE_DEPTH_MISC);
+  } else {
+    assert(obj->cpuset);
+    assert((int) obj->depth >= 0);
+  }
+
+  /* group depth cannot be -1 anymore in v2.0+ */
+  if (obj->type == HWLOC_OBJ_GROUP) {
+    assert(obj->attr->group.depth != (unsigned) -1);
+  }
+
+  /* there's other cpusets and nodesets if and only if there's a main cpuset */
+  assert(!!obj->cpuset == !!obj->complete_cpuset);
+  assert(!!obj->cpuset == !!obj->allowed_cpuset);
+  assert(!!obj->cpuset == !!obj->nodeset);
+  assert(!!obj->nodeset == !!obj->complete_nodeset);
+  assert(!!obj->nodeset == !!obj->allowed_nodeset);
+
+  /* check that complete/allowed/inline sets are larger than the main sets */
+  if (obj->cpuset) {
+    assert(hwloc_bitmap_isincluded(obj->cpuset, obj->complete_cpuset));
+    assert(hwloc_bitmap_isincluded(obj->nodeset, obj->complete_nodeset));
+    if (topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) {
+      assert(hwloc_bitmap_isincluded(obj->allowed_cpuset, obj->cpuset));
+      assert(hwloc_bitmap_isincluded(obj->allowed_nodeset, obj->nodeset));
+    } else {
+      assert(hwloc_bitmap_isequal(obj->allowed_cpuset, obj->cpuset));
+      assert(hwloc_bitmap_isequal(obj->allowed_nodeset, obj->nodeset));
+    }
+  }
+
+  /* check cache type/depth vs type */
+
+  /* check children */
+  hwloc__check_children(topology, obj);
+  hwloc__check_io_children(topology, obj);
+  hwloc__check_misc_children(topology, obj);
+}
+
+static void
+hwloc__check_level(struct hwloc_topology *topology, unsigned depth,
+		   hwloc_obj_t first, hwloc_obj_t last)
+{
+  unsigned width = hwloc_get_nbobjs_by_depth(topology, depth);
+  struct hwloc_obj *prev = NULL;
+  hwloc_obj_t obj;
+  unsigned j;
+
+  /* check each object of the level */
+  for(j=0; j<width; j++) {
+    obj = hwloc_get_obj_by_depth(topology, depth, j);
+    /* check that the object is corrected placed horizontally and vertically */
+    assert(obj);
+    assert(obj->depth == depth);
+    assert(obj->logical_index == j);
+    /* check that all objects in the level have the same type */
+    if (prev) {
+      assert(hwloc_type_cmp(obj, prev) == HWLOC_OBJ_EQUAL);
+      assert(prev->next_cousin == obj);
+    }
+    assert(obj->prev_cousin == prev);
+
+    /* check that PUs and NUMA nodes have correct cpuset/nodeset */
+    if (obj->type == HWLOC_OBJ_PU) {
+      assert(hwloc_bitmap_weight(obj->complete_cpuset) == 1);
+      assert(hwloc_bitmap_first(obj->complete_cpuset) == (int) obj->os_index);
+    }
+    if (obj->type == HWLOC_OBJ_NUMANODE) {
+      assert(hwloc_bitmap_weight(obj->complete_nodeset) == 1);
+      assert(hwloc_bitmap_first(obj->complete_nodeset) == (int) obj->os_index);
+    }
+    prev = obj;
+  }
+  if (prev)
+    assert(prev->next_cousin == NULL);
+
+  if (width) {
+    /* check first object of the level */
+    obj = hwloc_get_obj_by_depth(topology, depth, 0);
+    assert(obj);
+    assert(!obj->prev_cousin);
+    /* check type */
+    assert(hwloc_get_depth_type(topology, depth) == obj->type);
+    assert(depth == (unsigned) hwloc_get_type_depth(topology, obj->type)
+	   || HWLOC_TYPE_DEPTH_MULTIPLE == hwloc_get_type_depth(topology, obj->type));
+    /* check last object of the level */
+    obj = hwloc_get_obj_by_depth(topology, depth, width-1);
+    assert(obj);
+    assert(!obj->next_cousin);
+  }
+
+  if ((int) depth < 0) {
+    assert(first == hwloc_get_obj_by_depth(topology, depth, 0));
+    assert(last == hwloc_get_obj_by_depth(topology, depth, width-1));
+  } else {
+    assert(!first);
+    assert(!last);
+  }
+
+  /* check last+1 object of the level */
+  obj = hwloc_get_obj_by_depth(topology, depth, width);
+  assert(!obj);
+}
+
+/* check a whole topology structure */
+void
+hwloc_topology_check(struct hwloc_topology *topology)
+{
+  struct hwloc_obj *obj;
+  hwloc_obj_type_t type;
+  unsigned i, j, depth;
+
+  /* make sure we can use ranges to check types */
+  HWLOC_BUILD_ASSERT(HWLOC_OBJ_L2CACHE == HWLOC_OBJ_L1CACHE + 1);
+  HWLOC_BUILD_ASSERT(HWLOC_OBJ_L3CACHE == HWLOC_OBJ_L2CACHE + 1);
+  HWLOC_BUILD_ASSERT(HWLOC_OBJ_L4CACHE == HWLOC_OBJ_L3CACHE + 1);
+  HWLOC_BUILD_ASSERT(HWLOC_OBJ_L5CACHE == HWLOC_OBJ_L4CACHE + 1);
+  HWLOC_BUILD_ASSERT(HWLOC_OBJ_L1ICACHE == HWLOC_OBJ_L5CACHE + 1);
+  HWLOC_BUILD_ASSERT(HWLOC_OBJ_L2ICACHE == HWLOC_OBJ_L1ICACHE + 1);
+  HWLOC_BUILD_ASSERT(HWLOC_OBJ_L3ICACHE == HWLOC_OBJ_L2ICACHE + 1);
+
+  HWLOC_BUILD_ASSERT(HWLOC_OBJ_MISC + 1 == HWLOC_OBJ_BRIDGE);
+  HWLOC_BUILD_ASSERT(HWLOC_OBJ_BRIDGE + 1 == HWLOC_OBJ_PCI_DEVICE);
+  HWLOC_BUILD_ASSERT(HWLOC_OBJ_PCI_DEVICE + 1 == HWLOC_OBJ_OS_DEVICE);
+
+  /* make sure order and priority arrays have the right size */
+  HWLOC_BUILD_ASSERT(sizeof(obj_type_order)/sizeof(*obj_type_order) == HWLOC_OBJ_TYPE_MAX);
+  HWLOC_BUILD_ASSERT(sizeof(obj_order_type)/sizeof(*obj_order_type) == HWLOC_OBJ_TYPE_MAX);
+  HWLOC_BUILD_ASSERT(sizeof(obj_type_priority)/sizeof(*obj_type_priority) == HWLOC_OBJ_TYPE_MAX);
+
+  /* make sure order arrays are coherent */
+  for(type=0; type<HWLOC_OBJ_TYPE_MAX; type++)
+    assert(obj_order_type[obj_type_order[type]] == type);
+  for(i=0; i<HWLOC_OBJ_TYPE_MAX; i++)
+    assert(obj_type_order[obj_order_type[i]] == i);
+
+  depth = hwloc_topology_get_depth(topology);
+
+  assert(!topology->modified);
+
+  /* check that last level is PU */
+  assert(hwloc_get_depth_type(topology, depth-1) == HWLOC_OBJ_PU);
+  assert(hwloc_get_nbobjs_by_depth(topology, depth-1) > 0);
+  for(j=0; j<hwloc_get_nbobjs_by_depth(topology, depth-1); j++) {
+    obj = hwloc_get_obj_by_depth(topology, depth-1, j);
+    assert(obj);
+    assert(obj->type == HWLOC_OBJ_PU);
+  }
+  /* check that other levels are not PU */
+  for(i=1; i<depth-1; i++)
+    assert(hwloc_get_depth_type(topology, i) != HWLOC_OBJ_PU);
+
+  /* check that we have a NUMA level */
+  j = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
+  assert(j < hwloc_topology_get_depth(topology));
+  assert(hwloc_get_depth_type(topology, j) == HWLOC_OBJ_NUMANODE);
+  /* check that other levels are not NUMA */
+  for(i=0; i<depth-1; i++)
+    if (i != j)
+      assert(hwloc_get_depth_type(topology, i) != HWLOC_OBJ_NUMANODE);
+
+  /* top-level specific checks */
+  assert(hwloc_get_nbobjs_by_depth(topology, 0) == 1);
+  obj = hwloc_get_root_obj(topology);
+  assert(obj);
+  assert(!obj->parent);
+  assert(obj->cpuset);
+  assert(!obj->depth);
+
+  /* check each level */
+  for(i=0; i<depth; i++)
+    hwloc__check_level(topology, i, NULL, NULL);
+  for(i=0; i<HWLOC_NR_SLEVELS; i++)
+    hwloc__check_level(topology, HWLOC_SLEVEL_TO_DEPTH(i), topology->slevels[i].first, topology->slevels[i].last);
+
+  /* recurse and check the tree of children, and type-specific checks */
+  hwloc__check_object(topology, obj);
+
+  /* TODO: check that gp_index are unique across the topology (and >0).
+   * at least check it's unique across each level.
+   * Should only occur if XML is invalid.
+   */
+}
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/hwloc/traversal.c b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/traversal.c
new file mode 100644
index 0000000000..a2a93ecf60
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/hwloc/traversal.c
@@ -0,0 +1,553 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2017 Inria.  All rights reserved.
+ * Copyright © 2009-2010 Université Bordeaux
+ * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <private/private.h>
+#include <private/misc.h>
+#include <private/debug.h>
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif /* HAVE_STRINGS_H */
+
+int
+hwloc_get_type_depth (struct hwloc_topology *topology, hwloc_obj_type_t type)
+{
+  return topology->type_depth[type];
+}
+
+hwloc_obj_type_t
+hwloc_get_depth_type (hwloc_topology_t topology, unsigned depth)
+{
+  if (depth >= topology->nb_levels)
+    switch (depth) {
+    case HWLOC_TYPE_DEPTH_BRIDGE:
+      return HWLOC_OBJ_BRIDGE;
+    case HWLOC_TYPE_DEPTH_PCI_DEVICE:
+      return HWLOC_OBJ_PCI_DEVICE;
+    case HWLOC_TYPE_DEPTH_OS_DEVICE:
+      return HWLOC_OBJ_OS_DEVICE;
+    case HWLOC_TYPE_DEPTH_MISC:
+      return HWLOC_OBJ_MISC;
+    default:
+      return HWLOC_OBJ_TYPE_NONE;
+    }
+  return topology->levels[depth][0]->type;
+}
+
+unsigned
+hwloc_get_nbobjs_by_depth (struct hwloc_topology *topology, unsigned depth)
+{
+  if (depth >= topology->nb_levels) {
+    unsigned l = HWLOC_SLEVEL_FROM_DEPTH(depth);
+    if (l < HWLOC_NR_SLEVELS)
+      return topology->slevels[l].nbobjs;
+    else
+      return 0;
+  }
+  return topology->level_nbobjects[depth];
+}
+
+struct hwloc_obj *
+hwloc_get_obj_by_depth (struct hwloc_topology *topology, unsigned depth, unsigned idx)
+{
+  if (depth >= topology->nb_levels) {
+    unsigned l = HWLOC_SLEVEL_FROM_DEPTH(depth);
+    if (l < HWLOC_NR_SLEVELS)
+      return idx < topology->slevels[l].nbobjs ? topology->slevels[l].objs[idx] : NULL;
+    else
+      return NULL;
+  }
+  if (idx >= topology->level_nbobjects[depth])
+    return NULL;
+  return topology->levels[depth][idx];
+}
+
+unsigned hwloc_get_closest_objs (struct hwloc_topology *topology, struct hwloc_obj *src, struct hwloc_obj **objs, unsigned max)
+{
+  struct hwloc_obj *parent, *nextparent, **src_objs;
+  int i,src_nbobjects;
+  unsigned stored = 0;
+
+  if (!src->cpuset)
+    return 0;
+
+  src_nbobjects = topology->level_nbobjects[src->depth];
+  src_objs = topology->levels[src->depth];
+
+  parent = src;
+  while (stored < max) {
+    while (1) {
+      nextparent = parent->parent;
+      if (!nextparent)
+	goto out;
+      if (!hwloc_bitmap_isequal(parent->cpuset, nextparent->cpuset))
+	break;
+      parent = nextparent;
+    }
+
+    /* traverse src's objects and find those that are in nextparent and were not in parent */
+    for(i=0; i<src_nbobjects; i++) {
+      if (hwloc_bitmap_isincluded(src_objs[i]->cpuset, nextparent->cpuset)
+	  && !hwloc_bitmap_isincluded(src_objs[i]->cpuset, parent->cpuset)) {
+	objs[stored++] = src_objs[i];
+	if (stored == max)
+	  goto out;
+      }
+    }
+    parent = nextparent;
+  }
+
+ out:
+  return stored;
+}
+
+static int
+hwloc__get_largest_objs_inside_cpuset (struct hwloc_obj *current, hwloc_const_bitmap_t set,
+				       struct hwloc_obj ***res, int *max)
+{
+  int gotten = 0;
+  unsigned i;
+
+  /* the caller must ensure this */
+  if (*max <= 0)
+    return 0;
+
+  if (hwloc_bitmap_isequal(current->cpuset, set)) {
+    **res = current;
+    (*res)++;
+    (*max)--;
+    return 1;
+  }
+
+  for (i=0; i<current->arity; i++) {
+    hwloc_bitmap_t subset;
+    int ret;
+
+    /* split out the cpuset part corresponding to this child and see if there's anything to do */
+    if (!hwloc_bitmap_intersects(set,current->children[i]->cpuset))
+      continue;
+
+    subset = hwloc_bitmap_dup(set);
+    hwloc_bitmap_and(subset, subset, current->children[i]->cpuset);
+    ret = hwloc__get_largest_objs_inside_cpuset (current->children[i], subset, res, max);
+    gotten += ret;
+    hwloc_bitmap_free(subset);
+
+    /* if no more room to store remaining objects, return what we got so far */
+    if (!*max)
+      break;
+  }
+
+  return gotten;
+}
+
+int
+hwloc_get_largest_objs_inside_cpuset (struct hwloc_topology *topology, hwloc_const_bitmap_t set,
+				      struct hwloc_obj **objs, int max)
+{
+  struct hwloc_obj *current = topology->levels[0][0];
+
+  if (!hwloc_bitmap_isincluded(set, current->cpuset))
+    return -1;
+
+  if (max <= 0)
+    return 0;
+
+  return hwloc__get_largest_objs_inside_cpuset (current, set, &objs, &max);
+}
+
+const char *
+hwloc_type_name (hwloc_obj_type_t obj)
+{
+  switch (obj)
+    {
+    case HWLOC_OBJ_SYSTEM: return "System";
+    case HWLOC_OBJ_MACHINE: return "Machine";
+    case HWLOC_OBJ_MISC: return "Misc";
+    case HWLOC_OBJ_GROUP: return "Group";
+    case HWLOC_OBJ_NUMANODE: return "NUMANode";
+    case HWLOC_OBJ_PACKAGE: return "Package";
+    case HWLOC_OBJ_L1CACHE: return "L1Cache";
+    case HWLOC_OBJ_L2CACHE: return "L2Cache";
+    case HWLOC_OBJ_L3CACHE: return "L3Cache";
+    case HWLOC_OBJ_L4CACHE: return "L4Cache";
+    case HWLOC_OBJ_L5CACHE: return "L5Cache";
+    case HWLOC_OBJ_L1ICACHE: return "L1iCache";
+    case HWLOC_OBJ_L2ICACHE: return "L2iCache";
+    case HWLOC_OBJ_L3ICACHE: return "L3iCache";
+    case HWLOC_OBJ_CORE: return "Core";
+    case HWLOC_OBJ_BRIDGE: return "Bridge";
+    case HWLOC_OBJ_PCI_DEVICE: return "PCIDev";
+    case HWLOC_OBJ_OS_DEVICE: return "OSDev";
+    case HWLOC_OBJ_PU: return "PU";
+    default: return "Unknown";
+    }
+}
+
+int
+hwloc_type_sscanf(const char *string, hwloc_obj_type_t *typep,
+		  union hwloc_obj_attr_u *attrp, size_t attrsize)
+{
+  hwloc_obj_type_t type = (hwloc_obj_type_t) -1;
+  unsigned depthattr = (unsigned) -1;
+  hwloc_obj_cache_type_t cachetypeattr = (hwloc_obj_cache_type_t) -1; /* unspecified */
+  hwloc_obj_bridge_type_t ubtype = (hwloc_obj_bridge_type_t) -1;
+  hwloc_obj_osdev_type_t ostype = (hwloc_obj_osdev_type_t) -1;
+  char *end;
+
+  /* never match the ending \0 since we want to match things like core:2 too.
+   * just use hwloc_strncasecmp() everywhere.
+   */
+
+  /* types without a custom depth */
+  if (!hwloc_strncasecmp(string, "system", 2)) {
+    type = HWLOC_OBJ_SYSTEM;
+  } else if (!hwloc_strncasecmp(string, "machine", 2)) {
+    type = HWLOC_OBJ_MACHINE;
+  } else if (!hwloc_strncasecmp(string, "node", 2)
+	     || !hwloc_strncasecmp(string, "numa", 2)) { /* matches node and numanode */
+    type = HWLOC_OBJ_NUMANODE;
+  } else if (!hwloc_strncasecmp(string, "package", 2)
+	     || !hwloc_strncasecmp(string, "socket", 2)) { /* backward compat with v1.10 */
+    type = HWLOC_OBJ_PACKAGE;
+  } else if (!hwloc_strncasecmp(string, "core", 2)) {
+    type = HWLOC_OBJ_CORE;
+  } else if (!hwloc_strncasecmp(string, "pu", 2)) {
+    type = HWLOC_OBJ_PU;
+  } else if (!hwloc_strncasecmp(string, "misc", 4)) {
+    type = HWLOC_OBJ_MISC;
+
+  } else if (!hwloc_strncasecmp(string, "bridge", 4)) {
+    type = HWLOC_OBJ_BRIDGE;
+  } else if (!hwloc_strncasecmp(string, "hostbridge", 6)) {
+    type = HWLOC_OBJ_BRIDGE;
+    ubtype = HWLOC_OBJ_BRIDGE_HOST;
+  } else if (!hwloc_strncasecmp(string, "pcibridge", 5)) {
+    type = HWLOC_OBJ_BRIDGE;
+    ubtype = HWLOC_OBJ_BRIDGE_PCI;
+
+  } else if (!hwloc_strncasecmp(string, "pci", 3)) {
+    type = HWLOC_OBJ_PCI_DEVICE;
+
+  } else if (!hwloc_strncasecmp(string, "os", 2)) {
+    type = HWLOC_OBJ_OS_DEVICE;
+  } else if (!hwloc_strncasecmp(string, "bloc", 4)) {
+    type = HWLOC_OBJ_OS_DEVICE;
+    ostype = HWLOC_OBJ_OSDEV_BLOCK;
+  } else if (!hwloc_strncasecmp(string, "net", 3)) {
+    type = HWLOC_OBJ_OS_DEVICE;
+    ostype = HWLOC_OBJ_OSDEV_NETWORK;
+  } else if (!hwloc_strncasecmp(string, "openfab", 7)) {
+    type = HWLOC_OBJ_OS_DEVICE;
+    ostype = HWLOC_OBJ_OSDEV_OPENFABRICS;
+  } else if (!hwloc_strncasecmp(string, "dma", 3)) {
+    type = HWLOC_OBJ_OS_DEVICE;
+    ostype = HWLOC_OBJ_OSDEV_DMA;
+  } else if (!hwloc_strncasecmp(string, "gpu", 3)) {
+    type = HWLOC_OBJ_OS_DEVICE;
+    ostype = HWLOC_OBJ_OSDEV_GPU;
+  } else if (!hwloc_strncasecmp(string, "copro", 5)
+	     || !hwloc_strncasecmp(string, "co-pro", 6)) {
+    type = HWLOC_OBJ_OS_DEVICE;
+    ostype = HWLOC_OBJ_OSDEV_COPROC;
+
+  /* types with depthattr */
+  } else if ((string[0] == 'l' || string[0] == 'L') && string[1] >= '0' && string[1] <= '9') {
+    depthattr = strtol(string+1, &end, 10);
+    if (*end == 'i') {
+      if (depthattr >= 1 && depthattr <= 3) {
+	type = HWLOC_OBJ_L1ICACHE + depthattr-1;
+	cachetypeattr = HWLOC_OBJ_CACHE_INSTRUCTION;
+      } else
+	return -1;
+    } else {
+      if (depthattr >= 1 && depthattr <= 5) {
+	type = HWLOC_OBJ_L1CACHE + depthattr-1;
+	cachetypeattr = *end == 'd' ? HWLOC_OBJ_CACHE_DATA : HWLOC_OBJ_CACHE_UNIFIED;
+      } else
+	return -1;
+    }
+
+  } else if (!hwloc_strncasecmp(string, "group", 2)) {
+    size_t length;
+    type = HWLOC_OBJ_GROUP;
+    length = strcspn(string, "0123456789");
+    if (length <= 5 && !hwloc_strncasecmp(string, "group", length)
+	&& string[length] >= '0' && string[length] <= '9') {
+      depthattr = strtol(string+length, &end, 10);
+    }
+
+  } else
+    return -1;
+
+  *typep = type;
+  if (attrp) {
+    if (hwloc_obj_type_is_cache(type) && attrsize >= sizeof(attrp->cache)) {
+      attrp->cache.depth = depthattr;
+      attrp->cache.type = cachetypeattr;
+    } else if (type == HWLOC_OBJ_GROUP && attrsize >= sizeof(attrp->group)) {
+      attrp->group.depth = depthattr;
+    } else if (type == HWLOC_OBJ_BRIDGE && attrsize >= sizeof(attrp->bridge)) {
+      attrp->bridge.upstream_type = ubtype;
+      attrp->bridge.downstream_type = HWLOC_OBJ_BRIDGE_PCI; /* nothing else so far */
+    } else if (type == HWLOC_OBJ_OS_DEVICE && attrsize >= sizeof(attrp->osdev)) {
+      attrp->osdev.type = ostype;
+    }
+  }
+  return 0;
+}
+
+int
+hwloc_type_sscanf_as_depth(const char *string, hwloc_obj_type_t *typep,
+			   hwloc_topology_t topology, int *depthp)
+{
+  union hwloc_obj_attr_u attr;
+  hwloc_obj_type_t type;
+  int depth;
+  int err;
+
+  err = hwloc_type_sscanf(string, &type, &attr, sizeof(attr));
+  if (err < 0)
+    return err;
+
+  depth = hwloc_get_type_depth(topology, type);
+  if (type == HWLOC_OBJ_GROUP
+      && depth == HWLOC_TYPE_DEPTH_MULTIPLE
+      && attr.group.depth != (unsigned)-1) {
+    unsigned l;
+    depth = HWLOC_TYPE_DEPTH_UNKNOWN;
+    for(l=0; l<topology->nb_levels; l++) {
+      if (topology->levels[l][0]->type == HWLOC_OBJ_GROUP
+	  && topology->levels[l][0]->attr->group.depth == attr.group.depth) {
+	depth = l;
+	break;
+      }
+    }
+  }
+
+  if (typep)
+    *typep = type;
+  *depthp = (unsigned) depth;
+  return 0;
+}
+
+static const char* hwloc_obj_cache_type_letter(hwloc_obj_cache_type_t type)
+{
+  switch (type) {
+  case HWLOC_OBJ_CACHE_UNIFIED: return "";
+  case HWLOC_OBJ_CACHE_DATA: return "d";
+  case HWLOC_OBJ_CACHE_INSTRUCTION: return "i";
+  default: return "unknown";
+  }
+}
+
+int
+hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t obj, int verbose)
+{
+  hwloc_obj_type_t type = obj->type;
+  switch (type) {
+  case HWLOC_OBJ_MISC:
+  case HWLOC_OBJ_SYSTEM:
+  case HWLOC_OBJ_MACHINE:
+  case HWLOC_OBJ_NUMANODE:
+  case HWLOC_OBJ_PACKAGE:
+  case HWLOC_OBJ_CORE:
+  case HWLOC_OBJ_PU:
+    return hwloc_snprintf(string, size, "%s", hwloc_type_name(type));
+  case HWLOC_OBJ_L1CACHE:
+  case HWLOC_OBJ_L2CACHE:
+  case HWLOC_OBJ_L3CACHE:
+  case HWLOC_OBJ_L4CACHE:
+  case HWLOC_OBJ_L5CACHE:
+  case HWLOC_OBJ_L1ICACHE:
+  case HWLOC_OBJ_L2ICACHE:
+  case HWLOC_OBJ_L3ICACHE:
+    return hwloc_snprintf(string, size, "L%u%s%s", obj->attr->cache.depth,
+			  hwloc_obj_cache_type_letter(obj->attr->cache.type),
+			  verbose ? "Cache" : "");
+  case HWLOC_OBJ_GROUP:
+    if (obj->attr->group.depth != (unsigned) -1)
+      return hwloc_snprintf(string, size, "%s%u", hwloc_type_name(type), obj->attr->group.depth);
+    else
+      return hwloc_snprintf(string, size, "%s", hwloc_type_name(type));
+  case HWLOC_OBJ_BRIDGE:
+    return snprintf(string, size, obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI ? "PCIBridge" : "HostBridge");
+  case HWLOC_OBJ_PCI_DEVICE:
+    return hwloc_snprintf(string, size, "PCI");
+  case HWLOC_OBJ_OS_DEVICE:
+    switch (obj->attr->osdev.type) {
+    case HWLOC_OBJ_OSDEV_BLOCK: return hwloc_snprintf(string, size, "Block");
+    case HWLOC_OBJ_OSDEV_NETWORK: return hwloc_snprintf(string, size, verbose ? "Network" : "Net");
+    case HWLOC_OBJ_OSDEV_OPENFABRICS: return hwloc_snprintf(string, size, "OpenFabrics");
+    case HWLOC_OBJ_OSDEV_DMA: return hwloc_snprintf(string, size, "DMA");
+    case HWLOC_OBJ_OSDEV_GPU: return hwloc_snprintf(string, size, "GPU");
+    case HWLOC_OBJ_OSDEV_COPROC: return hwloc_snprintf(string, size, verbose ? "Co-Processor" : "CoProc");
+    default:
+      if (size > 0)
+	*string = '\0';
+      return 0;
+    }
+    break;
+  default:
+    if (size > 0)
+      *string = '\0';
+    return 0;
+  }
+}
+
+int
+hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t obj, const char * separator, int verbose)
+{
+  const char *prefix = "";
+  char *tmp = string;
+  ssize_t tmplen = size;
+  int ret = 0;
+  int res;
+
+  /* make sure we output at least an empty string */
+  if (size)
+    *string = '\0';
+
+  /* print memory attributes */
+  res = 0;
+  if (verbose) {
+    if (obj->memory.local_memory)
+      res = hwloc_snprintf(tmp, tmplen, "%slocal=%lu%s%stotal=%lu%s",
+			   prefix,
+			   (unsigned long) hwloc_memory_size_printf_value(obj->memory.local_memory, verbose),
+			   hwloc_memory_size_printf_unit(obj->memory.total_memory, verbose),
+			   separator,
+			   (unsigned long) hwloc_memory_size_printf_value(obj->memory.total_memory, verbose),
+			   hwloc_memory_size_printf_unit(obj->memory.local_memory, verbose));
+    else if (obj->memory.total_memory)
+      res = hwloc_snprintf(tmp, tmplen, "%stotal=%lu%s",
+			   prefix,
+			   (unsigned long) hwloc_memory_size_printf_value(obj->memory.total_memory, verbose),
+			   hwloc_memory_size_printf_unit(obj->memory.total_memory, verbose));
+  } else {
+    if (obj->memory.local_memory)
+      res = hwloc_snprintf(tmp, tmplen, "%s%lu%s",
+			   prefix,
+			   (unsigned long) hwloc_memory_size_printf_value(obj->memory.local_memory, verbose),
+			   hwloc_memory_size_printf_unit(obj->memory.local_memory, verbose));
+  }
+  if (res < 0)
+    return -1;
+  ret += res;
+  if (ret > 0)
+    prefix = separator;
+  if (res >= tmplen)
+    res = tmplen>0 ? (int)tmplen - 1 : 0;
+  tmp += res;
+  tmplen -= res;
+
+  /* printf type-specific attributes */
+  res = 0;
+  switch (obj->type) {
+  case HWLOC_OBJ_L1CACHE:
+  case HWLOC_OBJ_L2CACHE:
+  case HWLOC_OBJ_L3CACHE:
+  case HWLOC_OBJ_L4CACHE:
+  case HWLOC_OBJ_L5CACHE:
+  case HWLOC_OBJ_L1ICACHE:
+  case HWLOC_OBJ_L2ICACHE:
+  case HWLOC_OBJ_L3ICACHE:
+    if (verbose) {
+      char assoc[32];
+      if (obj->attr->cache.associativity == -1)
+	snprintf(assoc, sizeof(assoc), "%sfully-associative", separator);
+      else if (obj->attr->cache.associativity == 0)
+	*assoc = '\0';
+      else
+	snprintf(assoc, sizeof(assoc), "%sways=%d", separator, obj->attr->cache.associativity);
+      res = hwloc_snprintf(tmp, tmplen, "%ssize=%lu%s%slinesize=%u%s",
+			   prefix,
+			   (unsigned long) hwloc_memory_size_printf_value(obj->attr->cache.size, verbose),
+			   hwloc_memory_size_printf_unit(obj->attr->cache.size, verbose),
+			   separator, obj->attr->cache.linesize,
+			   assoc);
+    } else
+      res = hwloc_snprintf(tmp, tmplen, "%s%lu%s",
+			   prefix,
+			   (unsigned long) hwloc_memory_size_printf_value(obj->attr->cache.size, verbose),
+			   hwloc_memory_size_printf_unit(obj->attr->cache.size, verbose));
+    break;
+  case HWLOC_OBJ_BRIDGE:
+    if (verbose) {
+      char up[128], down[64];
+      /* upstream is PCI or HOST */
+      if (obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI) {
+        char linkspeed[64]= "";
+        if (obj->attr->pcidev.linkspeed)
+          snprintf(linkspeed, sizeof(linkspeed), "%slink=%.2fGB/s", separator, obj->attr->pcidev.linkspeed);
+	snprintf(up, sizeof(up), "busid=%04x:%02x:%02x.%01x%sid=%04x:%04x%sclass=%04x(%s)%s",
+		 obj->attr->pcidev.domain, obj->attr->pcidev.bus, obj->attr->pcidev.dev, obj->attr->pcidev.func, separator,
+		 obj->attr->pcidev.vendor_id, obj->attr->pcidev.device_id, separator,
+		 obj->attr->pcidev.class_id, hwloc_pci_class_string(obj->attr->pcidev.class_id), linkspeed);
+      } else
+        *up = '\0';
+      /* downstream is_PCI */
+      snprintf(down, sizeof(down), "buses=%04x:[%02x-%02x]",
+	       obj->attr->bridge.downstream.pci.domain, obj->attr->bridge.downstream.pci.secondary_bus, obj->attr->bridge.downstream.pci.subordinate_bus);
+      if (*up)
+	res = snprintf(string, size, "%s%s%s", up, separator, down);
+      else
+	res = snprintf(string, size, "%s", down);
+    }
+    break;
+  case HWLOC_OBJ_PCI_DEVICE:
+    if (verbose) {
+      char linkspeed[64]= "";
+      if (obj->attr->pcidev.linkspeed)
+        snprintf(linkspeed, sizeof(linkspeed), "%slink=%.2fGB/s", separator, obj->attr->pcidev.linkspeed);
+      res = snprintf(string, size, "busid=%04x:%02x:%02x.%01x%sid=%04x:%04x%sclass=%04x(%s)%s",
+		     obj->attr->pcidev.domain, obj->attr->pcidev.bus, obj->attr->pcidev.dev, obj->attr->pcidev.func, separator,
+		     obj->attr->pcidev.vendor_id, obj->attr->pcidev.device_id, separator,
+		     obj->attr->pcidev.class_id, hwloc_pci_class_string(obj->attr->pcidev.class_id), linkspeed);
+    }
+    break;
+  default:
+    break;
+  }
+  if (res < 0)
+    return -1;
+  ret += res;
+  if (ret > 0)
+    prefix = separator;
+  if (res >= tmplen)
+    res = tmplen>0 ? (int)tmplen - 1 : 0;
+  tmp += res;
+  tmplen -= res;
+
+  /* printf infos */
+  if (verbose) {
+    unsigned i;
+    for(i=0; i<obj->infos_count; i++) {
+      if (strchr(obj->infos[i].value, ' '))
+	res = hwloc_snprintf(tmp, tmplen, "%s%s=\"%s\"",
+			     prefix,
+			     obj->infos[i].name, obj->infos[i].value);
+      else
+	res = hwloc_snprintf(tmp, tmplen, "%s%s=%s",
+			     prefix,
+			     obj->infos[i].name, obj->infos[i].value);
+      if (res < 0)
+        return -1;
+      ret += res;
+      if (res >= tmplen)
+        res = tmplen>0 ? (int)tmplen - 1 : 0;
+      tmp += res;
+      tmplen -= res;
+      if (ret > 0)
+        prefix = separator;
+    }
+  }
+
+  return ret;
+}
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/include/Makefile.am
new file mode 100644
index 0000000000..6a64939238
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/Makefile.am
@@ -0,0 +1,65 @@
+# Copyright © 2009-2016 Inria.  All rights reserved.
+# Copyright © 2009-2010 Université Bordeaux
+# Copyright © 2009-2014 Cisco Systems, Inc.  All rights reserved.
+# Copyright © 2011      Oracle and/or its affiliates.  All rights reserved.
+# See COPYING in top-level directory.
+
+# Only install the headers if we're in standalone mode (meaning:
+# *don't* install the headers if we're in embedded mode).
+
+if HWLOC_BUILD_STANDALONE
+include_HEADERS = hwloc.h
+
+if BUILD_NETLOCSCOTCH
+include_HEADERS += netloc.h netlocscotch.h
+endif
+
+include_hwlocdir = $(includedir)/hwloc
+include_hwloc_HEADERS = \
+        hwloc/bitmap.h \
+        hwloc/helper.h \
+        hwloc/inlines.h \
+        hwloc/diff.h \
+        hwloc/distances.h \
+        hwloc/export.h \
+        hwloc/myriexpress.h \
+        hwloc/openfabrics-verbs.h \
+        hwloc/opencl.h \
+        hwloc/cuda.h \
+        hwloc/cudart.h \
+        hwloc/nvml.h \
+        hwloc/plugins.h \
+        hwloc/gl.h \
+        hwloc/intel-mic.h \
+        hwloc/rename.h \
+        hwloc/deprecated.h
+include_hwloc_autogendir = $(includedir)/hwloc/autogen
+nodist_include_hwloc_autogen_HEADERS = hwloc/autogen/config.h
+
+noinst_HEADERS = \
+        private/private.h \
+        private/debug.h \
+        private/misc.h \
+        private/xml.h \
+        private/components.h \
+        private/cpuid-x86.h \
+        private/netloc.h \
+        netloc/utarray.h \
+        netloc/uthash.h
+
+if HWLOC_HAVE_LINUX
+include_hwloc_HEADERS += \
+        hwloc/linux.h \
+        hwloc/linux-libnuma.h
+endif HWLOC_HAVE_LINUX
+
+if HWLOC_HAVE_SOLARIS
+include_hwloc_HEADERS += \
+       private/solaris-chiptype.h
+endif HWLOC_HAVE_SOLARIS
+
+if HWLOC_HAVE_SCHED_SETAFFINITY
+include_hwloc_HEADERS += hwloc/glibc-sched.h
+endif HWLOC_HAVE_SCHED_SETAFFINITY
+
+endif HWLOC_BUILD_STANDALONE
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc.h b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc.h
new file mode 100644
index 0000000000..dac14fa657
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc.h
@@ -0,0 +1,2184 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2017 Inria.  All rights reserved.
+ * Copyright © 2009-2012 Université Bordeaux
+ * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/*=====================================================================
+ *                 PLEASE GO READ THE DOCUMENTATION!
+ *         ------------------------------------------------
+ *               $tarball_directory/doc/doxygen-doc/
+ *                                or
+ *           http://www.open-mpi.org/projects/hwloc/doc/
+ *=====================================================================
+ *
+ * FAIR WARNING: Do NOT expect to be able to figure out all the
+ * subtleties of hwloc by simply reading function prototypes and
+ * constant descrptions here in this file.
+ *
+ * Hwloc has wonderful documentation in both PDF and HTML formats for
+ * your reading pleasure.  The formal documentation explains a LOT of
+ * hwloc-specific concepts, provides definitions, and discusses the
+ * "big picture" for many of the things that you'll find here in this
+ * header file.
+ *
+ * The PDF/HTML documentation was generated via Doxygen; much of what
+ * you'll see in there is also here in this file.  BUT THERE IS A LOT
+ * THAT IS IN THE PDF/HTML THAT IS ***NOT*** IN hwloc.h!
+ *
+ * There are entire paragraph-length descriptions, discussions, and
+ * pretty prictures to explain subtle corner cases, provide concrete
+ * examples, etc.
+ *
+ * Please, go read the documentation.  :-)
+ *
+ * Moreover there are several examples of hwloc use under doc/examples
+ * in the source tree.
+ *
+ *=====================================================================*/
+
+/** \file
+ * \brief The hwloc API.
+ *
+ * See hwloc/bitmap.h for bitmap specific macros.
+ * See hwloc/helper.h for high-level topology traversal helpers.
+ * See hwloc/inlines.h for the actual inline code of some functions below.
+ * See hwloc/export.h for exporting topologies to XML or to synthetic descriptions.
+ * See hwloc/distances.h for querying and modifying distances between objects.
+ * See hwloc/diff.h for manipulating differences between similar topologies.
+ */
+
+#ifndef HWLOC_H
+#define HWLOC_H
+
+#include <hwloc/autogen/config.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+
+/*
+ * Symbol transforms
+ */
+#include <hwloc/rename.h>
+
+/*
+ * Bitmap definitions
+ */
+
+#include <hwloc/bitmap.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_api_version API version
+ * @{
+ */
+
+/** \brief Indicate at build time which hwloc API version is being used. */
+#define HWLOC_API_VERSION 0x00020000
+
+/** \brief Indicate at runtime which hwloc API version was used at build time.
+ *
+ * Should be ::HWLOC_API_VERSION if running on the same version.
+ */
+HWLOC_DECLSPEC unsigned hwloc_get_api_version(void);
+
+/** \brief Current component and plugin ABI version (see hwloc/plugins.h) */
+#define HWLOC_COMPONENT_ABI 5
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_object_sets Object Sets (hwloc_cpuset_t and hwloc_nodeset_t)
+ *
+ * Hwloc uses bitmaps to represent two distinct kinds of object sets:
+ * CPU sets (::hwloc_cpuset_t) and NUMA node sets (::hwloc_nodeset_t).
+ * These types are both typedefs to a common back end type
+ * (::hwloc_bitmap_t), and therefore all the hwloc bitmap functions
+ * are applicable to both ::hwloc_cpuset_t and ::hwloc_nodeset_t (see
+ * \ref hwlocality_bitmap).
+ *
+ * The rationale for having two different types is that even though
+ * the actions one wants to perform on these types are the same (e.g.,
+ * enable and disable individual items in the set/mask), they're used
+ * in very different contexts: one for specifying which processors to
+ * use and one for specifying which NUMA nodes to use.  Hence, the
+ * name difference is really just to reflect the intent of where the
+ * type is used.
+ *
+ * @{
+ */
+
+/** \brief A CPU set is a bitmap whose bits are set according to CPU
+ * physical OS indexes.
+ *
+ * It may be consulted and modified with the bitmap API as any
+ * ::hwloc_bitmap_t (see hwloc/bitmap.h).
+ *
+ * Each bit may be converted into a PU object using
+ * hwloc_get_pu_obj_by_os_index().
+ */
+typedef hwloc_bitmap_t hwloc_cpuset_t;
+/** \brief A non-modifiable ::hwloc_cpuset_t. */
+typedef hwloc_const_bitmap_t hwloc_const_cpuset_t;
+
+/** \brief A node set is a bitmap whose bits are set according to NUMA
+ * memory node physical OS indexes.
+ *
+ * It may be consulted and modified with the bitmap API as any
+ * ::hwloc_bitmap_t (see hwloc/bitmap.h).
+ * Each bit may be converted into a NUMA node object using
+ * hwloc_get_numanode_obj_by_os_index().
+ *
+ * When binding memory on a system without any NUMA node,
+ * the single main memory bank is considered as NUMA node #0.
+ *
+ * See also \ref hwlocality_helper_nodeset_convert.
+ */
+typedef hwloc_bitmap_t hwloc_nodeset_t;
+/** \brief A non-modifiable ::hwloc_nodeset_t.
+ */
+typedef hwloc_const_bitmap_t hwloc_const_nodeset_t;
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_object_types Object Types
+ * @{
+ */
+
+/** \brief Type of topology object.
+ *
+ * \note Do not rely on the ordering or completeness of the values as new ones
+ * may be defined in the future!  If you need to compare types, use
+ * hwloc_compare_types() instead.
+ */
+typedef enum {
+  HWLOC_OBJ_SYSTEM,	/**< \brief Whole system (may be a cluster of machines).
+  			  * The whole system that is accessible to hwloc.
+			  * That may comprise several machines in SSI systems.
+			  * This object type is currently unused in native backends.
+			  */
+  HWLOC_OBJ_MACHINE,	/**< \brief Machine.
+			  * The typical root object type.
+			  * A set of processors and memory with cache
+			  * coherency.
+			  */
+  HWLOC_OBJ_NUMANODE,	/**< \brief NUMA node.
+			  * A set of processors around memory which the
+			  * processors can directly access.
+			  *
+			  * There is always at one such object in the topology
+			  * even if the machine is not NUMA.
+			  */
+  HWLOC_OBJ_PACKAGE,	/**< \brief Physical package, what goes into a socket.
+			  * In the physical meaning, i.e. that you can add
+			  * or remove physically.
+			  */
+
+  HWLOC_OBJ_CORE,	/**< \brief Core.
+			  * A computation unit (may be shared by several
+			  * logical processors).
+			  */
+  HWLOC_OBJ_PU,		/**< \brief Processing Unit, or (Logical) Processor.
+			  * An execution unit (may share a core with some
+			  * other logical processors, e.g. in the case of
+			  * an SMT core).
+			  *
+			  * Objects of this kind are always reported and can
+			  * thus be used as fallback when others are not.
+			  */
+
+  HWLOC_OBJ_L1CACHE,	/**< \brief Level 1 Data (or Unified) Cache. */
+  HWLOC_OBJ_L2CACHE,	/**< \brief Level 2 Data (or Unified) Cache. */
+  HWLOC_OBJ_L3CACHE,	/**< \brief Level 3 Data (or Unified) Cache. */
+  HWLOC_OBJ_L4CACHE,	/**< \brief Level 4 Data (or Unified) Cache. */
+  HWLOC_OBJ_L5CACHE,	/**< \brief Level 5 Data (or Unified) Cache. */
+
+  HWLOC_OBJ_L1ICACHE,	/**< \brief Level 1 instruction Cache (filtered out by default). */
+  HWLOC_OBJ_L2ICACHE,	/**< \brief Level 2 instruction Cache (filtered out by default). */
+  HWLOC_OBJ_L3ICACHE,	/**< \brief Level 3 instruction Cache (filtered out by default). */
+
+  HWLOC_OBJ_GROUP,	/**< \brief Group objects.
+			  * Objects which do not fit in the above but are
+			  * detected by hwloc and are useful to take into
+			  * account for affinity. For instance, some operating systems
+			  * expose their arbitrary processors aggregation this
+			  * way.  And hwloc may insert such objects to group
+			  * NUMA nodes according to their distances.
+			  * See also \ref faq_groups.
+			  *
+			  * These objects are removed when they do not bring
+			  * any structure.
+			  */
+
+  HWLOC_OBJ_MISC,	/**< \brief Miscellaneous objects (filtered out by default).
+			  * Objects without particular meaning, that can e.g. be
+			  * added by the application for its own use, or by hwloc
+			  * for miscellaneous objects such as MemoryModule (DIMMs).
+			  * These objects are not listed in the main children list,
+			  * but rather in the dedicated misc children list.
+			  * Misc objects may only have Misc objects as children,
+			  * and those are in the dedicated misc children list as well.
+			  * Misc objects have NULL CPU and node sets.
+			  */
+
+  HWLOC_OBJ_BRIDGE,	/**< \brief Bridge (filtered out by default).
+			  * Any bridge that connects the host or an I/O bus,
+			  * to another I/O bus.
+			  * They are not added to the topology unless I/O discovery
+			  * is enabled with hwloc_topology_set_flags().
+			  * I/O objects are not listed in the main children list,
+			  * but rather in the dedicated io children list.
+			  * I/O objects have NULL CPU and node sets.
+			  */
+  HWLOC_OBJ_PCI_DEVICE,	/**< \brief PCI device (filtered out by default).
+			  * They are not added to the topology unless I/O discovery
+			  * is enabled with hwloc_topology_set_flags().
+			  * I/O objects are not listed in the main children list,
+			  * but rather in the dedicated io children list.
+			  * I/O objects have NULL CPU and node sets.
+			  */
+  HWLOC_OBJ_OS_DEVICE,	/**< \brief Operating system device (filtered out by default).
+			  * They are not added to the topology unless I/O discovery
+			  * is enabled with hwloc_topology_set_flags().
+			  * I/O objects are not listed in the main children list,
+			  * but rather in the dedicated io children list.
+			  * I/O objects have NULL CPU and node sets.
+			  */
+
+  HWLOC_OBJ_TYPE_MAX    /**< \private Sentinel value */
+} hwloc_obj_type_t;
+
+/** \brief Cache type. */
+typedef enum hwloc_obj_cache_type_e {
+  HWLOC_OBJ_CACHE_UNIFIED,      /**< \brief Unified cache. */
+  HWLOC_OBJ_CACHE_DATA,         /**< \brief Data cache. */
+  HWLOC_OBJ_CACHE_INSTRUCTION   /**< \brief Instruction cache (filtered out by default). */
+} hwloc_obj_cache_type_t;
+
+/** \brief Type of one side (upstream or downstream) of an I/O bridge. */
+typedef enum hwloc_obj_bridge_type_e {
+  HWLOC_OBJ_BRIDGE_HOST,	/**< \brief Host-side of a bridge, only possible upstream. */
+  HWLOC_OBJ_BRIDGE_PCI		/**< \brief PCI-side of a bridge. */
+} hwloc_obj_bridge_type_t;
+
+/** \brief Type of a OS device. */
+typedef enum hwloc_obj_osdev_type_e {
+  HWLOC_OBJ_OSDEV_BLOCK,	/**< \brief Operating system block device.
+				  * For instance "sda" on Linux. */
+  HWLOC_OBJ_OSDEV_GPU,		/**< \brief Operating system GPU device.
+				  * For instance ":0.0" for a GL display,
+				  * "card0" for a Linux DRM device. */
+  HWLOC_OBJ_OSDEV_NETWORK,	/**< \brief Operating system network device.
+				  * For instance the "eth0" interface on Linux. */
+  HWLOC_OBJ_OSDEV_OPENFABRICS,	/**< \brief Operating system openfabrics device.
+				  * For instance the "mlx4_0" InfiniBand HCA,
+				  * or "hfi1_0" Omni-Path interface on Linux. */
+  HWLOC_OBJ_OSDEV_DMA,		/**< \brief Operating system dma engine device.
+				  * For instance the "dma0chan0" DMA channel on Linux. */
+  HWLOC_OBJ_OSDEV_COPROC	/**< \brief Operating system co-processor device.
+				  * For instance "mic0" for a Xeon Phi (MIC) on Linux,
+				  * "opencl0d0" for a OpenCL device,
+				  * "cuda0" for a CUDA device. */
+} hwloc_obj_osdev_type_t;
+
+/** \brief Compare the depth of two object types
+ *
+ * Types shouldn't be compared as they are, since newer ones may be added in
+ * the future.  This function returns less than, equal to, or greater than zero
+ * respectively if \p type1 objects usually include \p type2 objects, are the
+ * same as \p type2 objects, or are included in \p type2 objects. If the types
+ * can not be compared (because neither is usually contained in the other),
+ * ::HWLOC_TYPE_UNORDERED is returned.  Object types containing CPUs can always
+ * be compared (usually, a system contains machines which contain nodes which
+ * contain packages which contain caches, which contain cores, which contain
+ * processors).
+ *
+ * \note ::HWLOC_OBJ_PU will always be the deepest.
+ * \note This does not mean that the actual topology will respect that order:
+ * e.g. as of today cores may also contain caches, and packages may also contain
+ * nodes. This is thus just to be seen as a fallback comparison method.
+ */
+HWLOC_DECLSPEC int hwloc_compare_types (hwloc_obj_type_t type1, hwloc_obj_type_t type2) __hwloc_attribute_const;
+
+enum hwloc_compare_types_e {
+    HWLOC_TYPE_UNORDERED = INT_MAX	/**< \brief Value returned by hwloc_compare_types() when types can not be compared. \hideinitializer */
+};
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_objects Object Structure and Attributes
+ * @{
+ */
+
+union hwloc_obj_attr_u;
+
+/** \brief Object memory */
+struct hwloc_obj_memory_s {
+  hwloc_uint64_t total_memory; /**< \brief Total memory (in bytes) in this object and its children */
+  hwloc_uint64_t local_memory; /**< \brief Local memory (in bytes) */
+
+  /** \brief Size of array \p page_types */
+  unsigned page_types_len;
+  /** \brief Array of local memory page types, \c NULL if no local memory and \p page_types is 0.
+   *
+   * The array is sorted by increasing \p size fields.
+   * It contains \p page_types_len slots.
+   */
+  struct hwloc_obj_memory_page_type_s {
+    hwloc_uint64_t size;	/**< \brief Size of pages */
+    hwloc_uint64_t count;	/**< \brief Number of pages of this size */
+  } * page_types;
+};
+
+/** \brief Structure of a topology object
+ *
+ * Applications must not modify any field except hwloc_obj.userdata.
+ */
+struct hwloc_obj {
+  /* physical information */
+  hwloc_obj_type_t type;		/**< \brief Type of object */
+  char *subtype;			/**< \brief Subtype string to better describe the type field. */
+
+  unsigned os_index;			/**< \brief OS-provided physical index number.
+					 * It is not guaranteed unique across the entire machine,
+					 * except for PUs and NUMA nodes.
+					 */
+  char *name;				/**< \brief Object-specific name if any.
+					 * Mostly used for identifying OS devices and Misc objects where
+					 * a name string is more useful than numerical indexes.
+					 */
+
+  struct hwloc_obj_memory_s memory;	/**< \brief Memory attributes */
+
+  union hwloc_obj_attr_u *attr;		/**< \brief Object type-specific Attributes,
+					 * may be \c NULL if no attribute value was found */
+
+  /* global position */
+  unsigned depth;			/**< \brief Vertical index in the hierarchy.
+					 * If the topology is symmetric, this is equal to the
+					 * parent depth plus one, and also equal to the number
+					 * of parent/child links from the root object to here.
+					 */
+  unsigned logical_index;		/**< \brief Horizontal index in the whole list of similar objects,
+					 * hence guaranteed unique across the entire machine.
+					 * Could be a "cousin_rank" since it's the rank within the "cousin" list below
+					 * Note that this index may change when restricting the topology
+					 * or when inserting a group.
+					 */
+
+  /* cousins are all objects of the same type (and depth) across the entire topology */
+  struct hwloc_obj *next_cousin;	/**< \brief Next object of same type and depth */
+  struct hwloc_obj *prev_cousin;	/**< \brief Previous object of same type and depth */
+
+  /* children of the same parent are siblings, even if they may have different type and depth */
+  struct hwloc_obj *parent;		/**< \brief Parent, \c NULL if root (system object) */
+  unsigned sibling_rank;		/**< \brief Index in parent's \c children[] array. Or the index in parent's I/O or Misc children list. */
+  struct hwloc_obj *next_sibling;	/**< \brief Next object below the same parent */
+  struct hwloc_obj *prev_sibling;	/**< \brief Previous object below the same parent */
+
+  /* children array below this object (except I/O and Misc children) */
+  unsigned arity;			/**< \brief Number of children */
+  struct hwloc_obj **children;		/**< \brief Children, \c children[0 .. arity -1] */
+  struct hwloc_obj *first_child;	/**< \brief First child */
+  struct hwloc_obj *last_child;		/**< \brief Last child */
+
+  int symmetric_subtree;		/**< \brief Set if the subtree of normal objects below this object is symmetric,
+					  * which means all children and their children have identical subtrees.
+					  * I/O and Misc children are ignored.
+					  *
+					  * If set in the topology root object, lstopo may export the topology
+					  * as a synthetic string.
+					  */
+
+  /* specific list of I/O children */
+  unsigned io_arity;			/**< \brief Number of I/O children */
+  struct hwloc_obj *io_first_child;	/**< \brief First I/O child */
+
+  /* specific list of Misc children */
+  unsigned misc_arity;			/**< \brief Number of Misc children */
+  struct hwloc_obj *misc_first_child;	/**< \brief First Misc child */
+
+  /* cpusets and nodesets */
+  hwloc_cpuset_t cpuset;		/**< \brief CPUs covered by this object
+                                          *
+                                          * This is the set of CPUs for which there are PU objects in the topology
+                                          * under this object, i.e. which are known to be physically contained in this
+                                          * object and known how (the children path between this object and the PU
+                                          * objects).
+                                          *
+                                          * If the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set,
+                                          * some of these CPUs may not be allowed for binding, see allowed_cpuset.
+                                          *
+					  * \note All objects have non-NULL CPU and node sets except Misc and I/O objects.
+					  *
+                                          * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead.
+                                          */
+  hwloc_cpuset_t complete_cpuset;       /**< \brief The complete CPU set of logical processors of this object,
+                                          *
+                                          * This may include not only the same as the cpuset field, but also some CPUs for
+                                          * which topology information is unknown or incomplete, some offlines CPUs, and
+                                          * the CPUs that are ignored when the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM flag
+                                          * is not set.
+                                          * Thus no corresponding PU object may be found in the topology, because the
+                                          * precise position is undefined. It is however known that it would be somewhere
+                                          * under this object.
+                                          *
+                                          * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead.
+                                          */
+  hwloc_cpuset_t allowed_cpuset;        /**< \brief The CPU set of allowed logical processors
+                                          *
+                                          * This includes the CPUs contained in this object which are allowed for
+                                          * binding, i.e. passing them to the hwloc binding functions should not return
+                                          * permission errors.  This is usually restricted by administration rules.
+                                          *
+                                          * If the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set,
+                                          * allowed_cpuset may be smaller than cpuset. Otherwise they are identical.
+                                          *
+                                          * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead.
+                                          */
+
+  hwloc_nodeset_t nodeset;              /**< \brief NUMA nodes covered by this object or containing this object
+                                          *
+                                          * This is the set of NUMA nodes for which there are NUMA node objects in the
+                                          * topology under or above this object, i.e. which are known to be physically
+                                          * contained in this object or containing it and known how (the children path
+                                          * between this object and the NUMA node objects).
+                                          *
+                                          * In the end, these nodes are those that are close to the current object.
+                                          *
+                                          * If the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set,
+                                          * some of these nodes may not be allowed for allocation, see allowed_nodeset.
+                                          *
+                                          * If there are no NUMA nodes in the machine, all the memory is close to this
+                                          * object, so only the first bit may be set in \p nodeset.
+                                          *
+					  * \note All objects have non-NULL CPU and node sets except Misc and I/O objects.
+					  *
+                                          * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead.
+                                          */
+  hwloc_nodeset_t complete_nodeset;     /**< \brief The complete NUMA node set of this object,
+                                          *
+                                          * This may include not only the same as the nodeset field, but also some NUMA
+                                          * nodes for which topology information is unknown or incomplete, some offlines
+                                          * nodes, and the nodes that are ignored when the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM
+                                          * flag is not set.
+                                          * Thus no corresponding NUMA node object may be found in the topology, because the
+                                          * precise position is undefined. It is however known that it would be
+                                          * somewhere under this object.
+                                          *
+                                          * If there are no NUMA nodes in the machine, all the memory is close to this
+                                          * object, so only the first bit is set in \p complete_nodeset.
+                                          *
+                                          * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead.
+                                          */
+  hwloc_nodeset_t allowed_nodeset;      /**< \brief The set of allowed NUMA memory nodes
+                                          *
+                                          * This includes the NUMA memory nodes contained in this object which are
+                                          * allowed for memory allocation, i.e. passing them to NUMA node-directed
+                                          * memory allocation should not return permission errors. This is usually
+                                          * restricted by administration rules.
+                                          *
+                                          * If the ::HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set,
+                                          * allowed_nodeset may be smaller than nodeset. Otherwise they are identical.
+                                          *
+                                          * If there are no NUMA nodes in the machine, all the memory is close to this
+                                          * object, so only the first bit may be set in \p allowed_nodeset.
+                                          *
+                                          * \note Its value must not be changed, hwloc_bitmap_dup() must be used instead.
+                                          */
+
+  struct hwloc_obj_info_s *infos;	/**< \brief Array of stringified info type=name. */
+  unsigned infos_count;			/**< \brief Size of infos array. */
+
+  /* misc */
+  void *userdata;			/**< \brief Application-given private data pointer,
+					 * initialized to \c NULL, use it as you wish.
+					 * See hwloc_topology_set_userdata_export_callback() in hwloc/export.h
+					 * if you wish to export this field to XML. */
+
+  hwloc_uint64_t gp_index;			/**< \brief Global persistent index.
+					 * Generated by hwloc, unique across the topology (contrary to os_index)
+					 * and persistent across topology changes (contrary to logical_index).
+					 * Mostly used internally, but could also be used by application to identify objects.
+					 */
+};
+/**
+ * \brief Convenience typedef; a pointer to a struct hwloc_obj.
+ */
+typedef struct hwloc_obj * hwloc_obj_t;
+
+/** \brief Object type-specific Attributes */
+union hwloc_obj_attr_u {
+  /** \brief Cache-specific Object Attributes */
+  struct hwloc_cache_attr_s {
+    hwloc_uint64_t size;		  /**< \brief Size of cache in bytes */
+    unsigned depth;			  /**< \brief Depth of cache (e.g., L1, L2, ...etc.) */
+    unsigned linesize;			  /**< \brief Cache-line size in bytes. 0 if unknown */
+    int associativity;			  /**< \brief Ways of associativity,
+    					    *  -1 if fully associative, 0 if unknown */
+    hwloc_obj_cache_type_t type;          /**< \brief Cache type */
+  } cache;
+  /** \brief Group-specific Object Attributes */
+  struct hwloc_group_attr_s {
+    unsigned depth;			  /**< \brief Depth of group object.
+					   *   It may change if intermediate Group objects are added. */
+    unsigned kind;			  /**< \brief Internally-used kind of group. */
+    unsigned subkind;			  /**< \brief Internally-used subkind to distinguish different levels of groups with same kind */
+  } group;
+  /** \brief PCI Device specific Object Attributes */
+  struct hwloc_pcidev_attr_s {
+    unsigned short domain;
+    unsigned char bus, dev, func;
+    unsigned short class_id;
+    unsigned short vendor_id, device_id, subvendor_id, subdevice_id;
+    unsigned char revision;
+    float linkspeed; /* in GB/s */
+  } pcidev;
+  /** \brief Bridge specific Object Attribues */
+  struct hwloc_bridge_attr_s {
+    union {
+      struct hwloc_pcidev_attr_s pci;
+    } upstream;
+    hwloc_obj_bridge_type_t upstream_type;
+    union {
+      struct {
+	unsigned short domain;
+	unsigned char secondary_bus, subordinate_bus;
+      } pci;
+    } downstream;
+    hwloc_obj_bridge_type_t downstream_type;
+    unsigned depth;
+  } bridge;
+  /** \brief OS Device specific Object Attributes */
+  struct hwloc_osdev_attr_s {
+    hwloc_obj_osdev_type_t type;
+  } osdev;
+};
+
+/** \brief Object info
+ *
+ * \sa hwlocality_info_attr
+ */
+struct hwloc_obj_info_s {
+  char *name;	/**< \brief Info name */
+  char *value;	/**< \brief Info value */
+};
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_creation Topology Creation and Destruction
+ * @{
+ */
+
+struct hwloc_topology;
+/** \brief Topology context
+ *
+ * To be initialized with hwloc_topology_init() and built with hwloc_topology_load().
+ */
+typedef struct hwloc_topology * hwloc_topology_t;
+
+/** \brief Allocate a topology context.
+ *
+ * \param[out] topologyp is assigned a pointer to the new allocated context.
+ *
+ * \return 0 on success, -1 on error.
+ */
+HWLOC_DECLSPEC int hwloc_topology_init (hwloc_topology_t *topologyp);
+
+/** \brief Build the actual topology
+ *
+ * Build the actual topology once initialized with hwloc_topology_init() and
+ * tuned with \ref hwlocality_configuration and \ref hwlocality_setsource routines.
+ * No other routine may be called earlier using this topology context.
+ *
+ * \param topology is the topology to be loaded with objects.
+ *
+ * \return 0 on success, -1 on error.
+ *
+ * \note On failure, the topology is reinitialized. It should be either
+ * destroyed with hwloc_topology_destroy() or configured and loaded again.
+ *
+ * \note This function may be called only once per topology.
+ *
+ * \note The binding of the current thread or process may temporarily change
+ * during this call but it will be restored before it returns.
+ *
+ * \sa hwlocality_configuration and hwlocality_setsource
+ */
+HWLOC_DECLSPEC int hwloc_topology_load(hwloc_topology_t topology);
+
+/** \brief Terminate and free a topology context
+ *
+ * \param topology is the topology to be freed
+ */
+HWLOC_DECLSPEC void hwloc_topology_destroy (hwloc_topology_t topology);
+
+/** \brief Duplicate a topology.
+ *
+ * The entire topology structure as well as its objects
+ * are duplicated into a new one.
+ *
+ * This is useful for keeping a backup while modifying a topology.
+ *
+ * \note Object userdata is not duplicated since hwloc does not know what it point to.
+ * The objects of both old and new topologies will point to the same userdata.
+ */
+HWLOC_DECLSPEC int hwloc_topology_dup(hwloc_topology_t *newtopology, hwloc_topology_t oldtopology);
+
+/** \brief Run internal checks on a topology structure
+ *
+ * The program aborts if an inconsistency is detected in the given topology.
+ *
+ * \param topology is the topology to be checked
+ *
+ * \note This routine is only useful to developers.
+ *
+ * \note The input topology should have been previously loaded with
+ * hwloc_topology_load().
+ */
+HWLOC_DECLSPEC void hwloc_topology_check(hwloc_topology_t topology);
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_levels Object levels, depths and types
+ * @{
+ *
+ * Be sure to see the figure in \ref termsanddefs that shows a
+ * complete topology tree, including depths, child/sibling/cousin
+ * relationships, and an example of an asymmetric topology where one
+ * package has fewer caches than its peers.
+ */
+
+/** \brief Get the depth of the hierarchical tree of objects.
+ *
+ * This is the depth of ::HWLOC_OBJ_PU objects plus one.
+ *
+ * \note I/O and Misc objects are ignored when computing the depth
+ * of the tree (they are placed on special levels).
+ */
+HWLOC_DECLSPEC unsigned hwloc_topology_get_depth(hwloc_topology_t __hwloc_restrict topology) __hwloc_attribute_pure;
+
+/** \brief Returns the depth of objects of type \p type.
+ *
+ * If no object of this type is present on the underlying architecture, or if
+ * the OS doesn't provide this kind of information, the function returns
+ * ::HWLOC_TYPE_DEPTH_UNKNOWN.
+ *
+ * If type is absent but a similar type is acceptable, see also
+ * hwloc_get_type_or_below_depth() and hwloc_get_type_or_above_depth().
+ *
+ * If ::HWLOC_OBJ_GROUP is given, the function may return ::HWLOC_TYPE_DEPTH_MULTIPLE
+ * if multiple levels of Groups exist.
+ *
+ * If an I/O or Misc object type is given, the function returns a virtual value
+ * because these objects are stored in special levels that are not CPU-related.
+ * This virtual depth may be passed to other hwloc functions such as
+ * hwloc_get_obj_by_depth() but it should not be considered as an actual
+ * depth by the application. In particular, it should not be compared with
+ * any other object depth or with the entire topology depth.
+ *
+ * \sa hwloc_type_sscanf_as_depth() for returning the depth of objects
+ * whose type is given as a string.
+ */
+HWLOC_DECLSPEC int hwloc_get_type_depth (hwloc_topology_t topology, hwloc_obj_type_t type);
+
+enum hwloc_get_type_depth_e {
+    HWLOC_TYPE_DEPTH_UNKNOWN = -1,    /**< \brief No object of given type exists in the topology. \hideinitializer */
+    HWLOC_TYPE_DEPTH_MULTIPLE = -2,   /**< \brief Objects of given type exist at different depth in the topology (only for Groups). \hideinitializer */
+    HWLOC_TYPE_DEPTH_BRIDGE = -3,     /**< \brief Virtual depth for bridge object level. \hideinitializer */
+    HWLOC_TYPE_DEPTH_PCI_DEVICE = -4, /**< \brief Virtual depth for PCI device object level. \hideinitializer */
+    HWLOC_TYPE_DEPTH_OS_DEVICE = -5,  /**< \brief Virtual depth for software device object level. \hideinitializer */
+    HWLOC_TYPE_DEPTH_MISC = -6        /**< \brief Virtual depth for Misc object. \hideinitializer */
+};
+
+/** \brief Returns the depth of objects of type \p type or below
+ *
+ * If no object of this type is present on the underlying architecture, the
+ * function returns the depth of the first "present" object typically found
+ * inside \p type.
+ *
+ * May return ::HWLOC_TYPE_DEPTH_MULTIPLE for ::HWLOC_OBJ_GROUP just like
+ * hwloc_get_type_depth().
+ */
+static __hwloc_inline int
+hwloc_get_type_or_below_depth (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure;
+
+/** \brief Returns the depth of objects of type \p type or above
+ *
+ * If no object of this type is present on the underlying architecture, the
+ * function returns the depth of the first "present" object typically
+ * containing \p type.
+ *
+ * May return ::HWLOC_TYPE_DEPTH_MULTIPLE for ::HWLOC_OBJ_GROUP just like
+ * hwloc_get_type_depth().
+ */
+static __hwloc_inline int
+hwloc_get_type_or_above_depth (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure;
+
+/** \brief Returns the type of objects at depth \p depth.
+ *
+ * \p depth should between 0 and hwloc_topology_get_depth()-1.
+ *
+ * \return (hwloc_obj_type_t)-1 if depth \p depth does not exist.
+ */
+HWLOC_DECLSPEC hwloc_obj_type_t hwloc_get_depth_type (hwloc_topology_t topology, unsigned depth) __hwloc_attribute_pure;
+
+/** \brief Returns the width of level at depth \p depth.
+ */
+HWLOC_DECLSPEC unsigned hwloc_get_nbobjs_by_depth (hwloc_topology_t topology, unsigned depth) __hwloc_attribute_pure;
+
+/** \brief Returns the width of level type \p type
+ *
+ * If no object for that type exists, 0 is returned.
+ * If there are several levels with objects of that type, -1 is returned.
+ */
+static __hwloc_inline int
+hwloc_get_nbobjs_by_type (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure;
+
+/** \brief Returns the top-object of the topology-tree.
+ *
+ * Its type is typically ::HWLOC_OBJ_MACHINE but it could be different
+ * for complex topologies.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_root_obj (hwloc_topology_t topology) __hwloc_attribute_pure;
+
+/** \brief Returns the topology object at logical index \p idx from depth \p depth */
+HWLOC_DECLSPEC hwloc_obj_t hwloc_get_obj_by_depth (hwloc_topology_t topology, unsigned depth, unsigned idx) __hwloc_attribute_pure;
+
+/** \brief Returns the topology object at logical index \p idx with type \p type
+ *
+ * If no object for that type exists, \c NULL is returned.
+ * If there are several levels with objects of that type (::HWLOC_OBJ_GROUP),
+ * \c NULL is returned and the caller may fallback to hwloc_get_obj_by_depth().
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, unsigned idx) __hwloc_attribute_pure;
+
+/** \brief Returns the next object at depth \p depth.
+ *
+ * If \p prev is \c NULL, return the first object at depth \p depth.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_obj_by_depth (hwloc_topology_t topology, unsigned depth, hwloc_obj_t prev);
+
+/** \brief Returns the next object of type \p type.
+ *
+ * If \p prev is \c NULL, return the first object at type \p type.  If
+ * there are multiple or no depth for given type, return \c NULL and
+ * let the caller fallback to hwloc_get_next_obj_by_depth().
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type,
+			    hwloc_obj_t prev);
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_object_strings Converting between Object Types and Attributes, and Strings
+ * @{
+ */
+
+/** \brief Return a constant stringified object type.
+ *
+ * This function is the basic way to convert a generic type into a string.
+ * The output string may be parsed back by hwloc_type_sscanf().
+ *
+ * hwloc_obj_type_snprintf() may return a more precise output for a specific
+ * object, but it requires the caller to provide the output buffer.
+ *
+ * \note This function replaces the now deprecated hwloc_obj_type_string().
+ */
+HWLOC_DECLSPEC const char * hwloc_type_name (hwloc_obj_type_t type) __hwloc_attribute_const;
+
+/** \brief Stringify the type of a given topology object into a human-readable form.
+ *
+ * Contrary to hwloc_type_name(), this function includes object-specific
+ * attributes (such as the Group depth, the Bridge type, or OS device type)
+ * in the output, and it requires the caller to provide the output buffer.
+ *
+ * The output is guaranteed to be the same for all objects of a same topology level.
+ *
+ * If \p verbose is 1, longer type names are used, e.g. L1Cache instead of L1.
+ *
+ * The output string may be parsed back by hwloc_type_sscanf().
+ *
+ * If \p size is 0, \p string may safely be \c NULL.
+ *
+ * \return the number of character that were actually written if not truncating,
+ * or that would have been written (not including the ending \\0).
+ */
+HWLOC_DECLSPEC int hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size,
+					   hwloc_obj_t obj,
+					   int verbose);
+
+/** \brief Stringify the attributes of a given topology object into a human-readable form.
+ *
+ * Attribute values are separated by \p separator.
+ *
+ * Only the major attributes are printed in non-verbose mode.
+ *
+ * If \p size is 0, \p string may safely be \c NULL.
+ *
+ * \return the number of character that were actually written if not truncating,
+ * or that would have been written (not including the ending \\0).
+ */
+HWLOC_DECLSPEC int hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size,
+					   hwloc_obj_t obj, const char * __hwloc_restrict separator,
+					   int verbose);
+
+/** \brief Return an object type and attributes from a type string.
+ *
+ * Convert strings such as "Package" or "L1iCache" into the corresponding types.
+ * Matching is case-insensitive, and only the first letters are actually
+ * required to match.
+ *
+ * The matched object type is set in \p typep (which cannot be \c NULL).
+ *
+ * Type-specific attributes, for instance Cache type, Cache depth, Group depth,
+ * Bridge type or OS Device type may be returned in \p attrp.
+ * Attributes that are not specified in the string (for instance "Group"
+ * without a depth, or "L2Cache" without a cache type) are set to -1.
+ *
+ * \p attrp is only filled if not \c NULL and if its size specified in \p attrsize
+ * is large enough. It should be at least as large as union hwloc_obj_attr_u.
+ *
+ * \return 0 if a type was correctly identified, otherwise -1.
+ *
+ * \note This function is guaranteed to match any string returned by
+ * hwloc_type_name() or hwloc_obj_type_snprintf().
+ *
+ * \note This is an extended version of the now deprecated hwloc_obj_type_sscanf().
+ */
+HWLOC_DECLSPEC int hwloc_type_sscanf(const char *string,
+				     hwloc_obj_type_t *typep,
+				     union hwloc_obj_attr_u *attrp, size_t attrsize);
+
+/** \brief Return an object type and its level depth from a type string.
+ *
+ * Convert strings such as "Package" or "L1iCache" into the corresponding types
+ * and return in \p depthp the depth of the corresponding level in the
+ * topology \p topology.
+ *
+ * If no object of this type is present on the underlying architecture,
+ * ::HWLOC_TYPE_DEPTH_UNKNOWN is returned.
+ *
+ * If multiple such levels exist (for instance if giving Group without any depth),
+ * the function may return ::HWLOC_TYPE_DEPTH_MULTIPLE instead.
+ *
+ * The matched object type is set in \p typep if \p typep is non \c NULL.
+ *
+ * \note This function is similar to hwloc_type_sscanf() followed
+ * by hwloc_get_type_depth() but it also automatically disambiguates
+ * multiple group levels etc.
+ *
+ * \note This function is guaranteed to match any string returned by
+ * hwloc_type_name() or hwloc_obj_type_snprintf().
+ */
+HWLOC_DECLSPEC int hwloc_type_sscanf_as_depth(const char *string,
+					      hwloc_obj_type_t *typep,
+					      hwloc_topology_t topology, int *depthp);
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_info_attr Consulting and Adding Key-Value Info Attributes
+ *
+ * @{
+ */
+
+/** \brief Search the given key name in object infos and return the corresponding value.
+ *
+ * If multiple keys match the given name, only the first one is returned.
+ *
+ * \return \c NULL if no such key exists.
+ */
+static __hwloc_inline const char *
+hwloc_obj_get_info_by_name(hwloc_obj_t obj, const char *name) __hwloc_attribute_pure;
+
+/** \brief Add the given info name and value pair to the given object.
+ *
+ * The info is appended to the existing info array even if another key
+ * with the same name already exists.
+ *
+ * The input strings are copied before being added in the object infos.
+ *
+ * \note This function may be used to enforce object colors in the lstopo
+ * graphical output by using "lstopoStyle" as a name and "Background=#rrggbb"
+ * as a value. See CUSTOM COLORS in the lstopo(1) manpage for details.
+ *
+ * \note If \p value contains some non-printable characters, they will
+ * be dropped when exporting to XML, see hwloc_topology_export_xml() in hwloc/export.h.
+ */
+HWLOC_DECLSPEC void hwloc_obj_add_info(hwloc_obj_t obj, const char *name, const char *value);
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_cpubinding CPU binding
+ *
+ * It is often useful to call hwloc_bitmap_singlify() first so that a single CPU
+ * remains in the set. This way, the process will not even migrate between
+ * different CPUs inside the given set.
+ * Some operating systems also only support that kind of binding.
+ *
+ * Some operating systems do not provide all hwloc-supported
+ * mechanisms to bind processes, threads, etc.
+ * hwloc_topology_get_support() may be used to query about the actual CPU
+ * binding support in the currently used operating system.
+ *
+ * When the requested binding operation is not available and the
+ * ::HWLOC_CPUBIND_STRICT flag was passed, the function returns -1.
+ * \p errno is set to \c ENOSYS when it is not possible to bind the requested kind of object
+ * processes/threads. errno is set to \c EXDEV when the requested cpuset
+ * can not be enforced (e.g. some systems only allow one CPU, and some
+ * other systems only allow one NUMA node).
+ *
+ * If ::HWLOC_CPUBIND_STRICT was not passed, the function may fail as well,
+ * or the operating system may use a slightly different operation
+ * (with side-effects, smaller binding set, etc.)
+ * when the requested operation is not exactly supported.
+ *
+ * The most portable version that should be preferred over the others,
+ * whenever possible, is the following one which just binds the current program,
+ * assuming it is single-threaded:
+ *
+ * \code
+ * hwloc_set_cpubind(topology, set, 0),
+ * \endcode
+ *
+ * If the program may be multithreaded, the following one should be preferred
+ * to only bind the current thread:
+ *
+ * \code
+ * hwloc_set_cpubind(topology, set, HWLOC_CPUBIND_THREAD),
+ * \endcode
+ *
+ * \sa Some example codes are available under doc/examples/ in the source tree.
+ *
+ * \note To unbind, just call the binding function with either a full cpuset or
+ * a cpuset equal to the system cpuset.
+ *
+ * \note On some operating systems, CPU binding may have effects on memory binding, see
+ * ::HWLOC_CPUBIND_NOMEMBIND
+ *
+ * \note Running lstopo \--top or hwloc-ps can be a very convenient tool to check
+ * how binding actually happened.
+ * @{
+ */
+
+/** \brief Process/Thread binding flags.
+ *
+ * These bit flags can be used to refine the binding policy.
+ *
+ * The default (0) is to bind the current process, assumed to be
+ * single-threaded, in a non-strict way.  This is the most portable
+ * way to bind as all operating systems usually provide it.
+ *
+ * \note Not all systems support all kinds of binding.  See the
+ * "Detailed Description" section of \ref hwlocality_cpubinding for a
+ * description of errors that can occur.
+ */
+typedef enum {
+  /** \brief Bind all threads of the current (possibly) multithreaded process.
+   * \hideinitializer */
+  HWLOC_CPUBIND_PROCESS = (1<<0),
+
+  /** \brief Bind current thread of current process.
+   * \hideinitializer */
+  HWLOC_CPUBIND_THREAD = (1<<1),
+
+  /** \brief Request for strict binding from the OS.
+   *
+   * By default, when the designated CPUs are all busy while other
+   * CPUs are idle, operating systems may execute the thread/process
+   * on those other CPUs instead of the designated CPUs, to let them
+   * progress anyway.  Strict binding means that the thread/process
+   * will _never_ execute on other cpus than the designated CPUs, even
+   * when those are busy with other tasks and other CPUs are idle.
+   *
+   * \note Depending on the operating system, strict binding may not
+   * be possible (e.g., the OS does not implement it) or not allowed
+   * (e.g., for an administrative reasons), and the function will fail
+   * in that case.
+   *
+   * When retrieving the binding of a process, this flag checks
+   * whether all its threads  actually have the same binding. If the
+   * flag is not given, the binding of each thread will be
+   * accumulated.
+   *
+   * \note This flag is meaningless when retrieving the binding of a
+   * thread.
+   * \hideinitializer
+   */
+  HWLOC_CPUBIND_STRICT = (1<<2),
+
+  /** \brief Avoid any effect on memory binding
+   *
+   * On some operating systems, some CPU binding function would also
+   * bind the memory on the corresponding NUMA node.  It is often not
+   * a problem for the application, but if it is, setting this flag
+   * will make hwloc avoid using OS functions that would also bind
+   * memory.  This will however reduce the support of CPU bindings,
+   * i.e. potentially return -1 with errno set to ENOSYS in some
+   * cases.
+   *
+   * This flag is only meaningful when used with functions that set
+   * the CPU binding.  It is ignored when used with functions that get
+   * CPU binding information.
+   * \hideinitializer
+   */
+  HWLOC_CPUBIND_NOMEMBIND = (1<<3)
+} hwloc_cpubind_flags_t;
+
+/** \brief Bind current process or thread on cpus given in physical bitmap \p set.
+ *
+ * \return -1 with errno set to ENOSYS if the action is not supported
+ * \return -1 with errno set to EXDEV if the binding cannot be enforced
+ */
+HWLOC_DECLSPEC int hwloc_set_cpubind(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags);
+
+/** \brief Get current process or thread binding.
+ *
+ * Writes into \p set the physical cpuset which the process or thread (according to \e
+ * flags) was last bound to.
+ */
+HWLOC_DECLSPEC int hwloc_get_cpubind(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
+
+/** \brief Bind a process \p pid on cpus given in physical bitmap \p set.
+ *
+ * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
+ * and \p HANDLE on native Windows platforms.
+ *
+ * \note As a special case on Linux, if a tid (thread ID) is supplied
+ * instead of a pid (process ID) and ::HWLOC_CPUBIND_THREAD is passed in flags,
+ * the binding is applied to that specific thread.
+ *
+ * \note On non-Linux systems, ::HWLOC_CPUBIND_THREAD can not be used in \p flags.
+ */
+HWLOC_DECLSPEC int hwloc_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_cpuset_t set, int flags);
+
+/** \brief Get the current physical binding of process \p pid.
+ *
+ * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
+ * and \p HANDLE on native Windows platforms.
+ *
+ * \note As a special case on Linux, if a tid (thread ID) is supplied
+ * instead of a pid (process ID) and HWLOC_CPUBIND_THREAD is passed in flags,
+ * the binding for that specific thread is returned.
+ *
+ * \note On non-Linux systems, HWLOC_CPUBIND_THREAD can not be used in \p flags.
+ */
+HWLOC_DECLSPEC int hwloc_get_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, int flags);
+
+#ifdef hwloc_thread_t
+/** \brief Bind a thread \p thread on cpus given in physical bitmap \p set.
+ *
+ * \note \p hwloc_thread_t is \p pthread_t on Unix platforms,
+ * and \p HANDLE on native Windows platforms.
+ *
+ * \note ::HWLOC_CPUBIND_PROCESS can not be used in \p flags.
+ */
+HWLOC_DECLSPEC int hwloc_set_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t thread, hwloc_const_cpuset_t set, int flags);
+#endif
+
+#ifdef hwloc_thread_t
+/** \brief Get the current physical binding of thread \p tid.
+ *
+ * \note \p hwloc_thread_t is \p pthread_t on Unix platforms,
+ * and \p HANDLE on native Windows platforms.
+ *
+ * \note ::HWLOC_CPUBIND_PROCESS can not be used in \p flags.
+ */
+HWLOC_DECLSPEC int hwloc_get_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t thread, hwloc_cpuset_t set, int flags);
+#endif
+
+/** \brief Get the last physical CPU where the current process or thread ran.
+ *
+ * The operating system may move some tasks from one processor
+ * to another at any time according to their binding,
+ * so this function may return something that is already
+ * outdated.
+ *
+ * \p flags can include either ::HWLOC_CPUBIND_PROCESS or ::HWLOC_CPUBIND_THREAD to
+ * specify whether the query should be for the whole process (union of all CPUs
+ * on which all threads are running), or only the current thread. If the
+ * process is single-threaded, flags can be set to zero to let hwloc use
+ * whichever method is available on the underlying OS.
+ */
+HWLOC_DECLSPEC int hwloc_get_last_cpu_location(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
+
+/** \brief Get the last physical CPU where a process ran.
+ *
+ * The operating system may move some tasks from one processor
+ * to another at any time according to their binding,
+ * so this function may return something that is already
+ * outdated.
+ *
+ * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
+ * and \p HANDLE on native Windows platforms.
+ *
+ * \note As a special case on Linux, if a tid (thread ID) is supplied
+ * instead of a pid (process ID) and ::HWLOC_CPUBIND_THREAD is passed in flags,
+ * the last CPU location of that specific thread is returned.
+ *
+ * \note On non-Linux systems, ::HWLOC_CPUBIND_THREAD can not be used in \p flags.
+ */
+HWLOC_DECLSPEC int hwloc_get_proc_last_cpu_location(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, int flags);
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_membinding Memory binding
+ *
+ * Memory binding can be done three ways:
+ *
+ * - explicit memory allocation thanks to hwloc_alloc_membind() and friends:
+ *   the binding will have effect on the memory allocated by these functions.
+ * - implicit memory binding through binding policy: hwloc_set_membind() and
+ *   friends only define the current policy of the process, which will be
+ *   applied to the subsequent calls to malloc() and friends.
+ * - migration of existing memory ranges, thanks to hwloc_set_area_membind()
+ *   and friends, which move already-allocated data.
+ *
+ * Not all operating systems support all three ways.
+ * hwloc_topology_get_support() may be used to query about the actual memory
+ * binding support in the currently used operating system.
+ *
+ * When the requested binding operation is not available and the
+ * ::HWLOC_MEMBIND_STRICT flag was passed, the function returns -1.
+ * \p errno will be set to \c ENOSYS when the system does support
+ * the specified action or policy
+ * (e.g., some systems only allow binding memory on a per-thread
+ * basis, whereas other systems only allow binding memory for all
+ * threads in a process).
+ * \p errno will be set to EXDEV when the requested set can not be enforced
+ * (e.g., some systems only allow binding memory to a single NUMA node).
+ *
+ * If ::HWLOC_MEMBIND_STRICT was not passed, the function may fail as well,
+ * or the operating system may use a slightly different operation
+ * (with side-effects, smaller binding set, etc.)
+ * when the requested operation is not exactly supported.
+ *
+ * The most portable form that should be preferred over the others
+ * whenever possible is as follows.
+ * It allocates some memory hopefully bound to the specified set.
+ * To do so, hwloc will possibly have to change the current memory
+ * binding policy in order to actually get the memory bound, if the OS
+ * does not provide any other way to simply allocate bound memory
+ * without changing the policy for all allocations. That is the
+ * difference with hwloc_alloc_membind(), which will never change the
+ * current memory binding policy.
+ *
+ * \code
+ * hwloc_alloc_membind_policy(topology, size, set,
+ *                            HWLOC_MEMBIND_BIND, 0);
+ * \endcode
+ *
+ * Each hwloc memory binding function takes a bitmap argument that
+ * is a CPU set by default, or a NUMA memory node set if the flag
+ * ::HWLOC_MEMBIND_BYNODESET is specified.
+ * See \ref hwlocality_object_sets and \ref hwlocality_bitmap for a
+ * discussion of CPU sets and NUMA memory node sets.
+ * It is also possible to convert between CPU set and node set using
+ * hwloc_cpuset_to_nodeset() or hwloc_cpuset_from_nodeset().
+ *
+ * Memory binding by CPU set cannot work for CPU-less NUMA memory nodes.
+ * Binding by nodeset should therefore be preferred whenever possible.
+ *
+ * \sa Some example codes are available under doc/examples/ in the source tree.
+ *
+ * \note On some operating systems, memory binding affects the CPU
+ * binding; see ::HWLOC_MEMBIND_NOCPUBIND
+ * @{
+ */
+
+/** \brief Memory binding policy.
+ *
+ * These constants can be used to choose the binding policy.  Only one policy can
+ * be used at a time (i.e., the values cannot be OR'ed together).
+ *
+ * Not all systems support all kinds of binding.
+ * hwloc_topology_get_support() may be used to query about the actual memory
+ * binding policy support in the currently used operating system.
+ * See the "Detailed Description" section of \ref hwlocality_membinding
+ * for a description of errors that can occur.
+ */
+typedef enum {
+  /** \brief Reset the memory allocation policy to the system default.
+   * Depending on the operating system, this may correspond to
+   * ::HWLOC_MEMBIND_FIRSTTOUCH (Linux),
+   * or ::HWLOC_MEMBIND_BIND (AIX, HP-UX, Solaris, Windows).
+   * This policy is never returned by get membind functions when running
+   * on normal machines.
+   * It is only returned when binding hooks are empty because the topology
+   * was loaded from XML, or HWLOC_THISSYSTEM=0, etc.
+   * \hideinitializer */
+  HWLOC_MEMBIND_DEFAULT =	0,
+
+  /** \brief Allocate memory
+   * but do not immediately bind it to a specific locality. Instead,
+   * each page in the allocation is bound only when it is first
+   * touched. Pages are individually bound to the local NUMA node of
+   * the first thread that touches it. If there is not enough memory
+   * on the node, allocation may be done in the specified nodes
+   * before allocating on other nodes.
+   * \hideinitializer */
+  HWLOC_MEMBIND_FIRSTTOUCH =	1,
+
+  /** \brief Allocate memory on the specified nodes.
+   * \hideinitializer */
+  HWLOC_MEMBIND_BIND =		2,
+
+  /** \brief Allocate memory on the given nodes in an interleaved
+   * / round-robin manner.  The precise layout of the memory across
+   * multiple NUMA nodes is OS/system specific. Interleaving can be
+   * useful when threads distributed across the specified NUMA nodes
+   * will all be accessing the whole memory range concurrently, since
+   * the interleave will then balance the memory references.
+   * \hideinitializer */
+  HWLOC_MEMBIND_INTERLEAVE =	3,
+
+  /** \brief For each page bound with this policy, by next time
+   * it is touched (and next time only), it is moved from its current
+   * location to the local NUMA node of the thread where the memory
+   * reference occurred (if it needs to be moved at all).
+   * \hideinitializer */
+  HWLOC_MEMBIND_NEXTTOUCH =	4,
+
+  /** \brief Returned by get_membind() functions when multiple
+   * threads or parts of a memory area have differing memory binding
+   * policies.
+   * \hideinitializer */
+  HWLOC_MEMBIND_MIXED = -1
+} hwloc_membind_policy_t;
+
+/** \brief Memory binding flags.
+ *
+ * These flags can be used to refine the binding policy.
+ * All flags can be logically OR'ed together with the exception of
+ * ::HWLOC_MEMBIND_PROCESS and ::HWLOC_MEMBIND_THREAD;
+ * these two flags are mutually exclusive.
+ *
+ * Not all systems support all kinds of binding.
+ * hwloc_topology_get_support() may be used to query about the actual memory
+ * binding support in the currently used operating system.
+ * See the "Detailed Description" section of \ref hwlocality_membinding
+ * for a description of errors that can occur.
+ */
+typedef enum {
+  /** \brief Set policy for all threads of the specified (possibly
+   * multithreaded) process.  This flag is mutually exclusive with
+   * ::HWLOC_MEMBIND_THREAD.
+   * \hideinitializer */
+  HWLOC_MEMBIND_PROCESS =       (1<<0),
+
+ /** \brief Set policy for a specific thread of the current process.
+  * This flag is mutually exclusive with ::HWLOC_MEMBIND_PROCESS.
+  * \hideinitializer */
+  HWLOC_MEMBIND_THREAD =        (1<<1),
+
+ /** Request strict binding from the OS.  The function will fail if
+  * the binding can not be guaranteed / completely enforced.
+  *
+  * This flag has slightly different meanings depending on which
+  * function it is used with.
+  * \hideinitializer  */
+  HWLOC_MEMBIND_STRICT =        (1<<2),
+
+ /** \brief Migrate existing allocated memory.  If the memory cannot
+  * be migrated and the ::HWLOC_MEMBIND_STRICT flag is passed, an error
+  * will be returned.
+  * \hideinitializer  */
+  HWLOC_MEMBIND_MIGRATE =       (1<<3),
+
+  /** \brief Avoid any effect on CPU binding.
+   *
+   * On some operating systems, some underlying memory binding
+   * functions also bind the application to the corresponding CPU(s).
+   * Using this flag will cause hwloc to avoid using OS functions that
+   * could potentially affect CPU bindings.  Note, however, that using
+   * NOCPUBIND may reduce hwloc's overall memory binding
+   * support. Specifically: some of hwloc's memory binding functions
+   * may fail with errno set to ENOSYS when used with NOCPUBIND.
+   * \hideinitializer
+   */
+  HWLOC_MEMBIND_NOCPUBIND =     (1<<4),
+
+  /** \brief Consider the bitmap argument as a nodeset.
+   *
+   * The bitmap argument is considered a nodeset if this flag is given,
+   * or a cpuset otherwise by default.
+   *
+   * Memory binding by CPU set cannot work for CPU-less NUMA memory nodes.
+   * Binding by nodeset should therefore be preferred whenever possible.
+   */
+  HWLOC_MEMBIND_BYNODESET =     (1<<5)
+} hwloc_membind_flags_t;
+
+/** \brief Set the default memory binding policy of the current
+ * process or thread to prefer the NUMA node(s) specified by \p set
+ *
+ * If neither ::HWLOC_MEMBIND_PROCESS nor ::HWLOC_MEMBIND_THREAD is
+ * specified, the current process is assumed to be single-threaded.
+ * This is the most portable form as it permits hwloc to use either
+ * process-based OS functions or thread-based OS functions, depending
+ * on which are available.
+ *
+ * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset.
+ * Otherwise it's a cpuset.
+ *
+ * \return -1 with errno set to ENOSYS if the action is not supported
+ * \return -1 with errno set to EXDEV if the binding cannot be enforced
+ */
+HWLOC_DECLSPEC int hwloc_set_membind(hwloc_topology_t topology, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags);
+
+/** \brief Query the default memory binding policy and physical locality of the
+ * current process or thread.
+ *
+ * This function has two output parameters: \p set and \p policy.
+ * The values returned in these parameters depend on both the \p flags
+ * passed in and the current memory binding policies and nodesets in
+ * the queried target.
+ *
+ * Passing the ::HWLOC_MEMBIND_PROCESS flag specifies that the query
+ * target is the current policies and nodesets for all the threads in
+ * the current process.  Passing ::HWLOC_MEMBIND_THREAD specifies that
+ * the query target is the current policy and nodeset for only the
+ * thread invoking this function.
+ *
+ * If neither of these flags are passed (which is the most portable
+ * method), the process is assumed to be single threaded.  This allows
+ * hwloc to use either process-based OS functions or thread-based OS
+ * functions, depending on which are available.
+ *
+ * ::HWLOC_MEMBIND_STRICT is only meaningful when ::HWLOC_MEMBIND_PROCESS
+ * is also specified.  In this case, hwloc will check the default
+ * memory policies and nodesets for all threads in the process.  If
+ * they are not identical, -1 is returned and errno is set to EXDEV.
+ * If they are identical, the values are returned in \p set and \p
+ * policy.
+ *
+ * Otherwise, if ::HWLOC_MEMBIND_PROCESS is specified (and
+ * ::HWLOC_MEMBIND_STRICT is \em not specified), the default set
+ * from each thread is logically OR'ed together.
+ * If all threads' default policies are the same, \p policy is set to
+ * that policy.  If they are different, \p policy is set to
+ * ::HWLOC_MEMBIND_MIXED.
+ *
+ * In the ::HWLOC_MEMBIND_THREAD case (or when neither
+ * ::HWLOC_MEMBIND_PROCESS or ::HWLOC_MEMBIND_THREAD is specified), there
+ * is only one set and policy; they are returned in \p set and
+ * \p policy, respectively.
+ *
+ * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset.
+ * Otherwise it's a cpuset.
+ *
+ * If any other flags are specified, -1 is returned and errno is set
+ * to EINVAL.
+ */
+HWLOC_DECLSPEC int hwloc_get_membind(hwloc_topology_t topology, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags);
+
+/** \brief Set the default memory binding policy of the specified
+ * process to prefer the NUMA node(s) specified by \p set
+ *
+ * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset.
+ * Otherwise it's a cpuset.
+ *
+ * \return -1 with errno set to ENOSYS if the action is not supported
+ * \return -1 with errno set to EXDEV if the binding cannot be enforced
+ *
+ * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
+ * and \p HANDLE on native Windows platforms.
+ */
+HWLOC_DECLSPEC int hwloc_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags);
+
+/** \brief Query the default memory binding policy and physical locality of the
+ * specified process.
+ *
+ * This function has two output parameters: \p set and \p policy.
+ * The values returned in these parameters depend on both the \p flags
+ * passed in and the current memory binding policies and nodesets in
+ * the queried target.
+ *
+ * Passing the ::HWLOC_MEMBIND_PROCESS flag specifies that the query
+ * target is the current policies and nodesets for all the threads in
+ * the specified process.  If ::HWLOC_MEMBIND_PROCESS is not specified
+ * (which is the most portable method), the process is assumed to be
+ * single threaded.  This allows hwloc to use either process-based OS
+ * functions or thread-based OS functions, depending on which are
+ * available.
+ *
+ * Note that it does not make sense to pass ::HWLOC_MEMBIND_THREAD to
+ * this function.
+ *
+ * If ::HWLOC_MEMBIND_STRICT is specified, hwloc will check the default
+ * memory policies and nodesets for all threads in the specified
+ * process.  If they are not identical, -1 is returned and errno is
+ * set to EXDEV.  If they are identical, the values are returned in \p
+ * set and \p policy.
+ *
+ * Otherwise, \p set is set to the logical OR of all threads'
+ * default set.  If all threads' default policies
+ * are the same, \p policy is set to that policy.  If they are
+ * different, \p policy is set to ::HWLOC_MEMBIND_MIXED.
+ *
+ * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset.
+ * Otherwise it's a cpuset.
+ *
+ * If any other flags are specified, -1 is returned and errno is set
+ * to EINVAL.
+ *
+ * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
+ * and \p HANDLE on native Windows platforms.
+ */
+HWLOC_DECLSPEC int hwloc_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags);
+
+/** \brief Bind the already-allocated memory identified by (addr, len)
+ * to the NUMA node(s) specified by \p set.
+ *
+ * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset.
+ * Otherwise it's a cpuset.
+ *
+ * \return 0 if \p len is 0.
+ * \return -1 with errno set to ENOSYS if the action is not supported
+ * \return -1 with errno set to EXDEV if the binding cannot be enforced
+ */
+HWLOC_DECLSPEC int hwloc_set_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags);
+
+/** \brief Query the CPUs near the physical NUMA node(s) and binding policy of
+ * the memory identified by (\p addr, \p len ).
+ *
+ * This function has two output parameters: \p set and \p policy.
+ * The values returned in these parameters depend on both the \p flags
+ * passed in and the memory binding policies and nodesets of the pages
+ * in the address range.
+ *
+ * If ::HWLOC_MEMBIND_STRICT is specified, the target pages are first
+ * checked to see if they all have the same memory binding policy and
+ * nodeset.  If they do not, -1 is returned and errno is set to EXDEV.
+ * If they are identical across all pages, the set and policy are
+ * returned in \p set and \p policy, respectively.
+ *
+ * If ::HWLOC_MEMBIND_STRICT is not specified, the union of all NUMA
+ * node(s) containing pages in the address range is calculated.
+ * If all pages in the target have the same policy, it is returned in
+ * \p policy.  Otherwise, \p policy is set to ::HWLOC_MEMBIND_MIXED.
+ *
+ * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset.
+ * Otherwise it's a cpuset.
+ *
+ * If any other flags are specified, -1 is returned and errno is set
+ * to EINVAL.
+ *
+ * If \p len is 0, -1 is returned and errno is set to EINVAL.
+ */
+HWLOC_DECLSPEC int hwloc_get_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags);
+
+/** \brief Get the NUMA nodes where memory identified by (\p addr, \p len ) is physically allocated.
+ *
+ * Fills \p set according to the NUMA nodes where the memory area pages
+ * are physically allocated. If no page is actually allocated yet,
+ * \p set may be empty.
+ *
+ * If pages spread to multiple nodes, it is not specified whether they spread
+ * equitably, or whether most of them are on a single node, etc.
+ *
+ * The operating system may move memory pages from one processor
+ * to another at any time according to their binding,
+ * so this function may return something that is already
+ * outdated.
+ *
+ * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset.
+ * Otherwise it's a cpuset.
+ *
+ * If \p len is 0, \p set is emptied.
+ *
+ * Flags are currently unused.
+ */
+HWLOC_DECLSPEC int hwloc_get_area_memlocation(hwloc_topology_t topology, const void *addr, size_t len, hwloc_bitmap_t set, int flags);
+
+/** \brief Allocate some memory
+ *
+ * This is equivalent to malloc(), except that it tries to allocate
+ * page-aligned memory from the OS.
+ *
+ * \note The allocated memory should be freed with hwloc_free().
+ */
+HWLOC_DECLSPEC void *hwloc_alloc(hwloc_topology_t topology, size_t len);
+
+/** \brief Allocate some memory on NUMA memory nodes specified by \p set
+ *
+ * \return NULL with errno set to ENOSYS if the action is not supported
+ * and ::HWLOC_MEMBIND_STRICT is given
+ * \return NULL with errno set to EXDEV if the binding cannot be enforced
+ * and ::HWLOC_MEMBIND_STRICT is given
+ * \return NULL with errno set to ENOMEM if the memory allocation failed
+ * even before trying to bind.
+ *
+ * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset.
+ * Otherwise it's a cpuset.
+ *
+ * \note The allocated memory should be freed with hwloc_free().
+ */
+HWLOC_DECLSPEC void *hwloc_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc;
+
+/** \brief Allocate some memory on NUMA memory nodes specified by \p set
+ *
+ * This is similar to hwloc_alloc_membind_nodeset() except that it is allowed to change
+ * the current memory binding policy, thus providing more binding support, at
+ * the expense of changing the current state.
+ *
+ * If ::HWLOC_MEMBIND_BYNODESET is specified, set is considered a nodeset.
+ * Otherwise it's a cpuset.
+ */
+static __hwloc_inline void *
+hwloc_alloc_membind_policy(hwloc_topology_t topology, size_t len, hwloc_const_bitmap_t set, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc;
+
+/** \brief Free memory that was previously allocated by hwloc_alloc()
+ * or hwloc_alloc_membind().
+ */
+HWLOC_DECLSPEC int hwloc_free(hwloc_topology_t topology, void *addr, size_t len);
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_setsource Changing the Source of Topology Discovery
+ *
+ * If none of the functions below is called, the default is to detect all the objects
+ * of the machine that the caller is allowed to access.
+ *
+ * This default behavior may also be modified through environment variables
+ * if the application did not modify it already.
+ * Setting HWLOC_XMLFILE in the environment enforces the discovery from a XML
+ * file as if hwloc_topology_set_xml() had been called.
+ * Setting HWLOC_SYNTHETIC enforces a synthetic topology as if
+ * hwloc_topology_set_synthetic() had been called.
+ *
+ * Finally, HWLOC_THISSYSTEM enforces the return value of
+ * hwloc_topology_is_thissystem().
+ *
+ * @{
+ */
+
+/** \brief Change which process the topology is viewed from.
+ *
+ * On some systems, processes may have different views of the machine, for
+ * instance the set of allowed CPUs. By default, hwloc exposes the view from
+ * the current process. Calling hwloc_topology_set_pid() permits to make it
+ * expose the topology of the machine from the point of view of another
+ * process.
+ *
+ * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
+ * and \p HANDLE on native Windows platforms.
+ *
+ * \note -1 is returned and errno is set to ENOSYS on platforms that do not
+ * support this feature.
+ */
+HWLOC_DECLSPEC int hwloc_topology_set_pid(hwloc_topology_t __hwloc_restrict topology, hwloc_pid_t pid);
+
+/** \brief Enable synthetic topology.
+ *
+ * Gather topology information from the given \p description,
+ * a space-separated string of <type:number> describing
+ * the object type and arity at each level.
+ * All types may be omitted (space-separated string of numbers) so that
+ * hwloc chooses all types according to usual topologies.
+ * See also the \ref synthetic.
+ *
+ * Setting the environment variable HWLOC_SYNTHETIC
+ * may also result in this behavior.
+ *
+ * If \p description was properly parsed and describes a valid topology
+ * configuration, this function returns 0.
+ * Otherwise -1 is returned and errno is set to EINVAL.
+ *
+ * Note that this function does not actually load topology
+ * information; it just tells hwloc where to load it from.  You'll
+ * still need to invoke hwloc_topology_load() to actually load the
+ * topology information.
+ *
+ * \note For convenience, this backend provides empty binding hooks which just
+ * return success.
+ *
+ * \note On success, the synthetic component replaces the previously enabled
+ * component (if any), but the topology is not actually modified until
+ * hwloc_topology_load().
+ */
+HWLOC_DECLSPEC int hwloc_topology_set_synthetic(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict description);
+
+/** \brief Enable XML-file based topology.
+ *
+ * Gather topology information from the XML file given at \p xmlpath.
+ * Setting the environment variable HWLOC_XMLFILE may also result in this behavior.
+ * This file may have been generated earlier with hwloc_topology_export_xml() in hwloc/export.h,
+ * or lstopo file.xml.
+ *
+ * Note that this function does not actually load topology
+ * information; it just tells hwloc where to load it from.  You'll
+ * still need to invoke hwloc_topology_load() to actually load the
+ * topology information.
+ *
+ * \return -1 with errno set to EINVAL on failure to read the XML file.
+ *
+ * \note See also hwloc_topology_set_userdata_import_callback()
+ * for importing application-specific object userdata.
+ *
+ * \note For convenience, this backend provides empty binding hooks which just
+ * return success.  To have hwloc still actually call OS-specific hooks, the
+ * ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded
+ * file is really the underlying system.
+ *
+ * \note On success, the XML component replaces the previously enabled
+ * component (if any), but the topology is not actually modified until
+ * hwloc_topology_load().
+ */
+HWLOC_DECLSPEC int hwloc_topology_set_xml(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict xmlpath);
+
+/** \brief Enable XML based topology using a memory buffer (instead of
+ * a file, as with hwloc_topology_set_xml()).
+ *
+ * Gather topology information from the XML memory buffer given at \p
+ * buffer and of length \p size.  This buffer may have been filled
+ * earlier with hwloc_topology_export_xmlbuffer() in hwloc/export.h.
+ *
+ * Note that this function does not actually load topology
+ * information; it just tells hwloc where to load it from.  You'll
+ * still need to invoke hwloc_topology_load() to actually load the
+ * topology information.
+ *
+ * \return -1 with errno set to EINVAL on failure to read the XML buffer.
+ *
+ * \note See also hwloc_topology_set_userdata_import_callback()
+ * for importing application-specific object userdata.
+ *
+ * \note For convenience, this backend provides empty binding hooks which just
+ * return success.  To have hwloc still actually call OS-specific hooks, the
+ * ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded
+ * file is really the underlying system.
+ *
+ * \note On success, the XML component replaces the previously enabled
+ * component (if any), but the topology is not actually modified until
+ * hwloc_topology_load().
+ */
+HWLOC_DECLSPEC int hwloc_topology_set_xmlbuffer(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict buffer, int size);
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_configuration Topology Detection Configuration and Query
+ *
+ * Several functions can optionally be called between hwloc_topology_init() and
+ * hwloc_topology_load() to configure how the detection should be performed,
+ * e.g. to ignore some objects types, define a synthetic topology, etc.
+ *
+ * @{
+ */
+
+/** \brief Flags to be set onto a topology context before load.
+ *
+ * Flags should be given to hwloc_topology_set_flags().
+ * They may also be returned by hwloc_topology_get_flags().
+ */
+enum hwloc_topology_flags_e {
+ /** \brief Detect the whole system, ignore reservations.
+   *
+   * Gather all resources, even if some were disabled by the administrator.
+   * For instance, ignore Linux Cgroup/Cpusets and gather all processors and memory nodes.
+   *
+   * When this flag is not set, PUs that are disallowed are not added to the topology.
+   * Parent objects (package, core, cache, etc.) are added only if some of their children are allowed.
+   * NUMA nodes are always added but their available memory is set to 0 when disallowed.
+   *
+   * When this flag is set, each object has allowed_cpuset <= cpuset <= complete_cpuset.
+   * Otherwise allowed_cpuset = cpuset <= complete_cpuset.
+   * The same applies to nodesets.
+   *
+   * If the current topology is exported to XML and reimported later, this flag
+   * should be set again in the reimported topology so that disallowed resources
+   * are reimported as well.
+   * \hideinitializer
+   */
+  HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM = (1UL<<0),
+
+ /** \brief Assume that the selected backend provides the topology for the
+   * system on which we are running.
+   *
+   * This forces hwloc_topology_is_thissystem() to return 1, i.e. makes hwloc assume that
+   * the selected backend provides the topology for the system on which we are running,
+   * even if it is not the OS-specific backend but the XML backend for instance.
+   * This means making the binding functions actually call the OS-specific
+   * system calls and really do binding, while the XML backend would otherwise
+   * provide empty hooks just returning success.
+   *
+   * Setting the environment variable HWLOC_THISSYSTEM may also result in the
+   * same behavior.
+   *
+   * This can be used for efficiency reasons to first detect the topology once,
+   * save it to an XML file, and quickly reload it later through the XML
+   * backend, but still having binding functions actually do bind.
+   * \hideinitializer
+   */
+  HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM = (1UL<<1),
+
+ /** \brief Get the set of allowed resources from the local operating system even if the topology was loaded from XML or synthetic description.
+   *
+   * If the topology was loaded from XML or from a synthetic string,
+   * restrict it by applying the current process restrictions such as
+   * Linux Cgroup/Cpuset.
+   *
+   * This is useful when the topology is not loaded directly from
+   * the local machine (e.g. for performance reason) and it comes
+   * with all resources, while the running process is restricted
+   * to only parts of the machine.
+   *
+   * This flag is ignored unless ::HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM is
+   * also set since the loaded topology must match the underlying machine
+   * where restrictions will be gathered from.
+   *
+   * Setting the environment variable HWLOC_THISSYSTEM_ALLOWED_RESOURCES
+   * would result in the same behavior.
+   * \hideinitializer
+   */
+  HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES = (1UL<<2)
+};
+
+/** \brief Set OR'ed flags to non-yet-loaded topology.
+ *
+ * Set a OR'ed set of ::hwloc_topology_flags_e onto a topology that was not yet loaded.
+ *
+ * If this function is called multiple times, the last invokation will erase
+ * and replace the set of flags that was previously set.
+ *
+ * The flags set in a topology may be retrieved with hwloc_topology_get_flags()
+ */
+HWLOC_DECLSPEC int hwloc_topology_set_flags (hwloc_topology_t topology, unsigned long flags);
+
+/** \brief Get OR'ed flags of a topology.
+ *
+ * Get the OR'ed set of ::hwloc_topology_flags_e of a topology.
+ *
+ * \return the flags previously set with hwloc_topology_set_flags().
+ */
+HWLOC_DECLSPEC unsigned long hwloc_topology_get_flags (hwloc_topology_t topology);
+
+/** \brief Does the topology context come from this system?
+ *
+ * \return 1 if this topology context was built using the system
+ * running this program.
+ * \return 0 instead (for instance if using another file-system root,
+ * a XML topology file, or a synthetic topology).
+ */
+HWLOC_DECLSPEC int hwloc_topology_is_thissystem(hwloc_topology_t  __hwloc_restrict topology) __hwloc_attribute_pure;
+
+/** \brief Flags describing actual discovery support for this topology. */
+struct hwloc_topology_discovery_support {
+  /** \brief Detecting the number of PU objects is supported. */
+  unsigned char pu;
+};
+
+/** \brief Flags describing actual PU binding support for this topology.
+ *
+ * A flag may be set even if the feature isn't supported in all cases
+ * (e.g. binding to random sets of non-contiguous objects).
+ */
+struct hwloc_topology_cpubind_support {
+  /** Binding the whole current process is supported.  */
+  unsigned char set_thisproc_cpubind;
+  /** Getting the binding of the whole current process is supported.  */
+  unsigned char get_thisproc_cpubind;
+  /** Binding a whole given process is supported.  */
+  unsigned char set_proc_cpubind;
+  /** Getting the binding of a whole given process is supported.  */
+  unsigned char get_proc_cpubind;
+  /** Binding the current thread only is supported.  */
+  unsigned char set_thisthread_cpubind;
+  /** Getting the binding of the current thread only is supported.  */
+  unsigned char get_thisthread_cpubind;
+  /** Binding a given thread only is supported.  */
+  unsigned char set_thread_cpubind;
+  /** Getting the binding of a given thread only is supported.  */
+  unsigned char get_thread_cpubind;
+  /** Getting the last processors where the whole current process ran is supported */
+  unsigned char get_thisproc_last_cpu_location;
+  /** Getting the last processors where a whole process ran is supported */
+  unsigned char get_proc_last_cpu_location;
+  /** Getting the last processors where the current thread ran is supported */
+  unsigned char get_thisthread_last_cpu_location;
+};
+
+/** \brief Flags describing actual memory binding support for this topology.
+ *
+ * A flag may be set even if the feature isn't supported in all cases
+ * (e.g. binding to random sets of non-contiguous objects).
+ */
+struct hwloc_topology_membind_support {
+  /** Binding the whole current process is supported.  */
+  unsigned char set_thisproc_membind;
+  /** Getting the binding of the whole current process is supported.  */
+  unsigned char get_thisproc_membind;
+  /** Binding a whole given process is supported.  */
+  unsigned char set_proc_membind;
+  /** Getting the binding of a whole given process is supported.  */
+  unsigned char get_proc_membind;
+  /** Binding the current thread only is supported.  */
+  unsigned char set_thisthread_membind;
+  /** Getting the binding of the current thread only is supported.  */
+  unsigned char get_thisthread_membind;
+  /** Binding a given memory area is supported. */
+  unsigned char set_area_membind;
+  /** Getting the binding of a given memory area is supported.  */
+  unsigned char get_area_membind;
+  /** Allocating a bound memory area is supported. */
+  unsigned char alloc_membind;
+  /** First-touch policy is supported. */
+  unsigned char firsttouch_membind;
+  /** Bind policy is supported. */
+  unsigned char bind_membind;
+  /** Interleave policy is supported. */
+  unsigned char interleave_membind;
+  /** Next-touch migration policy is supported. */
+  unsigned char nexttouch_membind;
+  /** Migration flags is supported. */
+  unsigned char migrate_membind;
+  /** Getting the last NUMA nodes where a memory area was allocated is supported */
+  unsigned char get_area_memlocation;
+};
+
+/** \brief Set of flags describing actual support for this topology.
+ *
+ * This is retrieved with hwloc_topology_get_support() and will be valid until
+ * the topology object is destroyed.  Note: the values are correct only after
+ * discovery.
+ */
+struct hwloc_topology_support {
+  struct hwloc_topology_discovery_support *discovery;
+  struct hwloc_topology_cpubind_support *cpubind;
+  struct hwloc_topology_membind_support *membind;
+};
+
+/** \brief Retrieve the topology support.
+ *
+ * Each flag indicates whether a feature is supported.
+ * If set to 0, the feature is not supported.
+ * If set to 1, the feature is supported, but the corresponding
+ * call may still fail in some corner cases.
+ *
+ * These features are also listed by hwloc-info \--support
+ */
+HWLOC_DECLSPEC const struct hwloc_topology_support *hwloc_topology_get_support(hwloc_topology_t __hwloc_restrict topology);
+
+/** \brief Type filtering flags.
+ *
+ * By default, most objects are kept (::HWLOC_TYPE_FILTER_KEEP_ALL).
+ * Instruction caches, I/O and Misc objects are ignored by default (::HWLOC_TYPE_FILTER_KEEP_NONE).
+ * Group levels are ignored unless they bring structure (::HWLOC_TYPE_FILTER_KEEP_STRUCTURE).
+ *
+ * Note that group objects are also ignored individually (without the entire level)
+ * when they do not bring structure.
+ */
+enum hwloc_type_filter_e {
+  /** \brief Keep all objects of this type.
+   *
+   * Cannot be set for ::HWLOC_OBJ_GROUP (groups are designed only to add more structure to the topology).
+   * \hideinitializer
+   */
+  HWLOC_TYPE_FILTER_KEEP_ALL = 0,
+
+  /** \brief Ignore all objects of this type.
+   *
+   * The bottom-level type ::HWLOC_OBJ_PU and the ::HWLOC_OBJ_NUMANODE type may not be ignored.
+   * The top-level object of the hierarchy will never actually be removed even if its type is ignored.
+   * \hideinitializer
+   */
+  HWLOC_TYPE_FILTER_KEEP_NONE = 1,
+
+  /** \brief Only ignore objects if their entire level does not bring any structure.
+   *
+   * Keep the entire level of objects if at least one of these objects adds
+   * structure to the topology. An object brings structure when it has multiple
+   * children and it is not the only child of its parent.
+   *
+   * If all objects in the level are the only child of their parent, and if none
+   * of them has multiple children, the entire level is removed.
+   *
+   * Cannot be set for I/O and Misc objects since the topology structure does not matter there.
+   * \hideinitializer
+   */
+  HWLOC_TYPE_FILTER_KEEP_STRUCTURE = 2,
+
+  /** \brief Only keep likely-important objects of the given type.
+   *
+   * It is only useful for I/O object types.
+   * For ::HWLOC_OBJ_PCI_DEVICE and ::HWLOC_OBJ_OS_DEVICE, it means that only objects
+   * of major/common kinds are kept (storage, network, OpenFabrics, Intel MICs, CUDA,
+   * OpenCL, NVML, and displays).
+   * For ::HWLOC_OBJ_BRIDGE, it means that bridges are kept only if they have children.
+   *
+   * This flag equivalent to ::HWLOC_TYPE_FILTER_KEEP_ALL for normal and Misc types
+   * since they are likely important.
+   * \hideinitializer
+   */
+  HWLOC_TYPE_FILTER_KEEP_IMPORTANT = 3
+};
+
+/** \brief Set the filtering for the given object type.
+ */
+HWLOC_DECLSPEC int hwloc_topology_set_type_filter(hwloc_topology_t topology, hwloc_obj_type_t type, enum hwloc_type_filter_e filter);
+
+/** \brief Get the current filtering for the given object type.
+ */
+HWLOC_DECLSPEC int hwloc_topology_get_type_filter(hwloc_topology_t topology, hwloc_obj_type_t type, enum hwloc_type_filter_e *filter);
+
+/** \brief Set the filtering for all object types.
+ *
+ * If some types do not support this filtering, they are silently ignored.
+ */
+HWLOC_DECLSPEC int hwloc_topology_set_all_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter);
+
+/** \brief Set the filtering for all cache object types.
+ */
+static __hwloc_inline int
+hwloc_topology_set_cache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter)
+{
+  unsigned i;
+  for(i=HWLOC_OBJ_L1CACHE; i<HWLOC_OBJ_L3ICACHE; i++)
+    hwloc_topology_set_type_filter(topology, (hwloc_obj_type_t) i, filter);
+  return 0;
+}
+
+/** \brief Set the filtering for all instruction cache object types.
+ */
+static __hwloc_inline int
+hwloc_topology_set_icache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter)
+{
+  unsigned i;
+  for(i=HWLOC_OBJ_L1ICACHE; i<HWLOC_OBJ_L3ICACHE; i++)
+    hwloc_topology_set_type_filter(topology, (hwloc_obj_type_t) i, filter);
+  return 0;
+}
+
+/** \brief Set the filtering for I/O and Misc object types.
+ */
+static __hwloc_inline int
+hwloc_topology_set_io_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter)
+{
+  hwloc_topology_set_type_filter(topology, HWLOC_OBJ_MISC, filter);
+  hwloc_topology_set_type_filter(topology, HWLOC_OBJ_BRIDGE, filter);
+  hwloc_topology_set_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, filter);
+  hwloc_topology_set_type_filter(topology, HWLOC_OBJ_OS_DEVICE, filter);
+  return 0;
+}
+
+/** \brief Set the topology-specific userdata pointer.
+ *
+ * Each topology may store one application-given private data pointer.
+ * It is initialized to \c NULL.
+ * hwloc will never modify it.
+ *
+ * Use it as you wish, after hwloc_topology_init() and until hwloc_topolog_destroy().
+ *
+ * This pointer is not exported to XML.
+ */
+HWLOC_DECLSPEC void hwloc_topology_set_userdata(hwloc_topology_t topology, const void *userdata);
+
+/** \brief Retrieve the topology-specific userdata pointer.
+ *
+ * Retrieve the application-given private data pointer that was
+ * previously set with hwloc_topology_set_userdata().
+ */
+HWLOC_DECLSPEC void * hwloc_topology_get_userdata(hwloc_topology_t topology);
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_tinker Modifying a loaded Topology
+ * @{
+ */
+
+/** \brief Flags to be given to hwloc_topology_restrict(). */
+enum hwloc_restrict_flags_e {
+  /** \brief Remove all objects that became CPU-less.
+   * By default, only objects that contain no PU and no memory are removed.
+   * \hideinitializer
+   */
+  HWLOC_RESTRICT_FLAG_REMOVE_CPULESS = (1<<0),
+
+  /** \brief Move Misc objects to ancestors if their parents are removed during restriction.
+   * If this flag is not set, Misc objects are removed when their parents are removed.
+   * \hideinitializer
+   */
+  HWLOC_RESTRICT_FLAG_ADAPT_MISC = (1<<1),
+
+  /** \brief Move I/O objects to ancestors if their parents are removed during restriction.
+   * If this flag is not set, I/O devices and bridges are removed when their parents are removed.
+   * \hideinitializer
+   */
+  HWLOC_RESTRICT_FLAG_ADAPT_IO = (1<<2)
+};
+
+/** \brief Restrict the topology to the given CPU set.
+ *
+ * Topology \p topology is modified so as to remove all objects that
+ * are not included (or partially included) in the CPU set \p cpuset.
+ * All objects CPU and node sets are restricted accordingly.
+ *
+ * \p flags is a OR'ed set of ::hwloc_restrict_flags_e.
+ *
+ * \note This call may not be reverted by restricting back to a larger
+ * cpuset. Once dropped during restriction, objects may not be brought
+ * back, except by loading another topology with hwloc_topology_load().
+ *
+ * \return 0 on success.
+ *
+ * \return -1 with errno set to EINVAL if the input cpuset is invalid.
+ * The topology is not modified in this case.
+ *
+ * \return -1 with errno set to ENOMEM on failure to allocate internal data.
+ * The topology is reinitialized in this case. It should be either
+ * destroyed with hwloc_topology_destroy() or configured and loaded again.
+ */
+HWLOC_DECLSPEC int hwloc_topology_restrict(hwloc_topology_t __hwloc_restrict topology, hwloc_const_cpuset_t cpuset, unsigned long flags);
+
+/** \brief Add a MISC object as a leaf of the topology
+ *
+ * A new MISC object will be created and inserted into the topology at the
+ * position given by parent. It is appended to the list of existing Misc children,
+ * without ever adding any intermediate hierarchy level. This is useful for
+ * annotating the topology without actually changing the hierarchy.
+ *
+ * \p name is supposed to be unique across all Misc objects in the topology.
+ * It will be duplicated to setup the new object attributes.
+ *
+ * The new leaf object will not have any \p cpuset.
+ *
+ * \return the newly-created object
+ *
+ * \return \c NULL on error.
+ *
+ * \return \c NULL if Misc objects are filtered-out of the topology (::HWLOC_TYPE_FILTER_KEEP_NONE).
+ *
+ * \note If \p name contains some non-printable characters, they will
+ * be dropped when exporting to XML, see hwloc_topology_export_xml() in hwloc/export.h.
+ */
+HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_insert_misc_object(hwloc_topology_t topology, hwloc_obj_t parent, const char *name);
+
+/** \brief Allocate a Group object to insert later with hwloc_topology_insert_group_object().
+ *
+ * This function returns a new Group object.
+ * The caller should (at least) initialize its sets before inserting the object.
+ * See hwloc_topology_insert_group_object().
+ *
+ * The \p subtype object attribute may be set to display something else
+ * than "Group" as the type name for this object in lstopo.
+ * Custom name/value info pairs may be added with hwloc_obj_add_info() after
+ * insertion.
+ *
+ * The \p kind group attribute should be 0. The \p subkind group attribute may
+ * be set to identify multiple Groups of the same level.
+ *
+ * It is recommended not to set any other object attribute before insertion,
+ * since the Group may get discarded during insertion.
+ *
+ * The object will be destroyed if passed to hwloc_topology_insert_group_object()
+ * without any set defined.
+ */
+HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_alloc_group_object(hwloc_topology_t topology);
+
+/** \brief Add more structure to the topology by adding an intermediate Group
+ *
+ * The caller should first allocate a new Group object with hwloc_topology_alloc_group_object().
+ * Then it must setup at least one of its CPU or node sets to specify
+ * the final location of the Group in the topology.
+ * Then the object can be passed to this function for actual insertion in the topology.
+ *
+ * Either the cpuset or nodeset field (or both, if compatible) must be set
+ * to a non-empty bitmap. The complete_cpuset or complete_nodeset may be set
+ * instead if inserting with respect to the complete topology
+ * (including disallowed, offline or unknown objects).
+ *
+ * It grouping several objects, hwloc_obj_add_other_obj_sets() is an easy way
+ * to build the Group sets iteratively.
+ *
+ * These sets cannot be larger than the current topology, or they would get
+ * restricted silently.
+ *
+ * The core will setup the other sets after actual insertion.
+ *
+ * \return The inserted object if it was properly inserted.
+ *
+ * \return An existing object if the Group was discarded because the topology already
+ * contained an object at the same location (the Group did not add any locality information).
+ * Any name/info key pair set before inserting is appended to the existing object.
+ *
+ * \return \c NULL if the insertion failed because of conflicting sets in topology tree.
+ *
+ * \return \c NULL if Group objects are filtered-out of the topology (::HWLOC_TYPE_FILTER_KEEP_NONE).
+ *
+ * \return \c NULL if the object was discarded because no set was initialized in the Group
+ * before insert, or all of them were empty.
+ */
+HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_insert_group_object(hwloc_topology_t topology, hwloc_obj_t group);
+
+/** \brief Setup object cpusets/nodesets by OR'ing another object's sets.
+ *
+ * For each defined cpuset or nodeset in \p src, allocate the corresponding set
+ * in \p dst and add \p src to it by OR'ing sets.
+ *
+ * This function is convenient between hwloc_topology_alloc_group_object()
+ * and hwloc_topology_insert_group_object(). It builds the sets of the new Group
+ * that will be inserted as a new intermediate parent of several objects.
+ */
+HWLOC_DECLSPEC int hwloc_obj_add_other_obj_sets(hwloc_obj_t dst, hwloc_obj_t src);
+
+/** @} */
+
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+/* high-level helpers */
+#include <hwloc/helper.h>
+
+/* inline code of some functions above */
+#include <hwloc/inlines.h>
+
+/* exporting to XML or synthetic */
+#include <hwloc/export.h>
+
+/* distances */
+#include <hwloc/distances.h>
+
+/* topology diffs */
+#include <hwloc/diff.h>
+
+/* deprecated headers */
+#include <hwloc/deprecated.h>
+
+#endif /* HWLOC_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/autogen/config.h.in b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/autogen/config.h.in
new file mode 100644
index 0000000000..e101b0a479
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/autogen/config.h.in
@@ -0,0 +1,201 @@
+/* -*- c -*-
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2014 Inria.  All rights reserved.
+ * Copyright © 2009-2012 Université Bordeaux
+ * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/* The configuration file */
+
+#ifndef HWLOC_CONFIG_H
+#define HWLOC_CONFIG_H
+
+#if (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95))
+# define __hwloc_restrict __restrict
+#else
+# if __STDC_VERSION__ >= 199901L
+#  define __hwloc_restrict restrict
+# else
+#  define __hwloc_restrict
+# endif
+#endif
+
+/* Note that if we're compiling C++, then just use the "inline"
+   keyword, since it's part of C++ */
+#if defined(c_plusplus) || defined(__cplusplus)
+#  define __hwloc_inline inline
+#elif defined(_MSC_VER) || defined(__HP_cc)
+#  define __hwloc_inline __inline
+#else
+#  define __hwloc_inline __inline__
+#endif
+
+/*
+ * Note: this is public.  We can not assume anything from the compiler used
+ * by the application and thus the HWLOC_HAVE_* macros below are not
+ * fetched from the autoconf result here. We only automatically use a few
+ * well-known easy cases.
+ */
+
+/* Some handy constants to make the logic below a little more readable */
+#if defined(__cplusplus) && \
+    (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR >= 4))
+#define GXX_ABOVE_3_4 1
+#else
+#define GXX_ABOVE_3_4 0
+#endif
+
+#if !defined(__cplusplus) && \
+    (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95))
+#define GCC_ABOVE_2_95 1
+#else
+#define GCC_ABOVE_2_95 0
+#endif
+
+#if !defined(__cplusplus) && \
+    (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96))
+#define GCC_ABOVE_2_96 1
+#else
+#define GCC_ABOVE_2_96 0
+#endif
+
+#if !defined(__cplusplus) && \
+    (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3))
+#define GCC_ABOVE_3_3 1
+#else
+#define GCC_ABOVE_3_3 0
+#endif
+
+/* Maybe before gcc 2.95 too */
+#ifdef HWLOC_HAVE_ATTRIBUTE_UNUSED
+#define __HWLOC_HAVE_ATTRIBUTE_UNUSED HWLOC_HAVE_ATTRIBUTE_UNUSED 
+#elif defined(__GNUC__)
+# define __HWLOC_HAVE_ATTRIBUTE_UNUSED (GXX_ABOVE_3_4 || GCC_ABOVE_2_95)
+#else
+# define __HWLOC_HAVE_ATTRIBUTE_UNUSED 0
+#endif
+#if __HWLOC_HAVE_ATTRIBUTE_UNUSED
+# define __hwloc_attribute_unused __attribute__((__unused__))
+#else
+# define __hwloc_attribute_unused
+#endif
+
+#ifdef HWLOC_HAVE_ATTRIBUTE_MALLOC
+#define __HWLOC_HAVE_ATTRIBUTE_MALLOC HWLOC_HAVE_ATTRIBUTE_MALLOC 
+#elif defined(__GNUC__)
+# define __HWLOC_HAVE_ATTRIBUTE_MALLOC (GXX_ABOVE_3_4 || GCC_ABOVE_2_96)
+#else
+# define __HWLOC_HAVE_ATTRIBUTE_MALLOC 0
+#endif
+#if __HWLOC_HAVE_ATTRIBUTE_MALLOC
+# define __hwloc_attribute_malloc __attribute__((__malloc__))
+#else
+# define __hwloc_attribute_malloc
+#endif
+
+#ifdef HWLOC_HAVE_ATTRIBUTE_CONST
+#define __HWLOC_HAVE_ATTRIBUTE_CONST HWLOC_HAVE_ATTRIBUTE_CONST 
+#elif defined(__GNUC__)
+# define __HWLOC_HAVE_ATTRIBUTE_CONST (GXX_ABOVE_3_4 || GCC_ABOVE_2_95)
+#else
+# define __HWLOC_HAVE_ATTRIBUTE_CONST 0
+#endif
+#if __HWLOC_HAVE_ATTRIBUTE_CONST
+# define __hwloc_attribute_const __attribute__((__const__))
+#else
+# define __hwloc_attribute_const
+#endif
+
+#ifdef HWLOC_HAVE_ATTRIBUTE_PURE
+#define __HWLOC_HAVE_ATTRIBUTE_PURE HWLOC_HAVE_ATTRIBUTE_PURE 
+#elif defined(__GNUC__)
+# define __HWLOC_HAVE_ATTRIBUTE_PURE (GXX_ABOVE_3_4 || GCC_ABOVE_2_96)
+#else
+# define __HWLOC_HAVE_ATTRIBUTE_PURE 0
+#endif
+#if __HWLOC_HAVE_ATTRIBUTE_PURE
+# define __hwloc_attribute_pure __attribute__((__pure__))
+#else
+# define __hwloc_attribute_pure
+#endif
+
+#ifdef HWLOC_HAVE_ATTRIBUTE_DEPRECATED
+#define __HWLOC_HAVE_ATTRIBUTE_DEPRECATED HWLOC_HAVE_ATTRIBUTE_DEPRECATED 
+#elif defined(__GNUC__)
+# define __HWLOC_HAVE_ATTRIBUTE_DEPRECATED (GXX_ABOVE_3_4 || GCC_ABOVE_3_3)
+#else
+# define __HWLOC_HAVE_ATTRIBUTE_DEPRECATED 0
+#endif
+#if __HWLOC_HAVE_ATTRIBUTE_DEPRECATED
+# define __hwloc_attribute_deprecated __attribute__((__deprecated__))
+#else
+# define __hwloc_attribute_deprecated
+#endif
+
+#ifdef HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS
+#define __HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS
+#elif defined(__GNUC__)
+# define __HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS (GXX_ABOVE_3_4 || GCC_ABOVE_3_3)
+#else
+# define __HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS 0
+#endif
+#if __HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS
+# define __hwloc_attribute_may_alias __attribute__((__may_alias__))
+#else
+# define __hwloc_attribute_may_alias
+#endif
+
+#ifdef HWLOC_C_HAVE_VISIBILITY
+# if HWLOC_C_HAVE_VISIBILITY
+#  define HWLOC_DECLSPEC __attribute__((__visibility__("default")))
+# else
+#  define HWLOC_DECLSPEC
+# endif
+#else
+# define HWLOC_DECLSPEC
+#endif
+
+/* Defined to 1 on Linux */
+#undef HWLOC_LINUX_SYS
+
+/* Defined to 1 if the CPU_SET macro works */
+#undef HWLOC_HAVE_CPU_SET
+
+/* Defined to 1 if you have the `windows.h' header. */
+#undef HWLOC_HAVE_WINDOWS_H
+#undef hwloc_pid_t
+#undef hwloc_thread_t
+
+#ifdef HWLOC_HAVE_WINDOWS_H
+
+#  include <windows.h>
+typedef DWORDLONG hwloc_uint64_t;
+
+#else /* HWLOC_HAVE_WINDOWS_H */
+
+#  ifdef hwloc_thread_t
+#    include <pthread.h>
+#  endif /* hwloc_thread_t */
+
+/* Defined to 1 if you have the <stdint.h> header file. */
+#  undef HWLOC_HAVE_STDINT_H
+
+#  include <unistd.h>
+#  ifdef HWLOC_HAVE_STDINT_H
+#    include <stdint.h>
+#  endif
+typedef uint64_t hwloc_uint64_t;
+
+#endif /* HWLOC_HAVE_WINDOWS_H */
+
+/* Whether we need to re-define all the hwloc public symbols or not */
+#undef HWLOC_SYM_TRANSFORM
+
+/* The hwloc symbol prefix */
+#undef HWLOC_SYM_PREFIX
+
+/* The hwloc symbol prefix in all caps */
+#undef HWLOC_SYM_PREFIX_CAPS
+
+#endif /* HWLOC_CONFIG_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/bitmap.h b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/bitmap.h
new file mode 100644
index 0000000000..19b8b551e9
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/bitmap.h
@@ -0,0 +1,376 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2016 Inria.  All rights reserved.
+ * Copyright © 2009-2012 Université Bordeaux
+ * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief The bitmap API, for use in hwloc itself.
+ */
+
+#ifndef HWLOC_BITMAP_H
+#define HWLOC_BITMAP_H
+
+#include <hwloc/autogen/config.h>
+#include <assert.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_bitmap The bitmap API
+ *
+ * The ::hwloc_bitmap_t type represents a set of objects, typically OS
+ * processors -- which may actually be hardware threads (represented
+ * by ::hwloc_cpuset_t, which is a typedef for ::hwloc_bitmap_t) -- or
+ * memory nodes (represented by ::hwloc_nodeset_t, which is also a
+ * typedef for ::hwloc_bitmap_t).
+ *
+ * <em>Both CPU and node sets are always indexed by OS physical number.</em>
+ *
+ * \note CPU sets and nodesets are described in \ref hwlocality_object_sets.
+ *
+ * A bitmap may be of infinite size (all bits are set after some point).
+ * A bitmap may even be full if all bits are set.
+ *
+ * \note Several examples of using the bitmap API are available under the
+ * doc/examples/ directory in the source tree.
+ * Regression tests such as tests/hwloc/hwloc_bitmap*.c also make intensive use
+ * of this API.
+ * @{
+ */
+
+
+/** \brief
+ * Set of bits represented as an opaque pointer to an internal bitmap.
+ */
+typedef struct hwloc_bitmap_s * hwloc_bitmap_t;
+/** \brief a non-modifiable ::hwloc_bitmap_t */
+typedef const struct hwloc_bitmap_s * hwloc_const_bitmap_t;
+
+
+/*
+ * Bitmap allocation, freeing and copying.
+ */
+
+/** \brief Allocate a new empty bitmap.
+ *
+ * \returns A valid bitmap or \c NULL.
+ *
+ * The bitmap should be freed by a corresponding call to
+ * hwloc_bitmap_free().
+ */
+HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc(void) __hwloc_attribute_malloc;
+
+/** \brief Allocate a new full bitmap. */
+HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc_full(void) __hwloc_attribute_malloc;
+
+/** \brief Free bitmap \p bitmap.
+ *
+ * If \p bitmap is \c NULL, no operation is performed.
+ */
+HWLOC_DECLSPEC void hwloc_bitmap_free(hwloc_bitmap_t bitmap);
+
+/** \brief Duplicate bitmap \p bitmap by allocating a new bitmap and copying \p bitmap contents.
+ *
+ * If \p bitmap is \c NULL, \c NULL is returned.
+ */
+HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_dup(hwloc_const_bitmap_t bitmap) __hwloc_attribute_malloc;
+
+/** \brief Copy the contents of bitmap \p src into the already allocated bitmap \p dst */
+HWLOC_DECLSPEC void hwloc_bitmap_copy(hwloc_bitmap_t dst, hwloc_const_bitmap_t src);
+
+
+/*
+ * Bitmap/String Conversion
+ */
+
+/** \brief Stringify a bitmap.
+ *
+ * Up to \p buflen characters may be written in buffer \p buf.
+ *
+ * If \p buflen is 0, \p buf may safely be \c NULL.
+ *
+ * \return the number of character that were actually written if not truncating,
+ * or that would have been written (not including the ending \\0).
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);
+
+/** \brief Stringify a bitmap into a newly allocated string.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
+
+/** \brief Parse a bitmap string and stores it in bitmap \p bitmap.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);
+
+/** \brief Stringify a bitmap in the list format.
+ *
+ * Lists are comma-separated indexes or ranges.
+ * Ranges are dash separated indexes.
+ * The last range may not have an ending indexes if the bitmap is infinitely set.
+ *
+ * Up to \p buflen characters may be written in buffer \p buf.
+ *
+ * If \p buflen is 0, \p buf may safely be \c NULL.
+ *
+ * \return the number of character that were actually written if not truncating,
+ * or that would have been written (not including the ending \\0).
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_list_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);
+
+/** \brief Stringify a bitmap into a newly allocated list string.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_list_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
+
+/** \brief Parse a list string and stores it in bitmap \p bitmap.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_list_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);
+
+/** \brief Stringify a bitmap in the taskset-specific format.
+ *
+ * The taskset command manipulates bitmap strings that contain a single
+ * (possible very long) hexadecimal number starting with 0x.
+ *
+ * Up to \p buflen characters may be written in buffer \p buf.
+ *
+ * If \p buflen is 0, \p buf may safely be \c NULL.
+ *
+ * \return the number of character that were actually written if not truncating,
+ * or that would have been written (not including the ending \\0).
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);
+
+/** \brief Stringify a bitmap into a newly allocated taskset-specific string.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_taskset_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
+
+/** \brief Parse a taskset-specific bitmap string and stores it in bitmap \p bitmap.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_taskset_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);
+
+
+/*
+ * Building bitmaps.
+ */
+
+/** \brief Empty the bitmap \p bitmap */
+HWLOC_DECLSPEC void hwloc_bitmap_zero(hwloc_bitmap_t bitmap);
+
+/** \brief Fill bitmap \p bitmap with all possible indexes (even if those objects don't exist or are otherwise unavailable) */
+HWLOC_DECLSPEC void hwloc_bitmap_fill(hwloc_bitmap_t bitmap);
+
+/** \brief Empty the bitmap \p bitmap and add bit \p id */
+HWLOC_DECLSPEC void hwloc_bitmap_only(hwloc_bitmap_t bitmap, unsigned id);
+
+/** \brief Fill the bitmap \p and clear the index \p id */
+HWLOC_DECLSPEC void hwloc_bitmap_allbut(hwloc_bitmap_t bitmap, unsigned id);
+
+/** \brief Setup bitmap \p bitmap from unsigned long \p mask */
+HWLOC_DECLSPEC void hwloc_bitmap_from_ulong(hwloc_bitmap_t bitmap, unsigned long mask);
+
+/** \brief Setup bitmap \p bitmap from unsigned long \p mask used as \p i -th subset */
+HWLOC_DECLSPEC void hwloc_bitmap_from_ith_ulong(hwloc_bitmap_t bitmap, unsigned i, unsigned long mask);
+
+
+/*
+ * Modifying bitmaps.
+ */
+
+/** \brief Add index \p id in bitmap \p bitmap */
+HWLOC_DECLSPEC void hwloc_bitmap_set(hwloc_bitmap_t bitmap, unsigned id);
+
+/** \brief Add indexes from \p begin to \p end in bitmap \p bitmap.
+ *
+ * If \p end is \c -1, the range is infinite.
+ */
+HWLOC_DECLSPEC void hwloc_bitmap_set_range(hwloc_bitmap_t bitmap, unsigned begin, int end);
+
+/** \brief Replace \p i -th subset of bitmap \p bitmap with unsigned long \p mask */
+HWLOC_DECLSPEC void hwloc_bitmap_set_ith_ulong(hwloc_bitmap_t bitmap, unsigned i, unsigned long mask);
+
+/** \brief Remove index \p id from bitmap \p bitmap */
+HWLOC_DECLSPEC void hwloc_bitmap_clr(hwloc_bitmap_t bitmap, unsigned id);
+
+/** \brief Remove indexes from \p begin to \p end in bitmap \p bitmap.
+ *
+ * If \p end is \c -1, the range is infinite.
+ */
+HWLOC_DECLSPEC void hwloc_bitmap_clr_range(hwloc_bitmap_t bitmap, unsigned begin, int end);
+
+/** \brief Keep a single index among those set in bitmap \p bitmap
+ *
+ * May be useful before binding so that the process does not
+ * have a chance of migrating between multiple logical CPUs
+ * in the original mask.
+ */
+HWLOC_DECLSPEC void hwloc_bitmap_singlify(hwloc_bitmap_t bitmap);
+
+
+/*
+ * Consulting bitmaps.
+ */
+
+/** \brief Convert the beginning part of bitmap \p bitmap into unsigned long \p mask */
+HWLOC_DECLSPEC unsigned long hwloc_bitmap_to_ulong(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
+
+/** \brief Convert the \p i -th subset of bitmap \p bitmap into unsigned long mask */
+HWLOC_DECLSPEC unsigned long hwloc_bitmap_to_ith_ulong(hwloc_const_bitmap_t bitmap, unsigned i) __hwloc_attribute_pure;
+
+/** \brief Test whether index \p id is part of bitmap \p bitmap */
+HWLOC_DECLSPEC int hwloc_bitmap_isset(hwloc_const_bitmap_t bitmap, unsigned id) __hwloc_attribute_pure;
+
+/** \brief Test whether bitmap \p bitmap is empty */
+HWLOC_DECLSPEC int hwloc_bitmap_iszero(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
+
+/** \brief Test whether bitmap \p bitmap is completely full
+ *
+ * \note A full bitmap is always infinitely set.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_isfull(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
+
+/** \brief Compute the first index (least significant bit) in bitmap \p bitmap
+ *
+ * \return -1 if no index is set in \p bitmap.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_first(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
+
+/** \brief Compute the next index in bitmap \p bitmap which is after index \p prev
+ *
+ * If \p prev is -1, the first index is returned.
+ *
+ * \return -1 if no index with higher index is set in \p bitmap.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_next(hwloc_const_bitmap_t bitmap, int prev) __hwloc_attribute_pure;
+
+/** \brief Compute the last index (most significant bit) in bitmap \p bitmap
+ *
+ * \return -1 if no index is set in \p bitmap, or if \p bitmap is infinitely set.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_last(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
+
+/** \brief Compute the "weight" of bitmap \p bitmap (i.e., number of
+ * indexes that are in the bitmap).
+ *
+ * \return the number of indexes that are in the bitmap.
+ *
+ * \return -1 if \p bitmap is infinitely set.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_weight(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
+
+/** \brief Loop macro iterating on bitmap \p bitmap
+ *
+ * The loop must start with hwloc_bitmap_foreach_begin() and end
+ * with hwloc_bitmap_foreach_end() followed by a terminating ';'.
+ *
+ * \p index is the loop variable; it should be an unsigned int.  The
+ * first iteration will set \p index to the lowest index in the bitmap.
+ * Successive iterations will iterate through, in order, all remaining
+ * indexes set in the bitmap.  To be specific: each iteration will return a
+ * value for \p index such that hwloc_bitmap_isset(bitmap, index) is true.
+ *
+ * The assert prevents the loop from being infinite if the bitmap is infinitely set.
+ *
+ * \hideinitializer
+ */
+#define hwloc_bitmap_foreach_begin(id, bitmap) \
+do { \
+        assert(hwloc_bitmap_weight(bitmap) != -1); \
+        for (id = hwloc_bitmap_first(bitmap); \
+             (unsigned) id != (unsigned) -1; \
+             id = hwloc_bitmap_next(bitmap, id)) {
+
+/** \brief End of loop macro iterating on a bitmap.
+ *
+ * Needs a terminating ';'.
+ *
+ * \sa hwloc_bitmap_foreach_begin()
+ * \hideinitializer
+ */
+#define hwloc_bitmap_foreach_end()		\
+        } \
+} while (0)
+
+
+/*
+ * Combining bitmaps.
+ */
+
+/** \brief Or bitmaps \p bitmap1 and \p bitmap2 and store the result in bitmap \p res
+ *
+ * \p res can be the same as \p bitmap1 or \p bitmap2
+ */
+HWLOC_DECLSPEC void hwloc_bitmap_or (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2);
+
+/** \brief And bitmaps \p bitmap1 and \p bitmap2 and store the result in bitmap \p res
+ *
+ * \p res can be the same as \p bitmap1 or \p bitmap2
+ */
+HWLOC_DECLSPEC void hwloc_bitmap_and (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2);
+
+/** \brief And bitmap \p bitmap1 and the negation of \p bitmap2 and store the result in bitmap \p res
+ *
+ * \p res can be the same as \p bitmap1 or \p bitmap2
+ */
+HWLOC_DECLSPEC void hwloc_bitmap_andnot (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2);
+
+/** \brief Xor bitmaps \p bitmap1 and \p bitmap2 and store the result in bitmap \p res
+ *
+ * \p res can be the same as \p bitmap1 or \p bitmap2
+ */
+HWLOC_DECLSPEC void hwloc_bitmap_xor (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2);
+
+/** \brief Negate bitmap \p bitmap and store the result in bitmap \p res
+ *
+ * \p res can be the same as \p bitmap
+ */
+HWLOC_DECLSPEC void hwloc_bitmap_not (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap);
+
+
+/*
+ * Comparing bitmaps.
+ */
+
+/** \brief Test whether bitmaps \p bitmap1 and \p bitmap2 intersects */
+HWLOC_DECLSPEC int hwloc_bitmap_intersects (hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure;
+
+/** \brief Test whether bitmap \p sub_bitmap is part of bitmap \p super_bitmap.
+ *
+ * \note The empty bitmap is considered included in any other bitmap.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_isincluded (hwloc_const_bitmap_t sub_bitmap, hwloc_const_bitmap_t super_bitmap) __hwloc_attribute_pure;
+
+/** \brief Test whether bitmap \p bitmap1 is equal to bitmap \p bitmap2 */
+HWLOC_DECLSPEC int hwloc_bitmap_isequal (hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure;
+
+/** \brief Compare bitmaps \p bitmap1 and \p bitmap2 using their lowest index.
+ *
+ * Smaller least significant bit is smaller.
+ * The empty bitmap is considered higher than anything.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_compare_first(hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure;
+
+/** \brief Compare bitmaps \p bitmap1 and \p bitmap2 in lexicographic order.
+ *
+ * Lexicographic comparison of bitmaps, starting for their highest indexes.
+ * Compare last indexes first, then second, etc.
+ * The empty bitmap is considered lower than anything.
+ *
+ * \note This is different from the non-existing hwloc_bitmap_compare_last()
+ * which would only compare the highest index of each bitmap.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_compare(hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure;
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_BITMAP_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/cuda.h b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/cuda.h
new file mode 100644
index 0000000000..a5661b9706
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/cuda.h
@@ -0,0 +1,220 @@
+/*
+ * Copyright © 2010-2016 Inria.  All rights reserved.
+ * Copyright © 2010-2011 Université Bordeaux
+ * Copyright © 2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and the CUDA Driver API.
+ *
+ * Applications that use both hwloc and the CUDA Driver API may want to
+ * include this file so as to get topology information for CUDA devices.
+ *
+ */
+
+#ifndef HWLOC_CUDA_H
+#define HWLOC_CUDA_H
+
+#include <hwloc.h>
+#include <hwloc/autogen/config.h>
+#include <hwloc/helper.h>
+#ifdef HWLOC_LINUX_SYS
+#include <hwloc/linux.h>
+#endif
+
+#include <cuda.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_cuda Interoperability with the CUDA Driver API
+ *
+ * This interface offers ways to retrieve topology information about
+ * CUDA devices when using the CUDA Driver API.
+ *
+ * @{
+ */
+
+/** \brief Return the domain, bus and device IDs of the CUDA device \p cudevice.
+ *
+ * Device \p cudevice must match the local machine.
+ */
+static __hwloc_inline int
+hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused,
+			      CUdevice cudevice, int *domain, int *bus, int *dev)
+{
+  CUresult cres;
+
+#if CUDA_VERSION >= 4000
+  cres = cuDeviceGetAttribute(domain, CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, cudevice);
+  if (cres != CUDA_SUCCESS) {
+    errno = ENOSYS;
+    return -1;
+  }
+#else
+  *domain = 0;
+#endif
+  cres = cuDeviceGetAttribute(bus, CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, cudevice);
+  if (cres != CUDA_SUCCESS) {
+    errno = ENOSYS;
+    return -1;
+  }
+  cres = cuDeviceGetAttribute(dev, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, cudevice);
+  if (cres != CUDA_SUCCESS) {
+    errno = ENOSYS;
+    return -1;
+  }
+
+  return 0;
+}
+
+/** \brief Get the CPU set of logical processors that are physically
+ * close to device \p cudevice.
+ *
+ * Return the CPU set describing the locality of the CUDA device \p cudevice.
+ *
+ * Topology \p topology and device \p cudevice must match the local machine.
+ * I/O devices detection and the CUDA component are not needed in the topology.
+ *
+ * The function only returns the locality of the device.
+ * If more information about the device is needed, OS objects should
+ * be used instead, see hwloc_cuda_get_device_osdev()
+ * and hwloc_cuda_get_device_osdev_by_index().
+ *
+ * This function is currently only implemented in a meaningful way for
+ * Linux; other systems will simply get a full cpuset.
+ */
+static __hwloc_inline int
+hwloc_cuda_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
+			     CUdevice cudevice, hwloc_cpuset_t set)
+{
+#ifdef HWLOC_LINUX_SYS
+  /* If we're on Linux, use the sysfs mechanism to get the local cpus */
+#define HWLOC_CUDA_DEVICE_SYSFS_PATH_MAX 128
+  char path[HWLOC_CUDA_DEVICE_SYSFS_PATH_MAX];
+  int domainid, busid, deviceid;
+
+  if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domainid, &busid, &deviceid))
+    return -1;
+
+  if (!hwloc_topology_is_thissystem(topology)) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", domainid, busid, deviceid);
+  if (hwloc_linux_read_path_as_cpumask(path, set) < 0
+      || hwloc_bitmap_iszero(set))
+    hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+#else
+  /* Non-Linux systems simply get a full cpuset */
+  hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+#endif
+  return 0;
+}
+
+/** \brief Get the hwloc PCI device object corresponding to the
+ * CUDA device \p cudevice.
+ *
+ * Return the PCI device object describing the CUDA device \p cudevice.
+ * Return NULL if there is none.
+ *
+ * Topology \p topology and device \p cudevice must match the local machine.
+ * I/O devices detection must be enabled in topology \p topology.
+ * The CUDA component is not needed in the topology.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_cuda_get_device_pcidev(hwloc_topology_t topology, CUdevice cudevice)
+{
+  int domain, bus, dev;
+
+  if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domain, &bus, &dev))
+    return NULL;
+
+  return hwloc_get_pcidev_by_busid(topology, domain, bus, dev, 0);
+}
+
+/** \brief Get the hwloc OS device object corresponding to CUDA device \p cudevice.
+ *
+ * Return the hwloc OS device object that describes the given
+ * CUDA device \p cudevice. Return NULL if there is none.
+ *
+ * Topology \p topology and device \p cudevice must match the local machine.
+ * I/O devices detection and the NVML component must be enabled in the topology.
+ * If not, the locality of the object may still be found using
+ * hwloc_cuda_get_device_cpuset().
+ *
+ * \note This function cannot work if PCI devices are filtered out.
+ *
+ * \note The corresponding hwloc PCI device may be found by looking
+ * at the result parent pointer (unless PCI devices are filtered out).
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_cuda_get_device_osdev(hwloc_topology_t topology, CUdevice cudevice)
+{
+	hwloc_obj_t osdev = NULL;
+	int domain, bus, dev;
+
+	if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domain, &bus, &dev))
+		return NULL;
+
+	osdev = NULL;
+	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+		hwloc_obj_t pcidev = osdev->parent;
+		if (strncmp(osdev->name, "cuda", 4))
+			continue;
+		if (pcidev
+		    && pcidev->type == HWLOC_OBJ_PCI_DEVICE
+		    && (int) pcidev->attr->pcidev.domain == domain
+		    && (int) pcidev->attr->pcidev.bus == bus
+		    && (int) pcidev->attr->pcidev.dev == dev
+		    && pcidev->attr->pcidev.func == 0)
+			return osdev;
+		/* if PCI are filtered out, we need a info attr to match on */
+	}
+
+	return NULL;
+}
+
+/** \brief Get the hwloc OS device object corresponding to the
+ * CUDA device whose index is \p idx.
+ *
+ * Return the OS device object describing the CUDA device whose
+ * index is \p idx. Return NULL if there is none.
+ *
+ * The topology \p topology does not necessarily have to match the current
+ * machine. For instance the topology may be an XML import of a remote host.
+ * I/O devices detection and the CUDA component must be enabled in the topology.
+ *
+ * \note The corresponding PCI device object can be obtained by looking
+ * at the OS device parent object (unless PCI devices are filtered out).
+ *
+ * \note This function is identical to hwloc_cudart_get_device_osdev_by_index().
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_cuda_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx)
+{
+	hwloc_obj_t osdev = NULL;
+	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+		if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type
+		    && osdev->name
+		    && !strncmp("cuda", osdev->name, 4)
+		    && atoi(osdev->name + 4) == (int) idx)
+			return osdev;
+	}
+	return NULL;
+}
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_CUDA_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/cudart.h b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/cudart.h
new file mode 100644
index 0000000000..63c7f59c63
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/cudart.h
@@ -0,0 +1,177 @@
+/*
+ * Copyright © 2010-2017 Inria.  All rights reserved.
+ * Copyright © 2010-2011 Université Bordeaux
+ * Copyright © 2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and the CUDA Runtime API.
+ *
+ * Applications that use both hwloc and the CUDA Runtime API may want to
+ * include this file so as to get topology information for CUDA devices.
+ *
+ */
+
+#ifndef HWLOC_CUDART_H
+#define HWLOC_CUDART_H
+
+#include <hwloc.h>
+#include <hwloc/autogen/config.h>
+#include <hwloc/helper.h>
+#ifdef HWLOC_LINUX_SYS
+#include <hwloc/linux.h>
+#endif
+
+#include <cuda.h> /* for CUDA_VERSION */
+#include <cuda_runtime_api.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_cudart Interoperability with the CUDA Runtime API
+ *
+ * This interface offers ways to retrieve topology information about
+ * CUDA devices when using the CUDA Runtime API.
+ *
+ * @{
+ */
+
+/** \brief Return the domain, bus and device IDs of the CUDA device whose index is \p idx.
+ *
+ * Device index \p idx must match the local machine.
+ */
+static __hwloc_inline int
+hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused,
+				int idx, int *domain, int *bus, int *dev)
+{
+  cudaError_t cerr;
+  struct cudaDeviceProp prop;
+
+  cerr = cudaGetDeviceProperties(&prop, idx);
+  if (cerr) {
+    errno = ENOSYS;
+    return -1;
+  }
+
+#if CUDA_VERSION >= 4000
+  *domain = prop.pciDomainID;
+#else
+  *domain = 0;
+#endif
+
+  *bus = prop.pciBusID;
+  *dev = prop.pciDeviceID;
+
+  return 0;
+}
+
+/** \brief Get the CPU set of logical processors that are physically
+ * close to device \p idx.
+ *
+ * Return the CPU set describing the locality of the CUDA device
+ * whose index is \p idx.
+ *
+ * Topology \p topology and device \p idx must match the local machine.
+ * I/O devices detection and the CUDA component are not needed in the topology.
+ *
+ * The function only returns the locality of the device.
+ * If more information about the device is needed, OS objects should
+ * be used instead, see hwloc_cudart_get_device_osdev_by_index().
+ *
+ * This function is currently only implemented in a meaningful way for
+ * Linux; other systems will simply get a full cpuset.
+ */
+static __hwloc_inline int
+hwloc_cudart_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
+			       int idx, hwloc_cpuset_t set)
+{
+#ifdef HWLOC_LINUX_SYS
+  /* If we're on Linux, use the sysfs mechanism to get the local cpus */
+#define HWLOC_CUDART_DEVICE_SYSFS_PATH_MAX 128
+  char path[HWLOC_CUDART_DEVICE_SYSFS_PATH_MAX];
+  int domain, bus, dev;
+
+  if (hwloc_cudart_get_device_pci_ids(topology, idx, &domain, &bus, &dev))
+    return -1;
+
+  if (!hwloc_topology_is_thissystem(topology)) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", (unsigned) domain, (unsigned) bus, (unsigned) dev);
+  if (hwloc_linux_read_path_as_cpumask(path, set) < 0
+      || hwloc_bitmap_iszero(set))
+    hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+#else
+  /* Non-Linux systems simply get a full cpuset */
+  hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+#endif
+  return 0;
+}
+
+/** \brief Get the hwloc PCI device object corresponding to the
+ * CUDA device whose index is \p idx.
+ *
+ * Return the PCI device object describing the CUDA device whose
+ * index is \p idx. Return NULL if there is none.
+ *
+ * Topology \p topology and device \p idx must match the local machine.
+ * I/O devices detection must be enabled in topology \p topology.
+ * The CUDA component is not needed in the topology.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_cudart_get_device_pcidev(hwloc_topology_t topology, int idx)
+{
+  int domain, bus, dev;
+
+  if (hwloc_cudart_get_device_pci_ids(topology, idx, &domain, &bus, &dev))
+    return NULL;
+
+  return hwloc_get_pcidev_by_busid(topology, domain, bus, dev, 0);
+}
+
+/** \brief Get the hwloc OS device object corresponding to the
+ * CUDA device whose index is \p idx.
+ *
+ * Return the OS device object describing the CUDA device whose
+ * index is \p idx. Return NULL if there is none.
+ *
+ * The topology \p topology does not necessarily have to match the current
+ * machine. For instance the topology may be an XML import of a remote host.
+ * I/O devices detection and the CUDA component must be enabled in the topology.
+ * If not, the locality of the object may still be found using
+ * hwloc_cudart_get_device_cpuset().
+ *
+ * \note The corresponding PCI device object can be obtained by looking
+ * at the OS device parent object (unless PCI devices are filtered out).
+ *
+ * \note This function is identical to hwloc_cuda_get_device_osdev_by_index().
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_cudart_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx)
+{
+	hwloc_obj_t osdev = NULL;
+	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+		if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type
+		    && osdev->name
+		    && !strncmp("cuda", osdev->name, 4)
+		    && atoi(osdev->name + 4) == (int) idx)
+			return osdev;
+	}
+	return NULL;
+}
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_CUDART_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/deprecated.h b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/deprecated.h
new file mode 100644
index 0000000000..69b16b6617
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/deprecated.h
@@ -0,0 +1,216 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2016 Inria.  All rights reserved.
+ * Copyright © 2009-2012 Université Bordeaux
+ * Copyright © 2009-2010 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/**
+ * This file contains the inline code of functions declared in hwloc.h
+ */
+
+#ifndef HWLOC_DEPRECATED_H
+#define HWLOC_DEPRECATED_H
+
+#ifndef HWLOC_H
+#error Please include the main hwloc.h instead
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* backward compat with v1.10 before Socket->Package renaming */
+#define HWLOC_OBJ_SOCKET HWLOC_OBJ_PACKAGE
+/* backward compat with v1.10 before Node->NUMANode clarification */
+#define HWLOC_OBJ_NODE HWLOC_OBJ_NUMANODE
+
+/** \brief Insert a misc object by parent.
+ *
+ * Identical to hwloc_topology_insert_misc_object().
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_topology_insert_misc_object_by_parent(hwloc_topology_t topology, hwloc_obj_t parent, const char *name) __hwloc_attribute_deprecated;
+static __hwloc_inline hwloc_obj_t
+hwloc_topology_insert_misc_object_by_parent(hwloc_topology_t topology, hwloc_obj_t parent, const char *name)
+{
+  return hwloc_topology_insert_misc_object(topology, parent, name);
+}
+
+/** \brief Stringify the cpuset containing a set of objects.
+ *
+ * If \p size is 0, \p string may safely be \c NULL.
+ *
+ * \return the number of character that were actually written if not truncating,
+ * or that would have been written (not including the ending \\0).
+ */
+static __hwloc_inline int
+hwloc_obj_cpuset_snprintf(char *str, size_t size, size_t nobj, struct hwloc_obj * const *objs) __hwloc_attribute_deprecated;
+static __hwloc_inline int
+hwloc_obj_cpuset_snprintf(char *str, size_t size, size_t nobj, struct hwloc_obj * const *objs)
+{
+  hwloc_bitmap_t set = hwloc_bitmap_alloc();
+  int res;
+  unsigned i;
+
+  hwloc_bitmap_zero(set);
+  for(i=0; i<nobj; i++)
+    if (objs[i]->cpuset)
+      hwloc_bitmap_or(set, set, objs[i]->cpuset);
+
+  res = hwloc_bitmap_snprintf(str, size, set);
+  hwloc_bitmap_free(set);
+  return res;
+}
+
+/** \brief Return a stringified topology object type.
+ *
+ * Deprecated by the identical hwloc_type_name()
+ */
+static __hwloc_inline const char *
+hwloc_obj_type_string (hwloc_obj_type_t type) __hwloc_attribute_const; /* not deprecated in early 2.x releases because widely used and prototype unchanged */
+static __hwloc_inline const char *
+hwloc_obj_type_string (hwloc_obj_type_t type)
+{
+  return hwloc_type_name(type);
+}
+
+/** \brief Convert a type string into a type and some attributes.
+ *
+ * Deprecated by hwloc_type_sscanf()
+ */
+static __hwloc_inline int
+hwloc_obj_type_sscanf(const char *string, hwloc_obj_type_t *typep, int *depthattrp, void *typeattrp, size_t typeattrsize) __hwloc_attribute_deprecated;
+static __hwloc_inline int
+hwloc_obj_type_sscanf(const char *string, hwloc_obj_type_t *typep, int *depthattrp, void *typeattrp, size_t typeattrsize)
+{
+  union hwloc_obj_attr_u attr;
+  int err = hwloc_type_sscanf(string, typep, &attr, sizeof(attr));
+  if (err < 0)
+    return err;
+  if (hwloc_obj_type_is_cache(*typep)) {
+    if (depthattrp)
+      *depthattrp = attr.cache.depth;
+    if (typeattrp && typeattrsize >= sizeof(hwloc_obj_cache_type_t))
+      memcpy(typeattrp, &attr.cache.type, sizeof(hwloc_obj_cache_type_t));
+  } else if (*typep == HWLOC_OBJ_GROUP) {
+    if (depthattrp)
+      *depthattrp = attr.group.depth;
+  }
+  return 0;
+}
+
+/** \brief Set the default memory binding policy of the current
+ * process or thread to prefer the NUMA node(s) specified by physical \p nodeset
+ */
+static __hwloc_inline int
+hwloc_set_membind_nodeset(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_deprecated;
+static __hwloc_inline int
+hwloc_set_membind_nodeset(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  return hwloc_set_membind(topology, nodeset, policy, flags | HWLOC_MEMBIND_BYNODESET);
+}
+
+/** \brief Query the default memory binding policy and physical locality of the
+ * current process or thread.
+ */
+static __hwloc_inline int
+hwloc_get_membind_nodeset(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) __hwloc_attribute_deprecated;
+static __hwloc_inline int
+hwloc_get_membind_nodeset(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags)
+{
+  return hwloc_get_membind(topology, nodeset, policy, flags | HWLOC_MEMBIND_BYNODESET);
+}
+
+/** \brief Set the default memory binding policy of the specified
+ * process to prefer the NUMA node(s) specified by physical \p nodeset
+ */
+static __hwloc_inline int
+hwloc_set_proc_membind_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_deprecated;
+static __hwloc_inline int
+hwloc_set_proc_membind_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  return hwloc_set_proc_membind(topology, pid, nodeset, policy, flags | HWLOC_MEMBIND_BYNODESET);
+}
+
+/** \brief Query the default memory binding policy and physical locality of the
+ * specified process.
+ */
+static __hwloc_inline int
+hwloc_get_proc_membind_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) __hwloc_attribute_deprecated;
+static __hwloc_inline int
+hwloc_get_proc_membind_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags)
+{
+  return hwloc_get_proc_membind(topology, pid, nodeset, policy, flags | HWLOC_MEMBIND_BYNODESET);
+}
+
+/** \brief Bind the already-allocated memory identified by (addr, len)
+ * to the NUMA node(s) in physical \p nodeset.
+ */
+static __hwloc_inline int
+hwloc_set_area_membind_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_deprecated;
+static __hwloc_inline int
+hwloc_set_area_membind_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  return hwloc_set_area_membind(topology, addr, len, nodeset, policy, flags | HWLOC_MEMBIND_BYNODESET);
+}
+
+/** \brief Query the physical NUMA node(s) and binding policy of the memory
+ * identified by (\p addr, \p len ).
+ */
+static __hwloc_inline int
+hwloc_get_area_membind_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) __hwloc_attribute_deprecated;
+static __hwloc_inline int
+hwloc_get_area_membind_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags)
+{
+  return hwloc_get_area_membind(topology, addr, len, nodeset, policy, flags | HWLOC_MEMBIND_BYNODESET);
+}
+
+/** \brief Allocate some memory on the given physical nodeset \p nodeset
+ */
+static __hwloc_inline void *
+hwloc_alloc_membind_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc __hwloc_attribute_deprecated;
+static __hwloc_inline void *
+hwloc_alloc_membind_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  return hwloc_alloc_membind(topology, len, nodeset, policy, flags | HWLOC_MEMBIND_BYNODESET);
+}
+
+/** \brief Allocate some memory on the given nodeset \p nodeset.
+ */
+static __hwloc_inline void *
+hwloc_alloc_membind_policy_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc __hwloc_attribute_deprecated;
+static __hwloc_inline void *
+hwloc_alloc_membind_policy_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+  return hwloc_alloc_membind_policy(topology, len, nodeset, policy, flags | HWLOC_MEMBIND_BYNODESET);
+}
+
+/** \brief Convert a CPU set into a NUMA node set and handle non-NUMA cases
+ */
+static __hwloc_inline void
+hwloc_cpuset_to_nodeset_strict(hwloc_topology_t topology, hwloc_const_cpuset_t _cpuset, hwloc_nodeset_t nodeset) __hwloc_attribute_deprecated;
+static __hwloc_inline void
+hwloc_cpuset_to_nodeset_strict(hwloc_topology_t topology, hwloc_const_cpuset_t _cpuset, hwloc_nodeset_t nodeset)
+{
+  hwloc_cpuset_to_nodeset(topology, _cpuset, nodeset);
+}
+
+/** \brief Convert a NUMA node set into a CPU set and handle non-NUMA cases
+ */
+static __hwloc_inline void
+hwloc_cpuset_from_nodeset_strict(hwloc_topology_t topology, hwloc_cpuset_t _cpuset, hwloc_const_nodeset_t nodeset) __hwloc_attribute_deprecated;
+static __hwloc_inline void
+hwloc_cpuset_from_nodeset_strict(hwloc_topology_t topology, hwloc_cpuset_t _cpuset, hwloc_const_nodeset_t nodeset)
+{
+  hwloc_cpuset_from_nodeset(topology, _cpuset, nodeset);
+}
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_DEPRECATED_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/diff.h b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/diff.h
new file mode 100644
index 0000000000..98f791e3f3
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/diff.h
@@ -0,0 +1,284 @@
+/*
+ * Copyright © 2013-2016 Inria.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Topology differences.
+ */
+
+#ifndef HWLOC_DIFF_H
+#define HWLOC_DIFF_H
+
+#ifndef HWLOC_H
+#error Please include the main hwloc.h instead
+#endif
+
+
+#ifdef __cplusplus
+extern "C" {
+#elif 0
+}
+#endif
+
+
+/** \defgroup hwlocality_diff Topology differences
+ *
+ * Applications that manipulate many similar topologies, for instance
+ * one for each node of a homogeneous cluster, may want to compress
+ * topologies to reduce the memory footprint.
+ *
+ * This file offers a way to manipulate the difference between topologies
+ * and export/import it to/from XML.
+ * Compression may therefore be achieved by storing one topology
+ * entirely while the others are only described by their differences
+ * with the former.
+ * The actual topology can be reconstructed when actually needed by
+ * applying the precomputed difference to the reference topology.
+ *
+ * This interface targets very similar nodes.
+ * Only very simple differences between topologies are actually
+ * supported, for instance a change in the memory size, the name
+ * of the object, or some info attribute.
+ * More complex differences such as adding or removing objects cannot
+ * be represented in the difference structures and therefore return
+ * errors.
+ *
+ * It means that there is no need to apply the difference when
+ * looking at the tree organization (how many levels, how many
+ * objects per level, what kind of objects, CPU and node sets, etc)
+ * and when binding to objects.
+ * However the difference must be applied when looking at object
+ * attributes such as the name, the memory size or info attributes.
+ *
+ * @{
+ */
+
+
+/** \brief Type of one object attribute difference.
+ */
+typedef enum hwloc_topology_diff_obj_attr_type_e {
+  /** \brief The object local memory is modified.
+   * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_uint64_s
+   * (and the index field is ignored).
+   */
+  HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_SIZE,
+
+  /** \brief The object name is modified.
+   * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_string_s
+   * (and the name field is ignored).
+   */
+
+  HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME,
+  /** \brief the value of an info attribute is modified.
+   * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_string_s.
+   */
+  HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO
+} hwloc_topology_diff_obj_attr_type_t;
+
+/** \brief One object attribute difference.
+ */
+union hwloc_topology_diff_obj_attr_u {
+  struct hwloc_topology_diff_obj_attr_generic_s {
+    /* each part of the union must start with these */
+    hwloc_topology_diff_obj_attr_type_t type;
+  } generic;
+
+  /** \brief Integer attribute modification with an optional index. */
+  struct hwloc_topology_diff_obj_attr_uint64_s {
+    /* used for storing integer attributes */
+    hwloc_topology_diff_obj_attr_type_t type;
+    hwloc_uint64_t index; /* not used for SIZE */
+    hwloc_uint64_t oldvalue;
+    hwloc_uint64_t newvalue;
+  } uint64;
+
+  /** \brief String attribute modification with an optional name */
+  struct hwloc_topology_diff_obj_attr_string_s {
+    /* used for storing name and info pairs */
+    hwloc_topology_diff_obj_attr_type_t type;
+    char *name; /* not used for NAME */
+    char *oldvalue;
+    char *newvalue;
+  } string;
+};
+
+
+/** \brief Type of one element of a difference list.
+ */
+typedef enum hwloc_topology_diff_type_e {
+  /** \brief An object attribute was changed.
+   * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_obj_attr_s.
+   */
+  HWLOC_TOPOLOGY_DIFF_OBJ_ATTR,
+
+  /** \brief The difference is too complex,
+   * it cannot be represented. The difference below
+   * this object has not been checked.
+   * hwloc_topology_diff_build() will return 1.
+   *
+   * The union is a hwloc_topology_diff_obj_attr_u::hwloc_topology_diff_too_complex_s.
+   */
+  HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX
+} hwloc_topology_diff_type_t;
+
+/** \brief One element of a difference list between two topologies.
+ */
+typedef union hwloc_topology_diff_u {
+  struct hwloc_topology_diff_generic_s {
+    /* each part of the union must start with these */
+    hwloc_topology_diff_type_t type;
+    union hwloc_topology_diff_u * next; /* pointer to the next element of the list, or NULL */
+  } generic;
+
+  /* A difference in an object attribute. */
+  struct hwloc_topology_diff_obj_attr_s {
+    hwloc_topology_diff_type_t type; /* must be ::HWLOC_TOPOLOGY_DIFF_OBJ_ATTR */
+    union hwloc_topology_diff_u * next;
+    /* List of attribute differences for a single object */
+    unsigned obj_depth;
+    unsigned obj_index;
+    union hwloc_topology_diff_obj_attr_u diff;
+  } obj_attr;
+
+  /* A difference that is too complex. */
+  struct hwloc_topology_diff_too_complex_s {
+    hwloc_topology_diff_type_t type; /* must be ::HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX */
+    union hwloc_topology_diff_u * next;
+    /* Where we had to stop computing the diff in the first topology */
+    unsigned obj_depth;
+    unsigned obj_index;
+  } too_complex;
+} * hwloc_topology_diff_t;
+
+
+/** \brief Compute the difference between 2 topologies.
+ *
+ * The difference is stored as a list of ::hwloc_topology_diff_t entries
+ * starting at \p diff.
+ * It is computed by doing a depth-first traversal of both topology trees
+ * simultaneously.
+ *
+ * If the difference between 2 objects is too complex to be represented
+ * (for instance if some objects have different types, or different numbers
+ * of children), a special diff entry of type ::HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX
+ * is queued.
+ * The computation of the diff does not continue below these objects.
+ * So each such diff entry means that the difference between two subtrees
+ * could not be computed.
+ *
+ * \return 0 if the difference can be represented properly.
+ *
+ * \return 0 with \p diff pointing to NULL if there is no difference
+ * between the topologies.
+ *
+ * \return 1 if the difference is too complex (see above). Some entries in
+ * the list will be of type ::HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX.
+ *
+ * \return -1 on any other error.
+ *
+ * \note \p flags is currently not used. It should be 0.
+ *
+ * \note The output diff has to be freed with hwloc_topology_diff_destroy().
+ *
+ * \note The output diff can only be exported to XML or passed to
+ * hwloc_topology_diff_apply() if 0 was returned, i.e. if no entry of type
+ * ::HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX is listed.
+ *
+ * \note The output diff may be modified by removing some entries from
+ * the list. The removed entries should be freed by passing them to
+ * to hwloc_topology_diff_destroy() (possible as another list).
+*/
+HWLOC_DECLSPEC int hwloc_topology_diff_build(hwloc_topology_t topology, hwloc_topology_t newtopology, unsigned long flags, hwloc_topology_diff_t *diff);
+
+/** \brief Flags to be given to hwloc_topology_diff_apply().
+ */
+enum hwloc_topology_diff_apply_flags_e {
+  /** \brief Apply topology diff in reverse direction.
+   * \hideinitializer
+   */
+  HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE = (1UL<<0)
+};
+
+/** \brief Apply a topology diff to an existing topology.
+ *
+ * \p flags is an OR'ed set of ::hwloc_topology_diff_apply_flags_e.
+ *
+ * The new topology is modified in place. hwloc_topology_dup()
+ * may be used to duplicate it before patching.
+ *
+ * If the difference cannot be applied entirely, all previous applied
+ * elements are unapplied before returning.
+ *
+ * \return 0 on success.
+ *
+ * \return -N if applying the difference failed while trying
+ * to apply the N-th part of the difference. For instance -1
+ * is returned if the very first difference element could not
+ * be applied.
+ */
+HWLOC_DECLSPEC int hwloc_topology_diff_apply(hwloc_topology_t topology, hwloc_topology_diff_t diff, unsigned long flags);
+
+/** \brief Destroy a list of topology differences.
+ */
+HWLOC_DECLSPEC int hwloc_topology_diff_destroy(hwloc_topology_diff_t diff);
+
+/** \brief Load a list of topology differences from a XML file.
+ *
+ * If not \c NULL, \p refname will be filled with the identifier
+ * string of the reference topology for the difference file,
+ * if any was specified in the XML file.
+ * This identifier is usually the name of the other XML file
+ * that contains the reference topology.
+ *
+ * \note the pointer returned in refname should later be freed
+ * by the caller.
+ */
+HWLOC_DECLSPEC int hwloc_topology_diff_load_xml(const char *xmlpath, hwloc_topology_diff_t *diff, char **refname);
+
+/** \brief Export a list of topology differences to a XML file.
+ *
+ * If not \c NULL, \p refname defines an identifier string
+ * for the reference topology which was used as a base when
+ * computing this difference.
+ * This identifier is usually the name of the other XML file
+ * that contains the reference topology.
+ * This attribute is given back when reading the diff from XML.
+ */
+HWLOC_DECLSPEC int hwloc_topology_diff_export_xml(hwloc_topology_diff_t diff, const char *refname, const char *xmlpath);
+
+/** \brief Load a list of topology differences from a XML buffer.
+ *
+ * If not \c NULL, \p refname will be filled with the identifier
+ * string of the reference topology for the difference file,
+ * if any was specified in the XML file.
+ * This identifier is usually the name of the other XML file
+ * that contains the reference topology.
+ *
+ * \note the pointer returned in refname should later be freed
+ * by the caller.
+  */
+HWLOC_DECLSPEC int hwloc_topology_diff_load_xmlbuffer(const char *xmlbuffer, int buflen, hwloc_topology_diff_t *diff, char **refname);
+
+/** \brief Export a list of topology differences to a XML buffer.
+ *
+ * If not \c NULL, \p refname defines an identifier string
+ * for the reference topology which was used as a base when
+ * computing this difference.
+ * This identifier is usually the name of the other XML file
+ * that contains the reference topology.
+ * This attribute is given back when reading the diff from XML.
+ *
+ * \note The XML buffer should later be freed with hwloc_free_xmlbuffer().
+ */
+HWLOC_DECLSPEC int hwloc_topology_diff_export_xmlbuffer(hwloc_topology_diff_t diff, const char *refname, char **xmlbuffer, int *buflen);
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_DIFF_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/distances.h b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/distances.h
new file mode 100644
index 0000000000..bcda3a2e67
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/distances.h
@@ -0,0 +1,223 @@
+/*
+ * Copyright © 2010-2017 Inria.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Object distances.
+ */
+
+#ifndef HWLOC_DISTANCES_H
+#define HWLOC_DISTANCES_H
+
+#ifndef HWLOC_H
+#error Please include the main hwloc.h instead
+#endif
+
+
+#ifdef __cplusplus
+extern "C" {
+#elif 0
+}
+#endif
+
+
+/** \defgroup hwlocality_distances_get Retrieve distances between objects
+ * @{
+ */
+
+/** \brief Matrix of distances between a set of objects.
+ *
+ * This matrix often contains latencies between NUMA nodes
+ * (as reported in the System Locality Distance Information Table (SLIT)
+ * in the ACPI specification), which may or may not be physically accurate.
+ * It corresponds to the latency for accessing the memory of one node
+ * from a core in another node.
+ * The corresponding kind is ::HWLOC_DISTANCES_KIND_FROM_OS | ::HWLOC_DISTANCES_KIND_FROM_USER.
+ *
+ * The matrix may also contain bandwidths between random sets of objects,
+ * possibly provided by the user, as specified in the \p kind attribute.
+ */
+struct hwloc_distances_s {
+  unsigned nbobjs;		/**< \brief Number of objects described by the distance matrix. */
+  hwloc_obj_t *objs;		/**< \brief Array of objects described by the distance matrix. */
+  unsigned long kind;		/**< \brief OR'ed set of ::hwloc_distances_kind_e. */
+  hwloc_uint64_t *values;	/**< \brief Matrix of distances between objects, stored as a one-dimension array.
+				 *
+				 * Distance from i-th to j-th object is stored in slot i*nbobjs+j.
+				 * The meaning of the value depends on the \p kind attribute.
+				 */
+};
+
+/** \brief Kinds of distance matrices.
+ *
+ * The \p kind attribute of struct hwloc_distances_s is a OR'ed set
+ * of kinds.
+ *
+ * A kind of format HWLOC_DISTANCES_KIND_FROM_* specifies where the
+ * distance information comes from, if known.
+ *
+ * A kind of format HWLOC_DISTANCES_KIND_MEANS_* specifies whether
+ * values are latencies or bandwidths, if applicable.
+ */
+enum hwloc_distances_kind_e {
+  /** \brief These distances were obtained from the operating system or hardware.
+   * \hideinitializer
+   */
+  HWLOC_DISTANCES_KIND_FROM_OS = (1UL<<0),
+  /** \brief These distances were provided by the user.
+   * \hideinitializer
+   */
+  HWLOC_DISTANCES_KIND_FROM_USER = (1UL<<1),
+
+  /** \brief Distance values are similar to latencies between objects.
+   * Values are smaller for closer objects, hence minimal on the diagonal
+   * of the matrix (distance between an object and itself).
+   * It could also be the number of network hops between objects, etc.
+   * \hideinitializer
+   */
+  HWLOC_DISTANCES_KIND_MEANS_LATENCY = (1UL<<2),
+  /** \brief Distance values are similar to bandwidths between objects.
+   * Values are higher for closer objects, hence maximal on the diagonal
+   * of the matrix (distance between an object and itself).
+   * Such values are currently ignored for distance-based grouping.
+   * \hideinitializer
+   */
+  HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH = (1UL<<3)
+};
+
+/** \brief Retrieve distance matrices.
+ *
+ * Retrieve distance matrices from the topology into the \p distances array.
+ *
+ * \p flags is currently unused, should be \c 0.
+ *
+ * \p kind serves as a filter. If \c 0, all distance matrices are returned.
+ * If it contains some HWLOC_DISTANCES_KIND_FROM_*, only distances whose kind
+ * matches one of these are returned.
+ * If it contains some HWLOC_DISTANCES_KIND_MEANS_*, only distances whose kind
+ * matches one of these are returned.
+ *
+ * On input, \p nr points to the number of distances that may be stored in \p distances.
+ * On output, \p nr points to the number of distances that were actually found,
+ * even if some of them couldn't be stored in \p distances.
+ * Distances that couldn't be stored are ignored, but the function still returns
+ * success (\c 0). The caller may find out by comparing the value pointed by \p nr
+ * before and after the function call.
+ *
+ * Each distance structure returned in the \p distances array should be released
+ * by the caller using hwloc_distances_release().
+ */
+HWLOC_DECLSPEC int
+hwloc_distances_get(hwloc_topology_t topology,
+		    unsigned *nr, struct hwloc_distances_s **distances,
+		    unsigned long kind, unsigned long flags);
+
+/** \brief Retrieve distance matrices for object at a specific depth in the topology.
+ *
+ * Identical to hwloc_distances_get() with the additional \p depth filter.
+ */
+HWLOC_DECLSPEC int
+hwloc_distances_get_by_depth(hwloc_topology_t topology, unsigned depth,
+			     unsigned *nr, struct hwloc_distances_s **distances,
+			     unsigned long kind, unsigned long flags);
+
+/** \brief Retrieve distance matrices for object of a specific type.
+ *
+ * Identical to hwloc_distances_get() with the additional \p type filter.
+ */
+static __hwloc_inline int
+hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type,
+			    unsigned *nr, struct hwloc_distances_s **distances,
+			    unsigned long kind, unsigned long flags)
+{
+  int depth = hwloc_get_type_depth(topology, type);
+  if (depth < 0) {
+    *nr = 0;
+    return 0;
+  }
+  return hwloc_distances_get_by_depth(topology, depth, nr, distances, kind, flags);
+}
+
+/** \brief Release a distance structure previously returned by hwloc_distances_get(). */
+HWLOC_DECLSPEC void
+hwloc_distances_release(hwloc_topology_t topology, struct hwloc_distances_s *distances);
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_distances_add Add or remove distances between objects
+ * @{
+ */
+
+/** \brief Flags for adding a new distances to a topology. */
+enum hwloc_distances_flag_e {
+  /** \brief Try to group objects based on the newly provided distance information.
+   * \hideinitializer
+   */
+  HWLOC_DISTANCES_FLAG_GROUP = (1UL<<0),
+  /** \brief If grouping, consider the distance values as inaccurate and relax the
+   * comparisons during the grouping algorithms. The actual accuracy may be modified
+   * through the HWLOC_GROUPING_ACCURACY environment variable (see \ref envvar).
+   * \hideinitializer
+   */
+  HWLOC_DISTANCES_FLAG_GROUP_INACCURATE = (1UL<<1)
+};
+
+/** \brief Provide a distance matrix.
+ *
+ * Provide the matrix of distances between a set of objects given by \p nbobjs
+ * and the \p objs array. \p nbobjs must be at least 2.
+ * The distances are stored as a one-dimension array in \p values.
+ * The distance from object i to object j is in slot i*nbobjs+j.
+ *
+ * \p kind specifies the kind of distance as a OR'ed set of ::hwloc_distances_kind_e.
+ *
+ * \p flags configures the behavior of the function using an optional OR'ed set of
+ * ::hwloc_distances_flag_e.
+ *
+ * Objects must be of the same type. They cannot be of type Group.
+ */
+HWLOC_DECLSPEC int hwloc_distances_add(hwloc_topology_t topology,
+				       unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values,
+				       unsigned long kind, unsigned long flags);
+
+/** \brief Remove all distance matrices from a topology.
+ *
+ * Remove all distance matrices, either provided by the user or
+ * gathered through the OS.
+ *
+ * If these distances were used to group objects, these additional
+ *Group objects are not removed from the topology.
+ */
+HWLOC_DECLSPEC int hwloc_distances_remove(hwloc_topology_t topology);
+
+/** \brief Remove distance matrices for objects at a specific depth in the topology.
+ *
+ * Identical to hwloc_distances_remove() but only applies to one level of the topology.
+ */
+HWLOC_DECLSPEC int hwloc_distances_remove_by_depth(hwloc_topology_t topology, unsigned depth);
+
+/** \brief Remove distance matrices for objects of a specific type in the topology.
+ *
+ * Identical to hwloc_distances_remove() but only applies to one level of the topology.
+ */
+static __hwloc_inline int
+hwloc_distances_remove_by_type(hwloc_topology_t topology, hwloc_obj_type_t type)
+{
+  int depth = hwloc_get_type_depth(topology, type);
+  if (depth < 0)
+    return 0;
+  return hwloc_distances_remove_by_depth(topology, depth);
+}
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_DISTANCES_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/export.h b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/export.h
new file mode 100644
index 0000000000..3b6a951dcd
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/export.h
@@ -0,0 +1,236 @@
+/*
+ * Copyright © 2009-2016 Inria.  All rights reserved.
+ * Copyright © 2009-2012 Université Bordeaux
+ * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Exporting Topologies to XML or to Synthetic strings.
+ */
+
+#ifndef HWLOC_EXPORT_H
+#define HWLOC_EXPORT_H
+
+#ifndef HWLOC_H
+#error Please include the main hwloc.h instead
+#endif
+
+
+#ifdef __cplusplus
+extern "C" {
+#elif 0
+}
+#endif
+
+
+/** \defgroup hwlocality_xmlexport Exporting Topologies to XML
+ * @{
+ */
+
+/** \brief Flags for exporting XML topologies.
+ *
+ * Flags to be given as a OR'ed set to hwloc_topology_export_xml().
+ */
+enum hwloc_topology_export_xml_flags_e {
+ /** \brief Export XML that is loadable by hwloc v1.x.
+  * \hideinitializer
+  */
+ HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1 = (1UL<<0)
+};
+
+/** \brief Export the topology into an XML file.
+ *
+ * This file may be loaded later through hwloc_topology_set_xml().
+ *
+ * \p flags is a OR'ed set of ::hwloc_topology_export_xml_flags_e.
+ *
+ * \return -1 if a failure occured.
+ *
+ * \note See also hwloc_topology_set_userdata_export_callback()
+ * for exporting application-specific object userdata.
+ *
+ * \note The topology-specific userdata pointer is ignored when exporting to XML.
+ *
+ * \note Only printable characters may be exported to XML string attributes.
+ * Any other character, especially any non-ASCII character, will be silently
+ * dropped.
+ *
+ * \note If \p name is "-", the XML output is sent to the standard output.
+ */
+HWLOC_DECLSPEC int hwloc_topology_export_xml(hwloc_topology_t topology, const char *xmlpath, unsigned long flags);
+
+/** \brief Export the topology into a newly-allocated XML memory buffer.
+ *
+ * \p xmlbuffer is allocated by the callee and should be freed with
+ * hwloc_free_xmlbuffer() later in the caller.
+ *
+ * This memory buffer may be loaded later through hwloc_topology_set_xmlbuffer().
+ *
+ * \p flags is a OR'ed set of ::hwloc_topology_export_xml_flags_e.
+ *
+ * \return -1 if a failure occured.
+ *
+ * \note See also hwloc_topology_set_userdata_export_callback()
+ * for exporting application-specific object userdata.
+ *
+ * \note The topology-specific userdata pointer is ignored when exporting to XML.
+ *
+ * \note Only printable characters may be exported to XML string attributes.
+ * Any other character, especially any non-ASCII character, will be silently
+ * dropped.
+ */
+HWLOC_DECLSPEC int hwloc_topology_export_xmlbuffer(hwloc_topology_t topology, char **xmlbuffer, int *buflen, unsigned long flags);
+
+/** \brief Free a buffer allocated by hwloc_topology_export_xmlbuffer() */
+HWLOC_DECLSPEC void hwloc_free_xmlbuffer(hwloc_topology_t topology, char *xmlbuffer);
+
+/** \brief Set the application-specific callback for exporting object userdata
+ *
+ * The object userdata pointer is not exported to XML by default because hwloc
+ * does not know what it contains.
+ *
+ * This function lets applications set \p export_cb to a callback function
+ * that converts this opaque userdata into an exportable string.
+ *
+ * \p export_cb is invoked during XML export for each object whose
+ * \p userdata pointer is not \c NULL.
+ * The callback should use hwloc_export_obj_userdata() or
+ * hwloc_export_obj_userdata_base64() to actually export
+ * something to XML (possibly multiple times per object).
+ *
+ * \p export_cb may be set to \c NULL if userdata should not be exported to XML.
+ *
+ * \note The topology-specific userdata pointer is ignored when exporting to XML.
+ */
+HWLOC_DECLSPEC void hwloc_topology_set_userdata_export_callback(hwloc_topology_t topology,
+								void (*export_cb)(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj));
+
+/** \brief Export some object userdata to XML
+ *
+ * This function may only be called from within the export() callback passed
+ * to hwloc_topology_set_userdata_export_callback().
+ * It may be invoked one of multiple times to export some userdata to XML.
+ * The \p buffer content of length \p length is stored with optional name
+ * \p name.
+ *
+ * When importing this XML file, the import() callback (if set) will be
+ * called exactly as many times as hwloc_export_obj_userdata() was called
+ * during export(). It will receive the corresponding \p name, \p buffer
+ * and \p length arguments.
+ *
+ * \p reserved, \p topology and \p obj must be the first three parameters
+ * that were given to the export callback.
+ *
+ * Only printable characters may be exported to XML string attributes.
+ * If a non-printable character is passed in \p name or \p buffer,
+ * the function returns -1 with errno set to EINVAL.
+ *
+ * If exporting binary data, the application should first encode into
+ * printable characters only (or use hwloc_export_obj_userdata_base64()).
+ * It should also take care of portability issues if the export may
+ * be reimported on a different architecture.
+ */
+HWLOC_DECLSPEC int hwloc_export_obj_userdata(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length);
+
+/** \brief Encode and export some object userdata to XML
+ *
+ * This function is similar to hwloc_export_obj_userdata() but it encodes
+ * the input buffer into printable characters before exporting.
+ * On import, decoding is automatically performed before the data is given
+ * to the import() callback if any.
+ *
+ * This function may only be called from within the export() callback passed
+ * to hwloc_topology_set_userdata_export_callback().
+ *
+ * The function does not take care of portability issues if the export
+ * may be reimported on a different architecture.
+ */
+HWLOC_DECLSPEC int hwloc_export_obj_userdata_base64(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length);
+
+/** \brief Set the application-specific callback for importing userdata
+ *
+ * On XML import, userdata is ignored by default because hwloc does not know
+ * how to store it in memory.
+ *
+ * This function lets applications set \p import_cb to a callback function
+ * that will get the XML-stored userdata and store it in the object as expected
+ * by the application.
+ *
+ * \p import_cb is called during hwloc_topology_load() as many times as
+ * hwloc_export_obj_userdata() was called during export. The topology
+ * is not entirely setup yet. Object attributes are ready to consult,
+ * but links between objects are not.
+ *
+ * \p import_cb may be \c NULL if userdata should be ignored during import.
+ *
+ * \note \p buffer contains \p length characters followed by a null byte ('\0').
+ *
+ * \note This function should be called before hwloc_topology_load().
+ *
+ * \note The topology-specific userdata pointer is ignored when importing from XML.
+ */
+HWLOC_DECLSPEC void hwloc_topology_set_userdata_import_callback(hwloc_topology_t topology,
+								void (*import_cb)(hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length));
+
+/** @} */
+
+
+/** \defgroup hwlocality_syntheticexport Exporting Topologies to Synthetic
+ * @{
+ */
+
+/** \brief Flags for exporting synthetic topologies.
+ *
+ * Flags to be given as a OR'ed set to hwloc_topology_export_synthetic().
+ */
+enum hwloc_topology_export_synthetic_flags_e {
+ /** \brief Export extended types such as L2dcache as basic types such as Cache.
+  *
+  * This is required if loading the synthetic description with hwloc < 1.9.
+  * \hideinitializer
+  */
+ HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES = (1UL<<0),
+
+ /** \brief Do not export level attributes.
+  *
+  * Ignore level attributes such as memory/cache sizes or PU indexes.
+  * This is required if loading the synthetic description with hwloc < 1.10.
+  * \hideinitializer
+  */
+ HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS = (1UL<<1)
+};
+
+/** \brief Export the topology as a synthetic string.
+ *
+ * At most \p buflen characters will be written in \p buffer,
+ * including the terminating \0.
+ *
+ * This exported string may be given back to hwloc_topology_set_synthetic().
+ *
+ * \p flags is a OR'ed set of ::hwloc_topology_export_synthetic_flags_e.
+ *
+ * \return The number of characters that were written,
+ * not including the terminating \0.
+ *
+ * \return -1 if the topology could not be exported,
+ * for instance if it is not symmetric.
+ *
+ * \note I/O and Misc children are ignored, the synthetic string only
+ * describes normal children.
+ *
+ * \note A 1024-byte buffer should be large enough for exporting
+ * topologies in the vast majority of cases.
+ */
+  HWLOC_DECLSPEC int hwloc_topology_export_synthetic(hwloc_topology_t topology, char *buffer, size_t buflen, unsigned long flags);
+
+/** @} */
+
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_EXPORT_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/gl.h b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/gl.h
new file mode 100644
index 0000000000..3e643fa9a2
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/gl.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright © 2012 Blue Brain Project, EPFL. All rights reserved.
+ * Copyright © 2012-2013 Inria.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and OpenGL displays.
+ *
+ * Applications that use both hwloc and OpenGL may want to include
+ * this file so as to get topology information for OpenGL displays.
+ */
+
+#ifndef HWLOC_GL_H
+#define HWLOC_GL_H
+
+#include <hwloc.h>
+
+#include <stdio.h>
+#include <string.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_gl Interoperability with OpenGL displays
+ *
+ * This interface offers ways to retrieve topology information about
+ * OpenGL displays.
+ *
+ * Only the NVIDIA display locality information is currently available,
+ * using the NV-CONTROL X11 extension and the NVCtrl library.
+ *
+ * @{
+ */
+
+/** \brief Get the hwloc OS device object corresponding to the
+ * OpenGL display given by port and device index.
+ *
+ * Return the OS device object describing the OpenGL display
+ * whose port (server) is \p port and device (screen) is \p device.
+ * Return NULL if there is none.
+ *
+ * The topology \p topology does not necessarily have to match the current
+ * machine. For instance the topology may be an XML import of a remote host.
+ * I/O devices detection and the GL component must be enabled in the topology.
+ *
+ * \note The corresponding PCI device object can be obtained by looking
+ * at the OS device parent object (unless PCI devices are filtered out).
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_gl_get_display_osdev_by_port_device(hwloc_topology_t topology,
+					  unsigned port, unsigned device)
+{
+        unsigned x = (unsigned) -1, y = (unsigned) -1;
+        hwloc_obj_t osdev = NULL;
+        while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+                if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type
+                    && osdev->name
+                    && sscanf(osdev->name, ":%u.%u", &x, &y) == 2
+                    && port == x && device == y)
+                        return osdev;
+        }
+	errno = EINVAL;
+        return NULL;
+}
+
+/** \brief Get the hwloc OS device object corresponding to the
+ * OpenGL display given by name.
+ *
+ * Return the OS device object describing the OpenGL display
+ * whose name is \p name, built as ":port.device" such as ":0.0" .
+ * Return NULL if there is none.
+ *
+ * The topology \p topology does not necessarily have to match the current
+ * machine. For instance the topology may be an XML import of a remote host.
+ * I/O devices detection and the GL component must be enabled in the topology.
+ *
+ * \note The corresponding PCI device object can be obtained by looking
+ * at the OS device parent object (unless PCI devices are filtered out).
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_gl_get_display_osdev_by_name(hwloc_topology_t topology,
+				   const char *name)
+{
+        hwloc_obj_t osdev = NULL;
+        while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+                if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type
+                    && osdev->name
+                    && !strcmp(name, osdev->name))
+                        return osdev;
+        }
+	errno = EINVAL;
+        return NULL;
+}
+
+/** \brief Get the OpenGL display port and device corresponding
+ * to the given hwloc OS object.
+ *
+ * Return the OpenGL display port (server) in \p port and device (screen)
+ * in \p screen that correspond to the given hwloc OS device object.
+ * Return \c -1 if there is none.
+ *
+ * The topology \p topology does not necessarily have to match the current
+ * machine. For instance the topology may be an XML import of a remote host.
+ * I/O devices detection and the GL component must be enabled in the topology.
+ */
+static __hwloc_inline int
+hwloc_gl_get_display_by_osdev(hwloc_topology_t topology __hwloc_attribute_unused,
+			      hwloc_obj_t osdev,
+			      unsigned *port, unsigned *device)
+{
+	unsigned x = -1, y = -1;
+	if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type
+	    && sscanf(osdev->name, ":%u.%u", &x, &y) == 2) {
+		*port = x;
+		*device = y;
+		return 0;
+	}
+	errno = EINVAL;
+	return -1;
+}
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_GL_H */
+
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/glibc-sched.h b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/glibc-sched.h
new file mode 100644
index 0000000000..1f9ba7cddc
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/glibc-sched.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2013 inria.  All rights reserved.
+ * Copyright © 2009-2011 Université Bordeaux
+ * Copyright © 2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and glibc scheduling routines.
+ *
+ * Applications that use both hwloc and glibc scheduling routines such as
+ * sched_getaffinity() or pthread_attr_setaffinity_np() may want to include
+ * this file so as to ease conversion between their respective types.
+ */
+
+#ifndef HWLOC_GLIBC_SCHED_H
+#define HWLOC_GLIBC_SCHED_H
+
+#include <hwloc.h>
+#include <hwloc/helper.h>
+#include <assert.h>
+
+#if !defined _GNU_SOURCE || !defined _SCHED_H || (!defined CPU_SETSIZE && !defined sched_priority)
+#error Please make sure to include sched.h before including glibc-sched.h, and define _GNU_SOURCE before any inclusion of sched.h
+#endif
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#ifdef HWLOC_HAVE_CPU_SET
+
+
+/** \defgroup hwlocality_glibc_sched Interoperability with glibc sched affinity
+ *
+ * This interface offers ways to convert between hwloc cpusets and glibc cpusets
+ * such as those manipulated by sched_getaffinity() or pthread_attr_setaffinity_np().
+ *
+ * \note Topology \p topology must match the current machine.
+ *
+ * @{
+ */
+
+
+/** \brief Convert hwloc CPU set \p toposet into glibc sched affinity CPU set \p schedset
+ *
+ * This function may be used before calling sched_setaffinity or any other function
+ * that takes a cpu_set_t as input parameter.
+ *
+ * \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC
+ */
+static __hwloc_inline int
+hwloc_cpuset_to_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t hwlocset,
+				    cpu_set_t *schedset, size_t schedsetsize)
+{
+#ifdef CPU_ZERO_S
+  unsigned cpu;
+  CPU_ZERO_S(schedsetsize, schedset);
+  hwloc_bitmap_foreach_begin(cpu, hwlocset)
+    CPU_SET_S(cpu, schedsetsize, schedset);
+  hwloc_bitmap_foreach_end();
+#else /* !CPU_ZERO_S */
+  unsigned cpu;
+  CPU_ZERO(schedset);
+  assert(schedsetsize == sizeof(cpu_set_t));
+  hwloc_bitmap_foreach_begin(cpu, hwlocset)
+    CPU_SET(cpu, schedset);
+  hwloc_bitmap_foreach_end();
+#endif /* !CPU_ZERO_S */
+  return 0;
+}
+
+/** \brief Convert glibc sched affinity CPU set \p schedset into hwloc CPU set
+ *
+ * This function may be used before calling sched_setaffinity  or any other function
+ * that takes a cpu_set_t  as input parameter.
+ *
+ * \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC
+ */
+static __hwloc_inline int
+hwloc_cpuset_from_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cpuset_t hwlocset,
+                                       const cpu_set_t *schedset, size_t schedsetsize)
+{
+  int cpu;
+#ifdef CPU_ZERO_S
+  int count;
+#endif
+  hwloc_bitmap_zero(hwlocset);
+#ifdef CPU_ZERO_S
+  count = CPU_COUNT_S(schedsetsize, schedset);
+  cpu = 0;
+  while (count) {
+    if (CPU_ISSET_S(cpu, schedsetsize, schedset)) {
+      hwloc_bitmap_set(hwlocset, cpu);
+      count--;
+    }
+    cpu++;
+  }
+#else /* !CPU_ZERO_S */
+  /* sched.h does not support dynamic cpu_set_t (introduced in glibc 2.7),
+   * assume we have a very old interface without CPU_COUNT (added in 2.6)
+   */
+  assert(schedsetsize == sizeof(cpu_set_t));
+  for(cpu=0; cpu<CPU_SETSIZE; cpu++)
+    if (CPU_ISSET(cpu, schedset))
+      hwloc_bitmap_set(hwlocset, cpu);
+#endif /* !CPU_ZERO_S */
+  return 0;
+}
+
+/** @} */
+
+
+#endif /* CPU_SET */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_GLIBC_SCHED_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/helper.h b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/helper.h
new file mode 100644
index 0000000000..f942a8f188
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/helper.h
@@ -0,0 +1,1081 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2016 Inria.  All rights reserved.
+ * Copyright © 2009-2012 Université Bordeaux
+ * Copyright © 2009-2010 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief High-level hwloc traversal helpers.
+ */
+
+#ifndef HWLOC_HELPER_H
+#define HWLOC_HELPER_H
+
+#ifndef HWLOC_H
+#error Please include the main hwloc.h instead
+#endif
+
+#include <stdlib.h>
+#include <errno.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_helper_find_inside Finding Objects inside a CPU set
+ * @{
+ */
+
+/** \brief Get the first largest object included in the given cpuset \p set.
+ *
+ * \return the first object that is included in \p set and whose parent is not.
+ *
+ * This is convenient for iterating over all largest objects within a CPU set
+ * by doing a loop getting the first largest object and clearing its CPU set
+ * from the remaining CPU set.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_first_largest_obj_inside_cpuset(hwloc_topology_t topology, hwloc_const_cpuset_t set)
+{
+  hwloc_obj_t obj = hwloc_get_root_obj(topology);
+  if (!hwloc_bitmap_intersects(obj->cpuset, set))
+    return NULL;
+  while (!hwloc_bitmap_isincluded(obj->cpuset, set)) {
+    /* while the object intersects without being included, look at its children */
+    hwloc_obj_t child = obj->first_child;
+    while (child) {
+      if (hwloc_bitmap_intersects(child->cpuset, set))
+	break;
+      child = child->next_sibling;
+    }
+    if (!child)
+      /* no child intersects, return their father */
+      return obj;
+    /* found one intersecting child, look at its children */
+    obj = child;
+  }
+  /* obj is included, return it */
+  return obj;
+}
+
+/** \brief Get the set of largest objects covering exactly a given cpuset \p set
+ *
+ * \return the number of objects returned in \p objs.
+ */
+HWLOC_DECLSPEC int hwloc_get_largest_objs_inside_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+						 hwloc_obj_t * __hwloc_restrict objs, int max);
+
+/** \brief Return the next object at depth \p depth included in CPU set \p set.
+ *
+ * If \p prev is \c NULL, return the first object at depth \p depth
+ * included in \p set.  The next invokation should pass the previous
+ * return value in \p prev so as to obtain the next object in \p set.
+ *
+ * \note Objects with empty CPU sets are ignored
+ * (otherwise they would be considered included in any given set).
+ *
+ * \note This function cannot work if objects at the given depth do
+ * not have CPU sets (I/O or Misc objects).
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+					   unsigned depth, hwloc_obj_t prev)
+{
+  hwloc_obj_t next = hwloc_get_next_obj_by_depth(topology, depth, prev);
+  if (!next)
+    return NULL;
+  while (next && (hwloc_bitmap_iszero(next->cpuset) || !hwloc_bitmap_isincluded(next->cpuset, set)))
+    next = next->next_cousin;
+  return next;
+}
+
+/** \brief Return the next object of type \p type included in CPU set \p set.
+ *
+ * If there are multiple or no depth for given type, return \c NULL
+ * and let the caller fallback to
+ * hwloc_get_next_obj_inside_cpuset_by_depth().
+ *
+ * \note Objects with empty CPU sets are ignored
+ * (otherwise they would be considered included in any given set).
+ *
+ * \note This function cannot work if objects of the given type do
+ * not have CPU sets (I/O or Misc objects).
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+					  hwloc_obj_type_t type, hwloc_obj_t prev)
+{
+  int depth = hwloc_get_type_depth(topology, type);
+  if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE)
+    return NULL;
+  return hwloc_get_next_obj_inside_cpuset_by_depth(topology, set, depth, prev);
+}
+
+/** \brief Return the (logically) \p idx -th object at depth \p depth included in CPU set \p set.
+ *
+ * \note Objects with empty CPU sets are ignored
+ * (otherwise they would be considered included in any given set).
+ *
+ * \note This function cannot work if objects at the given depth do
+ * not have CPU sets (I/O or Misc objects).
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+				      unsigned depth, unsigned idx) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+				      unsigned depth, unsigned idx)
+{
+  hwloc_obj_t obj = hwloc_get_obj_by_depth (topology, depth, 0);
+  unsigned count = 0;
+  if (!obj)
+    return NULL;
+  while (obj) {
+    if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set)) {
+      if (count == idx)
+	return obj;
+      count++;
+    }
+    obj = obj->next_cousin;
+  }
+  return NULL;
+}
+
+/** \brief Return the \p idx -th object of type \p type included in CPU set \p set.
+ *
+ * If there are multiple or no depth for given type, return \c NULL
+ * and let the caller fallback to
+ * hwloc_get_obj_inside_cpuset_by_depth().
+ *
+ * \note Objects with empty CPU sets are ignored
+ * (otherwise they would be considered included in any given set).
+ *
+ * \note This function cannot work if objects of the given type do
+ * not have CPU sets (I/O or Misc objects).
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+				     hwloc_obj_type_t type, unsigned idx) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+				     hwloc_obj_type_t type, unsigned idx)
+{
+  int depth = hwloc_get_type_depth(topology, type);
+  if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE)
+    return NULL;
+  return hwloc_get_obj_inside_cpuset_by_depth(topology, set, depth, idx);
+}
+
+/** \brief Return the number of objects at depth \p depth included in CPU set \p set.
+ *
+ * \note Objects with empty CPU sets are ignored
+ * (otherwise they would be considered included in any given set).
+ *
+ * \note This function cannot work if objects at the given depth do
+ * not have CPU sets (I/O or Misc objects).
+ */
+static __hwloc_inline unsigned
+hwloc_get_nbobjs_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+					 unsigned depth) __hwloc_attribute_pure;
+static __hwloc_inline unsigned
+hwloc_get_nbobjs_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+					 unsigned depth)
+{
+  hwloc_obj_t obj = hwloc_get_obj_by_depth (topology, depth, 0);
+  unsigned count = 0;
+  if (!obj)
+    return 0;
+  while (obj) {
+    if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set))
+      count++;
+    obj = obj->next_cousin;
+  }
+  return count;
+}
+
+/** \brief Return the number of objects of type \p type included in CPU set \p set.
+ *
+ * If no object for that type exists inside CPU set \p set, 0 is
+ * returned.  If there are several levels with objects of that type
+ * inside CPU set \p set, -1 is returned.
+ *
+ * \note Objects with empty CPU sets are ignored
+ * (otherwise they would be considered included in any given set).
+ *
+ * \note This function cannot work if objects of the given type do
+ * not have CPU sets (I/O objects).
+ */
+static __hwloc_inline int
+hwloc_get_nbobjs_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+					hwloc_obj_type_t type) __hwloc_attribute_pure;
+static __hwloc_inline int
+hwloc_get_nbobjs_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+					hwloc_obj_type_t type)
+{
+  int depth = hwloc_get_type_depth(topology, type);
+  if (depth == HWLOC_TYPE_DEPTH_UNKNOWN)
+    return 0;
+  if (depth == HWLOC_TYPE_DEPTH_MULTIPLE)
+    return -1; /* FIXME: agregate nbobjs from different levels? */
+  return hwloc_get_nbobjs_inside_cpuset_by_depth(topology, set, depth);
+}
+
+/** \brief Return the logical index among the objects included in CPU set \p set.
+ *
+ * Consult all objects in the same level as \p obj and inside CPU set \p set
+ * in the logical order, and return the index of \p obj within them.
+ * If \p set covers the entire topology, this is the logical index of \p obj.
+ * Otherwise, this is similar to a logical index within the part of the topology
+ * defined by CPU set \p set.
+ *
+ * \note Objects with empty CPU sets are ignored
+ * (otherwise they would be considered included in any given set).
+ *
+ * \note This function cannot work if obj does not have CPU sets (I/O objects).
+ */
+static __hwloc_inline int
+hwloc_get_obj_index_inside_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set,
+				   hwloc_obj_t obj) __hwloc_attribute_pure;
+static __hwloc_inline int
+hwloc_get_obj_index_inside_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set,
+				   hwloc_obj_t obj)
+{
+  int idx = 0;
+  if (!hwloc_bitmap_isincluded(obj->cpuset, set))
+    return -1;
+  /* count how many objects are inside the cpuset on the way from us to the beginning of the level */
+  while ((obj = obj->prev_cousin) != NULL)
+    if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set))
+      idx++;
+  return idx;
+}
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_helper_find_covering Finding Objects covering at least CPU set
+ * @{
+ */
+
+/** \brief Get the child covering at least CPU set \p set.
+ *
+ * \return \c NULL if no child matches or if \p set is empty.
+ *
+ * \note This function cannot work if parent does not have a CPU set (I/O or Misc objects).
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_child_covering_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set,
+				hwloc_obj_t parent) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_child_covering_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set,
+				hwloc_obj_t parent)
+{
+  hwloc_obj_t child;
+  if (hwloc_bitmap_iszero(set))
+    return NULL;
+  child = parent->first_child;
+  while (child) {
+    if (child->cpuset && hwloc_bitmap_isincluded(set, child->cpuset))
+      return child;
+    child = child->next_sibling;
+  }
+  return NULL;
+}
+
+/** \brief Get the lowest object covering at least CPU set \p set
+ *
+ * \return \c NULL if no object matches or if \p set is empty.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set)
+{
+  struct hwloc_obj *current = hwloc_get_root_obj(topology);
+  if (hwloc_bitmap_iszero(set) || !hwloc_bitmap_isincluded(set, current->cpuset))
+    return NULL;
+  while (1) {
+    hwloc_obj_t child = hwloc_get_child_covering_cpuset(topology, set, current);
+    if (!child)
+      return current;
+    current = child;
+  }
+}
+
+/** \brief Iterate through same-depth objects covering at least CPU set \p set
+ *
+ * If object \p prev is \c NULL, return the first object at depth \p
+ * depth covering at least part of CPU set \p set.  The next
+ * invokation should pass the previous return value in \p prev so as
+ * to obtain the next object covering at least another part of \p set.
+ *
+ * \note This function cannot work if objects at the given depth do
+ * not have CPU sets (I/O or Misc objects).
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_obj_covering_cpuset_by_depth(hwloc_topology_t topology, hwloc_const_cpuset_t set,
+					    unsigned depth, hwloc_obj_t prev)
+{
+  hwloc_obj_t next = hwloc_get_next_obj_by_depth(topology, depth, prev);
+  if (!next)
+    return NULL;
+  while (next && !hwloc_bitmap_intersects(set, next->cpuset))
+    next = next->next_cousin;
+  return next;
+}
+
+/** \brief Iterate through same-type objects covering at least CPU set \p set
+ *
+ * If object \p prev is \c NULL, return the first object of type \p
+ * type covering at least part of CPU set \p set.  The next invokation
+ * should pass the previous return value in \p prev so as to obtain
+ * the next object of type \p type covering at least another part of
+ * \p set.
+ *
+ * If there are no or multiple depths for type \p type, \c NULL is returned.
+ * The caller may fallback to hwloc_get_next_obj_covering_cpuset_by_depth()
+ * for each depth.
+ *
+ * \note This function cannot work if objects of the given type do
+ * not have CPU sets (I/O or Misc objects).
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_obj_covering_cpuset_by_type(hwloc_topology_t topology, hwloc_const_cpuset_t set,
+					   hwloc_obj_type_t type, hwloc_obj_t prev)
+{
+  int depth = hwloc_get_type_depth(topology, type);
+  if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE)
+    return NULL;
+  return hwloc_get_next_obj_covering_cpuset_by_depth(topology, set, depth, prev);
+}
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_helper_ancestors Looking at Ancestor and Child Objects
+ * @{
+ *
+ * Be sure to see the figure in \ref termsanddefs that shows a
+ * complete topology tree, including depths, child/sibling/cousin
+ * relationships, and an example of an asymmetric topology where one
+ * package has fewer caches than its peers.
+ */
+
+/** \brief Returns the ancestor object of \p obj at depth \p depth. */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_ancestor_obj_by_depth (hwloc_topology_t topology __hwloc_attribute_unused, unsigned depth, hwloc_obj_t obj) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_ancestor_obj_by_depth (hwloc_topology_t topology __hwloc_attribute_unused, unsigned depth, hwloc_obj_t obj)
+{
+  hwloc_obj_t ancestor = obj;
+  if (obj->depth < depth)
+    return NULL;
+  while (ancestor && ancestor->depth > depth)
+    ancestor = ancestor->parent;
+  return ancestor;
+}
+
+/** \brief Returns the ancestor object of \p obj with type \p type. */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_ancestor_obj_by_type (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_type_t type, hwloc_obj_t obj) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_ancestor_obj_by_type (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_type_t type, hwloc_obj_t obj)
+{
+  hwloc_obj_t ancestor = obj->parent;
+  while (ancestor && ancestor->type != type)
+    ancestor = ancestor->parent;
+  return ancestor;
+}
+
+/** \brief Returns the common parent object to objects \p obj1 and \p obj2 */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_common_ancestor_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj1, hwloc_obj_t obj2) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_common_ancestor_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj1, hwloc_obj_t obj2)
+{
+  /* the loop isn't so easy since intermediate ancestors may have
+   * different depth, causing us to alternate between using obj1->parent
+   * and obj2->parent. Also, even if at some point we find ancestors of
+   * of the same depth, their ancestors may have different depth again.
+   */
+  while (obj1 != obj2) {
+    while (obj1->depth > obj2->depth)
+      obj1 = obj1->parent;
+    while (obj2->depth > obj1->depth)
+      obj2 = obj2->parent;
+    if (obj1 != obj2 && obj1->depth == obj2->depth) {
+      obj1 = obj1->parent;
+      obj2 = obj2->parent;
+    }
+  }
+  return obj1;
+}
+
+/** \brief Returns true if \p obj is inside the subtree beginning with ancestor object \p subtree_root.
+ *
+ * \note This function cannot work if \p obj and \p subtree_root objects do
+ * not have CPU sets (I/O or Misc objects).
+ */
+static __hwloc_inline int
+hwloc_obj_is_in_subtree (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj, hwloc_obj_t subtree_root) __hwloc_attribute_pure;
+static __hwloc_inline int
+hwloc_obj_is_in_subtree (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj, hwloc_obj_t subtree_root)
+{
+  return obj->cpuset && subtree_root->cpuset && hwloc_bitmap_isincluded(obj->cpuset, subtree_root->cpuset);
+}
+
+/** \brief Return the next child.
+ *
+ * Return the next child among the normal children list, then among the I/O
+ * children list, then among the Misc children list.
+ *
+ * If \p prev is \c NULL, return the first child.
+ *
+ * Return \c NULL when there is no next child.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_child (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t parent, hwloc_obj_t prev)
+{
+  hwloc_obj_t obj;
+  int state = 0;
+  if (prev) {
+    if (prev->type == HWLOC_OBJ_MISC)
+      state = 2;
+    else if (prev->type == HWLOC_OBJ_BRIDGE || prev->type == HWLOC_OBJ_PCI_DEVICE || prev->type == HWLOC_OBJ_OS_DEVICE)
+      state = 1;
+    obj = prev->next_sibling;
+  } else {
+    obj = parent->first_child;
+  }
+  if (!obj && state == 0) {
+    obj = parent->io_first_child;
+    state = 1;
+  }
+  if (!obj && state == 1) {
+    obj = parent->misc_first_child;
+    state = 2;
+  }
+  return obj;
+}
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_helper_find_cache Looking at Cache Objects
+ * @{
+ */
+
+/** \brief Check whether an object is a Cache (Data, Unified or Instruction). */
+static __hwloc_inline int
+hwloc_obj_type_is_cache(hwloc_obj_type_t type)
+{
+  return (type >= HWLOC_OBJ_L1CACHE && type <= HWLOC_OBJ_L3ICACHE);
+}
+
+/** \brief Check whether an object is a Data or Unified Cache. */
+static __hwloc_inline int
+hwloc_obj_type_is_dcache(hwloc_obj_type_t type)
+{
+  return (type >= HWLOC_OBJ_L1CACHE && type <= HWLOC_OBJ_L5CACHE);
+}
+
+/** \brief Check whether an object is a Instruction Cache. */
+static __hwloc_inline int
+hwloc_obj_type_is_icache(hwloc_obj_type_t type)
+{
+  return (type >= HWLOC_OBJ_L1ICACHE && type <= HWLOC_OBJ_L3ICACHE);
+}
+
+/** \brief Find the depth of cache objects matching cache level and type.
+ *
+ * Return the depth of the topology level that contains cache objects
+ * whose attributes match \p cachelevel and \p cachetype.
+
+ * This function is identical to calling hwloc_get_type_depth() with the
+ * corresponding type such as ::HWLOC_OBJ_L1ICACHE, except that it may
+ * also return a Unified cache when looking for an instruction cache.
+ *
+ * If no cache level matches, ::HWLOC_TYPE_DEPTH_UNKNOWN is returned.
+ *
+ * If \p cachetype is ::HWLOC_OBJ_CACHE_UNIFIED, the depth of the
+ * unique matching unified cache level is returned.
+ *
+ * If \p cachetype is ::HWLOC_OBJ_CACHE_DATA or ::HWLOC_OBJ_CACHE_INSTRUCTION,
+ * either a matching cache, or a unified cache is returned.
+ *
+ * If \p cachetype is \c -1, it is ignored and multiple levels may
+ * match. The function returns either the depth of a uniquely matching
+ * level or ::HWLOC_TYPE_DEPTH_MULTIPLE.
+ */
+static __hwloc_inline int
+hwloc_get_cache_type_depth (hwloc_topology_t topology,
+			    unsigned cachelevel, hwloc_obj_cache_type_t cachetype)
+{
+  int depth;
+  int found = HWLOC_TYPE_DEPTH_UNKNOWN;
+  for (depth=0; ; depth++) {
+    hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, 0);
+    if (!obj)
+      break;
+    if (!hwloc_obj_type_is_dcache(obj->type) || obj->attr->cache.depth != cachelevel)
+      /* doesn't match, try next depth */
+      continue;
+    if (cachetype == (hwloc_obj_cache_type_t) -1) {
+      if (found != HWLOC_TYPE_DEPTH_UNKNOWN) {
+	/* second match, return MULTIPLE */
+        return HWLOC_TYPE_DEPTH_MULTIPLE;
+      }
+      /* first match, mark it as found */
+      found = depth;
+      continue;
+    }
+    if (obj->attr->cache.type == cachetype || obj->attr->cache.type == HWLOC_OBJ_CACHE_UNIFIED)
+      /* exact match (either unified is alone, or we match instruction or data), return immediately */
+      return depth;
+  }
+  /* went to the bottom, return what we found */
+  return found;
+}
+
+/** \brief Get the first data (or unified) cache covering a cpuset \p set
+ *
+ * \return \c NULL if no cache matches.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_cache_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_cache_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set)
+{
+  hwloc_obj_t current = hwloc_get_obj_covering_cpuset(topology, set);
+  while (current) {
+    if (hwloc_obj_type_is_dcache(current->type))
+      return current;
+    current = current->parent;
+  }
+  return NULL;
+}
+
+/** \brief Get the first data (or unified) cache shared between an object and somebody else.
+ *
+ * \return \c NULL if no cache matches or if an invalid object is given.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_shared_cache_covering_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_shared_cache_covering_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj)
+{
+  hwloc_obj_t current = obj->parent;
+  if (!obj->cpuset)
+    return NULL;
+  while (current) {
+    if (!hwloc_bitmap_isequal(current->cpuset, obj->cpuset)
+        && hwloc_obj_type_is_dcache(current->type))
+      return current;
+    current = current->parent;
+  }
+  return NULL;
+}
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_helper_find_misc Finding objects, miscellaneous helpers
+ * @{
+ *
+ * Be sure to see the figure in \ref termsanddefs that shows a
+ * complete topology tree, including depths, child/sibling/cousin
+ * relationships, and an example of an asymmetric topology where one
+ * package has fewer caches than its peers.
+ */
+
+/** \brief Returns the object of type ::HWLOC_OBJ_PU with \p os_index.
+ *
+ * This function is useful for converting a CPU set into the PU
+ * objects it contains.
+ * When retrieving the current binding (e.g. with hwloc_get_cpubind()),
+ * one may iterate over the bits of the resulting CPU set with
+ * hwloc_bitmap_foreach_begin(), and find the corresponding PUs
+ * with this function.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_pu_obj_by_os_index(hwloc_topology_t topology, unsigned os_index) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_pu_obj_by_os_index(hwloc_topology_t topology, unsigned os_index)
+{
+  hwloc_obj_t obj = NULL;
+  while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_PU, obj)) != NULL)
+    if (obj->os_index == os_index)
+      return obj;
+  return NULL;
+}
+
+/** \brief Returns the object of type ::HWLOC_OBJ_NUMANODE with \p os_index.
+ *
+ * This function is useful for converting a nodeset into the NUMA node
+ * objects it contains.
+ * When retrieving the current binding (e.g. with hwloc_get_membind() with HWLOC_MEMBIND_BYNODESET),
+ * one may iterate over the bits of the resulting nodeset with
+ * hwloc_bitmap_foreach_begin(), and find the corresponding NUMA nodes
+ * with this function.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_numanode_obj_by_os_index(hwloc_topology_t topology, unsigned os_index) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_numanode_obj_by_os_index(hwloc_topology_t topology, unsigned os_index)
+{
+  hwloc_obj_t obj = NULL;
+  while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, obj)) != NULL)
+    if (obj->os_index == os_index)
+      return obj;
+  return NULL;
+}
+
+/** \brief Do a depth-first traversal of the topology to find and sort
+ *
+ * all objects that are at the same depth than \p src.
+ * Report in \p objs up to \p max physically closest ones to \p src.
+ *
+ * \return the number of objects returned in \p objs.
+ *
+ * \return 0 if \p src is an I/O object.
+ *
+ * \note This function requires the \p src object to have a CPU set.
+ */
+/* TODO: rather provide an iterator? Provide a way to know how much should be allocated? By returning the total number of objects instead? */
+HWLOC_DECLSPEC unsigned hwloc_get_closest_objs (hwloc_topology_t topology, hwloc_obj_t src, hwloc_obj_t * __hwloc_restrict objs, unsigned max);
+
+/** \brief Find an object below another object, both specified by types and indexes.
+ *
+ * Start from the top system object and find object of type \p type1
+ * and logical index \p idx1.  Then look below this object and find another
+ * object of type \p type2 and logical index \p idx2.  Indexes are specified
+ * within the parent, not withing the entire system.
+ *
+ * For instance, if type1 is PACKAGE, idx1 is 2, type2 is CORE and idx2
+ * is 3, return the fourth core object below the third package.
+ *
+ * \note This function requires these objects to have a CPU set.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_below_by_type (hwloc_topology_t topology,
+			     hwloc_obj_type_t type1, unsigned idx1,
+			     hwloc_obj_type_t type2, unsigned idx2) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_below_by_type (hwloc_topology_t topology,
+			     hwloc_obj_type_t type1, unsigned idx1,
+			     hwloc_obj_type_t type2, unsigned idx2)
+{
+  hwloc_obj_t obj;
+  obj = hwloc_get_obj_by_type (topology, type1, idx1);
+  if (!obj)
+    return NULL;
+  return hwloc_get_obj_inside_cpuset_by_type(topology, obj->cpuset, type2, idx2);
+}
+
+/** \brief Find an object below a chain of objects specified by types and indexes.
+ *
+ * This is a generalized version of hwloc_get_obj_below_by_type().
+ *
+ * Arrays \p typev and \p idxv must contain \p nr types and indexes.
+ *
+ * Start from the top system object and walk the arrays \p typev and \p idxv.
+ * For each type and logical index couple in the arrays, look under the previously found
+ * object to find the index-th object of the given type.
+ * Indexes are specified within the parent, not withing the entire system.
+ *
+ * For instance, if nr is 3, typev contains NODE, PACKAGE and CORE,
+ * and idxv contains 0, 1 and 2, return the third core object below
+ * the second package below the first NUMA node.
+ *
+ * \note This function requires all these objects and the root object
+ * to have a CPU set.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_below_array_by_type (hwloc_topology_t topology, int nr, hwloc_obj_type_t *typev, unsigned *idxv) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_below_array_by_type (hwloc_topology_t topology, int nr, hwloc_obj_type_t *typev, unsigned *idxv)
+{
+  hwloc_obj_t obj = hwloc_get_root_obj(topology);
+  int i;
+  for(i=0; i<nr; i++) {
+    if (!obj)
+      return NULL;
+    obj = hwloc_get_obj_inside_cpuset_by_type(topology, obj->cpuset, typev[i], idxv[i]);
+  }
+  return obj;
+}
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_helper_distribute Distributing items over a topology
+ * @{
+ */
+
+/** \brief Flags to be given to hwloc_distrib().
+ */
+enum hwloc_distrib_flags_e {
+  /** \brief Distrib in reverse order, starting from the last objects.
+   * \hideinitializer
+   */
+  HWLOC_DISTRIB_FLAG_REVERSE = (1UL<<0)
+};
+
+/** \brief Distribute \p n items over the topology under \p roots
+ *
+ * Array \p set will be filled with \p n cpusets recursively distributed
+ * linearly over the topology under objects \p roots, down to depth \p until
+ * (which can be INT_MAX to distribute down to the finest level).
+ *
+ * \p n_roots is usually 1 and \p roots only contains the topology root object
+ * so as to distribute over the entire topology.
+ *
+ * This is typically useful when an application wants to distribute \p n
+ * threads over a machine, giving each of them as much private cache as
+ * possible and keeping them locally in number order.
+ *
+ * The caller may typically want to also call hwloc_bitmap_singlify()
+ * before binding a thread so that it does not move at all.
+ *
+ * \p flags should be 0 or a OR'ed set of ::hwloc_distrib_flags_e.
+ *
+ * \note This function requires the \p roots objects to have a CPU set.
+ *
+ * \note This function replaces the now deprecated hwloc_distribute()
+ * and hwloc_distributev() functions.
+ */
+static __hwloc_inline int
+hwloc_distrib(hwloc_topology_t topology,
+	      hwloc_obj_t *roots, unsigned n_roots,
+	      hwloc_cpuset_t *set,
+	      unsigned n,
+	      unsigned until, unsigned long flags)
+{
+  unsigned i;
+  unsigned tot_weight;
+  unsigned given, givenweight;
+  hwloc_cpuset_t *cpusetp = set;
+
+  if (flags & ~HWLOC_DISTRIB_FLAG_REVERSE) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  tot_weight = 0;
+  for (i = 0; i < n_roots; i++)
+    tot_weight += hwloc_bitmap_weight(roots[i]->cpuset);
+
+  for (i = 0, given = 0, givenweight = 0; i < n_roots; i++) {
+    unsigned chunk, weight;
+    hwloc_obj_t root = roots[flags & HWLOC_DISTRIB_FLAG_REVERSE ? n_roots-1-i : i];
+    hwloc_cpuset_t cpuset = root->cpuset;
+    weight = hwloc_bitmap_weight(cpuset);
+    if (!weight)
+      continue;
+    /* Give to root a chunk proportional to its weight.
+     * If previous chunks got rounded-up, we may get a bit less. */
+    chunk = (( (givenweight+weight) * n  + tot_weight-1) / tot_weight)
+          - ((  givenweight         * n  + tot_weight-1) / tot_weight);
+    if (!root->arity || chunk <= 1 || root->depth >= until) {
+      /* We can't split any more, put everything there.  */
+      if (chunk) {
+	/* Fill cpusets with ours */
+	unsigned j;
+	for (j=0; j < chunk; j++)
+	  cpusetp[j] = hwloc_bitmap_dup(cpuset);
+      } else {
+	/* We got no chunk, just merge our cpuset to a previous one
+	 * (the first chunk cannot be empty)
+	 * so that this root doesn't get ignored.
+	 */
+	assert(given);
+	hwloc_bitmap_or(cpusetp[-1], cpusetp[-1], cpuset);
+      }
+    } else {
+      /* Still more to distribute, recurse into children */
+      hwloc_distrib(topology, root->children, root->arity, cpusetp, chunk, until, flags);
+    }
+    cpusetp += chunk;
+    given += chunk;
+    givenweight += weight;
+  }
+
+  return 0;
+}
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_helper_topology_sets CPU and node sets of entire topologies
+ * @{
+ */
+/** \brief Get complete CPU set
+ *
+ * \return the complete CPU set of logical processors of the system.
+ *
+ * \note The returned cpuset is not newly allocated and should thus not be
+ * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy.
+ */
+static __hwloc_inline hwloc_const_cpuset_t
+hwloc_topology_get_complete_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_const_cpuset_t
+hwloc_topology_get_complete_cpuset(hwloc_topology_t topology)
+{
+  return hwloc_get_root_obj(topology)->complete_cpuset;
+}
+
+/** \brief Get topology CPU set
+ *
+ * \return the CPU set of logical processors of the system for which hwloc
+ * provides topology information. This is equivalent to the cpuset of the
+ * system object.
+ *
+ * \note The returned cpuset is not newly allocated and should thus not be
+ * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy.
+ */
+static __hwloc_inline hwloc_const_cpuset_t
+hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_const_cpuset_t
+hwloc_topology_get_topology_cpuset(hwloc_topology_t topology)
+{
+  return hwloc_get_root_obj(topology)->cpuset;
+}
+
+/** \brief Get allowed CPU set
+ *
+ * \return the CPU set of allowed logical processors of the system.
+ *
+ * \note The returned cpuset is not newly allocated and should thus not be
+ * changed or freed, hwloc_bitmap_dup() must be used to obtain a local copy.
+ */
+static __hwloc_inline hwloc_const_cpuset_t
+hwloc_topology_get_allowed_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_const_cpuset_t
+hwloc_topology_get_allowed_cpuset(hwloc_topology_t topology)
+{
+  return hwloc_get_root_obj(topology)->allowed_cpuset;
+}
+
+/** \brief Get complete node set
+ *
+ * \return the complete node set of memory of the system.
+ *
+ * \note The returned nodeset is not newly allocated and should thus not be
+ * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy.
+ */
+static __hwloc_inline hwloc_const_nodeset_t
+hwloc_topology_get_complete_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_const_nodeset_t
+hwloc_topology_get_complete_nodeset(hwloc_topology_t topology)
+{
+  return hwloc_get_root_obj(topology)->complete_nodeset;
+}
+
+/** \brief Get topology node set
+ *
+ * \return the node set of memory of the system for which hwloc
+ * provides topology information. This is equivalent to the nodeset of the
+ * system object.
+ *
+ * \note The returned nodeset is not newly allocated and should thus not be
+ * changed or freed; hwloc_bitmap_dup() must be used to obtain a local copy.
+ */
+static __hwloc_inline hwloc_const_nodeset_t
+hwloc_topology_get_topology_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_const_nodeset_t
+hwloc_topology_get_topology_nodeset(hwloc_topology_t topology)
+{
+  return hwloc_get_root_obj(topology)->nodeset;
+}
+
+/** \brief Get allowed node set
+ *
+ * \return the node set of allowed memory of the system.
+ *
+ * \note The returned nodeset is not newly allocated and should thus not be
+ * changed or freed, hwloc_bitmap_dup() must be used to obtain a local copy.
+ */
+static __hwloc_inline hwloc_const_nodeset_t
+hwloc_topology_get_allowed_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_const_nodeset_t
+hwloc_topology_get_allowed_nodeset(hwloc_topology_t topology)
+{
+  return hwloc_get_root_obj(topology)->allowed_nodeset;
+}
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_helper_nodeset_convert Converting between CPU sets and node sets
+ *
+ * @{
+ */
+
+/** \brief Convert a CPU set into a NUMA node set and handle non-NUMA cases
+ *
+ * If some NUMA nodes have no CPUs at all, this function never sets their
+ * indexes in the output node set, even if a full CPU set is given in input.
+ *
+ * If the topology contains no NUMA nodes, the machine is considered
+ * as a single memory node, and the following behavior is used:
+ * If \p cpuset is empty, \p nodeset will be emptied as well.
+ * Otherwise \p nodeset will be entirely filled.
+ */
+static __hwloc_inline void
+hwloc_cpuset_to_nodeset(hwloc_topology_t topology, hwloc_const_cpuset_t _cpuset, hwloc_nodeset_t nodeset)
+{
+	int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
+	hwloc_obj_t obj = NULL;
+	assert(depth != HWLOC_TYPE_DEPTH_UNKNOWN);
+	hwloc_bitmap_zero(nodeset);
+	while ((obj = hwloc_get_next_obj_covering_cpuset_by_depth(topology, _cpuset, depth, obj)) != NULL)
+		hwloc_bitmap_set(nodeset, obj->os_index);
+}
+
+/** \brief Convert a NUMA node set into a CPU set and handle non-NUMA cases
+ *
+ * If the topology contains no NUMA nodes, the machine is considered
+ * as a single memory node, and the following behavior is used:
+ * If \p nodeset is empty, \p cpuset will be emptied as well.
+ * Otherwise \p cpuset will be entirely filled.
+ * This is useful for manipulating memory binding sets.
+ */
+static __hwloc_inline void
+hwloc_cpuset_from_nodeset(hwloc_topology_t topology, hwloc_cpuset_t _cpuset, hwloc_const_nodeset_t nodeset)
+{
+	int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
+	hwloc_obj_t obj = NULL;
+	assert(depth != HWLOC_TYPE_DEPTH_UNKNOWN);
+	hwloc_bitmap_zero(_cpuset);
+	while ((obj = hwloc_get_next_obj_by_depth(topology, depth, obj)) != NULL) {
+		if (hwloc_bitmap_isset(nodeset, obj->os_index))
+			/* no need to check obj->cpuset because objects in levels always have a cpuset */
+			hwloc_bitmap_or(_cpuset, _cpuset, obj->cpuset);
+	}
+}
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_advanced_io Finding I/O objects
+ * @{
+ */
+
+/** \brief Get the first non-I/O ancestor object.
+ *
+ * Given the I/O object \p ioobj, find the smallest non-I/O ancestor
+ * object. This regular object may then be used for binding because
+ * its locality is the same as \p ioobj.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_non_io_ancestor_obj(hwloc_topology_t topology __hwloc_attribute_unused,
+			      hwloc_obj_t ioobj)
+{
+  hwloc_obj_t obj = ioobj;
+  while (obj && !obj->cpuset) {
+    obj = obj->parent;
+  }
+  return obj;
+}
+
+/** \brief Get the next PCI device in the system.
+ *
+ * \return the first PCI device if \p prev is \c NULL.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_pcidev(hwloc_topology_t topology, hwloc_obj_t prev)
+{
+  return hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_PCI_DEVICE, prev);
+}
+
+/** \brief Find the PCI device object matching the PCI bus id
+ * given domain, bus device and function PCI bus id.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_pcidev_by_busid(hwloc_topology_t topology,
+			  unsigned domain, unsigned bus, unsigned dev, unsigned func)
+{
+  hwloc_obj_t obj = NULL;
+  while ((obj = hwloc_get_next_pcidev(topology, obj)) != NULL) {
+    if (obj->attr->pcidev.domain == domain
+	&& obj->attr->pcidev.bus == bus
+	&& obj->attr->pcidev.dev == dev
+	&& obj->attr->pcidev.func == func)
+      return obj;
+  }
+  return NULL;
+}
+
+/** \brief Find the PCI device object matching the PCI bus id
+ * given as a string xxxx:yy:zz.t or yy:zz.t.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_pcidev_by_busidstring(hwloc_topology_t topology, const char *busid)
+{
+  unsigned domain = 0; /* default */
+  unsigned bus, dev, func;
+
+  if (sscanf(busid, "%x:%x.%x", &bus, &dev, &func) != 3
+      && sscanf(busid, "%x:%x:%x.%x", &domain, &bus, &dev, &func) != 4) {
+    errno = EINVAL;
+    return NULL;
+  }
+
+  return hwloc_get_pcidev_by_busid(topology, domain, bus, dev, func);
+}
+
+/** \brief Get the next OS device in the system.
+ *
+ * \return the first OS device if \p prev is \c NULL.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_osdev(hwloc_topology_t topology, hwloc_obj_t prev)
+{
+  return hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_OS_DEVICE, prev);
+}
+
+/** \brief Get the next bridge in the system.
+ *
+ * \return the first bridge if \p prev is \c NULL.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_bridge(hwloc_topology_t topology, hwloc_obj_t prev)
+{
+  return hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_BRIDGE, prev);
+}
+
+/* \brief Checks whether a given bridge covers a given PCI bus.
+ */
+static __hwloc_inline int
+hwloc_bridge_covers_pcibus(hwloc_obj_t bridge,
+			   unsigned domain, unsigned bus)
+{
+  return bridge->type == HWLOC_OBJ_BRIDGE
+    && bridge->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI
+    && bridge->attr->bridge.downstream.pci.domain == domain
+    && bridge->attr->bridge.downstream.pci.secondary_bus <= bus
+    && bridge->attr->bridge.downstream.pci.subordinate_bus >= bus;
+}
+
+/** @} */
+
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_HELPER_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/inlines.h b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/inlines.h
new file mode 100644
index 0000000000..a4808dbba5
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/inlines.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2016 Inria.  All rights reserved.
+ * Copyright © 2009-2012 Université Bordeaux
+ * Copyright © 2009-2010 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/**
+ * This file contains the inline code of functions declared in hwloc.h
+ */
+
+#ifndef HWLOC_INLINES_H
+#define HWLOC_INLINES_H
+
+#ifndef HWLOC_H
+#error Please include the main hwloc.h instead
+#endif
+
+#include <stdlib.h>
+#include <errno.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static __hwloc_inline int
+hwloc_get_type_or_below_depth (hwloc_topology_t topology, hwloc_obj_type_t type)
+{
+  int depth = hwloc_get_type_depth(topology, type);
+
+  if (depth != HWLOC_TYPE_DEPTH_UNKNOWN)
+    return depth;
+
+  /* find the highest existing level with type order >= */
+  for(depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU); ; depth--)
+    if (hwloc_compare_types(hwloc_get_depth_type(topology, depth), type) < 0)
+      return depth+1;
+
+  /* Shouldn't ever happen, as there is always a SYSTEM level with lower order and known depth.  */
+  /* abort(); */
+}
+
+static __hwloc_inline int
+hwloc_get_type_or_above_depth (hwloc_topology_t topology, hwloc_obj_type_t type)
+{
+  int depth = hwloc_get_type_depth(topology, type);
+
+  if (depth != HWLOC_TYPE_DEPTH_UNKNOWN)
+    return depth;
+
+  /* find the lowest existing level with type order <= */
+  for(depth = 0; ; depth++)
+    if (hwloc_compare_types(hwloc_get_depth_type(topology, depth), type) > 0)
+      return depth-1;
+
+  /* Shouldn't ever happen, as there is always a PU level with higher order and known depth.  */
+  /* abort(); */
+}
+
+static __hwloc_inline int
+hwloc_get_nbobjs_by_type (hwloc_topology_t topology, hwloc_obj_type_t type)
+{
+  int depth = hwloc_get_type_depth(topology, type);
+  if (depth == HWLOC_TYPE_DEPTH_UNKNOWN)
+    return 0;
+  if (depth == HWLOC_TYPE_DEPTH_MULTIPLE)
+    return -1; /* FIXME: agregate nbobjs from different levels? */
+  return hwloc_get_nbobjs_by_depth(topology, depth);
+}
+
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, unsigned idx)
+{
+  int depth = hwloc_get_type_depth(topology, type);
+  if (depth == HWLOC_TYPE_DEPTH_UNKNOWN)
+    return NULL;
+  if (depth == HWLOC_TYPE_DEPTH_MULTIPLE)
+    return NULL;
+  return hwloc_get_obj_by_depth(topology, depth, idx);
+}
+
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_obj_by_depth (hwloc_topology_t topology, unsigned depth, hwloc_obj_t prev)
+{
+  if (!prev)
+    return hwloc_get_obj_by_depth (topology, depth, 0);
+  if (prev->depth != depth)
+    return NULL;
+  return prev->next_cousin;
+}
+
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type,
+			    hwloc_obj_t prev)
+{
+  int depth = hwloc_get_type_depth(topology, type);
+  if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE)
+    return NULL;
+  return hwloc_get_next_obj_by_depth (topology, depth, prev);
+}
+
+static __hwloc_inline hwloc_obj_t
+hwloc_get_root_obj (hwloc_topology_t topology)
+{
+  return hwloc_get_obj_by_depth (topology, 0, 0);
+}
+
+static __hwloc_inline const char *
+hwloc_obj_get_info_by_name(hwloc_obj_t obj, const char *name)
+{
+  unsigned i;
+  for(i=0; i<obj->infos_count; i++)
+    if (!strcmp(obj->infos[i].name, name))
+      return obj->infos[i].value;
+  return NULL;
+}
+
+static __hwloc_inline void *
+hwloc_alloc_membind_policy(hwloc_topology_t topology, size_t len, hwloc_const_cpuset_t set, hwloc_membind_policy_t policy, int flags)
+{
+  void *p = hwloc_alloc_membind(topology, len, set, policy, flags);
+  if (p)
+    return p;
+  hwloc_set_membind(topology, set, policy, flags);
+  p = hwloc_alloc(topology, len);
+  if (p && policy != HWLOC_MEMBIND_FIRSTTOUCH)
+    /* Enforce the binding by touching the data */
+    memset(p, 0, len);
+  return p;
+}
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_INLINES_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/intel-mic.h b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/intel-mic.h
new file mode 100644
index 0000000000..6f6f9d1b3a
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/intel-mic.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright © 2013-2016 Inria.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and Intel Xeon Phi (MIC).
+ *
+ * Applications that use both hwloc and Intel Xeon Phi (MIC) may want to
+ * include this file so as to get topology information for MIC devices.
+ */
+
+#ifndef HWLOC_INTEL_MIC_H
+#define HWLOC_INTEL_MIC_H
+
+#include <hwloc.h>
+#include <hwloc/autogen/config.h>
+#include <hwloc/helper.h>
+#ifdef HWLOC_LINUX_SYS
+#include <hwloc/linux.h>
+#include <dirent.h>
+#include <string.h>
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_intel_mic Interoperability with Intel Xeon Phi (MIC)
+ *
+ * This interface offers ways to retrieve topology information about
+ * Intel Xeon Phi (MIC) devices.
+ *
+ * @{
+ */
+
+/** \brief Get the CPU set of logical processors that are physically
+ * close to MIC device whose index is \p idx.
+ *
+ * Return the CPU set describing the locality of the MIC device whose index is \p idx.
+ *
+ * Topology \p topology and device index \p idx must match the local machine.
+ * I/O devices detection is not needed in the topology.
+ *
+ * The function only returns the locality of the device.
+ * If more information about the device is needed, OS objects should
+ * be used instead, see hwloc_intel_mic_get_device_osdev_by_index().
+ *
+ * This function is currently only implemented in a meaningful way for
+ * Linux; other systems will simply get a full cpuset.
+ */
+static __hwloc_inline int
+hwloc_intel_mic_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
+				  int idx __hwloc_attribute_unused,
+				  hwloc_cpuset_t set)
+{
+#ifdef HWLOC_LINUX_SYS
+	/* If we're on Linux, use the sysfs mechanism to get the local cpus */
+#define HWLOC_INTEL_MIC_DEVICE_SYSFS_PATH_MAX 128
+	char path[HWLOC_INTEL_MIC_DEVICE_SYSFS_PATH_MAX];
+	DIR *sysdir = NULL;
+	struct dirent *dirent;
+	unsigned pcibus, pcidev, pcifunc;
+
+	if (!hwloc_topology_is_thissystem(topology)) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	sprintf(path, "/sys/class/mic/mic%d", idx);
+	sysdir = opendir(path);
+	if (!sysdir)
+		return -1;
+
+	while ((dirent = readdir(sysdir)) != NULL) {
+		if (sscanf(dirent->d_name, "pci_%02x:%02x.%02x", &pcibus, &pcidev, &pcifunc) == 3) {
+			sprintf(path, "/sys/class/mic/mic%d/pci_%02x:%02x.%02x/local_cpus", idx, pcibus, pcidev, pcifunc);
+			if (hwloc_linux_read_path_as_cpumask(path, set) < 0
+			    || hwloc_bitmap_iszero(set))
+				hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+			break;
+		}
+	}
+
+	closedir(sysdir);
+#else
+	/* Non-Linux systems simply get a full cpuset */
+	hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+#endif
+	return 0;
+}
+
+/** \brief Get the hwloc OS device object corresponding to the
+ * MIC device for the given index.
+ *
+ * Return the OS device object describing the MIC device whose index is \p idx.
+ * Return NULL if there is none.
+ *
+ * The topology \p topology does not necessarily have to match the current
+ * machine. For instance the topology may be an XML import of a remote host.
+ * I/O devices detection must be enabled in the topology.
+ *
+ * \note The corresponding PCI device object can be obtained by looking
+ * at the OS device parent object.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_intel_mic_get_device_osdev_by_index(hwloc_topology_t topology,
+					  unsigned idx)
+{
+	hwloc_obj_t osdev = NULL;
+	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+		if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type
+                    && osdev->name
+		    && !strncmp("mic", osdev->name, 3)
+		    && atoi(osdev->name + 3) == (int) idx)
+                        return osdev;
+        }
+        return NULL;
+}
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_INTEL_MIC_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/linux-libnuma.h b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/linux-libnuma.h
new file mode 100644
index 0000000000..0ce25910a5
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/linux-libnuma.h
@@ -0,0 +1,273 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2014 Inria.  All rights reserved.
+ * Copyright © 2009-2010, 2012 Université Bordeaux
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and Linux libnuma.
+ *
+ * Applications that use both Linux libnuma and hwloc may want to
+ * include this file so as to ease conversion between their respective types.
+*/
+
+#ifndef HWLOC_LINUX_LIBNUMA_H
+#define HWLOC_LINUX_LIBNUMA_H
+
+#include <hwloc.h>
+#include <numa.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_linux_libnuma_ulongs Interoperability with Linux libnuma unsigned long masks
+ *
+ * This interface helps converting between Linux libnuma unsigned long masks
+ * and hwloc cpusets and nodesets.
+ *
+ * \note Topology \p topology must match the current machine.
+ *
+ * \note The behavior of libnuma is undefined if the kernel is not NUMA-aware.
+ * (when CONFIG_NUMA is not set in the kernel configuration).
+ * This helper and libnuma may thus not be strictly compatible in this case,
+ * which may be detected by checking whether numa_available() returns -1.
+ *
+ * @{
+ */
+
+
+/** \brief Convert hwloc CPU set \p cpuset into the array of unsigned long \p mask
+ *
+ * \p mask is the array of unsigned long that will be filled.
+ * \p maxnode contains the maximal node number that may be stored in \p mask.
+ * \p maxnode will be set to the maximal node number that was found, plus one.
+ *
+ * This function may be used before calling set_mempolicy, mbind, migrate_pages
+ * or any other function that takes an array of unsigned long and a maximal
+ * node number as input parameter.
+ */
+static __hwloc_inline int
+hwloc_cpuset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset,
+				    unsigned long *mask, unsigned long *maxnode)
+{
+  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
+  unsigned long outmaxnode = -1;
+  hwloc_obj_t node = NULL;
+
+  /* round-up to the next ulong and clear all bytes */
+  *maxnode = (*maxnode + 8*sizeof(*mask) - 1) & ~(8*sizeof(*mask) - 1);
+  memset(mask, 0, *maxnode/8);
+
+  while ((node = hwloc_get_next_obj_covering_cpuset_by_depth(topology, cpuset, depth, node)) != NULL) {
+    if (node->os_index >= *maxnode)
+      continue;
+    mask[node->os_index/sizeof(*mask)/8] |= 1UL << (node->os_index % (sizeof(*mask)*8));
+    if (outmaxnode == (unsigned long) -1 || outmaxnode < node->os_index)
+      outmaxnode = node->os_index;
+  }
+
+  *maxnode = outmaxnode+1;
+  return 0;
+}
+
+/** \brief Convert hwloc NUMA node set \p nodeset into the array of unsigned long \p mask
+ *
+ * \p mask is the array of unsigned long that will be filled.
+ * \p maxnode contains the maximal node number that may be stored in \p mask.
+ * \p maxnode will be set to the maximal node number that was found, plus one.
+ *
+ * This function may be used before calling set_mempolicy, mbind, migrate_pages
+ * or any other function that takes an array of unsigned long and a maximal
+ * node number as input parameter.
+ */
+static __hwloc_inline int
+hwloc_nodeset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset,
+				      unsigned long *mask, unsigned long *maxnode)
+{
+  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
+  unsigned long outmaxnode = -1;
+  hwloc_obj_t node = NULL;
+
+  /* round-up to the next ulong and clear all bytes */
+  *maxnode = (*maxnode + 8*sizeof(*mask) - 1) & ~(8*sizeof(*mask) - 1);
+  memset(mask, 0, *maxnode/8);
+
+  while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) {
+    if (node->os_index >= *maxnode)
+      continue;
+    if (!hwloc_bitmap_isset(nodeset, node->os_index))
+      continue;
+    mask[node->os_index/sizeof(*mask)/8] |= 1UL << (node->os_index % (sizeof(*mask)*8));
+    if (outmaxnode == (unsigned long) -1 || outmaxnode < node->os_index)
+      outmaxnode = node->os_index;
+  }
+
+  *maxnode = outmaxnode+1;
+  return 0;
+}
+
+/** \brief Convert the array of unsigned long \p mask into hwloc CPU set
+ *
+ * \p mask is a array of unsigned long that will be read.
+ * \p maxnode contains the maximal node number that may be read in \p mask.
+ *
+ * This function may be used after calling get_mempolicy or any other function
+ * that takes an array of unsigned long as output parameter (and possibly
+ * a maximal node number as input parameter).
+ */
+static __hwloc_inline int
+hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t cpuset,
+				      const unsigned long *mask, unsigned long maxnode)
+{
+  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
+  hwloc_obj_t node = NULL;
+  hwloc_bitmap_zero(cpuset);
+  while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
+    if (node->os_index < maxnode
+	&& (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8)))))
+      hwloc_bitmap_or(cpuset, cpuset, node->cpuset);
+  return 0;
+}
+
+/** \brief Convert the array of unsigned long \p mask into hwloc NUMA node set
+ *
+ * \p mask is a array of unsigned long that will be read.
+ * \p maxnode contains the maximal node number that may be read in \p mask.
+ *
+ * This function may be used after calling get_mempolicy or any other function
+ * that takes an array of unsigned long as output parameter (and possibly
+ * a maximal node number as input parameter).
+ */
+static __hwloc_inline int
+hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset_t nodeset,
+					const unsigned long *mask, unsigned long maxnode)
+{
+  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
+  hwloc_obj_t node = NULL;
+  hwloc_bitmap_zero(nodeset);
+  while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
+    if (node->os_index < maxnode
+	&& (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8)))))
+      hwloc_bitmap_set(nodeset, node->os_index);
+  return 0;
+}
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_linux_libnuma_bitmask Interoperability with Linux libnuma bitmask
+ *
+ * This interface helps converting between Linux libnuma bitmasks
+ * and hwloc cpusets and nodesets.
+ *
+ * \note Topology \p topology must match the current machine.
+ *
+ * \note The behavior of libnuma is undefined if the kernel is not NUMA-aware.
+ * (when CONFIG_NUMA is not set in the kernel configuration).
+ * This helper and libnuma may thus not be strictly compatible in this case,
+ * which may be detected by checking whether numa_available() returns -1.
+ *
+ * @{
+ */
+
+
+/** \brief Convert hwloc CPU set \p cpuset into the returned libnuma bitmask
+ *
+ * The returned bitmask should later be freed with numa_bitmask_free.
+ *
+ * This function may be used before calling many numa_ functions
+ * that use a struct bitmask as an input parameter.
+ *
+ * \return newly allocated struct bitmask.
+ */
+static __hwloc_inline struct bitmask *
+hwloc_cpuset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset) __hwloc_attribute_malloc;
+static __hwloc_inline struct bitmask *
+hwloc_cpuset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset)
+{
+  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
+  hwloc_obj_t node = NULL;
+  struct bitmask *bitmask = numa_allocate_cpumask();
+  if (!bitmask)
+    return NULL;
+  while ((node = hwloc_get_next_obj_covering_cpuset_by_depth(topology, cpuset, depth, node)) != NULL)
+    if (node->memory.local_memory)
+      numa_bitmask_setbit(bitmask, node->os_index);
+  return bitmask;
+}
+
+/** \brief Convert hwloc NUMA node set \p nodeset into the returned libnuma bitmask
+ *
+ * The returned bitmask should later be freed with numa_bitmask_free.
+ *
+ * This function may be used before calling many numa_ functions
+ * that use a struct bitmask as an input parameter.
+ *
+ * \return newly allocated struct bitmask.
+ */
+static __hwloc_inline struct bitmask *
+hwloc_nodeset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset) __hwloc_attribute_malloc;
+static __hwloc_inline struct bitmask *
+hwloc_nodeset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset)
+{
+  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
+  hwloc_obj_t node = NULL;
+  struct bitmask *bitmask = numa_allocate_cpumask();
+  if (!bitmask)
+    return NULL;
+  while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
+    if (hwloc_bitmap_isset(nodeset, node->os_index) && node->memory.local_memory)
+      numa_bitmask_setbit(bitmask, node->os_index);
+  return bitmask;
+}
+
+/** \brief Convert libnuma bitmask \p bitmask into hwloc CPU set \p cpuset
+ *
+ * This function may be used after calling many numa_ functions
+ * that use a struct bitmask as an output parameter.
+ */
+static __hwloc_inline int
+hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_t cpuset,
+					const struct bitmask *bitmask)
+{
+  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
+  hwloc_obj_t node = NULL;
+  hwloc_bitmap_zero(cpuset);
+  while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
+    if (numa_bitmask_isbitset(bitmask, node->os_index))
+      hwloc_bitmap_or(cpuset, cpuset, node->cpuset);
+  return 0;
+}
+
+/** \brief Convert libnuma bitmask \p bitmask into hwloc NUMA node set \p nodeset
+ *
+ * This function may be used after calling many numa_ functions
+ * that use a struct bitmask as an output parameter.
+ */
+static __hwloc_inline int
+hwloc_nodeset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_nodeset_t nodeset,
+					 const struct bitmask *bitmask)
+{
+  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
+  hwloc_obj_t node = NULL;
+  hwloc_bitmap_zero(nodeset);
+  while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
+    if (numa_bitmask_isbitset(bitmask, node->os_index))
+      hwloc_bitmap_set(nodeset, node->os_index);
+  return 0;
+}
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_LINUX_NUMA_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/linux.h b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/linux.h
new file mode 100644
index 0000000000..c409e1c2af
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/linux.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2016 Inria.  All rights reserved.
+ * Copyright © 2009-2011 Université Bordeaux
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and Linux.
+ *
+ * Applications that use hwloc on Linux may want to include this file
+ * if using some low-level Linux features.
+ */
+
+#ifndef HWLOC_LINUX_H
+#define HWLOC_LINUX_H
+
+#include <hwloc.h>
+#include <stdio.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_linux Linux-specific helpers
+ *
+ * This includes helpers for manipulating Linux kernel cpumap files, and hwloc
+ * equivalents of the Linux sched_setaffinity and sched_getaffinity system calls.
+ *
+ * @{
+ */
+
+/** \brief Bind a thread \p tid on cpus given in cpuset \p set
+ *
+ * The behavior is exactly the same as the Linux sched_setaffinity system call,
+ * but uses a hwloc cpuset.
+ *
+ * \note This is equivalent to calling hwloc_set_proc_cpubind() with
+ * HWLOC_CPUBIND_THREAD as flags.
+ */
+HWLOC_DECLSPEC int hwloc_linux_set_tid_cpubind(hwloc_topology_t topology, pid_t tid, hwloc_const_cpuset_t set);
+
+/** \brief Get the current binding of thread \p tid
+ *
+ * The behavior is exactly the same as the Linux sched_getaffinity system call,
+ * but uses a hwloc cpuset.
+ *
+ * \note This is equivalent to calling hwloc_get_proc_cpubind() with
+ * ::HWLOC_CPUBIND_THREAD as flags.
+ */
+HWLOC_DECLSPEC int hwloc_linux_get_tid_cpubind(hwloc_topology_t topology, pid_t tid, hwloc_cpuset_t set);
+
+/** \brief Get the last physical CPU where thread \p tid ran.
+ *
+ * \note This is equivalent to calling hwloc_get_proc_last_cpu_location() with
+ * ::HWLOC_CPUBIND_THREAD as flags.
+ */
+HWLOC_DECLSPEC int hwloc_linux_get_tid_last_cpu_location(hwloc_topology_t topology, pid_t tid, hwloc_bitmap_t set);
+
+/** \brief Convert a linux kernel cpumask file \p path into a hwloc bitmap \p set.
+ *
+ * Might be used when reading CPU set from sysfs attributes such as topology
+ * and caches for processors, or local_cpus for devices.
+ *
+ * \note This function ignores the HWLOC_FSROOT environment variable.
+ */
+HWLOC_DECLSPEC int hwloc_linux_read_path_as_cpumask(const char *path, hwloc_bitmap_t set);
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_LINUX_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/myriexpress.h b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/myriexpress.h
new file mode 100644
index 0000000000..68ff88f5ad
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/myriexpress.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright © 2010-2014 Inria.  All rights reserved.
+ * Copyright © 2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and Myrinet Express.
+ *
+ * Applications that use both hwloc and Myrinet Express verbs may want to
+ * include this file so as to get topology information for Myrinet hardware.
+ *
+ */
+
+#ifndef HWLOC_MYRIEXPRESS_H
+#define HWLOC_MYRIEXPRESS_H
+
+#include <hwloc.h>
+#include <hwloc/autogen/config.h>
+
+#include <myriexpress.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_myriexpress Interoperability with Myrinet Express
+ *
+ * This interface offers ways to retrieve topology information about
+ * Myrinet Express hardware.
+ *
+ * @{
+ */
+
+/** \brief Get the CPU set of logical processors that are physically
+ * close the MX board \p id.
+ *
+ * Return the CPU set describing the locality of the Myrinet Express
+ * board whose index is \p id.
+ *
+ * Topology \p topology and device \p id must match the local machine.
+ * I/O devices detection is not needed in the topology.
+ *
+ * The function only returns the locality of the device.
+ * No additional information about the device is available.
+ */
+static __hwloc_inline int
+hwloc_mx_board_get_device_cpuset(hwloc_topology_t topology,
+				 unsigned id, hwloc_cpuset_t set)
+{
+  uint32_t in, out;
+
+  if (!hwloc_topology_is_thissystem(topology)) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  in = id;
+  if (mx_get_info(NULL, MX_NUMA_NODE, &in, sizeof(in), &out, sizeof(out)) != MX_SUCCESS) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (out != (uint32_t) -1) {
+    hwloc_obj_t obj = NULL;
+    while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, obj)) != NULL)
+      if (obj->os_index == out) {
+	hwloc_bitmap_copy(set, obj->cpuset);
+	goto out;
+      }
+  }
+  /* fallback to the full topology cpuset */
+  hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+
+ out:
+  return 0;
+}
+
+/** \brief Get the CPU set of logical processors that are physically
+ * close the MX endpoint \p endpoint.
+ *
+ * Return the CPU set describing the locality of the Myrinet Express
+ * board that runs the MX endpoint \p endpoint.
+ *
+ * Topology \p topology and device \p id must match the local machine.
+ * I/O devices detection is not needed in the topology.
+ *
+ * The function only returns the locality of the endpoint.
+ * No additional information about the endpoint or device is available.
+ */
+static __hwloc_inline int
+hwloc_mx_endpoint_get_device_cpuset(hwloc_topology_t topology,
+				    mx_endpoint_t endpoint, hwloc_cpuset_t set)
+{
+  uint64_t nid;
+  uint32_t nindex, eid;
+  mx_endpoint_addr_t eaddr;
+
+  if (mx_get_endpoint_addr(endpoint, &eaddr) != MX_SUCCESS) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (mx_decompose_endpoint_addr(eaddr, &nid, &eid) != MX_SUCCESS) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (mx_nic_id_to_board_number(nid, &nindex) != MX_SUCCESS) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  return hwloc_mx_board_get_device_cpuset(topology, nindex, set);
+}
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_MYRIEXPRESS_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/nvml.h b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/nvml.h
new file mode 100644
index 0000000000..197108660e
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/nvml.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright © 2012-2016 Inria.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and the NVIDIA Management Library.
+ *
+ * Applications that use both hwloc and the NVIDIA Management Library may want to
+ * include this file so as to get topology information for NVML devices.
+ */
+
+#ifndef HWLOC_NVML_H
+#define HWLOC_NVML_H
+
+#include <hwloc.h>
+#include <hwloc/autogen/config.h>
+#include <hwloc/helper.h>
+#ifdef HWLOC_LINUX_SYS
+#include <hwloc/linux.h>
+#endif
+
+#include <nvml.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_nvml Interoperability with the NVIDIA Management Library
+ *
+ * This interface offers ways to retrieve topology information about
+ * devices managed by the NVIDIA Management Library (NVML).
+ *
+ * @{
+ */
+
+/** \brief Get the CPU set of logical processors that are physically
+ * close to NVML device \p device.
+ *
+ * Return the CPU set describing the locality of the NVML device \p device.
+ *
+ * Topology \p topology and device \p device must match the local machine.
+ * I/O devices detection and the NVML component are not needed in the topology.
+ *
+ * The function only returns the locality of the device.
+ * If more information about the device is needed, OS objects should
+ * be used instead, see hwloc_nvml_get_device_osdev()
+ * and hwloc_nvml_get_device_osdev_by_index().
+ *
+ * This function is currently only implemented in a meaningful way for
+ * Linux; other systems will simply get a full cpuset.
+ */
+static __hwloc_inline int
+hwloc_nvml_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
+			     nvmlDevice_t device, hwloc_cpuset_t set)
+{
+#ifdef HWLOC_LINUX_SYS
+  /* If we're on Linux, use the sysfs mechanism to get the local cpus */
+#define HWLOC_NVML_DEVICE_SYSFS_PATH_MAX 128
+  char path[HWLOC_NVML_DEVICE_SYSFS_PATH_MAX];
+  nvmlReturn_t nvres;
+  nvmlPciInfo_t pci;
+
+  if (!hwloc_topology_is_thissystem(topology)) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  nvres = nvmlDeviceGetPciInfo(device, &pci);
+  if (NVML_SUCCESS != nvres) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", pci.domain, pci.bus, pci.device);
+  if (hwloc_linux_read_path_as_cpumask(path, set) < 0
+      || hwloc_bitmap_iszero(set))
+    hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+#else
+  /* Non-Linux systems simply get a full cpuset */
+  hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+#endif
+  return 0;
+}
+
+/** \brief Get the hwloc OS device object corresponding to the
+ * NVML device whose index is \p idx.
+ *
+ * Return the OS device object describing the NVML device whose
+ * index is \p idx. Returns NULL if there is none.
+ *
+ * The topology \p topology does not necessarily have to match the current
+ * machine. For instance the topology may be an XML import of a remote host.
+ * I/O devices detection and the NVML component must be enabled in the topology.
+ *
+ * \note The corresponding PCI device object can be obtained by looking
+ * at the OS device parent object (unless PCI devices are filtered out).
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_nvml_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx)
+{
+	hwloc_obj_t osdev = NULL;
+	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+                if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type
+                    && osdev->name
+		    && !strncmp("nvml", osdev->name, 4)
+		    && atoi(osdev->name + 4) == (int) idx)
+                        return osdev;
+        }
+        return NULL;
+}
+
+/** \brief Get the hwloc OS device object corresponding to NVML device \p device.
+ *
+ * Return the hwloc OS device object that describes the given
+ * NVML device \p device. Return NULL if there is none.
+ *
+ * Topology \p topology and device \p device must match the local machine.
+ * I/O devices detection and the NVML component must be enabled in the topology.
+ * If not, the locality of the object may still be found using
+ * hwloc_nvml_get_device_cpuset().
+ *
+ * \note The corresponding hwloc PCI device may be found by looking
+ * at the result parent pointer (unless PCI devices are filtered out).
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_nvml_get_device_osdev(hwloc_topology_t topology, nvmlDevice_t device)
+{
+	hwloc_obj_t osdev;
+	nvmlReturn_t nvres;
+	nvmlPciInfo_t pci;
+	char uuid[64];
+
+	if (!hwloc_topology_is_thissystem(topology)) {
+		errno = EINVAL;
+		return NULL;
+	}
+
+	nvres = nvmlDeviceGetPciInfo(device, &pci);
+	if (NVML_SUCCESS != nvres)
+		return NULL;
+
+	nvres = nvmlDeviceGetUUID(device, uuid, sizeof(uuid));
+	if (NVML_SUCCESS != nvres)
+		uuid[0] = '\0';
+
+	osdev = NULL;
+	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+		hwloc_obj_t pcidev = osdev->parent;
+		const char *info;
+
+		if (strncmp(osdev->name, "nvml", 4))
+			continue;
+
+		if (pcidev
+		    && pcidev->type == HWLOC_OBJ_PCI_DEVICE
+		    && pcidev->attr->pcidev.domain == pci.domain
+		    && pcidev->attr->pcidev.bus == pci.bus
+		    && pcidev->attr->pcidev.dev == pci.device
+		    && pcidev->attr->pcidev.func == 0)
+			return osdev;
+
+		info = hwloc_obj_get_info_by_name(osdev, "NVIDIAUUID");
+		if (info && !strcmp(info, uuid))
+			return osdev;
+	}
+
+	return NULL;
+}
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_NVML_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/opencl.h b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/opencl.h
new file mode 100644
index 0000000000..d881427bbb
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/opencl.h
@@ -0,0 +1,196 @@
+/*
+ * Copyright © 2012-2017 Inria.  All rights reserved.
+ * Copyright © 2013 Université Bordeaux.  All right reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and the OpenCL interface.
+ *
+ * Applications that use both hwloc and OpenCL may want to
+ * include this file so as to get topology information for OpenCL devices.
+ */
+
+#ifndef HWLOC_OPENCL_H
+#define HWLOC_OPENCL_H
+
+#include <hwloc.h>
+#include <hwloc/autogen/config.h>
+#include <hwloc/helper.h>
+#ifdef HWLOC_LINUX_SYS
+#include <hwloc/linux.h>
+#endif
+
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+
+#include <stdio.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_opencl Interoperability with OpenCL
+ *
+ * This interface offers ways to retrieve topology information about
+ * OpenCL devices.
+ *
+ * Only the AMD OpenCL interface currently offers useful locality information
+ * about its devices.
+ *
+ * @{
+ */
+
+/** \brief Get the CPU set of logical processors that are physically
+ * close to OpenCL device \p device.
+ *
+ * Return the CPU set describing the locality of the OpenCL device \p device.
+ *
+ * Topology \p topology and device \p device must match the local machine.
+ * I/O devices detection and the OpenCL component are not needed in the topology.
+ *
+ * The function only returns the locality of the device.
+ * If more information about the device is needed, OS objects should
+ * be used instead, see hwloc_opencl_get_device_osdev()
+ * and hwloc_opencl_get_device_osdev_by_index().
+ *
+ * This function is currently only implemented in a meaningful way for
+ * Linux with the AMD OpenCL implementation; other systems will simply
+ * get a full cpuset.
+ */
+static __hwloc_inline int
+hwloc_opencl_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
+			       cl_device_id device __hwloc_attribute_unused,
+			       hwloc_cpuset_t set)
+{
+#if (defined HWLOC_LINUX_SYS) && (defined CL_DEVICE_TOPOLOGY_AMD)
+	/* If we're on Linux + AMD OpenCL, use the AMD extension + the sysfs mechanism to get the local cpus */
+#define HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX 128
+	char path[HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX];
+	cl_device_topology_amd amdtopo;
+	cl_int clret;
+
+	if (!hwloc_topology_is_thissystem(topology)) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	clret = clGetDeviceInfo(device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL);
+	if (CL_SUCCESS != clret) {
+		hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+		return 0;
+	}
+	if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) {
+		hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+		return 0;
+	}
+
+	sprintf(path, "/sys/bus/pci/devices/0000:%02x:%02x.%01x/local_cpus",
+		(unsigned) amdtopo.pcie.bus, (unsigned) amdtopo.pcie.device, (unsigned) amdtopo.pcie.function);
+	if (hwloc_linux_read_path_as_cpumask(path, set) < 0
+	    || hwloc_bitmap_iszero(set))
+		hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+#else
+	/* Non-Linux + AMD OpenCL systems simply get a full cpuset */
+	hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+#endif
+  return 0;
+}
+
+/** \brief Get the hwloc OS device object corresponding to the
+ * OpenCL device for the given indexes.
+ *
+ * Return the OS device object describing the OpenCL device
+ * whose platform index is \p platform_index,
+ * and whose device index within this platform if \p device_index.
+ * Return NULL if there is none.
+ *
+ * The topology \p topology does not necessarily have to match the current
+ * machine. For instance the topology may be an XML import of a remote host.
+ * I/O devices detection and the OpenCL component must be enabled in the topology.
+ *
+ * \note The corresponding PCI device object can be obtained by looking
+ * at the OS device parent object (unless PCI devices are filtered out).
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_opencl_get_device_osdev_by_index(hwloc_topology_t topology,
+				       unsigned platform_index, unsigned device_index)
+{
+	unsigned x = (unsigned) -1, y = (unsigned) -1;
+	hwloc_obj_t osdev = NULL;
+	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+		if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type
+                    && osdev->name
+		    && sscanf(osdev->name, "opencl%ud%u", &x, &y) == 2
+		    && platform_index == x && device_index == y)
+                        return osdev;
+        }
+        return NULL;
+}
+
+/** \brief Get the hwloc OS device object corresponding to OpenCL device \p device.
+ *
+ * Return the hwloc OS device object that describes the given
+ * OpenCL device \p device. Return NULL if there is none.
+ *
+ * Topology \p topology and device \p device must match the local machine.
+ * I/O devices detection and the OpenCL component must be enabled in the topology.
+ * If not, the locality of the object may still be found using
+ * hwloc_opencl_get_device_cpuset().
+ *
+ * \note This function cannot work if PCI devices are filtered out.
+ *
+ * \note The corresponding hwloc PCI device may be found by looking
+ * at the result parent pointer (unless PCI devices are filtered out).
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_opencl_get_device_osdev(hwloc_topology_t topology __hwloc_attribute_unused,
+			      cl_device_id device __hwloc_attribute_unused)
+{
+#ifdef CL_DEVICE_TOPOLOGY_AMD
+	hwloc_obj_t osdev;
+	cl_device_topology_amd amdtopo;
+	cl_int clret;
+
+	clret = clGetDeviceInfo(device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL);
+	if (CL_SUCCESS != clret) {
+		errno = EINVAL;
+		return NULL;
+	}
+	if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) {
+		errno = EINVAL;
+		return NULL;
+	}
+
+	osdev = NULL;
+	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+		hwloc_obj_t pcidev = osdev->parent;
+		if (strncmp(osdev->name, "opencl", 6))
+			continue;
+		if (pcidev
+		    && pcidev->type == HWLOC_OBJ_PCI_DEVICE
+		    && pcidev->attr->pcidev.domain == 0
+		    && pcidev->attr->pcidev.bus == amdtopo.pcie.bus
+		    && pcidev->attr->pcidev.dev == amdtopo.pcie.device
+		    && pcidev->attr->pcidev.func == amdtopo.pcie.function)
+			return osdev;
+		/* if PCI are filtered out, we need a info attr to match on */
+	}
+
+	return NULL;
+#else
+	return NULL;
+#endif
+}
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_OPENCL_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/openfabrics-verbs.h b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/openfabrics-verbs.h
new file mode 100644
index 0000000000..174ab4a57d
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/openfabrics-verbs.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2016 Inria.  All rights reserved.
+ * Copyright © 2009-2010 Université Bordeaux
+ * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and OpenFabrics
+ * verbs.
+ *
+ * Applications that use both hwloc and OpenFabrics verbs may want to
+ * include this file so as to get topology information for OpenFabrics
+ * hardware (InfiniBand, etc).
+ *
+ */
+
+#ifndef HWLOC_OPENFABRICS_VERBS_H
+#define HWLOC_OPENFABRICS_VERBS_H
+
+#include <hwloc.h>
+#include <hwloc/autogen/config.h>
+#ifdef HWLOC_LINUX_SYS
+#include <hwloc/linux.h>
+#endif
+
+#include <infiniband/verbs.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_openfabrics Interoperability with OpenFabrics
+ *
+ * This interface offers ways to retrieve topology information about
+ * OpenFabrics devices (InfiniBand, Omni-Path, usNIC, etc).
+ *
+ * @{
+ */
+
+/** \brief Get the CPU set of logical processors that are physically
+ * close to device \p ibdev.
+ *
+ * Return the CPU set describing the locality of the OpenFabrics
+ * device \p ibdev (InfiniBand, etc).
+ *
+ * Topology \p topology and device \p ibdev must match the local machine.
+ * I/O devices detection is not needed in the topology.
+ *
+ * The function only returns the locality of the device.
+ * If more information about the device is needed, OS objects should
+ * be used instead, see hwloc_ibv_get_device_osdev()
+ * and hwloc_ibv_get_device_osdev_by_name().
+ *
+ * This function is currently only implemented in a meaningful way for
+ * Linux; other systems will simply get a full cpuset.
+ */
+static __hwloc_inline int
+hwloc_ibv_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
+			    struct ibv_device *ibdev, hwloc_cpuset_t set)
+{
+#ifdef HWLOC_LINUX_SYS
+  /* If we're on Linux, use the verbs-provided sysfs mechanism to
+     get the local cpus */
+#define HWLOC_OPENFABRICS_VERBS_SYSFS_PATH_MAX 128
+  char path[HWLOC_OPENFABRICS_VERBS_SYSFS_PATH_MAX];
+
+  if (!hwloc_topology_is_thissystem(topology)) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  sprintf(path, "/sys/class/infiniband/%s/device/local_cpus",
+	  ibv_get_device_name(ibdev));
+  if (hwloc_linux_read_path_as_cpumask(path, set) < 0
+      || hwloc_bitmap_iszero(set))
+    hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+#else
+  /* Non-Linux systems simply get a full cpuset */
+  hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+#endif
+  return 0;
+}
+
+/** \brief Get the hwloc OS device object corresponding to the OpenFabrics
+ * device named \p ibname.
+ *
+ * Return the OS device object describing the OpenFabrics device
+ * (InfiniBand, Omni-Path, usNIC, etc) whose name is \p ibname
+ * (mlx5_0, hfi1_0, usnic_0, qib0, etc).
+ * Returns NULL if there is none.
+ * The name \p ibname is usually obtained from ibv_get_device_name().
+ *
+ * The topology \p topology does not necessarily have to match the current
+ * machine. For instance the topology may be an XML import of a remote host.
+ * I/O devices detection must be enabled in the topology.
+ *
+ * \note The corresponding PCI device object can be obtained by looking
+ * at the OS device parent object.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_ibv_get_device_osdev_by_name(hwloc_topology_t topology,
+				   const char *ibname)
+{
+	hwloc_obj_t osdev = NULL;
+	while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+		if (HWLOC_OBJ_OSDEV_OPENFABRICS == osdev->attr->osdev.type
+		    && osdev->name && !strcmp(ibname, osdev->name))
+			return osdev;
+	}
+	return NULL;
+}
+
+/** \brief Get the hwloc OS device object corresponding to the OpenFabrics
+ * device \p ibdev.
+ *
+ * Return the OS device object describing the OpenFabrics device \p ibdev
+ * (InfiniBand, etc). Returns NULL if there is none.
+ *
+ * Topology \p topology and device \p ibdev must match the local machine.
+ * I/O devices detection must be enabled in the topology.
+ * If not, the locality of the object may still be found using
+ * hwloc_ibv_get_device_cpuset().
+ *
+ * \note The corresponding PCI device object can be obtained by looking
+ * at the OS device parent object.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_ibv_get_device_osdev(hwloc_topology_t topology,
+			   struct ibv_device *ibdev)
+{
+	if (!hwloc_topology_is_thissystem(topology)) {
+		errno = EINVAL;
+		return NULL;
+	}
+	return hwloc_ibv_get_device_osdev_by_name(topology, ibv_get_device_name(ibdev));
+}
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_OPENFABRICS_VERBS_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/plugins.h b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/plugins.h
new file mode 100644
index 0000000000..881bb54935
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/plugins.h
@@ -0,0 +1,522 @@
+/*
+ * Copyright © 2013-2016 Inria.  All rights reserved.
+ * Copyright © 2016 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#ifndef HWLOC_PLUGINS_H
+#define HWLOC_PLUGINS_H
+
+/** \file
+ * \brief Public interface for building hwloc plugins.
+ */
+
+struct hwloc_backend;
+
+#include <hwloc.h>
+#ifdef HWLOC_INSIDE_PLUGIN
+/* needed for hwloc_plugin_check_namespace() */
+#include <ltdl.h>
+#endif
+
+
+
+/** \defgroup hwlocality_disc_components Components and Plugins: Discovery components
+ * @{
+ */
+
+/** \brief Discovery component type */
+typedef enum hwloc_disc_component_type_e {
+  /** \brief CPU-only discovery through the OS, or generic no-OS support.
+   * \hideinitializer */
+  HWLOC_DISC_COMPONENT_TYPE_CPU = (1<<0),
+
+  /** \brief xml or synthetic,
+   * platform-specific components such as bgq.
+   * Anything the discovers CPU and everything else.
+   * No misc backend is expected to complement a global component.
+   * \hideinitializer */
+  HWLOC_DISC_COMPONENT_TYPE_GLOBAL = (1<<1),
+
+  /** \brief OpenCL, Cuda, etc.
+   * \hideinitializer */
+  HWLOC_DISC_COMPONENT_TYPE_MISC = (1<<2)
+} hwloc_disc_component_type_t;
+
+/** \brief Discovery component structure
+ *
+ * This is the major kind of components, taking care of the discovery.
+ * They are registered by generic components, either statically-built or as plugins.
+ */
+struct hwloc_disc_component {
+  /** \brief Discovery component type */
+  hwloc_disc_component_type_t type;
+
+  /** \brief Name.
+   * If this component is built as a plugin, this name does not have to match the plugin filename.
+   */
+  const char *name;
+
+  /** \brief Component types to exclude, as an OR'ed set of ::hwloc_disc_component_type_e.
+   *
+   * For a GLOBAL component, this usually includes all other types (~0).
+   *
+   * Other components only exclude types that may bring conflicting
+   * topology information. MISC components should likely not be excluded
+   * since they usually bring non-primary additional information.
+   */
+  unsigned excludes;
+
+  /** \brief Instantiate callback to create a backend from the component.
+   * Parameters data1, data2, data3 are NULL except for components
+   * that have special enabling routines such as hwloc_topology_set_xml(). */
+  struct hwloc_backend * (*instantiate)(struct hwloc_disc_component *component, const void *data1, const void *data2, const void *data3);
+
+  /** \brief Component priority.
+   * Used to sort topology->components, higher priority first.
+   * Also used to decide between two components with the same name.
+   *
+   * Usual values are
+   * 50 for native OS (or platform) components,
+   * 45 for x86,
+   * 40 for no-OS fallback,
+   * 30 for global components (xml, synthetic),
+   * 20 for pci,
+   * 10 for other misc components (opencl etc.).
+   */
+  unsigned priority;
+
+  /** \private Used internally to list components by priority on topology->components
+   * (the component structure is usually read-only,
+   *  the core copies it before using this field for queueing)
+   */
+  struct hwloc_disc_component * next;
+};
+
+/** @} */
+
+
+
+
+/** \defgroup hwlocality_disc_backends Components and Plugins: Discovery backends
+ * @{
+ */
+
+/** \brief Discovery backend structure
+ *
+ * A backend is the instantiation of a discovery component.
+ * When a component gets enabled for a topology,
+ * its instantiate() callback creates a backend.
+ *
+ * hwloc_backend_alloc() initializes all fields to default values
+ * that the component may change (except "component" and "next")
+ * before enabling the backend with hwloc_backend_enable().
+ */
+struct hwloc_backend {
+  /** \private Reserved for the core, set by hwloc_backend_alloc() */
+  struct hwloc_disc_component * component;
+  /** \private Reserved for the core, set by hwloc_backend_enable() */
+  struct hwloc_topology * topology;
+  /** \private Reserved for the core. Set to 1 if forced through envvar, 0 otherwise. */
+  int envvar_forced;
+  /** \private Reserved for the core. Used internally to list backends topology->backends. */
+  struct hwloc_backend * next;
+
+  /** \brief Backend flags, currently always 0. */
+  unsigned long flags;
+
+  /** \brief Backend-specific 'is_thissystem' property.
+   * Set to 0 or 1 if the backend should enforce the thissystem flag when it gets enabled.
+   * Set to -1 if the backend doesn't care (default). */
+  int is_thissystem;
+
+  /** \brief Backend private data, or NULL if none. */
+  void * private_data;
+  /** \brief Callback for freeing the private_data.
+   * May be NULL.
+   */
+  void (*disable)(struct hwloc_backend *backend);
+
+  /** \brief Main discovery callback.
+   * returns -1 on error, either because it couldn't add its objects ot the existing topology,
+   * or because of an actual discovery/gathering failure.
+   * May be NULL if type is ::HWLOC_DISC_COMPONENT_TYPE_MISC. */
+  int (*discover)(struct hwloc_backend *backend);
+
+  /** \brief Callback used by the PCI backend to retrieve the locality of a PCI object from the OS/cpu backend.
+   * May be NULL. */
+  int (*get_pci_busid_cpuset)(struct hwloc_backend *backend, struct hwloc_pcidev_attr_s *busid, hwloc_bitmap_t cpuset);
+};
+
+/** \brief Allocate a backend structure, set good default values, initialize backend->component and topology, etc.
+ * The caller will then modify whatever needed, and call hwloc_backend_enable().
+ */
+HWLOC_DECLSPEC struct hwloc_backend * hwloc_backend_alloc(struct hwloc_disc_component *component);
+
+/** \brief Enable a previously allocated and setup backend. */
+HWLOC_DECLSPEC int hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *backend);
+
+/** @} */
+
+
+
+
+/** \defgroup hwlocality_generic_components Components and Plugins: Generic components
+ * @{
+ */
+
+/** \brief Generic component type */
+typedef enum hwloc_component_type_e {
+  /** \brief The data field must point to a struct hwloc_disc_component. */
+  HWLOC_COMPONENT_TYPE_DISC,
+
+  /** \brief The data field must point to a struct hwloc_xml_component. */
+  HWLOC_COMPONENT_TYPE_XML
+} hwloc_component_type_t;
+
+/** \brief Generic component structure
+ *
+ * Generic components structure, either statically listed by configure in static-components.h
+ * or dynamically loaded as a plugin.
+ */
+struct hwloc_component {
+  /** \brief Component ABI version, set to ::HWLOC_COMPONENT_ABI */
+  unsigned abi;
+
+  /** \brief Process-wide component initialization callback.
+   *
+   * This optional callback is called when the component is registered
+   * to the hwloc core (after loading the plugin).
+   *
+   * When the component is built as a plugin, this callback
+   * should call hwloc_check_plugin_namespace()
+   * and return an negative error code on error.
+   *
+   * \p flags is always 0 for now.
+   *
+   * \return 0 on success, or a negative code on error.
+   *
+   * \note If the component uses ltdl for loading its own plugins,
+   * it should load/unload them only in init() and finalize(),
+   * to avoid race conditions with hwloc's use of ltdl.
+   */
+  int (*init)(unsigned long flags);
+
+  /** \brief Process-wide component termination callback.
+   *
+   * This optional callback is called after unregistering the component
+   * from the hwloc core (before unloading the plugin).
+   *
+   * \p flags is always 0 for now.
+   *
+   * \note If the component uses ltdl for loading its own plugins,
+   * it should load/unload them only in init() and finalize(),
+   * to avoid race conditions with hwloc's use of ltdl.
+   */
+  void (*finalize)(unsigned long flags);
+
+  /** \brief Component type */
+  hwloc_component_type_t type;
+
+  /** \brief Component flags, unused for now */
+  unsigned long flags;
+
+  /** \brief Component data, pointing to a struct hwloc_disc_component or struct hwloc_xml_component. */
+  void * data;
+};
+
+/** @} */
+
+
+
+
+/** \defgroup hwlocality_components_core_funcs Components and Plugins: Core functions to be used by components
+ * @{
+ */
+
+/** \brief Add an object to the topology.
+ *
+ * It is sorted along the tree of other objects according to the inclusion of
+ * cpusets, to eventually be added as a child of the smallest object including
+ * this object.
+ *
+ * If the cpuset is empty, the type of the object (and maybe some attributes)
+ * must be enough to find where to insert the object. This is especially true
+ * for NUMA nodes with memory and no CPUs.
+ *
+ * The given object should not have children.
+ *
+ * This shall only be called before levels are built.
+ *
+ * In case of error, hwloc_report_os_error() is called.
+ *
+ * The caller should check whether the object type is filtered-out before calling this function.
+ *
+ * The topology cpuset/nodesets will be enlarged to include the object sets.
+ *
+ * Returns the object on success.
+ * Returns NULL and frees obj on error.
+ * Returns another object and frees obj if it was merged with an identical pre-existing object.
+ */
+HWLOC_DECLSPEC struct hwloc_obj *hwloc_insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj);
+
+/** \brief Type of error callbacks during object insertion */
+typedef void (*hwloc_report_error_t)(const char * msg, int line);
+/** \brief Report an insertion error from a backend */
+HWLOC_DECLSPEC void hwloc_report_os_error(const char * msg, int line);
+/** \brief Check whether insertion errors are hidden */
+HWLOC_DECLSPEC int hwloc_hide_errors(void);
+
+/** \brief Add an object to the topology and specify which error callback to use.
+ *
+ * Aside from the error callback selection, this function is identical to hwloc_insert_object_by_cpuset()
+ */
+HWLOC_DECLSPEC struct hwloc_obj *hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj, hwloc_report_error_t report_error);
+
+/** \brief Insert an object somewhere in the topology.
+ *
+ * It is added as the last child of the given parent.
+ * The cpuset is completely ignored, so strange objects such as I/O devices should
+ * preferably be inserted with this.
+ *
+ * When used for "normal" children with cpusets (when importing from XML
+ * when duplicating a topology), the caller should make sure that:
+ * - children are inserted in order,
+ * - children cpusets do not intersect.
+ *
+ * The given object may have normal, I/O or Misc children, as long as they are in order as well.
+ * These children must have valid parent and next_sibling pointers.
+ *
+ * The caller should check whether the object type is filtered-out before calling this function.
+ */
+HWLOC_DECLSPEC void hwloc_insert_object_by_parent(struct hwloc_topology *topology, hwloc_obj_t parent, hwloc_obj_t obj);
+
+/** \brief Allocate and initialize an object of the given type and physical index */
+HWLOC_DECLSPEC hwloc_obj_t hwloc_alloc_setup_object(hwloc_topology_t topology, hwloc_obj_type_t type, signed os_index);
+
+/** \brief Setup object cpusets/nodesets by OR'ing its children.
+ *
+ * Used when adding an object late in the topology.
+ * Will update the new object by OR'ing all its new children sets.
+ *
+ * Used when PCI backend adds a hostbridge parent, when distances
+ * add a new Group, etc.
+ */
+HWLOC_DECLSPEC int hwloc_obj_add_children_sets(hwloc_obj_t obj);
+
+/** \brief Request a reconnection of children and levels in the topology.
+ *
+ * May be used by backends during discovery if they need arrays or lists
+ * of object within levels or children to be fully connected.
+ *
+ * \p flags is currently unused, must 0.
+ */
+HWLOC_DECLSPEC int hwloc_topology_reconnect(hwloc_topology_t topology, unsigned long flags __hwloc_attribute_unused);
+
+/** \brief Make sure that plugins can lookup core symbols.
+ *
+ * This is a sanity check to avoid lazy-lookup failures when libhwloc
+ * is loaded within a plugin, and later tries to load its own plugins.
+ * This may fail (and abort the program) if libhwloc symbols are in a
+ * private namespace.
+ *
+ * \return 0 on success.
+ * \return -1 if the plugin cannot be successfully loaded. The caller
+ * plugin init() callback should return a negative error code as well.
+ *
+ * Plugins should call this function in their init() callback to avoid
+ * later crashes if lazy symbol resolution is used by the upper layer that
+ * loaded hwloc (e.g. OpenCL implementations using dlopen with RTLD_LAZY).
+ *
+ * \note The build system must define HWLOC_INSIDE_PLUGIN if and only if
+ * building the caller as a plugin.
+ *
+ * \note This function should remain inline so plugins can call it even
+ * when they cannot find libhwloc symbols.
+ */
+static __hwloc_inline int
+hwloc_plugin_check_namespace(const char *pluginname __hwloc_attribute_unused, const char *symbol __hwloc_attribute_unused)
+{
+#ifdef HWLOC_INSIDE_PLUGIN
+  lt_dlhandle handle;
+  void *sym;
+  handle = lt_dlopen(NULL);
+  if (!handle)
+    /* cannot check, assume things will work */
+    return 0;
+  sym = lt_dlsym(handle, symbol);
+  lt_dlclose(handle);
+  if (!sym) {
+    static int verboseenv_checked = 0;
+    static int verboseenv_value = 0;
+    if (!verboseenv_checked) {
+      const char *verboseenv = getenv("HWLOC_PLUGINS_VERBOSE");
+      verboseenv_value = verboseenv ? atoi(verboseenv) : 0;
+      verboseenv_checked = 1;
+    }
+    if (verboseenv_value)
+      fprintf(stderr, "Plugin `%s' disabling itself because it cannot find the `%s' core symbol.\n",
+	      pluginname, symbol);
+    return -1;
+  }
+#endif /* HWLOC_INSIDE_PLUGIN */
+  return 0;
+}
+
+/** @} */
+
+
+
+
+/** \defgroup hwlocality_components_filtering Components and Plugins: Filtering objects
+ * @{
+ */
+
+/** \brief Check whether the given PCI device classid is important.
+ *
+ * \return 1 if important, 0 otherwise.
+ */
+static __hwloc_inline int
+hwloc_filter_check_pcidev_subtype_important(unsigned classid)
+{
+  unsigned baseclass = classid >> 8;
+  return (baseclass == 0x03 /* PCI_BASE_CLASS_DISPLAY */
+	  || baseclass == 0x02 /* PCI_BASE_CLASS_NETWORK */
+	  || baseclass == 0x01 /* PCI_BASE_CLASS_STORAGE */
+	  || baseclass == 0x0b /* PCI_BASE_CLASS_PROCESSOR */
+	  || classid == 0x0c04 /* PCI_CLASS_SERIAL_FIBER */
+	  || classid == 0x0c06 /* PCI_CLASS_SERIAL_INFINIBAND */
+	  || baseclass == 0x12 /* Processing Accelerators */);
+}
+
+/** \brief Check whether the given OS device subtype is important.
+ *
+ * \return 1 if important, 0 otherwise.
+ */
+static __hwloc_inline int
+hwloc_filter_check_osdev_subtype_important(hwloc_obj_osdev_type_t subtype)
+{
+  return (subtype != HWLOC_OBJ_OSDEV_DMA);
+}
+
+/** \brief Check whether a non-I/O object type should be filtered-out.
+ *
+ * Cannot be used for I/O objects.
+ *
+ * \return 1 if the object type should be kept, 0 otherwise.
+ */
+static __hwloc_inline int
+hwloc_filter_check_keep_object_type(hwloc_topology_t topology, hwloc_obj_type_t type)
+{
+  enum hwloc_type_filter_e filter = HWLOC_TYPE_FILTER_KEEP_NONE;
+  hwloc_topology_get_type_filter(topology, type, &filter);
+  assert(filter != HWLOC_TYPE_FILTER_KEEP_IMPORTANT); /* IMPORTANT only used for I/O */
+  return filter == HWLOC_TYPE_FILTER_KEEP_NONE ? 0 : 1;
+}
+
+/** \brief Check whether the given object should be filtered-out.
+ *
+ * \return 1 if the object type should be kept, 0 otherwise.
+ */
+static __hwloc_inline int
+hwloc_filter_check_keep_object(hwloc_topology_t topology, hwloc_obj_t obj)
+{
+  hwloc_obj_type_t type = obj->type;
+  enum hwloc_type_filter_e filter = HWLOC_TYPE_FILTER_KEEP_NONE;
+  hwloc_topology_get_type_filter(topology, type, &filter);
+  if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
+    return 0;
+  if (filter == HWLOC_TYPE_FILTER_KEEP_IMPORTANT) {
+    if (type == HWLOC_OBJ_PCI_DEVICE)
+      return hwloc_filter_check_pcidev_subtype_important(obj->attr->pcidev.class_id);
+    if (type == HWLOC_OBJ_OS_DEVICE)
+      return hwloc_filter_check_osdev_subtype_important(obj->attr->osdev.type);
+  }
+  return 1;
+}
+
+/** @} */
+
+
+
+
+/** \defgroup hwlocality_components_pci_funcs Components and Plugins: PCI functions to be used by components
+ * @{
+ */
+
+/** \brief Return the offset of the given capability in the PCI config space buffer
+ *
+ * This function requires a 256-bytes config space. Unknown/unavailable bytes should be set to 0xff.
+ */
+HWLOC_DECLSPEC unsigned hwloc_pci_find_cap(const unsigned char *config, unsigned cap);
+
+/** \brief Fill linkspeed by reading the PCI config space where PCI_CAP_ID_EXP is at position offset.
+ *
+ * Needs 20 bytes of EXP capability block starting at offset in the config space
+ * for registers up to link status.
+ */
+HWLOC_DECLSPEC int hwloc_pci_find_linkspeed(const unsigned char *config, unsigned offset, float *linkspeed);
+
+/** \brief Return the hwloc object type (PCI device or Bridge) for the given class and configuration space.
+ *
+ * This function requires 16 bytes of common configuration header at the beginning of config.
+ */
+HWLOC_DECLSPEC hwloc_obj_type_t hwloc_pci_check_bridge_type(unsigned device_class, const unsigned char *config);
+
+/** \brief Fills the attributes of the given PCI bridge using the given PCI config space.
+ *
+ * This function requires 32 bytes of common configuration header at the beginning of config.
+ *
+ * Returns -1 and destroys /p obj if bridge fields are invalid.
+ */
+HWLOC_DECLSPEC int hwloc_pci_setup_bridge_attr(hwloc_obj_t obj, const unsigned char *config);
+
+/** \brief Insert a PCI object in the given PCI tree by looking at PCI bus IDs.
+ *
+ * If \p treep points to \c NULL, the new object is inserted there.
+ */
+HWLOC_DECLSPEC void hwloc_pci_tree_insert_by_busid(struct hwloc_obj **treep, struct hwloc_obj *obj);
+
+/** \brief Add some hostbridges on top of the given tree of PCI objects and attach them to the root of the topology.
+ *
+ * The core will move them to their actual PCI locality using hwloc_pci_belowroot_apply_locality()
+ * at the end of the discovery.
+ * In the meantime, other backends will easily lookup PCI objects (for instance to attach OS devices)
+ * by using hwloc_pci_belowroot_find_by_busid() or by manually looking at the topology root object
+ * io_first_child pointer.
+ */
+HWLOC_DECLSPEC int hwloc_pci_tree_attach_belowroot(struct hwloc_topology *topology, struct hwloc_obj *tree);
+
+/** \brief Find the PCI object that matches the bus ID.
+ *
+ * To be used after a PCI backend added PCI devices with hwloc_pci_tree_attach_belowroot()
+ * and before the core moves them to their actual location with hwloc_pci_belowroot_apply_locality().
+ *
+ * If no exactly matching object is found, return the container bridge if any, or NULL.
+ *
+ * On failure, it may be possible to find the PCI locality (instead of the PCI device)
+ * by calling hwloc_pci_find_busid_parent().
+ *
+ * \note This is semantically identical to hwloc_get_pcidev_by_busid() which only works
+ * after the topology is fully loaded.
+ */
+HWLOC_DECLSPEC struct hwloc_obj * hwloc_pci_belowroot_find_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func);
+
+/** \brief Find the normal parent of a PCI bus ID.
+ *
+ * Look at PCI affinity to find out where the given PCI bus ID should be attached.
+ *
+ * This function should be used to attach an I/O device directly under a normal
+ * (non-I/O) object, instead of below a PCI object.
+ * It is usually used by backends when hwloc_pci_belowroot_find_by_busid() failed
+ * to find the hwloc object corresponding to this bus ID, for instance because
+ * PCI discovery is not supported on this platform.
+ */
+HWLOC_DECLSPEC struct hwloc_obj * hwloc_pci_find_busid_parent(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func);
+
+/** @} */
+
+
+
+
+#endif /* HWLOC_PLUGINS_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/rename.h b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/rename.h
new file mode 100644
index 0000000000..09795a7a65
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/hwloc/rename.h
@@ -0,0 +1,707 @@
+/*
+ * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
+ * Copyright © 2010-2017 Inria.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#ifndef HWLOC_RENAME_H
+#define HWLOC_RENAME_H
+
+#include <hwloc/autogen/config.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* Only enact these defines if we're actually renaming the symbols
+   (i.e., avoid trying to have no-op defines if we're *not*
+   renaming). */
+
+#if HWLOC_SYM_TRANSFORM
+
+/* Use a preprocessor two-step in order to get the prefixing right.
+   Make 2 macros: HWLOC_NAME and HWLOC_NAME_CAPS for renaming
+   things. */
+
+#define HWLOC_MUNGE_NAME(a, b) HWLOC_MUNGE_NAME2(a, b)
+#define HWLOC_MUNGE_NAME2(a, b) a ## b
+#define HWLOC_NAME(name) HWLOC_MUNGE_NAME(HWLOC_SYM_PREFIX, hwloc_ ## name)
+#define HWLOC_NAME_CAPS(name) HWLOC_MUNGE_NAME(HWLOC_SYM_PREFIX_CAPS, hwloc_ ## name)
+
+/* Now define all the "real" names to be the prefixed names.  This
+   allows us to use the real names throughout the code base (i.e.,
+   "hwloc_<foo>"); the preprocessor will adjust to have the prefixed
+   name under the covers. */
+
+/* Names from hwloc.h */
+
+#define hwloc_get_api_version HWLOC_NAME(get_api_version)
+
+#define hwloc_topology HWLOC_NAME(topology)
+#define hwloc_topology_t HWLOC_NAME(topology_t)
+
+#define hwloc_cpuset_t HWLOC_NAME(cpuset_t)
+#define hwloc_const_cpuset_t HWLOC_NAME(const_cpuset_t)
+#define hwloc_nodeset_t HWLOC_NAME(nodeset_t)
+#define hwloc_const_nodeset_t HWLOC_NAME(const_nodeset_t)
+
+#define HWLOC_OBJ_SYSTEM HWLOC_NAME_CAPS(OBJ_SYSTEM)
+#define HWLOC_OBJ_MACHINE HWLOC_NAME_CAPS(OBJ_MACHINE)
+#define HWLOC_OBJ_NUMANODE HWLOC_NAME_CAPS(OBJ_NUMANODE)
+#define HWLOC_OBJ_PACKAGE HWLOC_NAME_CAPS(OBJ_PACKAGE)
+#define HWLOC_OBJ_CORE HWLOC_NAME_CAPS(OBJ_CORE)
+#define HWLOC_OBJ_PU HWLOC_NAME_CAPS(OBJ_PU)
+#define HWLOC_OBJ_L1CACHE HWLOC_NAME_CAPS(OBJ_L1CACHE)
+#define HWLOC_OBJ_L2CACHE HWLOC_NAME_CAPS(OBJ_L2CACHE)
+#define HWLOC_OBJ_L3CACHE HWLOC_NAME_CAPS(OBJ_L3CACHE)
+#define HWLOC_OBJ_L4CACHE HWLOC_NAME_CAPS(OBJ_L4CACHE)
+#define HWLOC_OBJ_L5CACHE HWLOC_NAME_CAPS(OBJ_L5CACHE)
+#define HWLOC_OBJ_L1ICACHE HWLOC_NAME_CAPS(OBJ_L1ICACHE)
+#define HWLOC_OBJ_L2ICACHE HWLOC_NAME_CAPS(OBJ_L2ICACHE)
+#define HWLOC_OBJ_L3ICACHE HWLOC_NAME_CAPS(OBJ_L3ICACHE)
+#define HWLOC_OBJ_MISC HWLOC_NAME_CAPS(OBJ_MISC)
+#define HWLOC_OBJ_GROUP HWLOC_NAME_CAPS(OBJ_GROUP)
+#define HWLOC_OBJ_BRIDGE HWLOC_NAME_CAPS(OBJ_BRIDGE)
+#define HWLOC_OBJ_PCI_DEVICE HWLOC_NAME_CAPS(OBJ_PCI_DEVICE)
+#define HWLOC_OBJ_OS_DEVICE HWLOC_NAME_CAPS(OBJ_OS_DEVICE)
+#define HWLOC_OBJ_TYPE_MAX HWLOC_NAME_CAPS(OBJ_TYPE_MAX)
+#define hwloc_obj_type_t HWLOC_NAME(obj_type_t)
+
+#define hwloc_obj_cache_type_e HWLOC_NAME(obj_cache_type_e)
+#define hwloc_obj_cache_type_t HWLOC_NAME(obj_cache_type_t)
+#define HWLOC_OBJ_CACHE_UNIFIED HWLOC_NAME_CAPS(OBJ_CACHE_UNIFIED)
+#define HWLOC_OBJ_CACHE_DATA HWLOC_NAME_CAPS(OBJ_CACHE_DATA)
+#define HWLOC_OBJ_CACHE_INSTRUCTION HWLOC_NAME_CAPS(OBJ_CACHE_INSTRUCTION)
+
+#define hwloc_obj_bridge_type_e HWLOC_NAME(obj_bridge_type_e)
+#define hwloc_obj_bridge_type_t HWLOC_NAME(obj_bridge_type_t)
+#define HWLOC_OBJ_BRIDGE_HOST HWLOC_NAME_CAPS(OBJ_BRIDGE_HOST)
+#define HWLOC_OBJ_BRIDGE_PCI HWLOC_NAME_CAPS(OBJ_BRIDGE_PCI)
+
+#define hwloc_obj_osdev_type_e HWLOC_NAME(obj_osdev_type_e)
+#define hwloc_obj_osdev_type_t HWLOC_NAME(obj_osdev_type_t)
+#define HWLOC_OBJ_OSDEV_BLOCK HWLOC_NAME_CAPS(OBJ_OSDEV_BLOCK)
+#define HWLOC_OBJ_OSDEV_GPU HWLOC_NAME_CAPS(OBJ_OSDEV_GPU)
+#define HWLOC_OBJ_OSDEV_NETWORK HWLOC_NAME_CAPS(OBJ_OSDEV_NETWORK)
+#define HWLOC_OBJ_OSDEV_OPENFABRICS HWLOC_NAME_CAPS(OBJ_OSDEV_OPENFABRICS)
+#define HWLOC_OBJ_OSDEV_DMA HWLOC_NAME_CAPS(OBJ_OSDEV_DMA)
+#define HWLOC_OBJ_OSDEV_COPROC HWLOC_NAME_CAPS(OBJ_OSDEV_COPROC)
+
+#define hwloc_compare_types HWLOC_NAME(compare_types)
+
+#define hwloc_compare_types_e HWLOC_NAME(compare_types_e)
+#define HWLOC_TYPE_UNORDERED HWLOC_NAME_CAPS(TYPE_UNORDERED)
+
+#define hwloc_obj_memory_s HWLOC_NAME(obj_memory_s)
+#define hwloc_obj_memory_page_type_s HWLOC_NAME(obj_memory_page_type_s)
+
+#define hwloc_obj HWLOC_NAME(obj)
+#define hwloc_obj_t HWLOC_NAME(obj_t)
+
+#define hwloc_obj_info_s HWLOC_NAME(obj_info_s)
+
+#define hwloc_obj_attr_u HWLOC_NAME(obj_attr_u)
+#define hwloc_cache_attr_s HWLOC_NAME(cache_attr_s)
+#define hwloc_group_attr_s HWLOC_NAME(group_attr_s)
+#define hwloc_pcidev_attr_s HWLOC_NAME(pcidev_attr_s)
+#define hwloc_bridge_attr_s HWLOC_NAME(bridge_attr_s)
+#define hwloc_osdev_attr_s HWLOC_NAME(osdev_attr_s)
+
+#define hwloc_topology_init HWLOC_NAME(topology_init)
+#define hwloc_topology_load HWLOC_NAME(topology_load)
+#define hwloc_topology_destroy HWLOC_NAME(topology_destroy)
+#define hwloc_topology_dup HWLOC_NAME(topology_dup)
+#define hwloc_topology_check HWLOC_NAME(topology_check)
+
+#define hwloc_topology_flags_e HWLOC_NAME(topology_flags_e)
+
+#define HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WHOLE_SYSTEM)
+#define HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IS_THISSYSTEM)
+#define HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)
+
+#define hwloc_topology_set_pid HWLOC_NAME(topology_set_pid)
+#define hwloc_topology_set_synthetic HWLOC_NAME(topology_set_synthetic)
+#define hwloc_topology_set_xml HWLOC_NAME(topology_set_xml)
+#define hwloc_topology_set_xmlbuffer HWLOC_NAME(topology_set_xmlbuffer)
+
+#define hwloc_topology_set_flags HWLOC_NAME(topology_set_flags)
+#define hwloc_topology_is_thissystem HWLOC_NAME(topology_is_thissystem)
+#define hwloc_topology_get_flags HWLOC_NAME(topology_get_flags)
+#define hwloc_topology_discovery_support HWLOC_NAME(topology_discovery_support)
+#define hwloc_topology_cpubind_support HWLOC_NAME(topology_cpubind_support)
+#define hwloc_topology_membind_support HWLOC_NAME(topology_membind_support)
+#define hwloc_topology_support HWLOC_NAME(topology_support)
+#define hwloc_topology_get_support HWLOC_NAME(topology_get_support)
+
+#define hwloc_type_filter_e HWLOC_NAME(type_filter_e)
+#define HWLOC_TYPE_FILTER_KEEP_ALL HWLOC_NAME_CAPS(TYPE_FILTER_KEEP_ALL)
+#define HWLOC_TYPE_FILTER_KEEP_NONE HWLOC_NAME_CAPS(TYPE_FILTER_KEEP_NONE)
+#define HWLOC_TYPE_FILTER_KEEP_STRUCTURE HWLOC_NAME_CAPS(TYPE_FILTER_KEEP_STRUCTURE)
+#define HWLOC_TYPE_FILTER_KEEP_IMPORTANT HWLOC_NAME_CAPS(TYPE_FILTER_KEEP_IMPORTANT)
+#define hwloc_topology_set_type_filter HWLOC_NAME(topology_set_type_filter)
+#define hwloc_topology_get_type_filter HWLOC_NAME(topology_get_type_filter)
+#define hwloc_topology_set_all_types_filter HWLOC_NAME(topology_set_all_types_filter)
+#define hwloc_topology_set_cache_types_filter HWLOC_NAME(topology_set_cache_types_filter)
+#define hwloc_topology_set_icache_types_filter HWLOC_NAME(topology_set_icache_types_filter)
+#define hwloc_topology_set_io_types_filter HWLOC_NAME(topology_set_io_types_filter)
+
+#define hwloc_topology_set_userdata HWLOC_NAME(topology_set_userdata)
+#define hwloc_topology_get_userdata HWLOC_NAME(topology_get_userdata)
+
+#define hwloc_restrict_flags_e HWLOC_NAME(restrict_flags_e)
+#define HWLOC_RESTRICT_FLAG_REMOVE_CPULESS HWLOC_NAME_CAPS(RESTRICT_FLAG_REMOVE_CPULESS)
+#define HWLOC_RESTRICT_FLAG_ADAPT_MISC HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_MISC)
+#define HWLOC_RESTRICT_FLAG_ADAPT_IO HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_IO)
+#define hwloc_topology_restrict HWLOC_NAME(topology_restrict)
+
+#define hwloc_topology_insert_misc_object HWLOC_NAME(topology_insert_misc_object)
+#define hwloc_topology_alloc_group_object HWLOC_NAME(topology_alloc_group_object)
+#define hwloc_topology_insert_group_object HWLOC_NAME(topology_insert_group_object)
+#define hwloc_obj_add_other_obj_sets HWLOC_NAME(obj_add_other_obj_sets)
+
+#define hwloc_topology_get_depth HWLOC_NAME(topology_get_depth)
+#define hwloc_get_type_depth HWLOC_NAME(get_type_depth)
+
+#define hwloc_get_type_depth_e HWLOC_NAME(get_type_depth_e)
+#define HWLOC_TYPE_DEPTH_UNKNOWN HWLOC_NAME_CAPS(TYPE_DEPTH_UNKNOWN)
+#define HWLOC_TYPE_DEPTH_MULTIPLE HWLOC_NAME_CAPS(TYPE_DEPTH_MULTIPLE)
+#define HWLOC_TYPE_DEPTH_BRIDGE HWLOC_NAME_CAPS(TYPE_DEPTH_BRIDGE)
+#define HWLOC_TYPE_DEPTH_PCI_DEVICE HWLOC_NAME_CAPS(TYPE_DEPTH_PCI_DEVICE)
+#define HWLOC_TYPE_DEPTH_OS_DEVICE HWLOC_NAME_CAPS(TYPE_DEPTH_OS_DEVICE)
+#define HWLOC_TYPE_DEPTH_MISC HWLOC_NAME_CAPS(TYPE_DEPTH_MISC)
+
+#define hwloc_get_depth_type HWLOC_NAME(get_depth_type)
+#define hwloc_get_nbobjs_by_depth HWLOC_NAME(get_nbobjs_by_depth)
+#define hwloc_get_nbobjs_by_type HWLOC_NAME(get_nbobjs_by_type)
+
+#define hwloc_get_obj_by_depth HWLOC_NAME(get_obj_by_depth )
+#define hwloc_get_obj_by_type HWLOC_NAME(get_obj_by_type )
+
+#define hwloc_type_name HWLOC_NAME(type_name)
+#define hwloc_obj_type_snprintf HWLOC_NAME(obj_type_snprintf )
+#define hwloc_obj_attr_snprintf HWLOC_NAME(obj_attr_snprintf )
+#define hwloc_type_sscanf HWLOC_NAME(type_sscanf)
+#define hwloc_type_sscanf_as_depth HWLOC_NAME(type_sscanf_as_depth)
+
+#define hwloc_obj_get_info_by_name HWLOC_NAME(obj_get_info_by_name)
+#define hwloc_obj_add_info HWLOC_NAME(obj_add_info)
+
+#define HWLOC_CPUBIND_PROCESS HWLOC_NAME_CAPS(CPUBIND_PROCESS)
+#define HWLOC_CPUBIND_THREAD HWLOC_NAME_CAPS(CPUBIND_THREAD)
+#define HWLOC_CPUBIND_STRICT HWLOC_NAME_CAPS(CPUBIND_STRICT)
+#define HWLOC_CPUBIND_NOMEMBIND HWLOC_NAME_CAPS(CPUBIND_NOMEMBIND)
+
+#define hwloc_cpubind_flags_t HWLOC_NAME(cpubind_flags_t)
+
+#define hwloc_set_cpubind HWLOC_NAME(set_cpubind)
+#define hwloc_get_cpubind HWLOC_NAME(get_cpubind)
+#define hwloc_set_proc_cpubind HWLOC_NAME(set_proc_cpubind)
+#define hwloc_get_proc_cpubind HWLOC_NAME(get_proc_cpubind)
+#define hwloc_set_thread_cpubind HWLOC_NAME(set_thread_cpubind)
+#define hwloc_get_thread_cpubind HWLOC_NAME(get_thread_cpubind)
+
+#define hwloc_get_last_cpu_location HWLOC_NAME(get_last_cpu_location)
+#define hwloc_get_proc_last_cpu_location HWLOC_NAME(get_proc_last_cpu_location)
+
+#define HWLOC_MEMBIND_DEFAULT HWLOC_NAME_CAPS(MEMBIND_DEFAULT)
+#define HWLOC_MEMBIND_FIRSTTOUCH HWLOC_NAME_CAPS(MEMBIND_FIRSTTOUCH)
+#define HWLOC_MEMBIND_BIND HWLOC_NAME_CAPS(MEMBIND_BIND)
+#define HWLOC_MEMBIND_INTERLEAVE HWLOC_NAME_CAPS(MEMBIND_INTERLEAVE)
+#define HWLOC_MEMBIND_NEXTTOUCH HWLOC_NAME_CAPS(MEMBIND_NEXTTOUCH)
+#define HWLOC_MEMBIND_MIXED HWLOC_NAME_CAPS(MEMBIND_MIXED)
+
+#define hwloc_membind_policy_t HWLOC_NAME(membind_policy_t)
+
+#define HWLOC_MEMBIND_PROCESS HWLOC_NAME_CAPS(MEMBIND_PROCESS)
+#define HWLOC_MEMBIND_THREAD HWLOC_NAME_CAPS(MEMBIND_THREAD)
+#define HWLOC_MEMBIND_STRICT HWLOC_NAME_CAPS(MEMBIND_STRICT)
+#define HWLOC_MEMBIND_MIGRATE HWLOC_NAME_CAPS(MEMBIND_MIGRATE)
+#define HWLOC_MEMBIND_NOCPUBIND HWLOC_NAME_CAPS(MEMBIND_NOCPUBIND)
+#define HWLOC_MEMBIND_BYNODESET HWLOC_NAME_CAPS(MEMBIND_BYNODESET)
+
+#define hwloc_membind_flags_t HWLOC_NAME(membind_flags_t)
+
+#define hwloc_set_membind HWLOC_NAME(set_membind)
+#define hwloc_get_membind HWLOC_NAME(get_membind)
+#define hwloc_set_proc_membind HWLOC_NAME(set_proc_membind)
+#define hwloc_get_proc_membind HWLOC_NAME(get_proc_membind)
+#define hwloc_set_area_membind HWLOC_NAME(set_area_membind)
+#define hwloc_get_area_membind HWLOC_NAME(get_area_membind)
+#define hwloc_get_area_memlocation HWLOC_NAME(get_area_memlocation)
+#define hwloc_alloc_membind HWLOC_NAME(alloc_membind)
+#define hwloc_alloc HWLOC_NAME(alloc)
+#define hwloc_free HWLOC_NAME(free)
+
+#define hwloc_get_non_io_ancestor_obj HWLOC_NAME(get_non_io_ancestor_obj)
+#define hwloc_get_next_pcidev HWLOC_NAME(get_next_pcidev)
+#define hwloc_get_pcidev_by_busid HWLOC_NAME(get_pcidev_by_busid)
+#define hwloc_get_pcidev_by_busidstring HWLOC_NAME(get_pcidev_by_busidstring)
+#define hwloc_get_next_osdev HWLOC_NAME(get_next_osdev)
+#define hwloc_get_next_bridge HWLOC_NAME(get_next_bridge)
+#define hwloc_bridge_covers_pcibus HWLOC_NAME(bridge_covers_pcibus)
+
+/* hwloc/bitmap.h */
+
+#define hwloc_bitmap_s HWLOC_NAME(bitmap_s)
+#define hwloc_bitmap_t HWLOC_NAME(bitmap_t)
+#define hwloc_const_bitmap_t HWLOC_NAME(const_bitmap_t)
+
+#define hwloc_bitmap_alloc HWLOC_NAME(bitmap_alloc)
+#define hwloc_bitmap_alloc_full HWLOC_NAME(bitmap_alloc_full)
+#define hwloc_bitmap_free HWLOC_NAME(bitmap_free)
+#define hwloc_bitmap_dup HWLOC_NAME(bitmap_dup)
+#define hwloc_bitmap_copy HWLOC_NAME(bitmap_copy)
+#define hwloc_bitmap_snprintf HWLOC_NAME(bitmap_snprintf)
+#define hwloc_bitmap_asprintf HWLOC_NAME(bitmap_asprintf)
+#define hwloc_bitmap_sscanf HWLOC_NAME(bitmap_sscanf)
+#define hwloc_bitmap_list_snprintf HWLOC_NAME(bitmap_list_snprintf)
+#define hwloc_bitmap_list_asprintf HWLOC_NAME(bitmap_list_asprintf)
+#define hwloc_bitmap_list_sscanf HWLOC_NAME(bitmap_list_sscanf)
+#define hwloc_bitmap_taskset_snprintf HWLOC_NAME(bitmap_taskset_snprintf)
+#define hwloc_bitmap_taskset_asprintf HWLOC_NAME(bitmap_taskset_asprintf)
+#define hwloc_bitmap_taskset_sscanf HWLOC_NAME(bitmap_taskset_sscanf)
+#define hwloc_bitmap_zero HWLOC_NAME(bitmap_zero)
+#define hwloc_bitmap_fill HWLOC_NAME(bitmap_fill)
+#define hwloc_bitmap_from_ulong HWLOC_NAME(bitmap_from_ulong)
+
+#define hwloc_bitmap_from_ith_ulong HWLOC_NAME(bitmap_from_ith_ulong)
+#define hwloc_bitmap_to_ulong HWLOC_NAME(bitmap_to_ulong)
+#define hwloc_bitmap_to_ith_ulong HWLOC_NAME(bitmap_to_ith_ulong)
+#define hwloc_bitmap_only HWLOC_NAME(bitmap_only)
+#define hwloc_bitmap_allbut HWLOC_NAME(bitmap_allbut)
+#define hwloc_bitmap_set HWLOC_NAME(bitmap_set)
+#define hwloc_bitmap_set_range HWLOC_NAME(bitmap_set_range)
+#define hwloc_bitmap_set_ith_ulong HWLOC_NAME(bitmap_set_ith_ulong)
+#define hwloc_bitmap_clr HWLOC_NAME(bitmap_clr)
+#define hwloc_bitmap_clr_range HWLOC_NAME(bitmap_clr_range)
+#define hwloc_bitmap_isset HWLOC_NAME(bitmap_isset)
+#define hwloc_bitmap_iszero HWLOC_NAME(bitmap_iszero)
+#define hwloc_bitmap_isfull HWLOC_NAME(bitmap_isfull)
+#define hwloc_bitmap_isequal HWLOC_NAME(bitmap_isequal)
+#define hwloc_bitmap_intersects HWLOC_NAME(bitmap_intersects)
+#define hwloc_bitmap_isincluded HWLOC_NAME(bitmap_isincluded)
+#define hwloc_bitmap_or HWLOC_NAME(bitmap_or)
+#define hwloc_bitmap_and HWLOC_NAME(bitmap_and)
+#define hwloc_bitmap_andnot HWLOC_NAME(bitmap_andnot)
+#define hwloc_bitmap_xor HWLOC_NAME(bitmap_xor)
+#define hwloc_bitmap_not HWLOC_NAME(bitmap_not)
+#define hwloc_bitmap_first HWLOC_NAME(bitmap_first)
+#define hwloc_bitmap_last HWLOC_NAME(bitmap_last)
+#define hwloc_bitmap_next HWLOC_NAME(bitmap_next)
+#define hwloc_bitmap_singlify HWLOC_NAME(bitmap_singlify)
+#define hwloc_bitmap_compare_first HWLOC_NAME(bitmap_compare_first)
+#define hwloc_bitmap_compare HWLOC_NAME(bitmap_compare)
+#define hwloc_bitmap_weight HWLOC_NAME(bitmap_weight)
+
+/* hwloc/helper.h */
+
+#define hwloc_get_type_or_below_depth HWLOC_NAME(get_type_or_below_depth)
+#define hwloc_get_type_or_above_depth HWLOC_NAME(get_type_or_above_depth)
+#define hwloc_get_root_obj HWLOC_NAME(get_root_obj)
+#define hwloc_get_ancestor_obj_by_depth HWLOC_NAME(get_ancestor_obj_by_depth)
+#define hwloc_get_ancestor_obj_by_type HWLOC_NAME(get_ancestor_obj_by_type)
+#define hwloc_get_next_obj_by_depth HWLOC_NAME(get_next_obj_by_depth)
+#define hwloc_get_next_obj_by_type HWLOC_NAME(get_next_obj_by_type)
+#define hwloc_get_pu_obj_by_os_index HWLOC_NAME(get_pu_obj_by_os_index)
+#define hwloc_get_numanode_obj_by_os_index HWLOC_NAME(get_numanode_obj_by_os_index)
+#define hwloc_get_next_child HWLOC_NAME(get_next_child)
+#define hwloc_get_common_ancestor_obj HWLOC_NAME(get_common_ancestor_obj)
+#define hwloc_obj_is_in_subtree HWLOC_NAME(obj_is_in_subtree)
+#define hwloc_get_first_largest_obj_inside_cpuset HWLOC_NAME(get_first_largest_obj_inside_cpuset)
+#define hwloc_get_largest_objs_inside_cpuset HWLOC_NAME(get_largest_objs_inside_cpuset)
+#define hwloc_get_next_obj_inside_cpuset_by_depth HWLOC_NAME(get_next_obj_inside_cpuset_by_depth)
+#define hwloc_get_next_obj_inside_cpuset_by_type HWLOC_NAME(get_next_obj_inside_cpuset_by_type)
+#define hwloc_get_obj_inside_cpuset_by_depth HWLOC_NAME(get_obj_inside_cpuset_by_depth)
+#define hwloc_get_obj_inside_cpuset_by_type HWLOC_NAME(get_obj_inside_cpuset_by_type)
+#define hwloc_get_nbobjs_inside_cpuset_by_depth HWLOC_NAME(get_nbobjs_inside_cpuset_by_depth)
+#define hwloc_get_nbobjs_inside_cpuset_by_type HWLOC_NAME(get_nbobjs_inside_cpuset_by_type)
+#define hwloc_get_obj_index_inside_cpuset HWLOC_NAME(get_obj_index_inside_cpuset)
+#define hwloc_get_child_covering_cpuset HWLOC_NAME(get_child_covering_cpuset)
+#define hwloc_get_obj_covering_cpuset HWLOC_NAME(get_obj_covering_cpuset)
+#define hwloc_get_next_obj_covering_cpuset_by_depth HWLOC_NAME(get_next_obj_covering_cpuset_by_depth)
+#define hwloc_get_next_obj_covering_cpuset_by_type HWLOC_NAME(get_next_obj_covering_cpuset_by_type)
+#define hwloc_obj_type_is_cache HWLOC_NAME(obj_type_is_cache)
+#define hwloc_obj_type_is_dcache HWLOC_NAME(obj_type_is_dcache)
+#define hwloc_obj_type_is_icache HWLOC_NAME(obj_type_is_icache)
+#define hwloc_get_cache_type_depth HWLOC_NAME(get_cache_type_depth)
+#define hwloc_get_cache_covering_cpuset HWLOC_NAME(get_cache_covering_cpuset)
+#define hwloc_get_shared_cache_covering_obj HWLOC_NAME(get_shared_cache_covering_obj)
+#define hwloc_get_closest_objs HWLOC_NAME(get_closest_objs)
+#define hwloc_get_obj_below_by_type HWLOC_NAME(get_obj_below_by_type)
+#define hwloc_get_obj_below_array_by_type HWLOC_NAME(get_obj_below_array_by_type)
+#define hwloc_distrib_flags_e HWLOC_NAME(distrib_flags_e)
+#define HWLOC_DISTRIB_FLAG_REVERSE HWLOC_NAME_CAPS(DISTRIB_FLAG_REVERSE)
+#define hwloc_distrib HWLOC_NAME(distrib)
+#define hwloc_alloc_membind_policy HWLOC_NAME(alloc_membind_policy)
+#define hwloc_alloc_membind_policy_nodeset HWLOC_NAME(alloc_membind_policy_nodeset)
+#define hwloc_topology_get_complete_cpuset HWLOC_NAME(topology_get_complete_cpuset)
+#define hwloc_topology_get_topology_cpuset HWLOC_NAME(topology_get_topology_cpuset)
+#define hwloc_topology_get_allowed_cpuset HWLOC_NAME(topology_get_allowed_cpuset)
+#define hwloc_topology_get_complete_nodeset HWLOC_NAME(topology_get_complete_nodeset)
+#define hwloc_topology_get_topology_nodeset HWLOC_NAME(topology_get_topology_nodeset)
+#define hwloc_topology_get_allowed_nodeset HWLOC_NAME(topology_get_allowed_nodeset)
+#define hwloc_cpuset_to_nodeset HWLOC_NAME(cpuset_to_nodeset)
+#define hwloc_cpuset_from_nodeset HWLOC_NAME(cpuset_from_nodeset)
+
+/* export.h */
+
+#define hwloc_topology_export_xml_flags_e HWLOC_NAME(topology_export_xml_flags_e)
+#define HWLOC_TOPOLOGY_EXPORT_XML_FLAG_V1 HWLOC_NAME_CAPS(TOPOLOGY_EXPORT_XML_FLAG_V1)
+#define hwloc_topology_export_xml HWLOC_NAME(topology_export_xml)
+#define hwloc_topology_export_xmlbuffer HWLOC_NAME(topology_export_xmlbuffer)
+#define hwloc_free_xmlbuffer HWLOC_NAME(free_xmlbuffer)
+#define hwloc_topology_set_userdata_export_callback HWLOC_NAME(topology_set_userdata_export_callback)
+#define hwloc_export_obj_userdata HWLOC_NAME(export_obj_userdata)
+#define hwloc_export_obj_userdata_base64 HWLOC_NAME(export_obj_userdata_base64)
+#define hwloc_topology_set_userdata_import_callback HWLOC_NAME(topology_set_userdata_import_callback)
+
+#define hwloc_topology_export_synthetic_flags_e HWLOC_NAME(topology_export_synthetic_flags_e)
+#define HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES HWLOC_NAME_CAPS(TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES)
+#define HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS HWLOC_NAME_CAPS(TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS)
+#define hwloc_topology_export_synthetic HWLOC_NAME(topology_export_synthetic)
+
+/* distances.h */
+
+#define hwloc_distances_s HWLOC_NAME(distances_s)
+
+#define hwloc_distances_kind_e HWLOC_NAME(distances_kind_e)
+#define HWLOC_DISTANCES_KIND_FROM_OS HWLOC_NAME_CAPS(DISTANCES_KIND_FROM_OS)
+#define HWLOC_DISTANCES_KIND_FROM_USER HWLOC_NAME_CAPS(DISTANCES_KIND_FROM_USER)
+#define HWLOC_DISTANCES_KIND_MEANS_LATENCY HWLOC_NAME_CAPS(DISTANCES_KIND_MEANS_LATENCY)
+#define HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH HWLOC_NAME_CAPS(DISTANCES_KIND_MEANS_BANDWIDTH)
+
+#define hwloc_distances_get HWLOC_NAME(distances_get)
+#define hwloc_distances_get_by_depth HWLOC_NAME(distances_get_by_depth)
+#define hwloc_distances_get_by_type HWLOC_NAME(distances_get_by_type)
+#define hwloc_distances_release HWLOC_NAME(distances_release)
+
+#define hwloc_distances_flag_e HWLOC_NAME(distances_flag_e)
+#define HWLOC_DISTANCES_FLAG_GROUP HWLOC_NAME_CAPS(DISTANCES_FLAG_GROUP)
+#define HWLOC_DISTANCES_FLAG_GROUP_INACCURATE HWLOC_NAME_CAPS(DISTANCES_FLAG_GROUP_INACCURATE)
+
+#define hwloc_distances_add HWLOC_NAME(distances_add)
+#define hwloc_distances_remove HWLOC_NAME(distances_remove)
+#define hwloc_distances_remove_by_depth HWLOC_NAME(distances_remove_by_depth)
+#define hwloc_distances_remove_by_type HWLOC_NAME(distances_remove_by_type)
+
+/* diff.h */
+
+#define hwloc_topology_diff_obj_attr_type_e HWLOC_NAME(topology_diff_obj_attr_type_e)
+#define hwloc_topology_diff_obj_attr_type_t HWLOC_NAME(topology_diff_obj_attr_type_t)
+#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_SIZE HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR_SIZE)
+#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR_NAME)
+#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR_INFO)
+#define hwloc_topology_diff_obj_attr_u HWLOC_NAME(topology_diff_obj_attr_u)
+#define hwloc_topology_diff_obj_attr_generic_s HWLOC_NAME(topology_diff_obj_attr_generic_s)
+#define hwloc_topology_diff_obj_attr_uint64_s HWLOC_NAME(topology_diff_obj_attr_uint64_s)
+#define hwloc_topology_diff_obj_attr_string_s HWLOC_NAME(topology_diff_obj_attr_string_s)
+#define hwloc_topology_diff_type_e HWLOC_NAME(topology_diff_type_e)
+#define hwloc_topology_diff_type_t HWLOC_NAME(topology_diff_type_t)
+#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR)
+#define HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX HWLOC_NAME_CAPS(TOPOLOGY_DIFF_TOO_COMPLEX)
+#define hwloc_topology_diff_u HWLOC_NAME(topology_diff_u)
+#define hwloc_topology_diff_t HWLOC_NAME(topology_diff_t)
+#define hwloc_topology_diff_generic_s HWLOC_NAME(topology_diff_generic_s)
+#define hwloc_topology_diff_obj_attr_s HWLOC_NAME(topology_diff_obj_attr_s)
+#define hwloc_topology_diff_too_complex_s HWLOC_NAME(topology_diff_too_complex_s)
+#define hwloc_topology_diff_build HWLOC_NAME(topology_diff_build)
+#define hwloc_topology_diff_apply_flags_e HWLOC_NAME(topology_diff_apply_flags_e)
+#define HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE HWLOC_NAME_CAPS(TOPOLOGY_DIFF_APPLY_REVERSE)
+#define hwloc_topology_diff_apply HWLOC_NAME(topology_diff_apply)
+#define hwloc_topology_diff_destroy HWLOC_NAME(topology_diff_destroy)
+#define hwloc_topology_diff_load_xml HWLOC_NAME(topology_diff_load_xml)
+#define hwloc_topology_diff_export_xml HWLOC_NAME(topology_diff_export_xml)
+#define hwloc_topology_diff_load_xmlbuffer HWLOC_NAME(topology_diff_load_xmlbuffer)
+#define hwloc_topology_diff_export_xmlbuffer HWLOC_NAME(topology_diff_export_xmlbuffer)
+
+/* glibc-sched.h */
+
+#define hwloc_cpuset_to_glibc_sched_affinity HWLOC_NAME(cpuset_to_glibc_sched_affinity)
+#define hwloc_cpuset_from_glibc_sched_affinity HWLOC_NAME(cpuset_from_glibc_sched_affinity)
+
+/* linux-libnuma.h */
+
+#define hwloc_cpuset_to_linux_libnuma_ulongs HWLOC_NAME(cpuset_to_linux_libnuma_ulongs)
+#define hwloc_nodeset_to_linux_libnuma_ulongs HWLOC_NAME(nodeset_to_linux_libnuma_ulongs)
+#define hwloc_cpuset_from_linux_libnuma_ulongs HWLOC_NAME(cpuset_from_linux_libnuma_ulongs)
+#define hwloc_nodeset_from_linux_libnuma_ulongs HWLOC_NAME(nodeset_from_linux_libnuma_ulongs)
+#define hwloc_cpuset_to_linux_libnuma_bitmask HWLOC_NAME(cpuset_to_linux_libnuma_bitmask)
+#define hwloc_nodeset_to_linux_libnuma_bitmask HWLOC_NAME(nodeset_to_linux_libnuma_bitmask)
+#define hwloc_cpuset_from_linux_libnuma_bitmask HWLOC_NAME(cpuset_from_linux_libnuma_bitmask)
+#define hwloc_nodeset_from_linux_libnuma_bitmask HWLOC_NAME(nodeset_from_linux_libnuma_bitmask)
+
+/* linux.h */
+
+#define hwloc_linux_set_tid_cpubind HWLOC_NAME(linux_set_tid_cpubind)
+#define hwloc_linux_get_tid_cpubind HWLOC_NAME(linux_get_tid_cpubind)
+#define hwloc_linux_get_tid_last_cpu_location HWLOC_NAME(linux_get_tid_last_cpu_location)
+#define hwloc_linux_read_path_as_cpumask HWLOC_NAME(linux_read_file_cpumask)
+
+/* openfabrics-verbs.h */
+
+#define hwloc_ibv_get_device_cpuset HWLOC_NAME(ibv_get_device_cpuset)
+#define hwloc_ibv_get_device_osdev HWLOC_NAME(ibv_get_device_osdev)
+#define hwloc_ibv_get_device_osdev_by_name HWLOC_NAME(ibv_get_device_osdev_by_name)
+
+/* myriexpress.h */
+
+#define hwloc_mx_board_get_device_cpuset HWLOC_NAME(mx_board_get_device_cpuset)
+#define hwloc_mx_endpoint_get_device_cpuset HWLOC_NAME(mx_endpoint_get_device_cpuset)
+
+/* intel-mic.h */
+
+#define hwloc_intel_mic_get_device_cpuset HWLOC_NAME(intel_mic_get_device_cpuset)
+#define hwloc_intel_mic_get_device_osdev_by_index HWLOC_NAME(intel_mic_get_device_osdev_by_index)
+
+/* opencl.h */
+
+#define hwloc_opencl_get_device_cpuset HWLOC_NAME(opencl_get_device_cpuset)
+#define hwloc_opencl_get_device_osdev HWLOC_NAME(opencl_get_device_osdev)
+#define hwloc_opencl_get_device_osdev_by_index HWLOC_NAME(opencl_get_device_osdev_by_index)
+
+/* cuda.h */
+
+#define hwloc_cuda_get_device_pci_ids HWLOC_NAME(cuda_get_device_pci_ids)
+#define hwloc_cuda_get_device_cpuset HWLOC_NAME(cuda_get_device_cpuset)
+#define hwloc_cuda_get_device_pcidev HWLOC_NAME(cuda_get_device_pcidev)
+#define hwloc_cuda_get_device_osdev HWLOC_NAME(cuda_get_device_osdev)
+#define hwloc_cuda_get_device_osdev_by_index HWLOC_NAME(cuda_get_device_osdev_by_index)
+
+/* cudart.h */
+
+#define hwloc_cudart_get_device_pci_ids HWLOC_NAME(cudart_get_device_pci_ids)
+#define hwloc_cudart_get_device_cpuset HWLOC_NAME(cudart_get_device_cpuset)
+#define hwloc_cudart_get_device_pcidev HWLOC_NAME(cudart_get_device_pcidev)
+#define hwloc_cudart_get_device_osdev_by_index HWLOC_NAME(cudart_get_device_osdev_by_index)
+
+/* nvml.h */
+
+#define hwloc_nvml_get_device_cpuset HWLOC_NAME(nvml_get_device_cpuset)
+#define hwloc_nvml_get_device_osdev HWLOC_NAME(nvml_get_device_osdev)
+#define hwloc_nvml_get_device_osdev_by_index HWLOC_NAME(nvml_get_device_osdev_by_index)
+
+/* gl.h */
+
+#define hwloc_gl_get_display_osdev_by_port_device HWLOC_NAME(gl_get_display_osdev_by_port_device)
+#define hwloc_gl_get_display_osdev_by_name HWLOC_NAME(gl_get_display_osdev_by_name)
+#define hwloc_gl_get_display_by_osdev HWLOC_NAME(gl_get_display_by_osdev)
+
+/* hwloc/plugins.h */
+
+#define hwloc_disc_component_type_e HWLOC_NAME(disc_component_type_e)
+#define HWLOC_DISC_COMPONENT_TYPE_CPU HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_CPU)
+#define HWLOC_DISC_COMPONENT_TYPE_GLOBAL HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_GLOBAL)
+#define HWLOC_DISC_COMPONENT_TYPE_MISC HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_MISC)
+#define hwloc_disc_component_type_t HWLOC_NAME(disc_component_type_t)
+#define hwloc_disc_component HWLOC_NAME(disc_component)
+
+#define hwloc_backend HWLOC_NAME(backend)
+
+#define hwloc_backend_alloc HWLOC_NAME(backend_alloc)
+#define hwloc_backend_enable HWLOC_NAME(backend_enable)
+
+#define hwloc_component_type_e HWLOC_NAME(component_type_e)
+#define HWLOC_COMPONENT_TYPE_DISC HWLOC_NAME_CAPS(COMPONENT_TYPE_DISC)
+#define HWLOC_COMPONENT_TYPE_XML HWLOC_NAME_CAPS(COMPONENT_TYPE_XML)
+#define hwloc_component_type_t HWLOC_NAME(component_type_t)
+#define hwloc_component HWLOC_NAME(component)
+
+#define hwloc_plugin_check_namespace HWLOC_NAME(plugin_check_namespace)
+
+#define hwloc_insert_object_by_cpuset HWLOC_NAME(insert_object_by_cpuset)
+#define hwloc_report_error_t HWLOC_NAME(report_error_t)
+#define hwloc_report_os_error HWLOC_NAME(report_os_error)
+#define hwloc_hide_errors HWLOC_NAME(hide_errors)
+#define hwloc__insert_object_by_cpuset HWLOC_NAME(_insert_object_by_cpuset)
+#define hwloc_insert_object_by_parent HWLOC_NAME(insert_object_by_parent)
+#define hwloc_alloc_setup_object HWLOC_NAME(alloc_setup_object)
+#define hwloc_obj_add_children_sets HWLOC_NAME(add_children_sets)
+#define hwloc_topology_reconnect HWLOC_NAME(topology_reconnect)
+
+#define hwloc_filter_check_pcidev_subtype_important HWLOC_NAME(filter_check_pcidev_subtype_important)
+#define hwloc_filter_check_osdev_subtype_important HWLOC_NAME(filter_check_osdev_subtype_important)
+#define hwloc_filter_check_keep_object_type HWLOC_NAME(filter_check_keep_object_type)
+#define hwloc_filter_check_keep_object HWLOC_NAME(filter_check_keep_object)
+
+#define hwloc_pci_find_cap HWLOC_NAME(pci_find_cap)
+#define hwloc_pci_find_linkspeed HWLOC_NAME(pci_find_linkspeed)
+#define hwloc_pci_check_bridge_type HWLOC_NAME(pci_check_bridge_type)
+#define hwloc_pci_setup_bridge_attr HWLOC_NAME(pci_setup_bridge_attr)
+#define hwloc_pci_tree_insert_by_busid HWLOC_NAME(pci_tree_insert_by_busid)
+#define hwloc_pci_tree_attach_belowroot HWLOC_NAME(pci_tree_attach_belowroot)
+#define hwloc_pci_belowroot_find_by_busid HWLOC_NAME(pci_belowroot_find_by_busid)
+#define hwloc_pci_find_busid_parent HWLOC_NAME(pci_find_busid_parent)
+
+/* hwloc/deprecated.h */
+
+#define hwloc_topology_insert_misc_object_by_parent HWLOC_NAME(topology_insert_misc_object_by_parent)
+#define hwloc_obj_cpuset_snprintf HWLOC_NAME(obj_cpuset_snprintf)
+#define hwloc_obj_type_string HWLOC_NAME(obj_type_string)
+#define hwloc_obj_type_sscanf HWLOC_NAME(obj_type_sscanf)
+
+#define hwloc_set_membind_nodeset HWLOC_NAME(set_membind_nodeset)
+#define hwloc_get_membind_nodeset HWLOC_NAME(get_membind_nodeset)
+#define hwloc_set_proc_membind_nodeset HWLOC_NAME(set_proc_membind_nodeset)
+#define hwloc_get_proc_membind_nodeset HWLOC_NAME(get_proc_membind_nodeset)
+#define hwloc_set_area_membind_nodeset HWLOC_NAME(set_area_membind_nodeset)
+#define hwloc_get_area_membind_nodeset HWLOC_NAME(get_area_membind_nodeset)
+#define hwloc_alloc_membind_nodeset HWLOC_NAME(alloc_membind_nodeset)
+
+#define hwloc_cpuset_to_nodeset_strict HWLOC_NAME(cpuset_to_nodeset_strict)
+#define hwloc_cpuset_from_nodeset_strict HWLOC_NAME(cpuset_from_nodeset_strict)
+
+/* private/debug.h */
+
+#define hwloc_debug_enabled HWLOC_NAME(debug_enabled)
+#define hwloc_debug HWLOC_NAME(debug)
+
+/* private/misc.h */
+
+#define hwloc_snprintf HWLOC_NAME(snprintf)
+#define hwloc_namecoloncmp HWLOC_NAME(namecoloncmp)
+#define hwloc_ffsl_manual HWLOC_NAME(ffsl_manual)
+#define hwloc_ffs32 HWLOC_NAME(ffs32)
+#define hwloc_ffsl_from_ffs32 HWLOC_NAME(ffsl_from_ffs32)
+#define hwloc_flsl_manual HWLOC_NAME(flsl_manual)
+#define hwloc_fls32 HWLOC_NAME(fls32)
+#define hwloc_flsl_from_fls32 HWLOC_NAME(flsl_from_fls32)
+#define hwloc_weight_long HWLOC_NAME(weight_long)
+#define hwloc_strncasecmp HWLOC_NAME(strncasecmp)
+#define hwloc_cache_type_by_depth_type HWLOC_NAME(cache_type_by_depth_type)
+#define hwloc_obj_type_is_io HWLOC_NAME(obj_type_is_io)
+#define hwloc_obj_type_is_special HWLOC_NAME(obj_type_is_special)
+
+/* private/cpuid-x86.h */
+
+#define hwloc_have_x86_cpuid HWLOC_NAME(have_x86_cpuid)
+#define hwloc_x86_cpuid HWLOC_NAME(x86_cpuid)
+
+/* private/xml.h */
+
+#define hwloc__xml_verbose HWLOC_NAME(_xml_verbose)
+
+#define hwloc__xml_import_state_s HWLOC_NAME(_xml_import_state_s)
+#define hwloc__xml_import_state_t HWLOC_NAME(_xml_import_state_t)
+#define hwloc__xml_import_diff HWLOC_NAME(_xml_import_diff)
+#define hwloc_xml_backend_data_s HWLOC_NAME(xml_backend_data_s)
+#define hwloc__xml_export_state_s HWLOC_NAME(_xml_export_state_s)
+#define hwloc__xml_export_state_t HWLOC_NAME(_xml_export_state_t)
+#define hwloc__xml_export_topology HWLOC_NAME(_xml_export_topology)
+#define hwloc__xml_export_diff HWLOC_NAME(_xml_export_diff)
+
+#define hwloc_xml_callbacks HWLOC_NAME(xml_callbacks)
+#define hwloc_xml_component HWLOC_NAME(xml_component)
+#define hwloc_xml_callbacks_register HWLOC_NAME(xml_callbacks_register)
+#define hwloc_xml_callbacks_reset HWLOC_NAME(xml_callbacks_reset)
+
+#define hwloc__xml_imported_v1distances_s HWLOC_NAME(_xml_imported_v1distances_s)
+
+/* private/components.h */
+
+#define hwloc_disc_component_force_enable HWLOC_NAME(disc_component_force_enable)
+#define hwloc_disc_components_enable_others HWLOC_NAME(disc_components_instantiate_others)
+
+#define hwloc_backends_is_thissystem HWLOC_NAME(backends_is_thissystem)
+#define hwloc_backends_find_callbacks HWLOC_NAME(backends_find_callbacks)
+
+#define hwloc_backends_init HWLOC_NAME(backends_init)
+#define hwloc_backends_disable_all HWLOC_NAME(backends_disable_all)
+
+#define hwloc_components_init HWLOC_NAME(components_init)
+#define hwloc_components_fini HWLOC_NAME(components_fini)
+
+/* private/private.h */
+
+#define hwloc_special_level_s HWLOC_NAME(special_level_s)
+
+#define hwloc_pci_forced_locality_s HWLOC_NAME(pci_forced_locality_s)
+
+#define hwloc_alloc_obj_cpusets HWLOC_NAME(alloc_obj_cpusets)
+#define hwloc_setup_pu_level HWLOC_NAME(setup_pu_level)
+#define hwloc_get_sysctlbyname HWLOC_NAME(get_sysctlbyname)
+#define hwloc_get_sysctl HWLOC_NAME(get_sysctl)
+#define hwloc_fallback_nbprocessors HWLOC_NAME(fallback_nbprocessors)
+
+#define hwloc__object_cpusets_compare_first HWLOC_NAME(_object_cpusets_compare_first)
+#define hwloc__reorder_children HWLOC_NAME(_reorder_children)
+
+#define hwloc_topology_setup_defaults HWLOC_NAME(topology_setup_defaults)
+#define hwloc_topology_clear HWLOC_NAME(topology_clear)
+
+#define hwloc_pci_discovery_init HWLOC_NAME(pci_discovery_init)
+#define hwloc_pci_discovery_prepare HWLOC_NAME(pci_discovery_prepare)
+#define hwloc_pci_discovery_exit HWLOC_NAME(pci_discovery_exit)
+#define hwloc_find_insert_io_parent_by_complete_cpuset HWLOC_NAME(hwloc_find_insert_io_parent_by_complete_cpuset)
+#define hwloc_pci_belowroot_apply_locality HWLOC_NAME(pci_belowroot_apply_locality)
+#define hwloc_pci_class_string HWLOC_NAME(pci_class_string)
+
+#define hwloc__add_info HWLOC_NAME(_add_info)
+#define hwloc__find_info_slot HWLOC_NAME(_find_info_slot)
+#define hwloc__move_infos HWLOC_NAME(_move_infos)
+#define hwloc__free_infos HWLOC_NAME(_free_infos)
+
+#define hwloc_binding_hooks HWLOC_NAME(binding_hooks)
+#define hwloc_set_native_binding_hooks HWLOC_NAME(set_native_binding_hooks)
+#define hwloc_set_binding_hooks HWLOC_NAME(set_binding_hooks)
+
+#define hwloc_set_linuxfs_hooks HWLOC_NAME(set_linuxfs_hooks)
+#define hwloc_set_bgq_hooks HWLOC_NAME(set_bgq_hooks)
+#define hwloc_set_solaris_hooks HWLOC_NAME(set_solaris_hooks)
+#define hwloc_set_aix_hooks HWLOC_NAME(set_aix_hooks)
+#define hwloc_set_windows_hooks HWLOC_NAME(set_windows_hooks)
+#define hwloc_set_darwin_hooks HWLOC_NAME(set_darwin_hooks)
+#define hwloc_set_freebsd_hooks HWLOC_NAME(set_freebsd_hooks)
+#define hwloc_set_netbsd_hooks HWLOC_NAME(set_netbsd_hooks)
+#define hwloc_set_hpux_hooks HWLOC_NAME(set_hpux_hooks)
+
+#define hwloc_look_hardwired_fujitsu_k HWLOC_NAME(look_hardwired_fujitsu_k)
+#define hwloc_look_hardwired_fujitsu_fx10 HWLOC_NAME(look_hardwired_fujitsu_fx10)
+#define hwloc_look_hardwired_fujitsu_fx100 HWLOC_NAME(look_hardwired_fujitsu_fx100)
+
+#define hwloc_add_uname_info HWLOC_NAME(add_uname_info)
+#define hwloc_free_unlinked_object HWLOC_NAME(free_unlinked_object)
+#define hwloc_free_object_and_children HWLOC_NAME(free_object_and_children)
+#define hwloc_free_object_siblings_and_children HWLOC_NAME(free_object_siblings_and_children)
+
+#define hwloc_alloc_heap HWLOC_NAME(alloc_heap)
+#define hwloc_alloc_mmap HWLOC_NAME(alloc_mmap)
+#define hwloc_free_heap HWLOC_NAME(free_heap)
+#define hwloc_free_mmap HWLOC_NAME(free_mmap)
+#define hwloc_alloc_or_fail HWLOC_NAME(alloc_or_fail)
+
+#define hwloc_internal_distances_s HWLOC_NAME(internal_distances_s)
+#define hwloc_internal_distances_init HWLOC_NAME(internal_distances_init)
+#define hwloc_internal_distances_prepare HWLOC_NAME(internal_distances_prepare)
+#define hwloc_internal_distances_dup HWLOC_NAME(internal_distances_dup)
+#define hwloc_internal_distances_refresh HWLOC_NAME(internal_distances_refresh)
+#define hwloc_internal_distances_destroy HWLOC_NAME(internal_distances_destroy)
+
+#define hwloc_internal_distances_add HWLOC_NAME(internal_distances_add)
+#define hwloc_internal_distances_add_by_index HWLOC_NAME(internal_distances_add_by_index)
+#define hwloc_internal_distances_invalidate_cached_objs HWLOC_NAME(hwloc_internal_distances_invalidate_cached_objs)
+
+#define hwloc_encode_to_base64 HWLOC_NAME(encode_to_base64)
+#define hwloc_decode_from_base64 HWLOC_NAME(decode_from_base64)
+
+#define hwloc_obj_add_info_nodup HWLOC_NAME(obj_add_info_nodup)
+
+#define hwloc_progname HWLOC_NAME(progname)
+
+#define hwloc_bitmap_compare_inclusion HWLOC_NAME(bitmap_compare_inclusion)
+
+/* private/solaris-chiptype.h */
+
+#define hwloc_solaris_get_chip_type HWLOC_NAME(solaris_get_chip_type)
+#define hwloc_solaris_get_chip_model HWLOC_NAME(solaris_get_chip_model)
+
+#endif /* HWLOC_SYM_TRANSFORM */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_RENAME_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/netloc.h b/opal/mca/hwloc/hwloc2x/hwloc/include/netloc.h
new file mode 100644
index 0000000000..1eacbca18b
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/netloc.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright © 2013-2014 Cisco Systems, Inc.  All rights reserved.
+ * Copyright © 2013-2014 University of Wisconsin-La Crosse.
+ *                         All rights reserved.
+ * Copyright © 2015-2016 Inria.  All rights reserved.
+ *
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ * See COPYING in top-level directory.
+ *
+ * $HEADER$
+ */
+
+#ifndef _NETLOC_H_
+#define _NETLOC_H_
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE // for asprintf
+#endif
+
+#include <hwloc/autogen/config.h>
+
+#include <hwloc.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \defgroup netloc_api Netloc API
+ * @{
+ */
+/**
+ * Return codes
+ */
+enum {
+    NETLOC_SUCCESS         =  0, /**< Success */
+    NETLOC_ERROR           = -1, /**< Error: General condition */
+    NETLOC_ERROR_NOTDIR    = -2, /**< Error: URI is not a directory */
+    NETLOC_ERROR_NOENT     = -3, /**< Error: URI is invalid, no such entry */
+    NETLOC_ERROR_EMPTY     = -4, /**< Error: No networks found */
+    NETLOC_ERROR_MULTIPLE  = -5, /**< Error: Multiple matching networks found */
+    NETLOC_ERROR_NOT_IMPL  = -6, /**< Error: Interface not implemented */
+    NETLOC_ERROR_EXISTS    = -7, /**< Error: If the entry already exists when trying to add to a lookup table */
+    NETLOC_ERROR_NOT_FOUND = -8, /**< Error: No path found */
+    NETLOC_ERROR_MAX       = -9  /**< Error: Enum upper bound marker. No errors less than this number Will not be returned externally. */
+};
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+/** @} */
+
+#endif // _NETLOC_H_
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/netloc/utarray.h b/opal/mca/hwloc/hwloc2x/hwloc/include/netloc/utarray.h
new file mode 100644
index 0000000000..f950973303
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/netloc/utarray.h
@@ -0,0 +1,237 @@
+/*
+Copyright (c) 2008-2014, Troy D. Hanson   http://troydhanson.github.com/uthash/
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* a dynamic array implementation using macros
+ */
+#ifndef UTARRAY_H
+#define UTARRAY_H
+
+#define UTARRAY_VERSION 1.9.9
+
+#ifdef __GNUC__
+#define _UNUSED_ __attribute__ ((__unused__))
+#else
+#define _UNUSED_
+#endif
+
+#include <stddef.h>  /* size_t */
+#include <string.h>  /* memset, etc */
+#include <stdlib.h>  /* exit */
+
+#ifndef oom
+#define oom() exit(-1)
+#endif
+
+typedef void (ctor_f)(void *dst, const void *src);
+typedef void (dtor_f)(void *elt);
+typedef void (init_f)(void *elt);
+typedef struct {
+    size_t sz;
+    init_f *init;
+    ctor_f *copy;
+    dtor_f *dtor;
+} UT_icd;
+
+typedef struct {
+    unsigned i,n;/* i: index of next available slot, n: num slots */
+    UT_icd icd;  /* initializer, copy and destructor functions */
+    char *d;     /* n slots of size icd->sz*/
+} UT_array;
+
+#define utarray_init(a,_icd) do {                                             \
+  memset(a,0,sizeof(UT_array));                                               \
+  (a)->icd=*_icd;                                                             \
+} while(0)
+
+#define utarray_done(a) do {                                                  \
+  if ((a)->n) {                                                               \
+    if ((a)->icd.dtor) {                                                      \
+      size_t _ut_i;                                                           \
+      for(_ut_i=0; _ut_i < (a)->i; _ut_i++) {                                 \
+        (a)->icd.dtor(utarray_eltptr(a,_ut_i));                               \
+      }                                                                       \
+    }                                                                         \
+    free((a)->d);                                                             \
+  }                                                                           \
+  (a)->n=0;                                                                   \
+} while(0)
+
+#define utarray_new(a,_icd) do {                                              \
+  a=(UT_array*)malloc(sizeof(UT_array));                                      \
+  utarray_init(a,_icd);                                                       \
+} while(0)
+
+#define utarray_free(a) do {                                                  \
+  utarray_done(a);                                                            \
+  free(a);                                                                    \
+} while(0)
+
+#define utarray_reserve(a,by) do {                                            \
+  if (((a)->i+(by)) > ((a)->n)) {                                             \
+    char *utarray_tmp;                                                        \
+    while(((a)->i+(by)) > ((a)->n)) { (a)->n = ((a)->n ? (2*(a)->n) : 8); }   \
+    utarray_tmp=(char*)realloc((a)->d, (a)->n*(a)->icd.sz);                   \
+    if (utarray_tmp == NULL) oom();                                           \
+    (a)->d=utarray_tmp;                                                       \
+  }                                                                           \
+} while(0)
+
+#define utarray_push_back(a,p) do {                                           \
+  utarray_reserve(a,1);                                                       \
+  if ((a)->icd.copy) { (a)->icd.copy( _utarray_eltptr(a,(a)->i++), p); }      \
+  else { memcpy(_utarray_eltptr(a,(a)->i++), p, (a)->icd.sz); };              \
+} while(0)
+
+#define utarray_pop_back(a) do {                                              \
+  if ((a)->icd.dtor) { (a)->icd.dtor( _utarray_eltptr(a,--((a)->i))); }       \
+  else { (a)->i--; }                                                          \
+} while(0)
+
+#define utarray_extend_back(a) do {                                           \
+  utarray_reserve(a,1);                                                       \
+  if ((a)->icd.init) { (a)->icd.init(_utarray_eltptr(a,(a)->i)); }            \
+  else { memset(_utarray_eltptr(a,(a)->i),0,(a)->icd.sz); }                   \
+  (a)->i++;                                                                   \
+} while(0)
+
+#define utarray_len(a) ((a)->i)
+
+#define utarray_eltptr(a,j) (((j) < (a)->i) ? _utarray_eltptr(a,j) : NULL)
+#define _utarray_eltptr(a,j) ((char*)((a)->d + ((a)->icd.sz*(j) )))
+
+#define utarray_insert(a,p,j) do {                                            \
+  if (j > (a)->i) utarray_resize(a,j);                                        \
+  utarray_reserve(a,1);                                                       \
+  if ((j) < (a)->i) {                                                         \
+    memmove( _utarray_eltptr(a,(j)+1), _utarray_eltptr(a,j),                  \
+             ((a)->i - (j))*((a)->icd.sz));                                   \
+  }                                                                           \
+  if ((a)->icd.copy) { (a)->icd.copy( _utarray_eltptr(a,j), p); }             \
+  else { memcpy(_utarray_eltptr(a,j), p, (a)->icd.sz); };                     \
+  (a)->i++;                                                                   \
+} while(0)
+
+#define utarray_inserta(a,w,j) do {                                           \
+  if (utarray_len(w) == 0) break;                                             \
+  if (j > (a)->i) utarray_resize(a,j);                                        \
+  utarray_reserve(a,utarray_len(w));                                          \
+  if ((j) < (a)->i) {                                                         \
+    memmove(_utarray_eltptr(a,(j)+utarray_len(w)),                            \
+            _utarray_eltptr(a,j),                                             \
+            ((a)->i - (j))*((a)->icd.sz));                                    \
+  }                                                                           \
+  if ((a)->icd.copy) {                                                        \
+    size_t _ut_i;                                                             \
+    for(_ut_i=0;_ut_i<(w)->i;_ut_i++) {                                       \
+      (a)->icd.copy(_utarray_eltptr(a,j+_ut_i), _utarray_eltptr(w,_ut_i));    \
+    }                                                                         \
+  } else {                                                                    \
+    memcpy(_utarray_eltptr(a,j), _utarray_eltptr(w,0),                        \
+           utarray_len(w)*((a)->icd.sz));                                     \
+  }                                                                           \
+  (a)->i += utarray_len(w);                                                   \
+} while(0)
+
+#define utarray_resize(dst,num) do {                                          \
+  size_t _ut_i;                                                               \
+  if (dst->i > (size_t)(num)) {                                               \
+    if ((dst)->icd.dtor) {                                                    \
+      for(_ut_i=num; _ut_i < dst->i; _ut_i++) {                               \
+        (dst)->icd.dtor(utarray_eltptr(dst,_ut_i));                           \
+      }                                                                       \
+    }                                                                         \
+  } else if (dst->i < (size_t)(num)) {                                        \
+    utarray_reserve(dst,num-dst->i);                                          \
+    if ((dst)->icd.init) {                                                    \
+      for(_ut_i=dst->i; _ut_i < num; _ut_i++) {                               \
+        (dst)->icd.init(utarray_eltptr(dst,_ut_i));                           \
+      }                                                                       \
+    } else {                                                                  \
+      memset(_utarray_eltptr(dst,dst->i),0,(dst)->icd.sz*(num-dst->i));       \
+    }                                                                         \
+  }                                                                           \
+  dst->i = num;                                                               \
+} while(0)
+
+#define utarray_concat(dst,src) do {                                          \
+  utarray_inserta((dst),(src),utarray_len(dst));                              \
+} while(0)
+
+#define utarray_erase(a,pos,len) do {                                         \
+  if ((a)->icd.dtor) {                                                        \
+    size_t _ut_i;                                                             \
+    for(_ut_i=0; _ut_i < len; _ut_i++) {                                      \
+      (a)->icd.dtor(utarray_eltptr((a),pos+_ut_i));                           \
+    }                                                                         \
+  }                                                                           \
+  if ((a)->i > (pos+len)) {                                                   \
+    memmove( _utarray_eltptr((a),pos), _utarray_eltptr((a),pos+len),          \
+            (((a)->i)-(pos+len))*((a)->icd.sz));                              \
+  }                                                                           \
+  (a)->i -= (len);                                                            \
+} while(0)
+
+#define utarray_renew(a,u) do {                                               \
+  if (a) utarray_clear(a); \
+  else utarray_new((a),(u));   \
+} while(0)
+
+#define utarray_clear(a) do {                                                 \
+  if ((a)->i > 0) {                                                           \
+    if ((a)->icd.dtor) {                                                      \
+      size_t _ut_i;                                                           \
+      for(_ut_i=0; _ut_i < (a)->i; _ut_i++) {                                 \
+        (a)->icd.dtor(utarray_eltptr(a,_ut_i));                               \
+      }                                                                       \
+    }                                                                         \
+    (a)->i = 0;                                                               \
+  }                                                                           \
+} while(0)
+
+#define utarray_sort(a,cmp) do {                                              \
+  qsort((a)->d, (a)->i, (a)->icd.sz, cmp);                                    \
+} while(0)
+
+#define utarray_find(a,v,cmp) bsearch((v),(a)->d,(a)->i,(a)->icd.sz,cmp)
+
+#define utarray_front(a) (((a)->i) ? (_utarray_eltptr(a,0)) : NULL)
+#define utarray_next(a,e) (((e)==NULL) ? utarray_front(a) : ((((a)->i) > (utarray_eltidx(a,e)+1)) ? _utarray_eltptr(a,utarray_eltidx(a,e)+1) : NULL))
+#define utarray_prev(a,e) (((e)==NULL) ? utarray_back(a) : ((utarray_eltidx(a,e) > 0) ? _utarray_eltptr(a,utarray_eltidx(a,e)-1) : NULL))
+#define utarray_back(a) (((a)->i) ? (_utarray_eltptr(a,(a)->i-1)) : NULL)
+#define utarray_eltidx(a,e) (((char*)(e) >= (char*)((a)->d)) ? (((char*)(e) - (char*)((a)->d))/(size_t)(a)->icd.sz) : (unsigned int)-1)
+
+/* last we pre-define a few icd for common utarrays of ints and strings */
+static void utarray_str_cpy(void *dst, const void *src) {
+  char **_src = (char**)src, **_dst = (char**)dst;
+  *_dst = (*_src == NULL) ? NULL : strdup(*_src);
+}
+static void utarray_str_dtor(void *elt) {
+  char **eltc = (char**)elt;
+  if (*eltc) free(*eltc);
+}
+static const UT_icd ut_str_icd _UNUSED_ = {sizeof(char*),NULL,utarray_str_cpy,utarray_str_dtor};
+static const UT_icd ut_int_icd _UNUSED_ = {sizeof(int),NULL,NULL,NULL};
+static const UT_icd ut_ptr_icd _UNUSED_ = {sizeof(void*),NULL,NULL,NULL};
+
+
+#endif /* UTARRAY_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/netloc/uthash.h b/opal/mca/hwloc/hwloc2x/hwloc/include/netloc/uthash.h
new file mode 100644
index 0000000000..ed69c0c52e
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/netloc/uthash.h
@@ -0,0 +1,966 @@
+/*
+Copyright (c) 2003-2014, Troy D. Hanson     http://troydhanson.github.com/uthash/
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef UTHASH_H
+#define UTHASH_H
+
+#include <string.h>   /* memcmp,strlen */
+#include <stddef.h>   /* ptrdiff_t */
+#include <stdlib.h>   /* exit() */
+
+/* These macros use decltype or the earlier __typeof GNU extension.
+   As decltype is only available in newer compilers (VS2010 or gcc 4.3+
+   when compiling c++ source) this code uses whatever method is needed
+   or, for VS2008 where neither is available, uses casting workarounds. */
+#if defined(_MSC_VER)   /* MS compiler */
+#if _MSC_VER >= 1600 && defined(__cplusplus)  /* VS2010 or newer in C++ mode */
+#define DECLTYPE(x) (decltype(x))
+#else                   /* VS2008 or older (or VS2010 in C mode) */
+#define NO_DECLTYPE
+#define DECLTYPE(x)
+#endif
+#elif defined(__BORLANDC__) || defined(__LCC__) || defined(__WATCOMC__)
+#define NO_DECLTYPE
+#define DECLTYPE(x)
+#else                   /* GNU, Sun and other compilers */
+#define DECLTYPE(x) (__typeof(x))
+#endif
+
+#ifdef NO_DECLTYPE
+#define DECLTYPE_ASSIGN(dst,src)                                                 \
+do {                                                                             \
+  char **_da_dst = (char**)(&(dst));                                             \
+  *_da_dst = (char*)(src);                                                       \
+} while(0)
+#else
+#define DECLTYPE_ASSIGN(dst,src)                                                 \
+do {                                                                             \
+  (dst) = DECLTYPE(dst)(src);                                                    \
+} while(0)
+#endif
+
+/* a number of the hash function use uint32_t which isn't defined on Pre VS2010 */
+#if defined(_WIN32)
+#if defined(_MSC_VER) && _MSC_VER >= 1600
+#include <stdint.h>
+#elif defined(__WATCOMC__) || defined(__MINGW32__) || defined(__CYGWIN__)
+#include <stdint.h>
+#else
+typedef unsigned int uint32_t;
+typedef unsigned char uint8_t;
+#endif
+#elif defined(__GNUC__) && !defined(__VXWORKS__)
+#include <stdint.h>
+#else
+typedef unsigned int uint32_t;
+typedef unsigned char uint8_t;
+#endif
+
+#define UTHASH_VERSION 1.9.9
+
+#ifndef uthash_fatal
+#define uthash_fatal(msg) exit(-1)        /* fatal error (out of memory,etc) */
+#endif
+#ifndef uthash_malloc
+#define uthash_malloc(sz) malloc(sz)      /* malloc fcn                      */
+#endif
+#ifndef uthash_free
+#define uthash_free(ptr,sz) free(ptr)     /* free fcn                        */
+#endif
+
+#ifndef uthash_noexpand_fyi
+#define uthash_noexpand_fyi(tbl)          /* can be defined to log noexpand  */
+#endif
+#ifndef uthash_expand_fyi
+#define uthash_expand_fyi(tbl)            /* can be defined to log expands   */
+#endif
+
+/* initial number of buckets */
+#define HASH_INITIAL_NUM_BUCKETS 32U     /* initial number of buckets        */
+#define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */
+#define HASH_BKT_CAPACITY_THRESH 10U     /* expand when bucket count reaches */
+
+/* calculate the element whose hash handle address is hhe */
+#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho)))
+
+#define HASH_FIND(hh,head,keyptr,keylen,out)                                     \
+do {                                                                             \
+  out=NULL;                                                                      \
+  if (head != NULL) {                                                            \
+     unsigned _hf_bkt,_hf_hashv;                                                 \
+     HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt);   \
+     if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv) != 0) {                      \
+       HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ],  \
+                        keyptr,keylen,out);                                      \
+     }                                                                           \
+  }                                                                              \
+} while (0)
+
+#ifdef HASH_BLOOM
+#define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM)
+#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8UL) + (((HASH_BLOOM_BITLEN%8UL)!=0UL) ? 1UL : 0UL)
+#define HASH_BLOOM_MAKE(tbl)                                                     \
+do {                                                                             \
+  (tbl)->bloom_nbits = HASH_BLOOM;                                               \
+  (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN);                 \
+  if (!((tbl)->bloom_bv))  { uthash_fatal( "out of memory"); }                   \
+  memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN);                                \
+  (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE;                                       \
+} while (0)
+
+#define HASH_BLOOM_FREE(tbl)                                                     \
+do {                                                                             \
+  uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN);                              \
+} while (0)
+
+#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8U] |= (1U << ((idx)%8U)))
+#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8U] & (1U << ((idx)%8U)))
+
+#define HASH_BLOOM_ADD(tbl,hashv)                                                \
+  HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U)))
+
+#define HASH_BLOOM_TEST(tbl,hashv)                                               \
+  HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U)))
+
+#else
+#define HASH_BLOOM_MAKE(tbl)
+#define HASH_BLOOM_FREE(tbl)
+#define HASH_BLOOM_ADD(tbl,hashv)
+#define HASH_BLOOM_TEST(tbl,hashv) (1)
+#define HASH_BLOOM_BYTELEN 0U
+#endif
+
+#define HASH_MAKE_TABLE(hh,head)                                                 \
+do {                                                                             \
+  (head)->hh.tbl = (UT_hash_table*)uthash_malloc(                                \
+                  sizeof(UT_hash_table));                                        \
+  if (!((head)->hh.tbl))  { uthash_fatal( "out of memory"); }                    \
+  memset((head)->hh.tbl, 0, sizeof(UT_hash_table));                              \
+  (head)->hh.tbl->tail = &((head)->hh);                                          \
+  (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS;                        \
+  (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2;              \
+  (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head);                    \
+  (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc(                      \
+          HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket));               \
+  if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); }             \
+  memset((head)->hh.tbl->buckets, 0,                                             \
+          HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket));               \
+  HASH_BLOOM_MAKE((head)->hh.tbl);                                               \
+  (head)->hh.tbl->signature = HASH_SIGNATURE;                                    \
+} while(0)
+
+#define HASH_ADD(hh,head,fieldname,keylen_in,add)                                \
+        HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add)
+
+#define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced)                   \
+do {                                                                             \
+  replaced=NULL;                                                                 \
+  HASH_FIND(hh,head,&((add)->fieldname),keylen_in,replaced);                     \
+  if (replaced!=NULL) {                                                          \
+     HASH_DELETE(hh,head,replaced);                                              \
+  }                                                                              \
+  HASH_ADD(hh,head,fieldname,keylen_in,add);                                     \
+} while(0)
+
+#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add)                            \
+do {                                                                             \
+ unsigned _ha_bkt;                                                               \
+ (add)->hh.next = NULL;                                                          \
+ (add)->hh.key = (char*)(keyptr);                                                \
+ (add)->hh.keylen = (unsigned)(keylen_in);                                       \
+ if (!(head)) {                                                                  \
+    head = (add);                                                                \
+    (head)->hh.prev = NULL;                                                      \
+    HASH_MAKE_TABLE(hh,head);                                                    \
+ } else {                                                                        \
+    (head)->hh.tbl->tail->next = (add);                                          \
+    (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail);         \
+    (head)->hh.tbl->tail = &((add)->hh);                                         \
+ }                                                                               \
+ (head)->hh.tbl->num_items++;                                                    \
+ (add)->hh.tbl = (head)->hh.tbl;                                                 \
+ HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets,                         \
+         (add)->hh.hashv, _ha_bkt);                                              \
+ HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh);                   \
+ HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv);                                 \
+ HASH_EMIT_KEY(hh,head,keyptr,keylen_in);                                        \
+ HASH_FSCK(hh,head);                                                             \
+} while(0)
+
+#define HASH_TO_BKT( hashv, num_bkts, bkt )                                      \
+do {                                                                             \
+  bkt = ((hashv) & ((num_bkts) - 1U));                                           \
+} while(0)
+
+/* delete "delptr" from the hash table.
+ * "the usual" patch-up process for the app-order doubly-linked-list.
+ * The use of _hd_hh_del below deserves special explanation.
+ * These used to be expressed using (delptr) but that led to a bug
+ * if someone used the same symbol for the head and deletee, like
+ *  HASH_DELETE(hh,users,users);
+ * We want that to work, but by changing the head (users) below
+ * we were forfeiting our ability to further refer to the deletee (users)
+ * in the patch-up process. Solution: use scratch space to
+ * copy the deletee pointer, then the latter references are via that
+ * scratch pointer rather than through the repointed (users) symbol.
+ */
+#define HASH_DELETE(hh,head,delptr)                                              \
+do {                                                                             \
+    struct UT_hash_handle *_hd_hh_del;                                           \
+    if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) )  {         \
+        uthash_free((head)->hh.tbl->buckets,                                     \
+                    (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
+        HASH_BLOOM_FREE((head)->hh.tbl);                                         \
+        uthash_free((head)->hh.tbl, sizeof(UT_hash_table));                      \
+        head = NULL;                                                             \
+    } else {                                                                     \
+        unsigned _hd_bkt;                                                        \
+        _hd_hh_del = &((delptr)->hh);                                            \
+        if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) {     \
+            (head)->hh.tbl->tail =                                               \
+                (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) +               \
+                (head)->hh.tbl->hho);                                            \
+        }                                                                        \
+        if ((delptr)->hh.prev != NULL) {                                         \
+            ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) +                  \
+                    (head)->hh.tbl->hho))->next = (delptr)->hh.next;             \
+        } else {                                                                 \
+            DECLTYPE_ASSIGN(head,(delptr)->hh.next);                             \
+        }                                                                        \
+        if (_hd_hh_del->next != NULL) {                                          \
+            ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next +                     \
+                    (head)->hh.tbl->hho))->prev =                                \
+                    _hd_hh_del->prev;                                            \
+        }                                                                        \
+        HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt);   \
+        HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del);        \
+        (head)->hh.tbl->num_items--;                                             \
+    }                                                                            \
+    HASH_FSCK(hh,head);                                                          \
+} while (0)
+
+
+/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */
+#define HASH_FIND_STR(head,findstr,out)                                          \
+    HASH_FIND(hh,head,findstr,(unsigned)strlen(findstr),out)
+#define HASH_ADD_STR(head,strfield,add)                                          \
+    HASH_ADD(hh,head,strfield[0],(unsigned int)strlen(add->strfield),add)
+#define HASH_REPLACE_STR(head,strfield,add,replaced)                             \
+    HASH_REPLACE(hh,head,strfield[0],(unsigned)strlen(add->strfield),add,replaced)
+#define HASH_FIND_INT(head,findint,out)                                          \
+    HASH_FIND(hh,head,findint,sizeof(int),out)
+#define HASH_ADD_INT(head,intfield,add)                                          \
+    HASH_ADD(hh,head,intfield,sizeof(int),add)
+#define HASH_REPLACE_INT(head,intfield,add,replaced)                             \
+    HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced)
+#define HASH_FIND_PTR(head,findptr,out)                                          \
+    HASH_FIND(hh,head,findptr,sizeof(void *),out)
+#define HASH_ADD_PTR(head,ptrfield,add)                                          \
+    HASH_ADD(hh,head,ptrfield,sizeof(void *),add)
+#define HASH_REPLACE_PTR(head,ptrfield,add,replaced)                             \
+    HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced)
+#define HASH_DEL(head,delptr)                                                    \
+    HASH_DELETE(hh,head,delptr)
+
+/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined.
+ * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined.
+ */
+#ifdef HASH_DEBUG
+#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0)
+#define HASH_FSCK(hh,head)                                                       \
+do {                                                                             \
+    struct UT_hash_handle *_thh;                                                 \
+    if (head) {                                                                  \
+        unsigned _bkt_i;                                                         \
+        unsigned _count;                                                         \
+        char *_prev;                                                             \
+        _count = 0;                                                              \
+        for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) {       \
+            unsigned _bkt_count = 0;                                             \
+            _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head;                      \
+            _prev = NULL;                                                        \
+            while (_thh) {                                                       \
+               if (_prev != (char*)(_thh->hh_prev)) {                            \
+                   HASH_OOPS("invalid hh_prev %p, actual %p\n",                  \
+                    _thh->hh_prev, _prev );                                      \
+               }                                                                 \
+               _bkt_count++;                                                     \
+               _prev = (char*)(_thh);                                            \
+               _thh = _thh->hh_next;                                             \
+            }                                                                    \
+            _count += _bkt_count;                                                \
+            if ((head)->hh.tbl->buckets[_bkt_i].count !=  _bkt_count) {          \
+               HASH_OOPS("invalid bucket count %u, actual %u\n",                 \
+                (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count);              \
+            }                                                                    \
+        }                                                                        \
+        if (_count != (head)->hh.tbl->num_items) {                               \
+            HASH_OOPS("invalid hh item count %u, actual %u\n",                   \
+                (head)->hh.tbl->num_items, _count );                             \
+        }                                                                        \
+        /* traverse hh in app order; check next/prev integrity, count */         \
+        _count = 0;                                                              \
+        _prev = NULL;                                                            \
+        _thh =  &(head)->hh;                                                     \
+        while (_thh) {                                                           \
+           _count++;                                                             \
+           if (_prev !=(char*)(_thh->prev)) {                                    \
+              HASH_OOPS("invalid prev %p, actual %p\n",                          \
+                    _thh->prev, _prev );                                         \
+           }                                                                     \
+           _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh);                    \
+           _thh = ( _thh->next ?  (UT_hash_handle*)((char*)(_thh->next) +        \
+                                  (head)->hh.tbl->hho) : NULL );                 \
+        }                                                                        \
+        if (_count != (head)->hh.tbl->num_items) {                               \
+            HASH_OOPS("invalid app item count %u, actual %u\n",                  \
+                (head)->hh.tbl->num_items, _count );                             \
+        }                                                                        \
+    }                                                                            \
+} while (0)
+#else
+#define HASH_FSCK(hh,head)
+#endif
+
+/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
+ * the descriptor to which this macro is defined for tuning the hash function.
+ * The app can #include <unistd.h> to get the prototype for write(2). */
+#ifdef HASH_EMIT_KEYS
+#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)                                   \
+do {                                                                             \
+    unsigned _klen = fieldlen;                                                   \
+    write(HASH_EMIT_KEYS, &_klen, sizeof(_klen));                                \
+    write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen);                      \
+} while (0)
+#else
+#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
+#endif
+
+/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
+#ifdef HASH_FUNCTION
+#define HASH_FCN HASH_FUNCTION
+#else
+#define HASH_FCN HASH_JEN
+#endif
+
+/* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */
+#define HASH_BER(key,keylen,num_bkts,hashv,bkt)                                  \
+do {                                                                             \
+  unsigned _hb_keylen=(unsigned)keylen;                                          \
+  const unsigned char *_hb_key=(const unsigned char*)(key);                      \
+  (hashv) = 0;                                                                   \
+  while (_hb_keylen-- != 0U) {                                                   \
+      (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++;                         \
+  }                                                                              \
+  bkt = (hashv) & (num_bkts-1U);                                                 \
+} while (0)
+
+
+/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
+ * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */
+#define HASH_SAX(key,keylen,num_bkts,hashv,bkt)                                  \
+do {                                                                             \
+  unsigned _sx_i;                                                                \
+  const unsigned char *_hs_key=(const unsigned char*)(key);                      \
+  hashv = 0;                                                                     \
+  for(_sx_i=0; _sx_i < keylen; _sx_i++) {                                        \
+      hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i];                     \
+  }                                                                              \
+  bkt = hashv & (num_bkts-1U);                                                   \
+} while (0)
+/* FNV-1a variation */
+#define HASH_FNV(key,keylen,num_bkts,hashv,bkt)                                  \
+do {                                                                             \
+  unsigned _fn_i;                                                                \
+  const unsigned char *_hf_key=(const unsigned char*)(key);                      \
+  hashv = 2166136261U;                                                           \
+  for(_fn_i=0; _fn_i < keylen; _fn_i++) {                                        \
+      hashv = hashv ^ _hf_key[_fn_i];                                            \
+      hashv = hashv * 16777619U;                                                 \
+  }                                                                              \
+  bkt = hashv & (num_bkts-1U);                                                   \
+} while(0)
+
+#define HASH_OAT(key,keylen,num_bkts,hashv,bkt)                                  \
+do {                                                                             \
+  unsigned _ho_i;                                                                \
+  const unsigned char *_ho_key=(const unsigned char*)(key);                      \
+  hashv = 0;                                                                     \
+  for(_ho_i=0; _ho_i < keylen; _ho_i++) {                                        \
+      hashv += _ho_key[_ho_i];                                                   \
+      hashv += (hashv << 10);                                                    \
+      hashv ^= (hashv >> 6);                                                     \
+  }                                                                              \
+  hashv += (hashv << 3);                                                         \
+  hashv ^= (hashv >> 11);                                                        \
+  hashv += (hashv << 15);                                                        \
+  bkt = hashv & (num_bkts-1U);                                                   \
+} while(0)
+
+#define HASH_JEN_MIX(a,b,c)                                                      \
+do {                                                                             \
+  a -= b; a -= c; a ^= ( c >> 13 );                                              \
+  b -= c; b -= a; b ^= ( a << 8 );                                               \
+  c -= a; c -= b; c ^= ( b >> 13 );                                              \
+  a -= b; a -= c; a ^= ( c >> 12 );                                              \
+  b -= c; b -= a; b ^= ( a << 16 );                                              \
+  c -= a; c -= b; c ^= ( b >> 5 );                                               \
+  a -= b; a -= c; a ^= ( c >> 3 );                                               \
+  b -= c; b -= a; b ^= ( a << 10 );                                              \
+  c -= a; c -= b; c ^= ( b >> 15 );                                              \
+} while (0)
+
+#define HASH_JEN(key,keylen,num_bkts,hashv,bkt)                                  \
+do {                                                                             \
+  unsigned _hj_i,_hj_j,_hj_k;                                                    \
+  unsigned const char *_hj_key=(unsigned const char*)(key);                      \
+  hashv = 0xfeedbeefu;                                                           \
+  _hj_i = _hj_j = 0x9e3779b9u;                                                   \
+  _hj_k = (unsigned)(keylen);                                                    \
+  while (_hj_k >= 12U) {                                                         \
+    _hj_i +=    (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 )                      \
+        + ( (unsigned)_hj_key[2] << 16 )                                         \
+        + ( (unsigned)_hj_key[3] << 24 ) );                                      \
+    _hj_j +=    (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 )                      \
+        + ( (unsigned)_hj_key[6] << 16 )                                         \
+        + ( (unsigned)_hj_key[7] << 24 ) );                                      \
+    hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 )                         \
+        + ( (unsigned)_hj_key[10] << 16 )                                        \
+        + ( (unsigned)_hj_key[11] << 24 ) );                                     \
+                                                                                 \
+     HASH_JEN_MIX(_hj_i, _hj_j, hashv);                                          \
+                                                                                 \
+     _hj_key += 12;                                                              \
+     _hj_k -= 12U;                                                               \
+  }                                                                              \
+  hashv += (unsigned)(keylen);                                                   \
+  switch ( _hj_k ) {                                                             \
+     case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); /* FALLTHROUGH */        \
+     case 10: hashv += ( (unsigned)_hj_key[9] << 16 );  /* FALLTHROUGH */        \
+     case 9:  hashv += ( (unsigned)_hj_key[8] << 8 );   /* FALLTHROUGH */        \
+     case 8:  _hj_j += ( (unsigned)_hj_key[7] << 24 );  /* FALLTHROUGH */        \
+     case 7:  _hj_j += ( (unsigned)_hj_key[6] << 16 );  /* FALLTHROUGH */        \
+     case 6:  _hj_j += ( (unsigned)_hj_key[5] << 8 );   /* FALLTHROUGH */        \
+     case 5:  _hj_j += _hj_key[4];                      /* FALLTHROUGH */        \
+     case 4:  _hj_i += ( (unsigned)_hj_key[3] << 24 );  /* FALLTHROUGH */        \
+     case 3:  _hj_i += ( (unsigned)_hj_key[2] << 16 );  /* FALLTHROUGH */        \
+     case 2:  _hj_i += ( (unsigned)_hj_key[1] << 8 );   /* FALLTHROUGH */        \
+     case 1:  _hj_i += _hj_key[0];                                               \
+  }                                                                              \
+  HASH_JEN_MIX(_hj_i, _hj_j, hashv);                                             \
+  bkt = hashv & (num_bkts-1U);                                                   \
+} while(0)
+
+/* The Paul Hsieh hash function */
+#undef get16bits
+#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__)             \
+  || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
+#define get16bits(d) (*((const uint16_t *) (d)))
+#endif
+
+#if !defined (get16bits)
+#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)             \
+                       +(uint32_t)(((const uint8_t *)(d))[0]) )
+#endif
+#define HASH_SFH(key,keylen,num_bkts,hashv,bkt)                                  \
+do {                                                                             \
+  unsigned const char *_sfh_key=(unsigned const char*)(key);                     \
+  uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen;                                \
+                                                                                 \
+  unsigned _sfh_rem = _sfh_len & 3U;                                             \
+  _sfh_len >>= 2;                                                                \
+  hashv = 0xcafebabeu;                                                           \
+                                                                                 \
+  /* Main loop */                                                                \
+  for (;_sfh_len > 0U; _sfh_len--) {                                             \
+    hashv    += get16bits (_sfh_key);                                            \
+    _sfh_tmp  = ((uint32_t)(get16bits (_sfh_key+2)) << 11) ^ hashv;              \
+    hashv     = (hashv << 16) ^ _sfh_tmp;                                        \
+    _sfh_key += 2U*sizeof (uint16_t);                                            \
+    hashv    += hashv >> 11;                                                     \
+  }                                                                              \
+                                                                                 \
+  /* Handle end cases */                                                         \
+  switch (_sfh_rem) {                                                            \
+    case 3: hashv += get16bits (_sfh_key);                                       \
+            hashv ^= hashv << 16;                                                \
+            hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)]) << 18;              \
+            hashv += hashv >> 11;                                                \
+            break;                                                               \
+    case 2: hashv += get16bits (_sfh_key);                                       \
+            hashv ^= hashv << 11;                                                \
+            hashv += hashv >> 17;                                                \
+            break;                                                               \
+    case 1: hashv += *_sfh_key;                                                  \
+            hashv ^= hashv << 10;                                                \
+            hashv += hashv >> 1;                                                 \
+  }                                                                              \
+                                                                                 \
+    /* Force "avalanching" of final 127 bits */                                  \
+    hashv ^= hashv << 3;                                                         \
+    hashv += hashv >> 5;                                                         \
+    hashv ^= hashv << 4;                                                         \
+    hashv += hashv >> 17;                                                        \
+    hashv ^= hashv << 25;                                                        \
+    hashv += hashv >> 6;                                                         \
+    bkt = hashv & (num_bkts-1U);                                                 \
+} while(0)
+
+#ifdef HASH_USING_NO_STRICT_ALIASING
+/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads.
+ * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
+ * MurmurHash uses the faster approach only on CPU's where we know it's safe.
+ *
+ * Note the preprocessor built-in defines can be emitted using:
+ *
+ *   gcc -m64 -dM -E - < /dev/null                  (on gcc)
+ *   cc -## a.c (where a.c is a simple test file)   (Sun Studio)
+ */
+#if (defined(__i386__) || defined(__x86_64__)  || defined(_M_IX86))
+#define MUR_GETBLOCK(p,i) p[i]
+#else /* non intel */
+#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 3UL) == 0UL)
+#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 3UL) == 1UL)
+#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 3UL) == 2UL)
+#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 3UL) == 3UL)
+#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL))
+#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__))
+#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24))
+#define MUR_TWO_TWO(p)   ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16))
+#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >>  8))
+#else /* assume little endian non-intel */
+#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24))
+#define MUR_TWO_TWO(p)   ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16))
+#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) <<  8))
+#endif
+#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) :           \
+                            (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \
+                             (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) :  \
+                                                      MUR_ONE_THREE(p))))
+#endif
+#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
+#define MUR_FMIX(_h) \
+do {                 \
+  _h ^= _h >> 16;    \
+  _h *= 0x85ebca6bu; \
+  _h ^= _h >> 13;    \
+  _h *= 0xc2b2ae35u; \
+  _h ^= _h >> 16;    \
+} while(0)
+
+#define HASH_MUR(key,keylen,num_bkts,hashv,bkt)                        \
+do {                                                                   \
+  const uint8_t *_mur_data = (const uint8_t*)(key);                    \
+  const int _mur_nblocks = (int)(keylen) / 4;                          \
+  uint32_t _mur_h1 = 0xf88D5353u;                                      \
+  uint32_t _mur_c1 = 0xcc9e2d51u;                                      \
+  uint32_t _mur_c2 = 0x1b873593u;                                      \
+  uint32_t _mur_k1 = 0;                                                \
+  const uint8_t *_mur_tail;                                            \
+  const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+(_mur_nblocks*4)); \
+  int _mur_i;                                                          \
+  for(_mur_i = -_mur_nblocks; _mur_i!=0; _mur_i++) {                   \
+    _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i);                        \
+    _mur_k1 *= _mur_c1;                                                \
+    _mur_k1 = MUR_ROTL32(_mur_k1,15);                                  \
+    _mur_k1 *= _mur_c2;                                                \
+                                                                       \
+    _mur_h1 ^= _mur_k1;                                                \
+    _mur_h1 = MUR_ROTL32(_mur_h1,13);                                  \
+    _mur_h1 = (_mur_h1*5U) + 0xe6546b64u;                              \
+  }                                                                    \
+  _mur_tail = (const uint8_t*)(_mur_data + (_mur_nblocks*4));          \
+  _mur_k1=0;                                                           \
+  switch((keylen) & 3U) {                                              \
+    case 3: _mur_k1 ^= (uint32_t)_mur_tail[2] << 16; /* FALLTHROUGH */ \
+    case 2: _mur_k1 ^= (uint32_t)_mur_tail[1] << 8;  /* FALLTHROUGH */ \
+    case 1: _mur_k1 ^= (uint32_t)_mur_tail[0];                         \
+    _mur_k1 *= _mur_c1;                                                \
+    _mur_k1 = MUR_ROTL32(_mur_k1,15);                                  \
+    _mur_k1 *= _mur_c2;                                                \
+    _mur_h1 ^= _mur_k1;                                                \
+  }                                                                    \
+  _mur_h1 ^= (uint32_t)(keylen);                                       \
+  MUR_FMIX(_mur_h1);                                                   \
+  hashv = _mur_h1;                                                     \
+  bkt = hashv & (num_bkts-1U);                                         \
+} while(0)
+#endif  /* HASH_USING_NO_STRICT_ALIASING */
+
+/* key comparison function; return 0 if keys equal */
+#define HASH_KEYCMP(a,b,len) memcmp(a,b,(unsigned long)(len))
+
+/* iterate over items in a known bucket to find desired item */
+#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out)                       \
+do {                                                                             \
+ if (head.hh_head != NULL) { DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); } \
+ else { out=NULL; }                                                              \
+ while (out != NULL) {                                                           \
+    if ((out)->hh.keylen == (keylen_in)) {                                       \
+        if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) { break; }         \
+    }                                                                            \
+    if ((out)->hh.hh_next != NULL) { DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); } \
+    else { out = NULL; }                                                         \
+ }                                                                               \
+} while(0)
+
+/* add an item to a bucket  */
+#define HASH_ADD_TO_BKT(head,addhh)                                              \
+do {                                                                             \
+ head.count++;                                                                   \
+ (addhh)->hh_next = head.hh_head;                                                \
+ (addhh)->hh_prev = NULL;                                                        \
+ if (head.hh_head != NULL) { (head).hh_head->hh_prev = (addhh); }                \
+ (head).hh_head=addhh;                                                           \
+ if ((head.count >= ((head.expand_mult+1U) * HASH_BKT_CAPACITY_THRESH))          \
+     && ((addhh)->tbl->noexpand != 1U)) {                                        \
+       HASH_EXPAND_BUCKETS((addhh)->tbl);                                        \
+ }                                                                               \
+} while(0)
+
+/* remove an item from a given bucket */
+#define HASH_DEL_IN_BKT(hh,head,hh_del)                                          \
+    (head).count--;                                                              \
+    if ((head).hh_head == hh_del) {                                              \
+      (head).hh_head = hh_del->hh_next;                                          \
+    }                                                                            \
+    if (hh_del->hh_prev) {                                                       \
+        hh_del->hh_prev->hh_next = hh_del->hh_next;                              \
+    }                                                                            \
+    if (hh_del->hh_next) {                                                       \
+        hh_del->hh_next->hh_prev = hh_del->hh_prev;                              \
+    }
+
+/* Bucket expansion has the effect of doubling the number of buckets
+ * and redistributing the items into the new buckets. Ideally the
+ * items will distribute more or less evenly into the new buckets
+ * (the extent to which this is true is a measure of the quality of
+ * the hash function as it applies to the key domain).
+ *
+ * With the items distributed into more buckets, the chain length
+ * (item count) in each bucket is reduced. Thus by expanding buckets
+ * the hash keeps a bound on the chain length. This bounded chain
+ * length is the essence of how a hash provides constant time lookup.
+ *
+ * The calculation of tbl->ideal_chain_maxlen below deserves some
+ * explanation. First, keep in mind that we're calculating the ideal
+ * maximum chain length based on the *new* (doubled) bucket count.
+ * In fractions this is just n/b (n=number of items,b=new num buckets).
+ * Since the ideal chain length is an integer, we want to calculate
+ * ceil(n/b). We don't depend on floating point arithmetic in this
+ * hash, so to calculate ceil(n/b) with integers we could write
+ *
+ *      ceil(n/b) = (n/b) + ((n%b)?1:0)
+ *
+ * and in fact a previous version of this hash did just that.
+ * But now we have improved things a bit by recognizing that b is
+ * always a power of two. We keep its base 2 log handy (call it lb),
+ * so now we can write this with a bit shift and logical AND:
+ *
+ *      ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
+ *
+ */
+#define HASH_EXPAND_BUCKETS(tbl)                                                 \
+do {                                                                             \
+    unsigned _he_bkt;                                                            \
+    unsigned _he_bkt_i;                                                          \
+    struct UT_hash_handle *_he_thh, *_he_hh_nxt;                                 \
+    UT_hash_bucket *_he_new_buckets, *_he_newbkt;                                \
+    _he_new_buckets = (UT_hash_bucket*)uthash_malloc(                            \
+             2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket));            \
+    if (!_he_new_buckets) { uthash_fatal( "out of memory"); }                    \
+    memset(_he_new_buckets, 0,                                                   \
+            2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket));             \
+    tbl->ideal_chain_maxlen =                                                    \
+       (tbl->num_items >> (tbl->log2_num_buckets+1U)) +                          \
+       (((tbl->num_items & ((tbl->num_buckets*2U)-1U)) != 0U) ? 1U : 0U);        \
+    tbl->nonideal_items = 0;                                                     \
+    for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++)                \
+    {                                                                            \
+        _he_thh = tbl->buckets[ _he_bkt_i ].hh_head;                             \
+        while (_he_thh != NULL) {                                                \
+           _he_hh_nxt = _he_thh->hh_next;                                        \
+           HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2U, _he_bkt);           \
+           _he_newbkt = &(_he_new_buckets[ _he_bkt ]);                           \
+           if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) {                \
+             tbl->nonideal_items++;                                              \
+             _he_newbkt->expand_mult = _he_newbkt->count /                       \
+                                        tbl->ideal_chain_maxlen;                 \
+           }                                                                     \
+           _he_thh->hh_prev = NULL;                                              \
+           _he_thh->hh_next = _he_newbkt->hh_head;                               \
+           if (_he_newbkt->hh_head != NULL) { _he_newbkt->hh_head->hh_prev =     \
+                _he_thh; }                                                       \
+           _he_newbkt->hh_head = _he_thh;                                        \
+           _he_thh = _he_hh_nxt;                                                 \
+        }                                                                        \
+    }                                                                            \
+    uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
+    tbl->num_buckets *= 2U;                                                      \
+    tbl->log2_num_buckets++;                                                     \
+    tbl->buckets = _he_new_buckets;                                              \
+    tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ?         \
+        (tbl->ineff_expands+1U) : 0U;                                            \
+    if (tbl->ineff_expands > 1U) {                                               \
+        tbl->noexpand=1;                                                         \
+        uthash_noexpand_fyi(tbl);                                                \
+    }                                                                            \
+    uthash_expand_fyi(tbl);                                                      \
+} while(0)
+
+
+/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
+/* Note that HASH_SORT assumes the hash handle name to be hh.
+ * HASH_SRT was added to allow the hash handle name to be passed in. */
+#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
+#define HASH_SRT(hh,head,cmpfcn)                                                 \
+do {                                                                             \
+  unsigned _hs_i;                                                                \
+  unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize;               \
+  struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail;            \
+  if (head != NULL) {                                                            \
+      _hs_insize = 1;                                                            \
+      _hs_looping = 1;                                                           \
+      _hs_list = &((head)->hh);                                                  \
+      while (_hs_looping != 0U) {                                                \
+          _hs_p = _hs_list;                                                      \
+          _hs_list = NULL;                                                       \
+          _hs_tail = NULL;                                                       \
+          _hs_nmerges = 0;                                                       \
+          while (_hs_p != NULL) {                                                \
+              _hs_nmerges++;                                                     \
+              _hs_q = _hs_p;                                                     \
+              _hs_psize = 0;                                                     \
+              for ( _hs_i = 0; _hs_i  < _hs_insize; _hs_i++ ) {                  \
+                  _hs_psize++;                                                   \
+                  _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ?              \
+                          ((void*)((char*)(_hs_q->next) +                        \
+                          (head)->hh.tbl->hho)) : NULL);                         \
+                  if (! (_hs_q) ) { break; }                                     \
+              }                                                                  \
+              _hs_qsize = _hs_insize;                                            \
+              while ((_hs_psize > 0U) || ((_hs_qsize > 0U) && (_hs_q != NULL))) {\
+                  if (_hs_psize == 0U) {                                         \
+                      _hs_e = _hs_q;                                             \
+                      _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ?          \
+                              ((void*)((char*)(_hs_q->next) +                    \
+                              (head)->hh.tbl->hho)) : NULL);                     \
+                      _hs_qsize--;                                               \
+                  } else if ( (_hs_qsize == 0U) || (_hs_q == NULL) ) {           \
+                      _hs_e = _hs_p;                                             \
+                      if (_hs_p != NULL){                                        \
+                        _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ?        \
+                                ((void*)((char*)(_hs_p->next) +                  \
+                                (head)->hh.tbl->hho)) : NULL);                   \
+                       }                                                         \
+                      _hs_psize--;                                               \
+                  } else if ((                                                   \
+                      cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \
+                             DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \
+                             ) <= 0) {                                           \
+                      _hs_e = _hs_p;                                             \
+                      if (_hs_p != NULL){                                        \
+                        _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ?        \
+                               ((void*)((char*)(_hs_p->next) +                   \
+                               (head)->hh.tbl->hho)) : NULL);                    \
+                       }                                                         \
+                      _hs_psize--;                                               \
+                  } else {                                                       \
+                      _hs_e = _hs_q;                                             \
+                      _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ?          \
+                              ((void*)((char*)(_hs_q->next) +                    \
+                              (head)->hh.tbl->hho)) : NULL);                     \
+                      _hs_qsize--;                                               \
+                  }                                                              \
+                  if ( _hs_tail != NULL ) {                                      \
+                      _hs_tail->next = ((_hs_e != NULL) ?                        \
+                            ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL);          \
+                  } else {                                                       \
+                      _hs_list = _hs_e;                                          \
+                  }                                                              \
+                  if (_hs_e != NULL) {                                           \
+                  _hs_e->prev = ((_hs_tail != NULL) ?                            \
+                     ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL);              \
+                  }                                                              \
+                  _hs_tail = _hs_e;                                              \
+              }                                                                  \
+              _hs_p = _hs_q;                                                     \
+          }                                                                      \
+          if (_hs_tail != NULL){                                                 \
+            _hs_tail->next = NULL;                                               \
+          }                                                                      \
+          if ( _hs_nmerges <= 1U ) {                                             \
+              _hs_looping=0;                                                     \
+              (head)->hh.tbl->tail = _hs_tail;                                   \
+              DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list));      \
+          }                                                                      \
+          _hs_insize *= 2U;                                                      \
+      }                                                                          \
+      HASH_FSCK(hh,head);                                                        \
+ }                                                                               \
+} while (0)
+
+/* This function selects items from one hash into another hash.
+ * The end result is that the selected items have dual presence
+ * in both hashes. There is no copy of the items made; rather
+ * they are added into the new hash through a secondary hash
+ * hash handle that must be present in the structure. */
+#define HASH_SELECT(hh_dst, dst, hh_src, src, cond)                              \
+do {                                                                             \
+  unsigned _src_bkt, _dst_bkt;                                                   \
+  void *_last_elt=NULL, *_elt;                                                   \
+  UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL;                         \
+  ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst));                 \
+  if (src != NULL) {                                                             \
+    for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) {     \
+      for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head;                \
+          _src_hh != NULL;                                                       \
+          _src_hh = _src_hh->hh_next) {                                          \
+          _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh);                       \
+          if (cond(_elt)) {                                                      \
+            _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho);               \
+            _dst_hh->key = _src_hh->key;                                         \
+            _dst_hh->keylen = _src_hh->keylen;                                   \
+            _dst_hh->hashv = _src_hh->hashv;                                     \
+            _dst_hh->prev = _last_elt;                                           \
+            _dst_hh->next = NULL;                                                \
+            if (_last_elt_hh != NULL) { _last_elt_hh->next = _elt; }             \
+            if (dst == NULL) {                                                   \
+              DECLTYPE_ASSIGN(dst,_elt);                                         \
+              HASH_MAKE_TABLE(hh_dst,dst);                                       \
+            } else {                                                             \
+              _dst_hh->tbl = (dst)->hh_dst.tbl;                                  \
+            }                                                                    \
+            HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt);    \
+            HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh);            \
+            (dst)->hh_dst.tbl->num_items++;                                      \
+            _last_elt = _elt;                                                    \
+            _last_elt_hh = _dst_hh;                                              \
+          }                                                                      \
+      }                                                                          \
+    }                                                                            \
+  }                                                                              \
+  HASH_FSCK(hh_dst,dst);                                                         \
+} while (0)
+
+#define HASH_CLEAR(hh,head)                                                      \
+do {                                                                             \
+  if (head != NULL) {                                                            \
+    uthash_free((head)->hh.tbl->buckets,                                         \
+                (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket));      \
+    HASH_BLOOM_FREE((head)->hh.tbl);                                             \
+    uthash_free((head)->hh.tbl, sizeof(UT_hash_table));                          \
+    (head)=NULL;                                                                 \
+  }                                                                              \
+} while(0)
+
+#define HASH_OVERHEAD(hh,head)                                                   \
+ ((head != NULL) ? (                                                             \
+ (size_t)(((head)->hh.tbl->num_items   * sizeof(UT_hash_handle))   +             \
+          ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket))   +             \
+           sizeof(UT_hash_table)                                   +             \
+           (HASH_BLOOM_BYTELEN))) : 0U)
+
+#ifdef NO_DECLTYPE
+#define HASH_ITER(hh,head,el,tmp)                                                \
+for(((el)=(head)), ((*(char**)(&(tmp)))=(char*)((head!=NULL)?(head)->hh.next:NULL)); \
+  (el) != NULL; ((el)=(tmp)), ((*(char**)(&(tmp)))=(char*)((tmp!=NULL)?(tmp)->hh.next:NULL)))
+#else
+#define HASH_ITER(hh,head,el,tmp)                                                \
+for(((el)=(head)), ((tmp)=DECLTYPE(el)((head!=NULL)?(head)->hh.next:NULL));      \
+  (el) != NULL; ((el)=(tmp)), ((tmp)=DECLTYPE(el)((tmp!=NULL)?(tmp)->hh.next:NULL)))
+#endif
+
+/* obtain a count of items in the hash */
+#define HASH_COUNT(head) HASH_CNT(hh,head)
+#define HASH_CNT(hh,head) ((head != NULL)?((head)->hh.tbl->num_items):0U)
+
+typedef struct UT_hash_bucket {
+   struct UT_hash_handle *hh_head;
+   unsigned count;
+
+   /* expand_mult is normally set to 0. In this situation, the max chain length
+    * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
+    * the bucket's chain exceeds this length, bucket expansion is triggered).
+    * However, setting expand_mult to a non-zero value delays bucket expansion
+    * (that would be triggered by additions to this particular bucket)
+    * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
+    * (The multiplier is simply expand_mult+1). The whole idea of this
+    * multiplier is to reduce bucket expansions, since they are expensive, in
+    * situations where we know that a particular bucket tends to be overused.
+    * It is better to let its chain length grow to a longer yet-still-bounded
+    * value, than to do an O(n) bucket expansion too often.
+    */
+   unsigned expand_mult;
+
+} UT_hash_bucket;
+
+/* random signature used only to find hash tables in external analysis */
+#define HASH_SIGNATURE 0xa0111fe1u
+#define HASH_BLOOM_SIGNATURE 0xb12220f2u
+
+typedef struct UT_hash_table {
+   UT_hash_bucket *buckets;
+   unsigned num_buckets, log2_num_buckets;
+   unsigned num_items;
+   struct UT_hash_handle *tail; /* tail hh in app order, for fast append    */
+   ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */
+
+   /* in an ideal situation (all buckets used equally), no bucket would have
+    * more than ceil(#items/#buckets) items. that's the ideal chain length. */
+   unsigned ideal_chain_maxlen;
+
+   /* nonideal_items is the number of items in the hash whose chain position
+    * exceeds the ideal chain maxlen. these items pay the penalty for an uneven
+    * hash distribution; reaching them in a chain traversal takes >ideal steps */
+   unsigned nonideal_items;
+
+   /* ineffective expands occur when a bucket doubling was performed, but
+    * afterward, more than half the items in the hash had nonideal chain
+    * positions. If this happens on two consecutive expansions we inhibit any
+    * further expansion, as it's not helping; this happens when the hash
+    * function isn't a good fit for the key domain. When expansion is inhibited
+    * the hash will still work, albeit no longer in constant time. */
+   unsigned ineff_expands, noexpand;
+
+   uint32_t signature; /* used only to find hash tables in external analysis */
+#ifdef HASH_BLOOM
+   uint32_t bloom_sig; /* used only to test bloom exists in external analysis */
+   uint8_t *bloom_bv;
+   uint8_t bloom_nbits;
+#endif
+
+} UT_hash_table;
+
+typedef struct UT_hash_handle {
+   struct UT_hash_table *tbl;
+   void *prev;                       /* prev element in app order      */
+   void *next;                       /* next element in app order      */
+   struct UT_hash_handle *hh_prev;   /* previous hh in bucket order    */
+   struct UT_hash_handle *hh_next;   /* next hh in bucket order        */
+   void *key;                        /* ptr to enclosing struct's key  */
+   unsigned keylen;                  /* enclosing struct's key len     */
+   unsigned hashv;                   /* result of hash-fcn(key)        */
+} UT_hash_handle;
+
+#endif /* UTHASH_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/netlocscotch.h b/opal/mca/hwloc/hwloc2x/hwloc/include/netlocscotch.h
new file mode 100644
index 0000000000..1381a223cc
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/netlocscotch.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright © 2016 Inria.  All rights reserved.
+ *
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ * See COPYING in top-level directory.
+ *
+ * $HEADER$
+ */
+
+#ifndef _NETLOCSCOTCH_H_
+#define _NETLOCSCOTCH_H_
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE // for asprintf
+#endif
+
+#include <hwloc/autogen/config.h>
+#include <netloc.h>
+
+/* Includes for Scotch */
+#include <stdio.h>
+#include <scotch.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * A structure to represent process mapping
+ */
+typedef struct {
+    int rank; /**< Rank of the process */
+    char *nodename; /**< Name of the node */
+    int core; /**< Physical slot number of the core */
+} netlocscotch_core_t;
+
+/**
+ * \brief Build the Scotch architecture representing the all machine
+ *
+ * \param arch Pointer to the Scotch arch that will be built.
+ *
+ * \returns 0 on success
+ * \returns NETLOC_ERROR on error
+ */
+int netlocscotch_build_global_arch(SCOTCH_Arch *arch);
+
+/**
+ * \brief Build the Scotch architecture representing the available resources
+ *
+ * This function reads the file about available resources, found by reading the
+ * environment variable NETLOC_CURRENTSLOTS. The file must be generated before
+ * calling the program running this functions with: mpirun -np <nprocs>
+ * netloc_mpi_find_hosts <outputfile>
+ * The complete architecture is needed since the sub architecture use data from it.
+ *
+ * \param arch Pointer to the Scotch arch that will be built.
+ * \param subarch Pointer to the Scotch sub arch that will be built.
+ *
+ * \returns 0 on success
+ * \returns NETLOC_ERROR on error
+ */
+int netlocscotch_build_current_arch(SCOTCH_Arch *arch, SCOTCH_Arch *subarch);
+
+/**
+ * \brief Give a good mapping with Scotch from a file containing a
+ * communication matrix
+ *
+ * This function reads the file about available resources, found by reading the
+ * environment variable NETLOC_CURRENTSLOTS. The file must be generated before
+ * calling the program running this functions with: mpirun -np <nprocs>
+ * netloc_mpi_find_hosts <outputfile>
+ *
+ * An application graph is built from the communication matrix and is mapped to
+ * the architecture graph built from the resource file.
+ *
+ * \param[in] filename Filename of the matrix file, where the matrix is stored line
+ * by line with spaces between values.
+ *
+ * \param[out] pnum_processes Pointer to the integer where th number of processes
+ * will be written.
+ *
+ * \param[out] pcores Array of pnum_processes elements.
+ *
+ * \returns 0 on succes 
+ * \returns NETLOC_ERROR on error
+ */
+int netlocscotch_get_mapping_from_comm_file(char *filename, int *pnum_processes,
+        netlocscotch_core_t **pcores);
+
+/**
+ * \brief Give a good mapping with Scotch from a communication matrix
+ *
+ * This function reads the file about available resources, found by reading the
+ * environment variable NETLOC_CURRENTSLOTS. The file must be generated before
+ * calling the program running this functions with: mpirun -np <nprocs>
+ * netloc_mpi_find_hosts <outputfile>
+ *
+ * An application graph is built from the communication matrix and is mapped to
+ * the architecture graph built from the resource file.
+ *
+ * \param[in] comm pointer to the lines of the matrix of communications.
+ *
+ * \param[in] num_vertices number of processes, that corresponds to the size of
+ * the matrix.
+ *
+ * \param[out] pcores Array of num_vertices elements.
+ *
+ * \returns 0 on success
+ * \returns NETLOC_ERROR on error
+ */
+int netlocscotch_get_mapping_from_comm_matrix(double **comm, int num_vertices,
+        netlocscotch_core_t **pcores);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+/** @} */
+
+#endif // _NETLOC_H_
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/private/autogen/config.h.in b/opal/mca/hwloc/hwloc2x/hwloc/include/private/autogen/config.h.in
new file mode 100644
index 0000000000..f65a8be473
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/private/autogen/config.h.in
@@ -0,0 +1,728 @@
+/* include/private/autogen/config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* -*- c -*-
+ *
+ * Copyright © 2009, 2011, 2012 CNRS, inria., Université Bordeaux  All rights reserved.
+ * Copyright © 2009-2014 Cisco Systems, Inc.  All rights reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ *
+ * This file is automatically generated by configure.  Edits will be lost
+ * the next time you run configure!
+ */
+
+#ifndef HWLOC_CONFIGURE_H
+#define HWLOC_CONFIGURE_H
+
+
+/* Define to 1 if the system has the type `CACHE_DESCRIPTOR'. */
+#undef HAVE_CACHE_DESCRIPTOR
+
+/* Define to 1 if the system has the type `CACHE_RELATIONSHIP'. */
+#undef HAVE_CACHE_RELATIONSHIP
+
+/* Define to 1 if you have the `clock_gettime' function. */
+#undef HAVE_CLOCK_GETTIME
+
+/* Define to 1 if you have the `clz' function. */
+#undef HAVE_CLZ
+
+/* Define to 1 if you have the `clzl' function. */
+#undef HAVE_CLZL
+
+/* Define to 1 if you have the <CL/cl_ext.h> header file. */
+#undef HAVE_CL_CL_EXT_H
+
+/* Define to 1 if you have the `cpuset_setaffinity' function. */
+#undef HAVE_CPUSET_SETAFFINITY
+
+/* Define to 1 if you have the `cpuset_setid' function. */
+#undef HAVE_CPUSET_SETID
+
+/* Define to 1 if you have the <ctype.h> header file. */
+#undef HAVE_CTYPE_H
+
+/* Define to 1 if we have -lcuda */
+#undef HAVE_CUDA
+
+/* Define to 1 if you have the <cuda.h> header file. */
+#undef HAVE_CUDA_H
+
+/* Define to 1 if you have the <cuda_runtime_api.h> header file. */
+#undef HAVE_CUDA_RUNTIME_API_H
+
+/* Define to 1 if you have the declaration of `CL_DEVICE_TOPOLOGY_AMD', and to
+   0 if you don't. */
+#undef HAVE_DECL_CL_DEVICE_TOPOLOGY_AMD
+
+/* Define to 1 if you have the declaration of `CTL_HW', and to 0 if you don't.
+   */
+#undef HAVE_DECL_CTL_HW
+
+/* Define to 1 if you have the declaration of `fabsf', and to 0 if you don't.
+   */
+#undef HAVE_DECL_FABSF
+
+/* Define to 1 if you have the declaration of `getexecname', and to 0 if you
+   don't. */
+#undef HAVE_DECL_GETEXECNAME
+
+/* Define to 1 if you have the declaration of `GetModuleFileName', and to 0 if
+   you don't. */
+#undef HAVE_DECL_GETMODULEFILENAME
+
+/* Define to 1 if you have the declaration of `getprogname', and to 0 if you
+   don't. */
+#undef HAVE_DECL_GETPROGNAME
+
+/* Define to 1 if you have the declaration of `HW_NCPU', and to 0 if you
+   don't. */
+#undef HAVE_DECL_HW_NCPU
+
+/* Define to 1 if you have the declaration of `lgrp_latency_cookie', and to 0
+   if you don't. */
+#undef HAVE_DECL_LGRP_LATENCY_COOKIE
+
+/* Define to 1 if you have the declaration of
+   `nvmlDeviceGetMaxPcieLinkGeneration', and to 0 if you don't. */
+#undef HAVE_DECL_NVMLDEVICEGETMAXPCIELINKGENERATION
+
+/* Define to 1 if you have the declaration of `pthread_getaffinity_np', and to
+   0 if you don't. */
+#undef HAVE_DECL_PTHREAD_GETAFFINITY_NP
+
+/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to
+   0 if you don't. */
+#undef HAVE_DECL_PTHREAD_SETAFFINITY_NP
+
+/* Embedded mode; just assume we do not have Valgrind support */
+#undef HAVE_DECL_RUNNING_ON_VALGRIND
+
+/* Define to 1 if you have the declaration of `snprintf', and to 0 if you
+   don't. */
+#undef HAVE_DECL_SNPRINTF
+
+/* Define to 1 if you have the declaration of `strcasecmp', and to 0 if you
+   don't. */
+#undef HAVE_DECL_STRCASECMP
+
+/* Define to 1 if you have the declaration of `strtoull', and to 0 if you
+   don't. */
+#undef HAVE_DECL_STRTOULL
+
+/* Define to 1 if you have the declaration of `_putenv', and to 0 if you
+   don't. */
+#undef HAVE_DECL__PUTENV
+
+/* Define to 1 if you have the declaration of `_SC_LARGE_PAGESIZE', and to 0
+   if you don't. */
+#undef HAVE_DECL__SC_LARGE_PAGESIZE
+
+/* Define to 1 if you have the declaration of `_SC_NPROCESSORS_CONF', and to 0
+   if you don't. */
+#undef HAVE_DECL__SC_NPROCESSORS_CONF
+
+/* Define to 1 if you have the declaration of `_SC_NPROCESSORS_ONLN', and to 0
+   if you don't. */
+#undef HAVE_DECL__SC_NPROCESSORS_ONLN
+
+/* Define to 1 if you have the declaration of `_SC_NPROC_CONF', and to 0 if
+   you don't. */
+#undef HAVE_DECL__SC_NPROC_CONF
+
+/* Define to 1 if you have the declaration of `_SC_NPROC_ONLN', and to 0 if
+   you don't. */
+#undef HAVE_DECL__SC_NPROC_ONLN
+
+/* Define to 1 if you have the declaration of `_SC_PAGESIZE', and to 0 if you
+   don't. */
+#undef HAVE_DECL__SC_PAGESIZE
+
+/* Define to 1 if you have the declaration of `_SC_PAGE_SIZE', and to 0 if you
+   don't. */
+#undef HAVE_DECL__SC_PAGE_SIZE
+
+/* Define to 1 if you have the declaration of `_strdup', and to 0 if you
+   don't. */
+#undef HAVE_DECL__STRDUP
+
+/* Define to 1 if you have the <dirent.h> header file. */
+#undef HAVE_DIRENT_H
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#undef HAVE_DLFCN_H
+
+/* Define to 1 if you have the `ffs' function. */
+#undef HAVE_FFS
+
+/* Define to 1 if you have the `ffsl' function. */
+#undef HAVE_FFSL
+
+/* Define to 1 if you have the `fls' function. */
+#undef HAVE_FLS
+
+/* Define to 1 if you have the `flsl' function. */
+#undef HAVE_FLSL
+
+/* Define to 1 if you have the `getpagesize' function. */
+#undef HAVE_GETPAGESIZE
+
+/* Define to 1 if the system has the type `GROUP_AFFINITY'. */
+#undef HAVE_GROUP_AFFINITY
+
+/* Define to 1 if the system has the type `GROUP_RELATIONSHIP'. */
+#undef HAVE_GROUP_RELATIONSHIP
+
+/* Define to 1 if you have the `host_info' function. */
+#undef HAVE_HOST_INFO
+
+/* Define to 1 if you have the <infiniband/verbs.h> header file. */
+#undef HAVE_INFINIBAND_VERBS_H
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if the system has the type `KAFFINITY'. */
+#undef HAVE_KAFFINITY
+
+/* Define to 1 if you have the <kstat.h> header file. */
+#undef HAVE_KSTAT_H
+
+/* Define to 1 if you have the <langinfo.h> header file. */
+#undef HAVE_LANGINFO_H
+
+/* Define to 1 if we have -lgdi32 */
+#undef HAVE_LIBGDI32
+
+/* Define to 1 if we have -libverbs */
+#undef HAVE_LIBIBVERBS
+
+/* Define to 1 if we have -lkstat */
+#undef HAVE_LIBKSTAT
+
+/* Define to 1 if we have -llgrp */
+#undef HAVE_LIBLGRP
+
+/* Define to 1 if you have the <libudev.h> header file. */
+#undef HAVE_LIBUDEV_H
+
+/* Define to 1 if you have the <locale.h> header file. */
+#undef HAVE_LOCALE_H
+
+/* Define to 1 if the system has the type `LOGICAL_PROCESSOR_RELATIONSHIP'. */
+#undef HAVE_LOGICAL_PROCESSOR_RELATIONSHIP
+
+/* Define to 1 if you have the <mach/mach_host.h> header file. */
+#undef HAVE_MACH_MACH_HOST_H
+
+/* Define to 1 if you have the <mach/mach_init.h> header file. */
+#undef HAVE_MACH_MACH_INIT_H
+
+/* Define to 1 if you have the <malloc.h> header file. */
+#undef HAVE_MALLOC_H
+
+/* Define to 1 if you have the `memalign' function. */
+#undef HAVE_MEMALIGN
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define to 1 if you have the <mpi.h> header file. */
+#undef HAVE_MPI_H
+
+/* Define to 1 if we have -lmyriexpress */
+#undef HAVE_MYRIEXPRESS
+
+/* Define to 1 if you have the <myriexpress.h> header file. */
+#undef HAVE_MYRIEXPRESS_H
+
+/* Define to 1 if you have the `nl_langinfo' function. */
+#undef HAVE_NL_LANGINFO
+
+/* Define to 1 if the system has the type `NUMA_NODE_RELATIONSHIP'. */
+#undef HAVE_NUMA_NODE_RELATIONSHIP
+
+/* Define to 1 if you have the <NVCtrl/NVCtrl.h> header file. */
+#undef HAVE_NVCTRL_NVCTRL_H
+
+/* Define to 1 if you have the <nvml.h> header file. */
+#undef HAVE_NVML_H
+
+/* Define to 1 if you have the `openat' function. */
+#undef HAVE_OPENAT
+
+/* Define to 1 if you have the <picl.h> header file. */
+#undef HAVE_PICL_H
+
+/* Define to 1 if you have the `posix_memalign' function. */
+#undef HAVE_POSIX_MEMALIGN
+
+/* Define to 1 if the system has the type `PROCESSOR_CACHE_TYPE'. */
+#undef HAVE_PROCESSOR_CACHE_TYPE
+
+/* Define to 1 if the system has the type `PROCESSOR_GROUP_INFO'. */
+#undef HAVE_PROCESSOR_GROUP_INFO
+
+/* Define to 1 if the system has the type `PROCESSOR_NUMBER'. */
+#undef HAVE_PROCESSOR_NUMBER
+
+/* Define to 1 if the system has the type `PROCESSOR_RELATIONSHIP'. */
+#undef HAVE_PROCESSOR_RELATIONSHIP
+
+/* Define to '1' if program_invocation_name is present and usable */
+#undef HAVE_PROGRAM_INVOCATION_NAME
+
+/* Define to 1 if the system has the type `PSAPI_WORKING_SET_EX_BLOCK'. */
+#undef HAVE_PSAPI_WORKING_SET_EX_BLOCK
+
+/* Define to 1 if the system has the type `PSAPI_WORKING_SET_EX_INFORMATION'.
+   */
+#undef HAVE_PSAPI_WORKING_SET_EX_INFORMATION
+
+/* Define to 1 if you have the <pthread_np.h> header file. */
+#undef HAVE_PTHREAD_NP_H
+
+/* Define to 1 if the system has the type `pthread_t'. */
+#undef HAVE_PTHREAD_T
+
+/* Define to 1 if you have the `putwc' function. */
+#undef HAVE_PUTWC
+
+/* Define to 1 if the system has the type `RelationProcessorPackage'. */
+#undef HAVE_RELATIONPROCESSORPACKAGE
+
+/* Define to 1 if you have the `setlocale' function. */
+#undef HAVE_SETLOCALE
+
+/* Define to 1 if the system has the type `ssize_t'. */
+#undef HAVE_SSIZE_T
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the `strftime' function. */
+#undef HAVE_STRFTIME
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the `strncasecmp' function. */
+#undef HAVE_STRNCASECMP
+
+/* Define to 1 if you have the `strtoull' function. */
+#undef HAVE_STRTOULL
+
+/* Define to '1' if sysctl is present and usable */
+#undef HAVE_SYSCTL
+
+/* Define to '1' if sysctlbyname is present and usable */
+#undef HAVE_SYSCTLBYNAME
+
+/* Define to 1 if the system has the type
+   `SYSTEM_LOGICAL_PROCESSOR_INFORMATION'. */
+#undef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION
+
+/* Define to 1 if the system has the type
+   `SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX'. */
+#undef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX
+
+/* Define to 1 if you have the <sys/cpuset.h> header file. */
+#undef HAVE_SYS_CPUSET_H
+
+/* Define to 1 if you have the <sys/lgrp_user.h> header file. */
+#undef HAVE_SYS_LGRP_USER_H
+
+/* Define to 1 if you have the <sys/mman.h> header file. */
+#undef HAVE_SYS_MMAN_H
+
+/* Define to 1 if you have the <sys/param.h> header file. */
+#undef HAVE_SYS_PARAM_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/sysctl.h> header file. */
+#undef HAVE_SYS_SYSCTL_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the <sys/utsname.h> header file. */
+#undef HAVE_SYS_UTSNAME_H
+
+/* Define to 1 if you have the <time.h> header file. */
+#undef HAVE_TIME_H
+
+/* Define to 1 if you have the `uname' function. */
+#undef HAVE_UNAME
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Define to 1 if you have the `uselocale' function. */
+#undef HAVE_USELOCALE
+
+/* Define to 1 if you have the <valgrind/valgrind.h> header file. */
+#undef HAVE_VALGRIND_VALGRIND_H
+
+/* Define to 1 if the system has the type `wchar_t'. */
+#undef HAVE_WCHAR_T
+
+/* Define to 1 if you have the <X11/keysym.h> header file. */
+#undef HAVE_X11_KEYSYM_H
+
+/* Define to 1 if you have the <X11/Xlib.h> header file. */
+#undef HAVE_X11_XLIB_H
+
+/* Define to 1 if you have the <X11/Xutil.h> header file. */
+#undef HAVE_X11_XUTIL_H
+
+/* Define to 1 if you have the <xlocale.h> header file. */
+#undef HAVE_XLOCALE_H
+
+/* Define to '1' if __progname is present and usable */
+#undef HAVE___PROGNAME
+
+/* Define to 1 on AIX */
+#undef HWLOC_AIX_SYS
+
+/* Define to 1 on BlueGene/Q */
+#undef HWLOC_BGQ_SYS
+
+/* Whether C compiler supports symbol visibility or not */
+#undef HWLOC_C_HAVE_VISIBILITY
+
+/* Define to 1 on Darwin */
+#undef HWLOC_DARWIN_SYS
+
+/* Whether we are in debugging mode or not */
+#undef HWLOC_DEBUG
+
+/* Define to 1 on *FREEBSD */
+#undef HWLOC_FREEBSD_SYS
+
+/* Whether your compiler has __attribute__ or not */
+#undef HWLOC_HAVE_ATTRIBUTE
+
+/* Whether your compiler has __attribute__ aligned or not */
+#undef HWLOC_HAVE_ATTRIBUTE_ALIGNED
+
+/* Whether your compiler has __attribute__ always_inline or not */
+#undef HWLOC_HAVE_ATTRIBUTE_ALWAYS_INLINE
+
+/* Whether your compiler has __attribute__ cold or not */
+#undef HWLOC_HAVE_ATTRIBUTE_COLD
+
+/* Whether your compiler has __attribute__ const or not */
+#undef HWLOC_HAVE_ATTRIBUTE_CONST
+
+/* Whether your compiler has __attribute__ deprecated or not */
+#undef HWLOC_HAVE_ATTRIBUTE_DEPRECATED
+
+/* Whether your compiler has __attribute__ format or not */
+#undef HWLOC_HAVE_ATTRIBUTE_FORMAT
+
+/* Whether your compiler has __attribute__ hot or not */
+#undef HWLOC_HAVE_ATTRIBUTE_HOT
+
+/* Whether your compiler has __attribute__ malloc or not */
+#undef HWLOC_HAVE_ATTRIBUTE_MALLOC
+
+/* Whether your compiler has __attribute__ may_alias or not */
+#undef HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS
+
+/* Whether your compiler has __attribute__ nonnull or not */
+#undef HWLOC_HAVE_ATTRIBUTE_NONNULL
+
+/* Whether your compiler has __attribute__ noreturn or not */
+#undef HWLOC_HAVE_ATTRIBUTE_NORETURN
+
+/* Whether your compiler has __attribute__ no_instrument_function or not */
+#undef HWLOC_HAVE_ATTRIBUTE_NO_INSTRUMENT_FUNCTION
+
+/* Whether your compiler has __attribute__ packed or not */
+#undef HWLOC_HAVE_ATTRIBUTE_PACKED
+
+/* Whether your compiler has __attribute__ pure or not */
+#undef HWLOC_HAVE_ATTRIBUTE_PURE
+
+/* Whether your compiler has __attribute__ sentinel or not */
+#undef HWLOC_HAVE_ATTRIBUTE_SENTINEL
+
+/* Whether your compiler has __attribute__ unused or not */
+#undef HWLOC_HAVE_ATTRIBUTE_UNUSED
+
+/* Whether your compiler has __attribute__ warn unused result or not */
+#undef HWLOC_HAVE_ATTRIBUTE_WARN_UNUSED_RESULT
+
+/* Whether your compiler has __attribute__ weak alias or not */
+#undef HWLOC_HAVE_ATTRIBUTE_WEAK_ALIAS
+
+/* Define to 1 if your `ffs' function is known to be broken. */
+#undef HWLOC_HAVE_BROKEN_FFS
+
+/* Define to 1 if you have the `cairo' library. */
+#undef HWLOC_HAVE_CAIRO
+
+/* Define to 1 if you have the `clz' function. */
+#undef HWLOC_HAVE_CLZ
+
+/* Define to 1 if you have the `clzl' function. */
+#undef HWLOC_HAVE_CLZL
+
+/* Define to 1 if the CPU_SET macro works */
+#undef HWLOC_HAVE_CPU_SET
+
+/* Define to 1 if the CPU_SET_S macro works */
+#undef HWLOC_HAVE_CPU_SET_S
+
+/* Define to 1 if you have the `cudart' SDK. */
+#undef HWLOC_HAVE_CUDART
+
+/* Define to 1 if function `clz' is declared by system headers */
+#undef HWLOC_HAVE_DECL_CLZ
+
+/* Define to 1 if function `clzl' is declared by system headers */
+#undef HWLOC_HAVE_DECL_CLZL
+
+/* Define to 1 if function `ffs' is declared by system headers */
+#undef HWLOC_HAVE_DECL_FFS
+
+/* Define to 1 if function `ffsl' is declared by system headers */
+#undef HWLOC_HAVE_DECL_FFSL
+
+/* Define to 1 if function `fls' is declared by system headers */
+#undef HWLOC_HAVE_DECL_FLS
+
+/* Define to 1 if function `flsl' is declared by system headers */
+#undef HWLOC_HAVE_DECL_FLSL
+
+/* Define to 1 if function `strncasecmp' is declared by system headers */
+#undef HWLOC_HAVE_DECL_STRNCASECMP
+
+/* Define to 1 if you have the `ffs' function. */
+#undef HWLOC_HAVE_FFS
+
+/* Define to 1 if you have the `ffsl' function. */
+#undef HWLOC_HAVE_FFSL
+
+/* Define to 1 if you have the `fls' function. */
+#undef HWLOC_HAVE_FLS
+
+/* Define to 1 if you have the `flsl' function. */
+#undef HWLOC_HAVE_FLSL
+
+/* Define to 1 if you have the GL module components. */
+#undef HWLOC_HAVE_GL
+
+/* Define to 1 if you have a library providing the termcap interface */
+#undef HWLOC_HAVE_LIBTERMCAP
+
+/* Define to 1 if you have libudev. */
+#undef HWLOC_HAVE_LIBUDEV
+
+/* Define to 1 if you have the `libxml2' library. */
+#undef HWLOC_HAVE_LIBXML2
+
+/* Define to 1 if building the Linux I/O component */
+#undef HWLOC_HAVE_LINUXIO
+
+/* Define to 1 if enabling Linux-specific PCI discovery in the Linux I/O
+   component */
+#undef HWLOC_HAVE_LINUXPCI
+
+/* Define to 1 if you have the `NVML' library. */
+#undef HWLOC_HAVE_NVML
+
+/* Define to 1 if glibc provides the old prototype (without length) of
+   sched_setaffinity() */
+#undef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
+
+/* Define to 1 if you have the `OpenCL' library. */
+#undef HWLOC_HAVE_OPENCL
+
+/* Define to 1 if the hwloc library should support dynamically-loaded plugins
+   */
+#undef HWLOC_HAVE_PLUGINS
+
+/* `Define to 1 if you have pthread_getthrds_np' */
+#undef HWLOC_HAVE_PTHREAD_GETTHRDS_NP
+
+/* Define to 1 if pthread mutexes are available */
+#undef HWLOC_HAVE_PTHREAD_MUTEX
+
+/* Define to 1 if glibc provides a prototype of sched_setaffinity() */
+#undef HWLOC_HAVE_SCHED_SETAFFINITY
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HWLOC_HAVE_STDINT_H
+
+/* Define to 1 if function `syscall' is available with 6 parameters */
+#undef HWLOC_HAVE_SYSCALL
+
+/* Define to 1 if you have the `windows.h' header. */
+#undef HWLOC_HAVE_WINDOWS_H
+
+/* Define to 1 if X11 headers including Xutil.h and keysym.h are available. */
+#undef HWLOC_HAVE_X11_KEYSYM
+
+/* Define to 1 if you have x86 cpuid */
+#undef HWLOC_HAVE_X86_CPUID
+
+/* Define to 1 on HP-UX */
+#undef HWLOC_HPUX_SYS
+
+/* Define to 1 on Irix */
+#undef HWLOC_IRIX_SYS
+
+/* Define to 1 on Linux */
+#undef HWLOC_LINUX_SYS
+
+/* Define to 1 on *NETBSD */
+#undef HWLOC_NETBSD_SYS
+
+/* The size of `unsigned int', as computed by sizeof */
+#undef HWLOC_SIZEOF_UNSIGNED_INT
+
+/* The size of `unsigned long', as computed by sizeof */
+#undef HWLOC_SIZEOF_UNSIGNED_LONG
+
+/* Define to 1 on Solaris */
+#undef HWLOC_SOLARIS_SYS
+
+/* The hwloc symbol prefix */
+#undef HWLOC_SYM_PREFIX
+
+/* The hwloc symbol prefix in all caps */
+#undef HWLOC_SYM_PREFIX_CAPS
+
+/* Whether we need to re-define all the hwloc public symbols or not */
+#undef HWLOC_SYM_TRANSFORM
+
+/* Define to 1 on unsupported systems */
+#undef HWLOC_UNSUPPORTED_SYS
+
+/* Define to 1 if ncurses works, preferred over curses */
+#undef HWLOC_USE_NCURSES
+
+/* The library version, always available, even in embedded mode, contrary to
+   VERSION */
+#undef HWLOC_VERSION
+
+/* Define to 1 on WINDOWS */
+#undef HWLOC_WIN_SYS
+
+/* Define to 1 on x86_32 */
+#undef HWLOC_X86_32_ARCH
+
+/* Define to 1 on x86_64 */
+#undef HWLOC_X86_64_ARCH
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+   */
+#undef LT_OBJDIR
+
+/* Define to 1 if scotch is netlocscotch is enabled */
+#undef NETLOC_SCOTCH
+
+/* Define to 1 if your C compiler doesn't accept -c and -o together. */
+#undef NO_MINUS_C_MINUS_O
+
+/* Name of package */
+#undef PACKAGE
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the home page for this package. */
+#undef PACKAGE_URL
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* The size of `unsigned int', as computed by sizeof. */
+#undef SIZEOF_UNSIGNED_INT
+
+/* The size of `unsigned long', as computed by sizeof. */
+#undef SIZEOF_UNSIGNED_LONG
+
+/* The size of `void *', as computed by sizeof. */
+#undef SIZEOF_VOID_P
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Enable extensions on HP-UX. */
+#ifndef _HPUX_SOURCE
+# undef _HPUX_SOURCE
+#endif
+
+
+/* Enable extensions on AIX 3, Interix.  */
+#ifndef _ALL_SOURCE
+# undef _ALL_SOURCE
+#endif
+/* Enable GNU extensions on systems that have them.  */
+#ifndef _GNU_SOURCE
+# undef _GNU_SOURCE
+#endif
+/* Enable threading extensions on Solaris.  */
+#ifndef _POSIX_PTHREAD_SEMANTICS
+# undef _POSIX_PTHREAD_SEMANTICS
+#endif
+/* Enable extensions on HP NonStop.  */
+#ifndef _TANDEM_SOURCE
+# undef _TANDEM_SOURCE
+#endif
+/* Enable general extensions on Solaris.  */
+#ifndef __EXTENSIONS__
+# undef __EXTENSIONS__
+#endif
+
+
+/* Version number of package */
+#undef VERSION
+
+/* Define to 1 if the X Window System is missing or not being used. */
+#undef X_DISPLAY_MISSING
+
+/* Are we building for HP-UX? */
+#undef _HPUX_SOURCE
+
+/* Define to 1 if on MINIX. */
+#undef _MINIX
+
+/* Define to 2 if the system does not provide POSIX.1 features except with
+   this defined. */
+#undef _POSIX_1_SOURCE
+
+/* Define to 1 if you need to in order for `stat' and other things to work. */
+#undef _POSIX_SOURCE
+
+/* Define this to the process ID type */
+#undef hwloc_pid_t
+
+/* Define this to the thread ID type */
+#undef hwloc_thread_t
+
+
+#endif /* HWLOC_CONFIGURE_H */
+
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/private/components.h b/opal/mca/hwloc/hwloc2x/hwloc/include/private/components.h
new file mode 100644
index 0000000000..8525bbe462
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/private/components.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright © 2012-2015 Inria.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+
+#ifdef HWLOC_INSIDE_PLUGIN
+/*
+ * these declarations are internal only, they are not available to plugins
+ * (many functions below are internal static symbols).
+ */
+#error This file should not be used in plugins
+#endif
+
+
+#ifndef PRIVATE_COMPONENTS_H
+#define PRIVATE_COMPONENTS_H 1
+
+#include <hwloc/plugins.h>
+
+struct hwloc_topology;
+
+extern int hwloc_disc_component_force_enable(struct hwloc_topology *topology,
+					     int envvar_forced, /* 1 if forced through envvar, 0 if forced through API */
+					     int type, const char *name,
+					     const void *data1, const void *data2, const void *data3);
+extern void hwloc_disc_components_enable_others(struct hwloc_topology *topology);
+
+/* Compute the topology is_thissystem flag and find some callbacks based on enabled backends */
+extern void hwloc_backends_is_thissystem(struct hwloc_topology *topology);
+extern void hwloc_backends_find_callbacks(struct hwloc_topology *topology);
+
+/* Initialize the list of backends used by a topology */
+extern void hwloc_backends_init(struct hwloc_topology *topology);
+/* Disable and destroy all backends used by a topology */
+extern void hwloc_backends_disable_all(struct hwloc_topology *topology);
+
+/* Used by the core to setup/destroy the list of components */
+extern void hwloc_components_init(void); /* increases components refcount, should be called exactly once per topology (during init) */
+extern void hwloc_components_fini(void); /* decreases components refcount, should be called exactly once per topology (during destroy) */
+
+#endif /* PRIVATE_COMPONENTS_H */
+
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/private/cpuid-x86.h b/opal/mca/hwloc/hwloc2x/hwloc/include/private/cpuid-x86.h
new file mode 100644
index 0000000000..2758afe049
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/private/cpuid-x86.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright © 2010-2012, 2014 Université Bordeaux
+ * Copyright © 2010 Cisco Systems, Inc.  All rights reserved.
+ * Copyright © 2014 Inria.  All rights reserved.
+ *
+ * See COPYING in top-level directory.
+ */
+
+/* Internals for x86's cpuid.  */
+
+#ifndef HWLOC_PRIVATE_CPUID_X86_H
+#define HWLOC_PRIVATE_CPUID_X86_H
+
+#if (defined HWLOC_X86_32_ARCH) && (!defined HWLOC_HAVE_MSVC_CPUIDEX)
+static __hwloc_inline int hwloc_have_x86_cpuid(void)
+{
+  int ret;
+  unsigned tmp, tmp2;
+  __asm__(
+      "mov $0,%0\n\t"   /* Not supported a priori */
+
+      "pushfl   \n\t"   /* Save flags */
+
+      "pushfl   \n\t"                                           \
+      "pop %1   \n\t"   /* Get flags */                         \
+
+#define TRY_TOGGLE                                              \
+      "xor $0x00200000,%1\n\t"        /* Try to toggle ID */    \
+      "mov %1,%2\n\t"   /* Save expected value */               \
+      "push %1  \n\t"                                           \
+      "popfl    \n\t"   /* Try to toggle */                     \
+      "pushfl   \n\t"                                           \
+      "pop %1   \n\t"                                           \
+      "cmp %1,%2\n\t"   /* Compare with expected value */       \
+      "jnz 0f\n\t"   /* Unexpected, failure */               \
+
+      TRY_TOGGLE        /* Try to set/clear */
+      TRY_TOGGLE        /* Try to clear/set */
+
+      "mov $1,%0\n\t"   /* Passed the test! */
+
+      "0: \n\t"
+      "popfl    \n\t"   /* Restore flags */
+
+      : "=r" (ret), "=&r" (tmp), "=&r" (tmp2));
+  return ret;
+}
+#endif /* !defined HWLOC_X86_32_ARCH && !defined HWLOC_HAVE_MSVC_CPUIDEX*/
+#if (defined HWLOC_X86_64_ARCH) || (defined HWLOC_HAVE_MSVC_CPUIDEX)
+static __hwloc_inline int hwloc_have_x86_cpuid(void) { return 1; }
+#endif /* HWLOC_X86_64_ARCH */
+
+static __hwloc_inline void hwloc_x86_cpuid(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx)
+{
+#ifdef HWLOC_HAVE_MSVC_CPUIDEX
+  int regs[4];
+  __cpuidex(regs, *eax, *ecx);
+  *eax = regs[0];
+  *ebx = regs[1];
+  *ecx = regs[2];
+  *edx = regs[3];
+#else /* HWLOC_HAVE_MSVC_CPUIDEX */
+  /* Note: gcc might want to use bx or the stack for %1 addressing, so we can't
+   * use them :/ */
+#ifdef HWLOC_X86_64_ARCH
+  hwloc_uint64_t sav_rbx;
+  __asm__(
+  "mov %%rbx,%2\n\t"
+  "cpuid\n\t"
+  "xchg %2,%%rbx\n\t"
+  "movl %k2,%1\n\t"
+  : "+a" (*eax), "=m" (*ebx), "=&r"(sav_rbx),
+    "+c" (*ecx), "=&d" (*edx));
+#elif defined(HWLOC_X86_32_ARCH)
+  __asm__(
+  "mov %%ebx,%1\n\t"
+  "cpuid\n\t"
+  "xchg %%ebx,%1\n\t"
+  : "+a" (*eax), "=&SD" (*ebx), "+c" (*ecx), "=&d" (*edx));
+#else
+#error unknown architecture
+#endif
+#endif /* HWLOC_HAVE_MSVC_CPUIDEX */
+}
+
+#endif /* HWLOC_PRIVATE_X86_CPUID_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/private/debug.h b/opal/mca/hwloc/hwloc2x/hwloc/include/private/debug.h
new file mode 100644
index 0000000000..a4492c17b3
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/private/debug.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2017 Inria.  All rights reserved.
+ * Copyright © 2009, 2011 Université Bordeaux
+ * Copyright © 2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/* The configuration file */
+
+#ifndef HWLOC_DEBUG_H
+#define HWLOC_DEBUG_H
+
+#include <private/autogen/config.h>
+
+#ifdef HWLOC_DEBUG
+#include <stdarg.h>
+#include <stdio.h>
+#endif
+
+#ifdef HWLOC_DEBUG
+static __hwloc_inline int hwloc_debug_enabled(void)
+{
+  static int checked = 0;
+  static int enabled = 1;
+  if (!checked) {
+    const char *env = getenv("HWLOC_DEBUG_VERBOSE");
+    if (env)
+      enabled = atoi(env);
+    if (enabled)
+      fprintf(stderr, "hwloc verbose debug enabled, may be disabled with HWLOC_DEBUG_VERBOSE=0 in the environment.\n");
+    checked = 1;
+  }
+  return enabled;
+}
+#endif
+
+#ifdef HWLOC_HAVE_ATTRIBUTE_FORMAT
+static __hwloc_inline void hwloc_debug(const char *s __hwloc_attribute_unused, ...) __attribute__ ((__format__ (__printf__, 1, 2)));
+#endif
+
+static __hwloc_inline void hwloc_debug(const char *s __hwloc_attribute_unused, ...)
+{
+#ifdef HWLOC_DEBUG
+  if (hwloc_debug_enabled()) {
+    va_list ap;
+    va_start(ap, s);
+    vfprintf(stderr, s, ap);
+    va_end(ap);
+  }
+#endif
+}
+
+#ifdef HWLOC_DEBUG
+#define hwloc_debug_bitmap(fmt, bitmap) do { \
+if (hwloc_debug_enabled()) { \
+  char *s; \
+  hwloc_bitmap_asprintf(&s, bitmap); \
+  fprintf(stderr, fmt, s); \
+  free(s); \
+} } while (0)
+#define hwloc_debug_1arg_bitmap(fmt, arg1, bitmap) do { \
+if (hwloc_debug_enabled()) { \
+  char *s; \
+  hwloc_bitmap_asprintf(&s, bitmap); \
+  fprintf(stderr, fmt, arg1, s); \
+  free(s); \
+} } while (0)
+#define hwloc_debug_2args_bitmap(fmt, arg1, arg2, bitmap) do { \
+if (hwloc_debug_enabled()) { \
+  char *s; \
+  hwloc_bitmap_asprintf(&s, bitmap); \
+  fprintf(stderr, fmt, arg1, arg2, s); \
+  free(s); \
+} } while (0)
+#else
+#define hwloc_debug_bitmap(s, bitmap) do { } while(0)
+#define hwloc_debug_1arg_bitmap(s, arg1, bitmap) do { } while(0)
+#define hwloc_debug_2args_bitmap(s, arg1, arg2, bitmap) do { } while(0)
+#endif
+
+#endif /* HWLOC_DEBUG_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/private/misc.h b/opal/mca/hwloc/hwloc2x/hwloc/include/private/misc.h
new file mode 100644
index 0000000000..4ce7132ddd
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/private/misc.h
@@ -0,0 +1,439 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2017 Inria.  All rights reserved.
+ * Copyright © 2009-2012 Université Bordeaux
+ * Copyright © 2011 Cisco Systems, Inc.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/* Misc macros and inlines.  */
+
+#ifndef HWLOC_PRIVATE_MISC_H
+#define HWLOC_PRIVATE_MISC_H
+
+#include <hwloc/autogen/config.h>
+#include <private/autogen/config.h>
+
+#ifdef HWLOC_HAVE_DECL_STRNCASECMP
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif
+#else
+#ifdef HAVE_CTYPE_H
+#include <ctype.h>
+#endif
+#endif
+
+/* Compile-time assertion */
+#define HWLOC_BUILD_ASSERT(condition) ((void)sizeof(char[1 - 2*!(condition)]))
+
+#define HWLOC_BITS_PER_LONG (HWLOC_SIZEOF_UNSIGNED_LONG * 8)
+#define HWLOC_BITS_PER_INT (HWLOC_SIZEOF_UNSIGNED_INT * 8)
+
+#if (HWLOC_BITS_PER_LONG != 32) && (HWLOC_BITS_PER_LONG != 64)
+#error "unknown size for unsigned long."
+#endif
+
+#if (HWLOC_BITS_PER_INT != 16) && (HWLOC_BITS_PER_INT != 32) && (HWLOC_BITS_PER_INT != 64)
+#error "unknown size for unsigned int."
+#endif
+
+/* internal-use-only value for when we don't know the type or don't have any value */
+#define HWLOC_OBJ_TYPE_NONE ((hwloc_obj_type_t) -1)
+
+/**
+ * ffsl helpers.
+ */
+
+#if defined(HWLOC_HAVE_BROKEN_FFS)
+
+/* System has a broken ffs().
+ * We must check the before __GNUC__ or HWLOC_HAVE_FFSL
+ */
+#    define HWLOC_NO_FFS
+
+#elif defined(__GNUC__)
+
+#  if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))
+     /* Starting from 3.4, gcc has a long variant.  */
+#    define hwloc_ffsl(x) __builtin_ffsl(x)
+#  else
+#    define hwloc_ffs(x) __builtin_ffs(x)
+#    define HWLOC_NEED_FFSL
+#  endif
+
+#elif defined(HWLOC_HAVE_FFSL)
+
+#  ifndef HWLOC_HAVE_DECL_FFSL
+extern int ffsl(long) __hwloc_attribute_const;
+#  endif
+
+#  define hwloc_ffsl(x) ffsl(x)
+
+#elif defined(HWLOC_HAVE_FFS)
+
+#  ifndef HWLOC_HAVE_DECL_FFS
+extern int ffs(int) __hwloc_attribute_const;
+#  endif
+
+#  define hwloc_ffs(x) ffs(x)
+#  define HWLOC_NEED_FFSL
+
+#else /* no ffs implementation */
+
+#    define HWLOC_NO_FFS
+
+#endif
+
+#ifdef HWLOC_NO_FFS
+
+/* no ffs or it is known to be broken */
+static __hwloc_inline int
+hwloc_ffsl_manual(unsigned long x) __hwloc_attribute_const;
+static __hwloc_inline int
+hwloc_ffsl_manual(unsigned long x)
+{
+	int i;
+
+	if (!x)
+		return 0;
+
+	i = 1;
+#if HWLOC_BITS_PER_LONG >= 64
+	if (!(x & 0xfffffffful)) {
+		x >>= 32;
+		i += 32;
+	}
+#endif
+	if (!(x & 0xffffu)) {
+		x >>= 16;
+		i += 16;
+	}
+	if (!(x & 0xff)) {
+		x >>= 8;
+		i += 8;
+	}
+	if (!(x & 0xf)) {
+		x >>= 4;
+		i += 4;
+	}
+	if (!(x & 0x3)) {
+		x >>= 2;
+		i += 2;
+	}
+	if (!(x & 0x1)) {
+		x >>= 1;
+		i += 1;
+	}
+
+	return i;
+}
+/* always define hwloc_ffsl as a macro, to avoid renaming breakage */
+#define hwloc_ffsl hwloc_ffsl_manual
+
+#elif defined(HWLOC_NEED_FFSL)
+
+/* We only have an int ffs(int) implementation, build a long one.  */
+
+/* First make it 32 bits if it was only 16.  */
+static __hwloc_inline int
+hwloc_ffs32(unsigned long x) __hwloc_attribute_const;
+static __hwloc_inline int
+hwloc_ffs32(unsigned long x)
+{
+#if HWLOC_BITS_PER_INT == 16
+	int low_ffs, hi_ffs;
+
+	low_ffs = hwloc_ffs(x & 0xfffful);
+	if (low_ffs)
+		return low_ffs;
+
+	hi_ffs = hwloc_ffs(x >> 16);
+	if (hi_ffs)
+		return hi_ffs + 16;
+
+	return 0;
+#else
+	return hwloc_ffs(x);
+#endif
+}
+
+/* Then make it 64 bit if longs are.  */
+static __hwloc_inline int
+hwloc_ffsl_from_ffs32(unsigned long x) __hwloc_attribute_const;
+static __hwloc_inline int
+hwloc_ffsl_from_ffs32(unsigned long x)
+{
+#if HWLOC_BITS_PER_LONG == 64
+	int low_ffs, hi_ffs;
+
+	low_ffs = hwloc_ffs32(x & 0xfffffffful);
+	if (low_ffs)
+		return low_ffs;
+
+	hi_ffs = hwloc_ffs32(x >> 32);
+	if (hi_ffs)
+		return hi_ffs + 32;
+
+	return 0;
+#else
+	return hwloc_ffs32(x);
+#endif
+}
+/* always define hwloc_ffsl as a macro, to avoid renaming breakage */
+#define hwloc_ffsl hwloc_ffsl_from_ffs32
+
+#endif
+
+/**
+ * flsl helpers.
+ */
+#ifdef __GNUC_____
+
+#  if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))
+#    define hwloc_flsl(x) (x ? (8*sizeof(long) - __builtin_clzl(x)) : 0)
+#  else
+#    define hwloc_fls(x) (x ? (8*sizeof(int) - __builtin_clz(x)) : 0)
+#    define HWLOC_NEED_FLSL
+#  endif
+
+#elif defined(HWLOC_HAVE_FLSL)
+
+#  ifndef HWLOC_HAVE_DECL_FLSL
+extern int flsl(long) __hwloc_attribute_const;
+#  endif
+
+#  define hwloc_flsl(x) flsl(x)
+
+#elif defined(HWLOC_HAVE_CLZL)
+
+#  ifndef HWLOC_HAVE_DECL_CLZL
+extern int clzl(long) __hwloc_attribute_const;
+#  endif
+
+#  define hwloc_flsl(x) (x ? (8*sizeof(long) - clzl(x)) : 0)
+
+#elif defined(HWLOC_HAVE_FLS)
+
+#  ifndef HWLOC_HAVE_DECL_FLS
+extern int fls(int) __hwloc_attribute_const;
+#  endif
+
+#  define hwloc_fls(x) fls(x)
+#  define HWLOC_NEED_FLSL
+
+#elif defined(HWLOC_HAVE_CLZ)
+
+#  ifndef HWLOC_HAVE_DECL_CLZ
+extern int clz(int) __hwloc_attribute_const;
+#  endif
+
+#  define hwloc_fls(x) (x ? (8*sizeof(int) - clz(x)) : 0)
+#  define HWLOC_NEED_FLSL
+
+#else /* no fls implementation */
+
+static __hwloc_inline int
+hwloc_flsl_manual(unsigned long x) __hwloc_attribute_const;
+static __hwloc_inline int
+hwloc_flsl_manual(unsigned long x)
+{
+	int i = 0;
+
+	if (!x)
+		return 0;
+
+	i = 1;
+#if HWLOC_BITS_PER_LONG >= 64
+	if ((x & 0xffffffff00000000ul)) {
+		x >>= 32;
+		i += 32;
+	}
+#endif
+	if ((x & 0xffff0000u)) {
+		x >>= 16;
+		i += 16;
+	}
+	if ((x & 0xff00)) {
+		x >>= 8;
+		i += 8;
+	}
+	if ((x & 0xf0)) {
+		x >>= 4;
+		i += 4;
+	}
+	if ((x & 0xc)) {
+		x >>= 2;
+		i += 2;
+	}
+	if ((x & 0x2)) {
+		x >>= 1;
+		i += 1;
+	}
+
+	return i;
+}
+/* always define hwloc_flsl as a macro, to avoid renaming breakage */
+#define hwloc_flsl hwloc_flsl_manual
+
+#endif
+
+#ifdef HWLOC_NEED_FLSL
+
+/* We only have an int fls(int) implementation, build a long one.  */
+
+/* First make it 32 bits if it was only 16.  */
+static __hwloc_inline int
+hwloc_fls32(unsigned long x) __hwloc_attribute_const;
+static __hwloc_inline int
+hwloc_fls32(unsigned long x)
+{
+#if HWLOC_BITS_PER_INT == 16
+	int low_fls, hi_fls;
+
+	hi_fls = hwloc_fls(x >> 16);
+	if (hi_fls)
+		return hi_fls + 16;
+
+	low_fls = hwloc_fls(x & 0xfffful);
+	if (low_fls)
+		return low_fls;
+
+	return 0;
+#else
+	return hwloc_fls(x);
+#endif
+}
+
+/* Then make it 64 bit if longs are.  */
+static __hwloc_inline int
+hwloc_flsl_from_fls32(unsigned long x) __hwloc_attribute_const;
+static __hwloc_inline int
+hwloc_flsl_from_fls32(unsigned long x)
+{
+#if HWLOC_BITS_PER_LONG == 64
+	int low_fls, hi_fls;
+
+	hi_fls = hwloc_fls32(x >> 32);
+	if (hi_fls)
+		return hi_fls + 32;
+
+	low_fls = hwloc_fls32(x & 0xfffffffful);
+	if (low_fls)
+		return low_fls;
+
+	return 0;
+#else
+	return hwloc_fls32(x);
+#endif
+}
+/* always define hwloc_flsl as a macro, to avoid renaming breakage */
+#define hwloc_flsl hwloc_flsl_from_fls32
+
+#endif
+
+static __hwloc_inline int
+hwloc_weight_long(unsigned long w) __hwloc_attribute_const;
+static __hwloc_inline int
+hwloc_weight_long(unsigned long w)
+{
+#if HWLOC_BITS_PER_LONG == 32
+#if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__) >= 4)
+	return __builtin_popcount(w);
+#else
+	unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
+	res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
+	res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
+	res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
+	return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
+#endif
+#else /* HWLOC_BITS_PER_LONG == 32 */
+#if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__) >= 4)
+	return __builtin_popcountll(w);
+#else
+	unsigned long res;
+	res = (w & 0x5555555555555555ul) + ((w >> 1) & 0x5555555555555555ul);
+	res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
+	res = (res & 0x0F0F0F0F0F0F0F0Ful) + ((res >> 4) & 0x0F0F0F0F0F0F0F0Ful);
+	res = (res & 0x00FF00FF00FF00FFul) + ((res >> 8) & 0x00FF00FF00FF00FFul);
+	res = (res & 0x0000FFFF0000FFFFul) + ((res >> 16) & 0x0000FFFF0000FFFFul);
+	return (res & 0x00000000FFFFFFFFul) + ((res >> 32) & 0x00000000FFFFFFFFul);
+#endif
+#endif /* HWLOC_BITS_PER_LONG == 64 */
+}
+
+#if !HAVE_DECL_STRTOULL && defined(HAVE_STRTOULL)
+unsigned long long int strtoull(const char *nptr, char **endptr, int base);
+#endif
+
+static __hwloc_inline int hwloc_strncasecmp(const char *s1, const char *s2, size_t n)
+{
+#ifdef HWLOC_HAVE_DECL_STRNCASECMP
+  return strncasecmp(s1, s2, n);
+#else
+  while (n) {
+    char c1 = tolower(*s1), c2 = tolower(*s2);
+    if (!c1 || !c2 || c1 != c2)
+      return c1-c2;
+    n--; s1++; s2++;
+  }
+  return 0;
+#endif
+}
+
+static __hwloc_inline hwloc_obj_type_t hwloc_cache_type_by_depth_type(unsigned depth, hwloc_obj_cache_type_t type)
+{
+  if (type == HWLOC_OBJ_CACHE_INSTRUCTION) {
+    if (depth >= 1 && depth <= 3)
+      return HWLOC_OBJ_L1ICACHE + depth-1;
+    else
+      return HWLOC_OBJ_TYPE_NONE;
+  } else {
+    if (depth >= 1 && depth <= 5)
+      return HWLOC_OBJ_L1CACHE + depth-1;
+    else
+      return HWLOC_OBJ_TYPE_NONE;
+  }
+}
+
+/* I/O or Misc object, without cpusets or nodesets. */
+static __hwloc_inline int hwloc_obj_type_is_special (hwloc_obj_type_t type)
+{
+  /* type contiguity is asserted in topology_check() */
+  return type >= HWLOC_OBJ_MISC && type <= HWLOC_OBJ_OS_DEVICE;
+}
+
+static __hwloc_inline int hwloc_obj_type_is_io (hwloc_obj_type_t type)
+{
+  /* type contiguity is asserted in topology_check() */
+  return type >= HWLOC_OBJ_BRIDGE && type <= HWLOC_OBJ_OS_DEVICE;
+}
+
+#ifdef HWLOC_WIN_SYS
+#  ifndef HAVE_SSIZE_T
+typedef SSIZE_T ssize_t;
+#  endif
+#  if !HAVE_DECL_STRTOULL && !defined(HAVE_STRTOULL)
+#    define strtoull _strtoui64
+#  endif
+#  ifndef S_ISREG
+#    define S_ISREG(m) ((m) & S_IFREG)
+#  endif
+#  ifndef S_ISDIR
+#    define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
+#  endif
+#  if !HAVE_DECL_STRCASECMP
+#    define strcasecmp _stricmp
+#  endif
+#  if !HAVE_DECL_SNPRINTF
+#    define snprintf _snprintf
+#  endif
+#  if HAVE_DECL__STRDUP
+#    define strdup _strdup
+#  endif
+#  if HAVE_DECL__PUTENV
+#    define putenv _putenv
+#  endif
+#endif
+
+#endif /* HWLOC_PRIVATE_MISC_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/private/netloc.h b/opal/mca/hwloc/hwloc2x/hwloc/include/private/netloc.h
new file mode 100644
index 0000000000..c070c54cce
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/private/netloc.h
@@ -0,0 +1,578 @@
+/*
+ * Copyright © 2014 Cisco Systems, Inc.  All rights reserved.
+ * Copyright © 2013-2014 University of Wisconsin-La Crosse.
+ *                         All rights reserved.
+ * Copyright © 2015-2017 Inria.  All rights reserved.
+ *
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ * See COPYING in top-level directory.
+ *
+ * $HEADER$
+ */
+
+#ifndef _NETLOC_PRIVATE_H_
+#define _NETLOC_PRIVATE_H_
+
+#include <hwloc.h>
+#include <netloc.h>
+#include <netloc/uthash.h>
+#include <netloc/utarray.h>
+#include <private/autogen/config.h>
+
+#define NETLOCFILE_VERSION 1
+
+#ifdef NETLOC_SCOTCH
+#include <stdint.h>
+#include <scotch.h>
+#define NETLOC_int SCOTCH_Num
+#else
+#define NETLOC_int int
+#endif
+
+/*
+ * "Import" a few things from hwloc
+ */
+#define __netloc_attribute_unused __hwloc_attribute_unused
+#define __netloc_attribute_malloc __hwloc_attribute_malloc
+#define __netloc_attribute_const __hwloc_attribute_const
+#define __netloc_attribute_pure __hwloc_attribute_pure
+#define __netloc_attribute_deprecated __hwloc_attribute_deprecated
+#define __netloc_attribute_may_alias __hwloc_attribute_may_alias
+#define NETLOC_DECLSPEC HWLOC_DECLSPEC
+
+
+/**********************************************************************
+ * Types
+ **********************************************************************/
+
+/**
+ * Definitions for Comparators
+ * \sa These are the return values from the following functions:
+ *     netloc_network_compare, netloc_dt_edge_t_compare, netloc_dt_node_t_compare
+ */
+typedef enum {
+    NETLOC_CMP_SAME    =  0,  /**< Compared as the Same */
+    NETLOC_CMP_SIMILAR = -1,  /**< Compared as Similar, but not the Same */
+    NETLOC_CMP_DIFF    = -2   /**< Compared as Different */
+} netloc_compare_type_t;
+
+/**
+ * Enumerated type for the various types of supported networks
+ */
+typedef enum {
+    NETLOC_NETWORK_TYPE_ETHERNET    = 1, /**< Ethernet network */
+    NETLOC_NETWORK_TYPE_INFINIBAND  = 2, /**< InfiniBand network */
+    NETLOC_NETWORK_TYPE_INVALID     = 3  /**< Invalid network */
+} netloc_network_type_t;
+
+/**
+ * Enumerated type for the various types of supported topologies
+ */
+typedef enum {
+    NETLOC_TOPOLOGY_TYPE_INVALID = -1, /**< Invalid */
+    NETLOC_TOPOLOGY_TYPE_TREE    = 1,  /**< Tree */
+} netloc_topology_type_t;
+
+/**
+ * Enumerated type for the various types of nodes
+ */
+typedef enum {
+    NETLOC_NODE_TYPE_HOST    = 0, /**< Host (a.k.a., network addressable endpoint - e.g., MAC Address) node */
+    NETLOC_NODE_TYPE_SWITCH  = 1, /**< Switch node */
+    NETLOC_NODE_TYPE_INVALID = 2  /**< Invalid node */
+} netloc_node_type_t;
+
+typedef enum {
+    NETLOC_ARCH_TREE    =  0,  /* Fat tree */
+} netloc_arch_type_t;
+
+
+/* Pre declarations to avoid inter dependency problems */
+/** \cond IGNORE */
+struct netloc_topology_t;
+typedef struct netloc_topology_t netloc_topology_t;
+struct netloc_node_t;
+typedef struct netloc_node_t netloc_node_t;
+struct netloc_edge_t;
+typedef struct netloc_edge_t netloc_edge_t;
+struct netloc_physical_link_t;
+typedef struct netloc_physical_link_t netloc_physical_link_t;
+struct netloc_path_t;
+typedef struct netloc_path_t netloc_path_t;
+
+struct netloc_arch_tree_t;
+typedef struct netloc_arch_tree_t netloc_arch_tree_t;
+struct netloc_arch_node_t;
+typedef struct netloc_arch_node_t netloc_arch_node_t;
+struct netloc_arch_node_slot_t;
+typedef struct netloc_arch_node_slot_t netloc_arch_node_slot_t;
+struct netloc_arch_t;
+typedef struct netloc_arch_t netloc_arch_t;
+/** \endcond */
+
+/**
+ * \struct netloc_topology_t
+ * \brief Netloc Topology Context
+ *
+ * An opaque data structure used to reference a network topology.
+ *
+ * \note Must be initialized with \ref netloc_topology_construct()
+ */
+struct netloc_topology_t {
+    /** Topology path */
+    char *topopath;
+    /** Subnet ID */
+    char *subnet_id;
+
+    /** Node List */
+    netloc_node_t *nodes; /* Hash table of nodes by physical_id */
+    netloc_node_t *nodesByHostname; /* Hash table of nodes by hostname */
+
+    netloc_physical_link_t *physical_links; /* Hash table with physcial links */
+
+    /** Partition List */
+    UT_array *partitions;
+
+    /** Hwloc topology List */
+    char *hwlocpath;
+    UT_array *topos;
+    hwloc_topology_t *hwloc_topos;
+
+    /** Type of the graph */
+    netloc_topology_type_t type;
+};
+
+/**
+ * \brief Netloc Node Type
+ *
+ * Represents the concept of a node (a.k.a., vertex, endpoint) within a network
+ * graph. This could be a server or a network switch. The \ref node_type parameter
+ * will distinguish the exact type of node this represents in the graph.
+ */
+struct netloc_node_t {
+    UT_hash_handle hh;       /* makes this structure hashable with physical_id */
+    UT_hash_handle hh2;      /* makes this structure hashable with hostname */
+
+    /** Physical ID of the node */
+    char physical_id[20];
+
+    /** Logical ID of the node (if any) */
+    int logical_id;
+
+    /** Type of the node */
+    netloc_node_type_t type;
+
+    /* Pointer to physical_links */
+    UT_array *physical_links;
+
+    /** Description information from discovery (if any) */
+    char *description;
+
+    /**
+     * Application-given private data pointer.
+     * Initialized to NULL, and not used by the netloc library.
+     */
+    void * userdata;
+
+    /** Outgoing edges from this node */
+    netloc_edge_t *edges;
+
+    UT_array *subnodes; /* the group of nodes for the virtual nodes */
+
+    netloc_path_t *paths;
+
+    char *hostname;
+
+    UT_array *partitions; /* index in the list from the topology */
+
+    hwloc_topology_t hwlocTopo;
+    int hwlocTopoIdx;
+};
+
+/**
+ * \brief Netloc Edge Type
+ *
+ * Represents the concept of a directed edge within a network graph.
+ *
+ * \note We do not point to the netloc_node_t structure directly to
+ * simplify the representation, and allow the information to more easily
+ * be entered into the data store without circular references.
+ * \todo JJH Is the note above still true?
+ */
+struct netloc_edge_t {
+    UT_hash_handle hh;       /* makes this structure hashable */
+
+    netloc_node_t *dest;
+
+    int id;
+
+    /** Pointers to the parent node */
+    netloc_node_t *node;
+
+    /* Pointer to physical_links */
+    UT_array *physical_links;
+
+    /** total gbits of the links */
+    float total_gbits;
+
+    UT_array *partitions; /* index in the list from the topology */
+
+    UT_array *subnode_edges; /* for edges going to virtual nodes */
+
+    struct netloc_edge_t *other_way;
+
+    /**
+     * Application-given private data pointer.
+     * Initialized to NULL, and not used by the netloc library.
+     */
+    void * userdata;
+};
+
+
+struct netloc_physical_link_t {
+    UT_hash_handle hh;       /* makes this structure hashable */
+
+    int id; // TODO long long
+    netloc_node_t *src;
+    netloc_node_t *dest;
+    int ports[2];
+    char *width;
+    char *speed;
+
+    netloc_edge_t *edge;
+
+    int other_way_id;
+    struct netloc_physical_link_t *other_way;
+
+    UT_array *partitions; /* index in the list from the topology */
+
+    /** gbits of the link from speed and width */
+    float gbits;
+
+    /** Description information from discovery (if any) */
+    char *description;
+};
+
+struct netloc_path_t {
+    UT_hash_handle hh;       /* makes this structure hashable */
+    char dest_id[20];
+    UT_array *links;
+};
+
+
+/**********************************************************************
+ *        Architecture structures
+ **********************************************************************/
+struct netloc_arch_tree_t {
+    NETLOC_int num_levels;
+    NETLOC_int *degrees;
+    NETLOC_int *cost;
+};
+
+struct netloc_arch_node_t {
+    UT_hash_handle hh;       /* makes this structure hashable */
+    char *name; /* Hash key */
+    netloc_node_t *node; /* Corresponding node */
+    int idx_in_topo; /* idx with ghost hosts to have complete topo */
+    int num_slots; /* it is not the real number of slots but the maximum slot idx */
+    int *slot_idx; /* corresponding idx in slot_tree */
+    int *slot_os_idx; /* corresponding os index for each leaf in tree */
+    netloc_arch_tree_t *slot_tree; /* Tree built from hwloc */
+    int num_current_slots; /* Number of PUs */
+    NETLOC_int *current_slots; /* indices in the complete tree */
+    int *slot_ranks; /* corresponding MPI rank for each leaf in tree */
+};
+
+struct netloc_arch_node_slot_t {
+    netloc_arch_node_t *node;
+    int slot;
+};
+
+struct netloc_arch_t {
+    netloc_topology_t *topology;
+    int has_slots; /* if slots are included in the architecture */
+    netloc_arch_type_t type;
+    union {
+        netloc_arch_tree_t *node_tree;
+        netloc_arch_tree_t *global_tree;
+    } arch;
+    netloc_arch_node_t *nodes_by_name;
+    netloc_arch_node_slot_t *node_slot_by_idx; /* node_slot by index in complete topo */
+    NETLOC_int num_current_hosts; /* if has_slots, host is a slot, else host is a node */
+    NETLOC_int *current_hosts; /* indices in the complete topology */
+};
+
+/**********************************************************************
+ * Topology Functions
+ **********************************************************************/
+/**
+ * Allocate a topology handle.
+ *
+ * User is responsible for calling \ref netloc_detach on the topology handle.
+ * The network parameter information is deep copied into the topology handle, so the
+ * user may destruct the network handle after calling this function and/or reuse
+ * the network handle.
+ *
+ * \returns NETLOC_SUCCESS on success
+ * \returns NETLOC_ERROR upon an error.
+ */
+netloc_topology_t *netloc_topology_construct(char *path);
+
+/**
+ * Destruct a topology handle
+ *
+ * \param topology A valid pointer to a \ref netloc_topology_t handle created
+ * from a prior call to \ref netloc_topology_construct.
+ *
+ * \returns NETLOC_SUCCESS on success
+ * \returns NETLOC_ERROR upon an error.
+ */
+int netloc_topology_destruct(netloc_topology_t *topology);
+
+int netloc_topology_find_partition_idx(netloc_topology_t *topology, char *partition_name);
+
+int netloc_topology_read_hwloc(netloc_topology_t *topology, int num_nodes,
+        netloc_node_t **node_list);
+
+#define netloc_topology_iter_partitions(topology,partition) \
+    for ((partition) = (char **)utarray_front(topology->partitions); \
+            (partition) != NULL; \
+            (partition) = (char **)utarray_next(topology->partitions, partition))
+
+#define netloc_topology_iter_hwloctopos(topology,hwloctopo) \
+    for ((hwloctopo) = (char **)utarray_front(topology->topos); \
+            (hwloctopo) != NULL; \
+            (hwloctopo) = (char **)utarray_next(topology->topos, hwloctopo))
+
+#define netloc_topology_find_node(topology,node_id,node) \
+    HASH_FIND_STR(topology->nodes, node_id, node)
+
+#define netloc_topology_iter_nodes(topology,node,_tmp) \
+    HASH_ITER(hh, topology->nodes, node, _tmp)
+
+#define netloc_topology_num_nodes(topology) \
+    HASH_COUNT(topology->nodes)
+
+/*************************************************/
+
+
+/**
+ * Constructor for netloc_node_t
+ *
+ * User is responsible for calling the destructor on the handle.
+ *
+ * Returns
+ *   A newly allocated pointer to the network information.
+ */
+netloc_node_t *netloc_node_construct(void);
+
+/**
+ * Destructor for netloc_node_t
+ *
+ * \param node A valid node handle
+ *
+ * Returns
+ *   NETLOC_SUCCESS on success
+ *   NETLOC_ERROR on error
+ */
+int netloc_node_destruct(netloc_node_t *node);
+
+char *netloc_node_pretty_print(netloc_node_t* node);
+
+#define netloc_node_get_num_subnodes(node) \
+    utarray_len((node)->subnodes)
+
+#define netloc_node_get_subnode(node,i) \
+    (*(netloc_node_t **)utarray_eltptr((node)->subnodes, (i)))
+
+#define netloc_node_get_num_edges(node) \
+    utarray_len((node)->edges)
+
+#define netloc_node_get_edge(node,i) \
+    (*(netloc_edge_t **)utarray_eltptr((node)->edges, (i)))
+
+#define netloc_node_iter_edges(node,edge,_tmp) \
+    HASH_ITER(hh, node->edges, edge, _tmp)
+
+#define netloc_node_iter_paths(node,path,_tmp) \
+    HASH_ITER(hh, node->paths, path, _tmp)
+
+#define netloc_node_is_host(node) \
+    (node->type == NETLOC_NODE_TYPE_HOST)
+
+#define netloc_node_is_switch(node) \
+    (node->type == NETLOC_NODE_TYPE_SWITCH)
+
+#define netloc_node_iter_paths(node, path,_tmp) \
+    HASH_ITER(hh, node->paths, path, _tmp)
+
+int netloc_node_is_in_partition(netloc_node_t *node, int partition);
+
+/*************************************************/
+
+
+/**
+ * Constructor for netloc_edge_t
+ *
+ * User is responsible for calling the destructor on the handle.
+ *
+ * Returns
+ *   A newly allocated pointer to the edge information.
+ */
+netloc_edge_t *netloc_edge_construct(void);
+
+/**
+ * Destructor for netloc_edge_t
+ *
+ * \param edge A valid edge handle
+ *
+ * Returns
+ *   NETLOC_SUCCESS on success
+ *   NETLOC_ERROR on error
+ */
+int netloc_edge_destruct(netloc_edge_t *edge);
+
+char * netloc_edge_pretty_print(netloc_edge_t* edge);
+
+void netloc_edge_reset_uid(void);
+
+int netloc_edge_is_in_partition(netloc_edge_t *edge, int partition);
+
+#define netloc_edge_get_num_links(edge) \
+    utarray_len((edge)->physical_links)
+
+#define netloc_edge_get_link(edge,i) \
+    (*(netloc_physical_link_t **)utarray_eltptr((edge)->physical_links, (i)))
+
+#define netloc_edge_get_num_subedges(edge) \
+    utarray_len((edge)->subnode_edges)
+
+#define netloc_edge_get_subedge(edge,i) \
+    (*(netloc_edge_t **)utarray_eltptr((edge)->subnode_edges, (i)))
+
+/*************************************************/
+
+
+/**
+ * Constructor for netloc_physical_link_t
+ *
+ * User is responsible for calling the destructor on the handle.
+ *
+ * Returns
+ *   A newly allocated pointer to the physical link information.
+ */
+netloc_physical_link_t * netloc_physical_link_construct(void);
+
+/**
+ * Destructor for netloc_physical_link_t
+ *
+ * Returns
+ *   NETLOC_SUCCESS on success
+ *   NETLOC_ERROR on error
+ */
+int netloc_physical_link_destruct(netloc_physical_link_t *link);
+
+char * netloc_link_pretty_print(netloc_physical_link_t* link);
+
+/*************************************************/
+
+
+netloc_path_t *netloc_path_construct(void);
+int netloc_path_destruct(netloc_path_t *path);
+
+
+/**********************************************************************
+ *        Architecture functions
+ **********************************************************************/
+
+netloc_arch_t * netloc_arch_construct(void);
+
+int netloc_arch_destruct(netloc_arch_t *arch);
+
+int netloc_arch_build(netloc_arch_t *arch, int add_slots);
+
+int netloc_arch_set_current_resources(netloc_arch_t *arch);
+
+int netloc_arch_set_global_resources(netloc_arch_t *arch);
+
+int netloc_arch_node_get_hwloc_info(netloc_arch_node_t *arch);
+
+void netloc_arch_tree_complete(netloc_arch_tree_t *tree, UT_array **down_degrees_by_level,
+        int num_hosts, int **parch_idx);
+
+NETLOC_int netloc_arch_tree_num_leaves(netloc_arch_tree_t *tree);
+
+
+/**********************************************************************
+ *        Access functions of various elements of the topology
+ **********************************************************************/
+
+#define netloc_get_num_partitions(object) \
+    utarray_len((object)->partitions)
+
+#define netloc_get_partition(object,i) \
+    (*(int *)utarray_eltptr((object)->partitions, (i)))
+
+
+#define netloc_path_iter_links(path,link) \
+    for ((link) = (netloc_physical_link_t **)utarray_front(path->links); \
+            (link) != NULL; \
+            (link) = (netloc_physical_link_t **)utarray_next(path->links, link))
+
+/**********************************************************************
+ *        Misc functions
+ **********************************************************************/
+
+/**
+ * Decode the network type
+ *
+ * \param net_type A valid member of the \ref netloc_network_type_t type
+ *
+ * \returns NULL if the type is invalid
+ * \returns A string for that \ref netloc_network_type_t type
+ */
+static inline const char * netloc_network_type_decode(netloc_network_type_t net_type) {
+    if( NETLOC_NETWORK_TYPE_ETHERNET == net_type ) {
+        return "ETH";
+    }
+    else if( NETLOC_NETWORK_TYPE_INFINIBAND == net_type ) {
+        return "IB";
+    }
+    else {
+        return NULL;
+    }
+}
+
+/**
+ * Decode the node type
+ *
+ * \param node_type A valid member of the \ref netloc_node_type_t type
+ *
+ * \returns NULL if the type is invalid
+ * \returns A string for that \ref netloc_node_type_t type
+ */
+static inline const char * netloc_node_type_decode(netloc_node_type_t node_type) {
+    if( NETLOC_NODE_TYPE_SWITCH == node_type ) {
+        return "SW";
+    }
+    else if( NETLOC_NODE_TYPE_HOST == node_type ) {
+        return "CA";
+    }
+    else {
+        return NULL;
+    }
+}
+
+ssize_t netloc_line_get(char **lineptr, size_t *n, FILE *stream);
+
+char *netloc_line_get_next_token(char **string, char c);
+
+int netloc_build_comm_mat(char *filename, int *pn, double ***pmat);
+
+#define STRDUP_IF_NOT_NULL(str) (NULL == str ? NULL : strdup(str))
+#define STR_EMPTY_IF_NULL(str) (NULL == str ? "" : str)
+
+
+#endif // _NETLOC_PRIVATE_H_
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/private/private.h b/opal/mca/hwloc/hwloc2x/hwloc/include/private/private.h
new file mode 100644
index 0000000000..893fae3d47
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/private/private.h
@@ -0,0 +1,388 @@
+/*
+ * Copyright © 2009      CNRS
+ * Copyright © 2009-2017 Inria.  All rights reserved.
+ * Copyright © 2009-2012 Université Bordeaux
+ * Copyright © 2009-2011 Cisco Systems, Inc.  All rights reserved.
+ *
+ * See COPYING in top-level directory.
+ */
+
+/* Internal types and helpers. */
+
+
+#ifdef HWLOC_INSIDE_PLUGIN
+/*
+ * these declarations are internal only, they are not available to plugins
+ * (many functions below are internal static symbols).
+ */
+#error This file should not be used in plugins
+#endif
+
+
+#ifndef HWLOC_PRIVATE_H
+#define HWLOC_PRIVATE_H
+
+#include <private/autogen/config.h>
+#include <hwloc.h>
+#include <hwloc/bitmap.h>
+#include <private/components.h>
+#include <private/debug.h>
+#include <sys/types.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
+#ifdef HAVE_SYS_UTSNAME_H
+#include <sys/utsname.h>
+#endif
+#include <string.h>
+
+struct hwloc_topology {
+  unsigned nb_levels;					/* Number of horizontal levels */
+  unsigned nb_levels_allocated;				/* Number of levels allocated and zeroed in level_nbobjects and levels below */
+  unsigned *level_nbobjects; 				/* Number of objects on each horizontal level */
+  struct hwloc_obj ***levels;				/* Direct access to levels, levels[l = 0 .. nblevels-1][0..level_nbobjects[l]] */
+  unsigned long flags;
+  int type_depth[HWLOC_OBJ_TYPE_MAX];
+  enum hwloc_type_filter_e type_filter[HWLOC_OBJ_TYPE_MAX];
+  int is_thissystem;
+  int is_loaded;
+  int modified;                                         /* >0 if objects were added/removed recently, which means a reconnect is needed */
+  hwloc_pid_t pid;                                      /* Process ID the topology is view from, 0 for self */
+  void *userdata;
+  uint64_t next_gp_index;
+
+#define HWLOC_NR_SLEVELS 4
+#define HWLOC_SLEVEL_BRIDGE 0
+#define HWLOC_SLEVEL_PCIDEV 1
+#define HWLOC_SLEVEL_OSDEV 2
+#define HWLOC_SLEVEL_MISC 3
+  /* order must match negative depth, it's asserted in setup_defaults() */
+#define HWLOC_SLEVEL_FROM_DEPTH(x) (HWLOC_TYPE_DEPTH_BRIDGE-(x))
+#define HWLOC_SLEVEL_TO_DEPTH(x) (HWLOC_TYPE_DEPTH_BRIDGE-(x))
+  struct hwloc_special_level_s {
+    unsigned nbobjs;
+    struct hwloc_obj **objs;
+    struct hwloc_obj *first, *last; /* Temporarily used while listing object before building the objs array */
+  } slevels[HWLOC_NR_SLEVELS];
+
+  int pci_nonzero_domains;
+  int need_pci_belowroot_apply_locality;
+  struct hwloc_backend *get_pci_busid_cpuset_backend;
+
+  int pci_has_forced_locality;
+  unsigned pci_forced_locality_nr;
+  struct hwloc_pci_forced_locality_s {
+    unsigned domain;
+    unsigned bus_first, bus_last;
+    hwloc_bitmap_t cpuset;
+  } * pci_forced_locality;
+
+  struct hwloc_binding_hooks {
+    int (*set_thisproc_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags);
+    int (*get_thisproc_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
+    int (*set_thisthread_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags);
+    int (*get_thisthread_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
+    int (*set_proc_cpubind)(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_cpuset_t set, int flags);
+    int (*get_proc_cpubind)(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, int flags);
+#ifdef hwloc_thread_t
+    int (*set_thread_cpubind)(hwloc_topology_t topology, hwloc_thread_t tid, hwloc_const_cpuset_t set, int flags);
+    int (*get_thread_cpubind)(hwloc_topology_t topology, hwloc_thread_t tid, hwloc_cpuset_t set, int flags);
+#endif
+
+    int (*get_thisproc_last_cpu_location)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
+    int (*get_thisthread_last_cpu_location)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
+    int (*get_proc_last_cpu_location)(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, int flags);
+
+    int (*set_thisproc_membind)(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags);
+    int (*get_thisproc_membind)(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags);
+    int (*set_thisthread_membind)(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags);
+    int (*get_thisthread_membind)(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags);
+    int (*set_proc_membind)(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags);
+    int (*get_proc_membind)(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags);
+    int (*set_area_membind)(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags);
+    int (*get_area_membind)(hwloc_topology_t topology, const void *addr, size_t len, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags);
+    int (*get_area_memlocation)(hwloc_topology_t topology, const void *addr, size_t len, hwloc_nodeset_t nodeset, int flags);
+    /* This has to return the same kind of pointer as alloc_membind, so that free_membind can be used on it */
+    void *(*alloc)(hwloc_topology_t topology, size_t len);
+    /* alloc_membind has to always succeed if !(flags & HWLOC_MEMBIND_STRICT).
+     * see hwloc_alloc_or_fail which is convenient for that.  */
+    void *(*alloc_membind)(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags);
+    int (*free_membind)(hwloc_topology_t topology, void *addr, size_t len);
+
+    int (*get_allowed_resources)(hwloc_topology_t topology);
+  } binding_hooks;
+
+  struct hwloc_topology_support support;
+
+  void (*userdata_export_cb)(void *reserved, struct hwloc_topology *topology, struct hwloc_obj *obj);
+  void (*userdata_import_cb)(struct hwloc_topology *topology, struct hwloc_obj *obj, const char *name, const void *buffer, size_t length);
+  int userdata_not_decoded;
+
+  struct hwloc_internal_distances_s {
+    hwloc_obj_type_t type;
+    /* add union hwloc_obj_attr_u if we ever support groups */
+    unsigned nbobjs;
+    uint64_t *indexes; /* array of OS or GP indexes before we can convert them into objs. */
+    uint64_t *values; /* distance matrices, ordered according to the above indexes/objs array.
+		       * distance from i to j is stored in slot i*nbnodes+j.
+		       */
+    unsigned long kind;
+
+    hwloc_obj_t *objs; /* array of objects */
+    int objs_are_valid; /* set to 1 if the array objs is still valid, 0 if needs refresh */
+
+    struct hwloc_internal_distances_s *prev, *next;
+  } *first_dist, *last_dist;
+
+  int grouping;
+  int grouping_verbose;
+  unsigned grouping_nbaccuracies;
+  float grouping_accuracies[5];
+  int grouping_next_subkind;
+
+  /* list of enabled backends. */
+  struct hwloc_backend * backends;
+  unsigned backend_excludes;
+};
+
+extern void hwloc_alloc_obj_cpusets(hwloc_obj_t obj);
+extern void hwloc_setup_pu_level(struct hwloc_topology *topology, unsigned nb_pus);
+extern int hwloc_get_sysctlbyname(const char *name, int64_t *n);
+extern int hwloc_get_sysctl(int name[], unsigned namelen, int *n);
+extern unsigned hwloc_fallback_nbprocessors(struct hwloc_topology *topology);
+
+extern int hwloc__object_cpusets_compare_first(hwloc_obj_t obj1, hwloc_obj_t obj2);
+extern void hwloc__reorder_children(hwloc_obj_t parent);
+
+extern void hwloc_topology_setup_defaults(struct hwloc_topology *topology);
+extern void hwloc_topology_clear(struct hwloc_topology *topology);
+
+extern void hwloc_pci_discovery_init(struct hwloc_topology *topology);
+extern void hwloc_pci_discovery_prepare(struct hwloc_topology *topology);
+extern void hwloc_pci_discovery_exit(struct hwloc_topology *topology);
+
+/* Look for an object matching complete cpuset exactly, or insert one.
+ * Return NULL on failure.
+ * Return a good fallback (object above) on failure to insert.
+ */
+extern hwloc_obj_t hwloc_find_insert_io_parent_by_complete_cpuset(struct hwloc_topology *topology, hwloc_cpuset_t cpuset);
+
+/* Move PCI objects currently attached to the root object ot their actual location.
+ * Called by the core at the end of hwloc_topology_load().
+ * Prior to this call, all PCI objects may be found below the root object.
+ * After this call and a reconnect of levels, all PCI objects are available through levels.
+ */
+extern int hwloc_pci_belowroot_apply_locality(struct hwloc_topology *topology);
+
+HWLOC_DECLSPEC extern const char * hwloc_pci_class_string(unsigned short class_id);
+
+extern void hwloc__add_info(struct hwloc_obj_info_s **infosp, unsigned *countp, const char *name, const char *value);
+extern char ** hwloc__find_info_slot(struct hwloc_obj_info_s **infosp, unsigned *countp, const char *name);
+extern void hwloc__move_infos(struct hwloc_obj_info_s **dst_infosp, unsigned *dst_countp, struct hwloc_obj_info_s **src_infosp, unsigned *src_countp);
+extern void hwloc__free_infos(struct hwloc_obj_info_s *infos, unsigned count);
+
+/* set native OS binding hooks */
+extern void hwloc_set_native_binding_hooks(struct hwloc_binding_hooks *hooks, struct hwloc_topology_support *support);
+/* set either native OS binding hooks (if thissystem), or dummy ones */
+extern void hwloc_set_binding_hooks(struct hwloc_topology *topology);
+
+#if defined(HWLOC_LINUX_SYS)
+extern void hwloc_set_linuxfs_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support);
+#endif /* HWLOC_LINUX_SYS */
+
+#if defined(HWLOC_BGQ_SYS)
+extern void hwloc_set_bgq_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support);
+#endif /* HWLOC_BGQ_SYS */
+
+#ifdef HWLOC_SOLARIS_SYS
+extern void hwloc_set_solaris_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support);
+#endif /* HWLOC_SOLARIS_SYS */
+
+#ifdef HWLOC_AIX_SYS
+extern void hwloc_set_aix_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support);
+#endif /* HWLOC_AIX_SYS */
+
+#ifdef HWLOC_WIN_SYS
+extern void hwloc_set_windows_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support);
+#endif /* HWLOC_WIN_SYS */
+
+#ifdef HWLOC_DARWIN_SYS
+extern void hwloc_set_darwin_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support);
+#endif /* HWLOC_DARWIN_SYS */
+
+#ifdef HWLOC_FREEBSD_SYS
+extern void hwloc_set_freebsd_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support);
+#endif /* HWLOC_FREEBSD_SYS */
+
+#ifdef HWLOC_NETBSD_SYS
+extern void hwloc_set_netbsd_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support);
+#endif /* HWLOC_NETBSD_SYS */
+
+#ifdef HWLOC_HPUX_SYS
+extern void hwloc_set_hpux_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support);
+#endif /* HWLOC_HPUX_SYS */
+
+extern int hwloc_look_hardwired_fujitsu_k(struct hwloc_topology *topology);
+extern int hwloc_look_hardwired_fujitsu_fx10(struct hwloc_topology *topology);
+extern int hwloc_look_hardwired_fujitsu_fx100(struct hwloc_topology *topology);
+
+/* Insert uname-specific names/values in the object infos array.
+ * If cached_uname isn't NULL, it is used as a struct utsname instead of recalling uname.
+ * Any field that starts with \0 is ignored.
+ */
+extern void hwloc_add_uname_info(struct hwloc_topology *topology, void *cached_uname);
+
+/* Free obj and its attributes assuming it's not linked to a parent and doesn't have any child */
+extern void hwloc_free_unlinked_object(hwloc_obj_t obj);
+
+/* Free obj and its children, assuming it's not linked to a parent */
+extern void hwloc_free_object_and_children(hwloc_obj_t obj);
+
+/* Free obj, its next siblings, and their children, assuming they're not linked to a parent */
+extern void hwloc_free_object_siblings_and_children(hwloc_obj_t obj);
+
+/* This can be used for the alloc field to get allocated data that can be freed by free() */
+void *hwloc_alloc_heap(hwloc_topology_t topology, size_t len);
+
+/* This can be used for the alloc field to get allocated data that can be freed by munmap() */
+void *hwloc_alloc_mmap(hwloc_topology_t topology, size_t len);
+
+/* This can be used for the free_membind field to free data using free() */
+int hwloc_free_heap(hwloc_topology_t topology, void *addr, size_t len);
+
+/* This can be used for the free_membind field to free data using munmap() */
+int hwloc_free_mmap(hwloc_topology_t topology, void *addr, size_t len);
+
+/* Allocates unbound memory or fail, depending on whether STRICT is requested
+ * or not */
+static __hwloc_inline void *
+hwloc_alloc_or_fail(hwloc_topology_t topology, size_t len, int flags)
+{
+  if (flags & HWLOC_MEMBIND_STRICT)
+    return NULL;
+  return hwloc_alloc(topology, len);
+}
+
+extern void hwloc_internal_distances_init(hwloc_topology_t topology);
+extern void hwloc_internal_distances_prepare(hwloc_topology_t topology);
+extern void hwloc_internal_distances_destroy(hwloc_topology_t topology);
+extern int hwloc_internal_distances_dup(hwloc_topology_t new, hwloc_topology_t old);
+extern void hwloc_internal_distances_refresh(hwloc_topology_t topology);
+extern int hwloc_internal_distances_add(hwloc_topology_t topology, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned long kind, unsigned long flags);
+extern int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, hwloc_obj_type_t type, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags);
+extern void hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology);
+
+#ifdef HAVE_USELOCALE
+#include "locale.h"
+#ifdef HAVE_XLOCALE_H
+#include "xlocale.h"
+#endif
+#define hwloc_localeswitch_declare locale_t __old_locale = (locale_t)0, __new_locale
+#define hwloc_localeswitch_init() do {                     \
+  __new_locale = newlocale(LC_ALL_MASK, "C", (locale_t)0); \
+  if (__new_locale != (locale_t)0)                         \
+    __old_locale = uselocale(__new_locale);                \
+} while (0)
+#define hwloc_localeswitch_fini() do { \
+  if (__new_locale != (locale_t)0) {   \
+    uselocale(__old_locale);           \
+    freelocale(__new_locale);          \
+  }                                    \
+} while(0)
+#else /* HAVE_USELOCALE */
+#if __HWLOC_HAVE_ATTRIBUTE_UNUSED
+#define hwloc_localeswitch_declare int __dummy_nolocale __hwloc_attribute_unused
+#define hwloc_localeswitch_init()
+#else
+#define hwloc_localeswitch_declare int __dummy_nolocale
+#define hwloc_localeswitch_init() (void)__dummy_nolocale
+#endif
+#define hwloc_localeswitch_fini()
+#endif /* HAVE_USELOCALE */
+
+#if !HAVE_DECL_FABSF
+#define fabsf(f) fabs((double)(f))
+#endif
+
+#if HAVE_DECL__SC_PAGE_SIZE
+#define hwloc_getpagesize() sysconf(_SC_PAGE_SIZE)
+#elif HAVE_DECL__SC_PAGESIZE
+#define hwloc_getpagesize() sysconf(_SC_PAGESIZE)
+#elif defined HAVE_GETPAGESIZE
+#define hwloc_getpagesize() getpagesize()
+#else
+#undef hwloc_getpagesize
+#endif
+
+/* encode src buffer into target buffer.
+ * targsize must be at least 4*((srclength+2)/3)+1.
+ * target will be 0-terminated.
+ */
+extern int hwloc_encode_to_base64(const char *src, size_t srclength, char *target, size_t targsize);
+/* decode src buffer into target buffer.
+ * src is 0-terminated.
+ * targsize must be at least srclength*3/4+1 (srclength not including \0)
+ * but only srclength*3/4 characters will be meaningful
+ * (the next one may be partially written during decoding, but it should be ignored).
+ */
+extern int hwloc_decode_from_base64(char const *src, char *target, size_t targsize);
+
+/* Check whether needle matches the beginning of haystack, at least n, and up
+ * to a colon or \0 */
+extern int hwloc_namecoloncmp(const char *haystack, const char *needle, size_t n);
+
+#ifdef HWLOC_HAVE_ATTRIBUTE_FORMAT
+# if HWLOC_HAVE_ATTRIBUTE_FORMAT
+#  define __hwloc_attribute_format(type, str, arg)  __attribute__((__format__(type, str, arg)))
+# else
+#  define __hwloc_attribute_format(type, str, arg)
+# endif
+#else
+# define __hwloc_attribute_format(type, str, arg)
+#endif
+
+#define hwloc_memory_size_printf_value(_size, _verbose) \
+  ((_size) < (10ULL<<20) || _verbose ? (((_size)>>9)+1)>>1 : (_size) < (10ULL<<30) ? (((_size)>>19)+1)>>1 : (_size) < (10ULL<<40) ? (((_size)>>29)+1)>>1 : (((_size)>>39)+1)>>1)
+#define hwloc_memory_size_printf_unit(_size, _verbose) \
+  ((_size) < (10ULL<<20) || _verbose ? "KB" : (_size) < (10ULL<<30) ? "MB" : (_size) < (10ULL<<40) ? "GB" : "TB")
+
+/* On some systems, snprintf returns the size of written data, not the actually
+ * required size.  hwloc_snprintf always report the actually required size. */
+extern int hwloc_snprintf(char *str, size_t size, const char *format, ...) __hwloc_attribute_format(printf, 3, 4);
+
+extern void hwloc_obj_add_info_nodup(hwloc_obj_t obj, const char *name, const char *value, int nodup);
+
+/* Return the name of the currently running program, if supported.
+ * If not NULL, must be freed by the caller.
+ */
+extern char * hwloc_progname(struct hwloc_topology *topology);
+
+#define HWLOC_BITMAP_EQUAL 0       /* Bitmaps are equal */
+#define HWLOC_BITMAP_INCLUDED 1    /* First bitmap included in second */
+#define HWLOC_BITMAP_CONTAINS 2    /* First bitmap contains second */
+#define HWLOC_BITMAP_INTERSECTS 3  /* Bitmaps intersect without any inclusion */
+#define HWLOC_BITMAP_DIFFERENT  4  /* Bitmaps do not intersect */
+
+/** \brief Compare bitmaps \p bitmap1 and \p bitmap2 from an inclusion point of view.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_compare_inclusion(hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure;
+
+/* obj->attr->group.kind internal values.
+ * the core will keep the highest ones when merging two groups.
+ */
+#define HWLOC_GROUP_KIND_NONE				0 /* user can use subkind */
+#define HWLOC_GROUP_KIND_DISTANCE			1 /* subkind is round of adding these groups during distance based grouping */
+#define HWLOC_GROUP_KIND_IO				2 /* no subkind */
+#define HWLOC_GROUP_KIND_WINDOWS_RELATIONSHIP_UNKNOWN	3 /* no subkind */
+#define HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP	4 /* no subkind */
+#define HWLOC_GROUP_KIND_AIX_SDL_UNKNOWN		5 /* subkind is SDL level */
+#define HWLOC_GROUP_KIND_INTEL_X2APIC_UNKNOWN		6 /* subkind is x2APIC unknown level */
+#define HWLOC_GROUP_KIND_S390_BOOK			7 /* no subkind */
+#define HWLOC_GROUP_KIND_INTEL_SUBNUMA_CLUSTER		8 /* no subkind */
+#define HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT		9 /* no subkind */
+#define HWLOC_GROUP_KIND_SYNTHETIC			10 /* subkind is group depth within synthetic description */
+
+#endif /* HWLOC_PRIVATE_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/private/solaris-chiptype.h b/opal/mca/hwloc/hwloc2x/hwloc/include/private/solaris-chiptype.h
new file mode 100644
index 0000000000..4af80d88f3
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/private/solaris-chiptype.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright © 2009-2010 Oracle and/or its affiliates.  All rights reserved.
+ *
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+
+#ifdef HWLOC_INSIDE_PLUGIN
+/*
+ * these declarations are internal only, they are not available to plugins
+ * (functions below are internal static symbols).
+ */
+#error This file should not be used in plugins
+#endif
+
+
+#ifndef HWLOC_PRIVATE_SOLARIS_CHIPTYPE_H
+#define HWLOC_PRIVATE_SOLARIS_CHIPTYPE_H
+
+/* SPARC Chip Modes. */
+#define MODE_UNKNOWN            0
+#define MODE_SPITFIRE           1
+#define MODE_BLACKBIRD          2
+#define MODE_CHEETAH            3
+#define MODE_SPARC64_VI         4
+#define MODE_T1                 5
+#define MODE_T2                 6
+#define MODE_SPARC64_VII        7
+#define MODE_ROCK               8
+
+/* SPARC Chip Implementations. */
+#define IMPL_SPARC64_VI         0x6
+#define IMPL_SPARC64_VII        0x7
+#define IMPL_SPITFIRE           0x10
+#define IMPL_BLACKBIRD          0x11
+#define IMPL_SABRE              0x12
+#define IMPL_HUMMINGBIRD        0x13
+#define IMPL_CHEETAH            0x14
+#define IMPL_CHEETAHPLUS        0x15
+#define IMPL_JALAPENO           0x16
+#define IMPL_JAGUAR             0x18
+#define IMPL_PANTHER            0x19
+#define IMPL_NIAGARA            0x23
+#define IMPL_NIAGARA_2          0x24
+#define IMPL_ROCK               0x25
+
+/* Default Mfg, Cache, Speed settings */
+#define TI_MANUFACTURER         0x17
+#define TWO_MEG_CACHE           2097152
+#define SPITFIRE_SPEED          142943750
+
+char* hwloc_solaris_get_chip_type(void);
+char* hwloc_solaris_get_chip_model(void);
+
+#endif /* HWLOC_PRIVATE_SOLARIS_CHIPTYPE_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/include/private/xml.h b/opal/mca/hwloc/hwloc2x/hwloc/include/private/xml.h
new file mode 100644
index 0000000000..b2eeac256a
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/include/private/xml.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright © 2009-2016 Inria.  All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#ifndef PRIVATE_XML_H
+#define PRIVATE_XML_H 1
+
+#include <hwloc.h>
+
+#include <sys/types.h>
+
+HWLOC_DECLSPEC int hwloc__xml_verbose(void);
+
+/**************
+ * XML import *
+ **************/
+
+typedef struct hwloc__xml_import_state_s {
+  struct hwloc__xml_import_state_s *parent;
+
+  /* globals shared because the entire stack of states during import */
+  struct hwloc_xml_backend_data_s *global;
+
+  /* opaque data used to store backend-specific data.
+   * statically allocated to allow stack-allocation by the common code without knowing actual backend needs.
+   */
+  char data[32];
+} * hwloc__xml_import_state_t;
+
+struct hwloc__xml_imported_v1distances_s {
+  unsigned long kind;
+  unsigned nbobjs;
+  float *floats;
+  struct hwloc__xml_imported_v1distances_s *prev, *next;
+};
+
+HWLOC_DECLSPEC int hwloc__xml_import_diff(hwloc__xml_import_state_t state, hwloc_topology_diff_t *firstdiffp);
+
+struct hwloc_xml_backend_data_s {
+  /* xml backend parameters */
+  int (*look_init)(struct hwloc_xml_backend_data_s *bdata, struct hwloc__xml_import_state_s *state);
+  void (*look_failed)(struct hwloc_xml_backend_data_s *bdata);
+  void (*backend_exit)(struct hwloc_xml_backend_data_s *bdata);
+  int (*next_attr)(struct hwloc__xml_import_state_s * state, char **namep, char **valuep);
+  int (*find_child)(struct hwloc__xml_import_state_s * state, struct hwloc__xml_import_state_s * childstate, char **tagp);
+  int (*close_tag)(struct hwloc__xml_import_state_s * state); /* look for an explicit closing tag </name> */
+  void (*close_child)(struct hwloc__xml_import_state_s * state);
+  int (*get_content)(struct hwloc__xml_import_state_s * state, char **beginp, size_t expected_length); /* return 0 on empty content (and sets beginp to empty string), 1 on actual content, -1 on error or unexpected content length */
+  void (*close_content)(struct hwloc__xml_import_state_s * state);
+  char * msgprefix;
+  void *data; /* libxml2 doc, or nolibxml buffer */
+  unsigned nbnumanodes;
+  hwloc_obj_t first_numanode, last_numanode; /* temporary cousin-list for handling v1distances */
+  struct hwloc__xml_imported_v1distances_s *first_v1dist, *last_v1dist;
+};
+
+/**************
+ * XML export *
+ **************/
+
+typedef struct hwloc__xml_export_state_s {
+  struct hwloc__xml_export_state_s *parent;
+
+  void (*new_child)(struct hwloc__xml_export_state_s *parentstate, struct hwloc__xml_export_state_s *state, const char *name);
+  void (*new_prop)(struct hwloc__xml_export_state_s *state, const char *name, const char *value);
+  void (*add_content)(struct hwloc__xml_export_state_s *state, const char *buffer, size_t length);
+  void (*end_object)(struct hwloc__xml_export_state_s *state, const char *name);
+
+  /* opaque data used to store backend-specific data.
+   * statically allocated to allow stack-allocation by the common code without knowing actual backend needs.
+   */
+  char data[40];
+} * hwloc__xml_export_state_t;
+
+HWLOC_DECLSPEC void hwloc__xml_export_topology(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, unsigned long flags);
+
+HWLOC_DECLSPEC void hwloc__xml_export_diff(hwloc__xml_export_state_t parentstate, hwloc_topology_diff_t diff);
+
+/******************
+ * XML components *
+ ******************/
+
+struct hwloc_xml_callbacks {
+  int (*backend_init)(struct hwloc_xml_backend_data_s *bdata, const char *xmlpath, const char *xmlbuffer, int xmlbuflen);
+  int (*export_file)(struct hwloc_topology *topology, const char *filename, unsigned long flags);
+  int (*export_buffer)(struct hwloc_topology *topology, char **xmlbuffer, int *buflen, unsigned long flags);
+  void (*free_buffer)(void *xmlbuffer);
+  int (*import_diff)(struct hwloc__xml_import_state_s *state, const char *xmlpath, const char *xmlbuffer, int xmlbuflen, hwloc_topology_diff_t *diff, char **refnamep);
+  int (*export_diff_file)(union hwloc_topology_diff_u *diff, const char *refname, const char *filename);
+  int (*export_diff_buffer)(union hwloc_topology_diff_u *diff, const char *refname, char **xmlbuffer, int *buflen);
+};
+
+struct hwloc_xml_component {
+  struct hwloc_xml_callbacks *nolibxml_callbacks;
+  struct hwloc_xml_callbacks *libxml_callbacks;
+};
+
+HWLOC_DECLSPEC void hwloc_xml_callbacks_register(struct hwloc_xml_component *component);
+HWLOC_DECLSPEC void hwloc_xml_callbacks_reset(void);
+
+#endif /* PRIVATE_XML_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/netloc.pc.in b/opal/mca/hwloc/hwloc2x/hwloc/netloc.pc.in
new file mode 100644
index 0000000000..ce9c43ef70
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/netloc.pc.in
@@ -0,0 +1,11 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: netloc
+Description: Network locality detection and management library
+Version: @HWLOC_VERSION@
+Cflags: -I${includedir}
+Libs: -L${libdir} -lnetloc -lhwloc
+Libs.private: @LIBS@
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/netloc/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/netloc/Makefile.am
new file mode 100644
index 0000000000..eb5dd58ac0
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/netloc/Makefile.am
@@ -0,0 +1,87 @@
+# Copyright © 2014 Cisco Systems, Inc.  All rights reserved.
+# Copyright © 2014      University of Wisconsin-La Crosse.
+#                         All rights reserved.
+#
+# Copyright © 2016 Inria.  All rights reserved.
+# See COPYING in top-level directory.
+#
+# $HEADER$
+#
+
+if BUILD_NETLOC
+AM_CPPFLAGS = \
+        $(NETLOC_CPPFLAGS) \
+        $(HWLOC_CPPFLAGS)
+
+# If we're in standalone mode, build the installable library.
+# Otherwise, build the embedded library.
+
+if HWLOC_BUILD_STANDALONE
+lib_LTLIBRARIES = libnetloc.la
+else
+noinst_LTLIBRARIES = libnetloc_embedded.la
+endif
+
+sources = \
+        support.c \
+        topology.c \
+        edge.c \
+        node.c \
+        physical_link.c \
+        path.c \
+        architecture.c \
+        hwloc.c \
+        mpicomm.c
+
+
+# Installable library
+
+libnetloc_la_SOURCES = $(sources)
+libnetloc_la_LDFLAGS = $(ldflags) -version-info $(libnetloc_so_version)
+libnetloc_la_LIBADD = \
+        $(top_builddir)/hwloc/libhwloc.la
+
+# Embedded library (note the lack of a .so version number -- that
+# intentionally only appears in the installable library).  Also note
+# the lack of _LDFLAGS -- all libs are added by the upper layer (via
+# HWLOC_EMBEDDED_LIBS).
+
+libnetloc_embedded_la_SOURCES = $(sources)
+libnetloc_embedded_la_LDFLAGS = $(ldflags)
+libnetloc_embedded_la_LIBADD = \
+        $(HWLOC_top_builddir)/hwloc/libhwloc_embedded.la
+
+if BUILD_NETLOCSCOTCH
+# If we're in standalone mode, build the installable library.
+# Otherwise, build the embedded library.
+
+if HWLOC_BUILD_STANDALONE
+lib_LTLIBRARIES += libnetlocscotch.la
+else
+noinst_LTLIBRARIES += libnetlocscotch_embedded.la
+endif
+
+scotchsources = \
+        scotch.c
+
+# Installable library
+
+libnetlocscotch_la_SOURCES = $(scotchsources)
+libnetlocscotch_la_LDFLAGS = -version-info $(libnetloc_so_version)
+libnetlocscotch_la_LIBADD = \
+        $(top_builddir)/hwloc/libhwloc.la \
+        libnetloc.la
+
+# Embedded library (note the lack of a .so version number -- that
+# intentionally only appears in the installable library).  Also note
+# the lack of _LDFLAGS -- all libs are added by the upper layer (via
+# HWLOC_EMBEDDED_LIBS).
+
+libnetlocscotch_embedded_la_SOURCES = $(scotchsources)
+libnetlocscotch_embedded_la_LDFLAGS =
+libnetlocscotch_embedded_la_LIBADD = \
+        $(HWLOC_top_builddir)/hwloc/libhwloc_embedded.la \
+        $(HWLOC_top_builddir)/hwloc/libnetloc_embedded.la
+
+endif BUILD_NETLOCSCOTCH
+endif BUILD_NETLOC
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/netloc/architecture.c b/opal/mca/hwloc/hwloc2x/hwloc/netloc/architecture.c
new file mode 100644
index 0000000000..a02136a44e
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/netloc/architecture.c
@@ -0,0 +1,852 @@
+/*
+ * Copyright © 2016-2017 Inria.  All rights reserved.
+ *
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ * See COPYING in top-level directory.
+ *
+ * $HEADER$
+ */
+
+#define _GNU_SOURCE         /* See feature_test_macros(7) */
+#include <private/netloc.h>
+#include <netloc.h>
+
+typedef struct netloc_analysis_data_t {
+    int level;
+    void *userdata;
+} netloc_analysis_data;
+
+
+static int partition_topology_to_tleaf(netloc_topology_t *topology,
+        int partition, int num_cores, netloc_arch_t *arch);
+static netloc_arch_tree_t *tree_merge(netloc_arch_tree_t *main,
+        netloc_arch_tree_t *sub);
+static int netloc_arch_tree_destruct(netloc_arch_tree_t *tree);
+static int netloc_arch_node_destruct(netloc_arch_node_t *arch_node);
+static netloc_arch_node_t *netloc_arch_node_construct(void);
+
+#define checked_fscanf(f, w, str, failed) \
+    if (fscanf(f, " %1023s", w) != 1) { \
+        fprintf(stderr, "Cannot read %s\n", str); \
+        perror("fscanf"); \
+        goto ERROR; \
+    }
+
+
+/* Complete the topology to have a complete balanced tree  */
+void netloc_arch_tree_complete(netloc_arch_tree_t *tree, UT_array **down_degrees_by_level,
+        int num_hosts, int **parch_idx)
+{
+    int num_levels = tree->num_levels;
+    NETLOC_int *max_degrees = tree->degrees;
+
+    /* Complete the tree by inserting nodes */
+    for (int l = 0; l < num_levels-1; l++) { // from the root to the leaves
+        int num_degrees = utarray_len(down_degrees_by_level[l]);
+        int *degrees = (int *)down_degrees_by_level[l]->d;
+        NETLOC_int max_degree = max_degrees[l];
+
+        unsigned int down_level_idx = 0;
+        UT_array *down_level_degrees = down_degrees_by_level[l+1];
+        NETLOC_int down_level_max_degree = max_degrees[l+1];
+        for (int d = 0; d < num_degrees; d++) {
+            int degree = degrees[d];
+            if (degree > 0) {
+                down_level_idx += degree;
+                if (degree < max_degree) {
+                    int missing_degree = (degree-max_degree)*down_level_max_degree;
+                    utarray_insert(down_level_degrees, &missing_degree, down_level_idx);
+                    down_level_idx++;
+                }
+            } else {
+                int missing_degree = degree*down_level_max_degree;
+                utarray_insert(down_level_degrees, &missing_degree, down_level_idx);
+                down_level_idx++;
+            }
+        }
+    }
+
+    /* Indices for the list of hosts, in the complete architecture */
+    int num_degrees = utarray_len(down_degrees_by_level[num_levels-1]);
+    int *degrees = (int *)down_degrees_by_level[num_levels-1]->d;
+    NETLOC_int max_degree = max_degrees[num_levels-1];
+    int ghost_idx = 0;
+    int idx = 0;
+    int *arch_idx = (int *)malloc(sizeof(int[num_hosts]));
+    for (int d = 0; d < num_degrees; d++) {
+        int degree = degrees[d];
+        int diff;
+
+        if (degree > 0) {
+            diff = max_degree-degree;
+        } else {
+            diff = -degree;
+        }
+
+        for (int i = 0; i < degree; i++) {
+            arch_idx[idx++] = ghost_idx++;
+        }
+        ghost_idx += diff;
+    }
+    *parch_idx = arch_idx;
+}
+
+NETLOC_int netloc_arch_tree_num_leaves(netloc_arch_tree_t *tree)
+{
+    NETLOC_int num_leaves = 1;
+    for (int l = 0; l < tree->num_levels; l++) {
+        num_leaves *= tree->degrees[l];
+    }
+    return num_leaves;
+}
+
+static int get_current_resources(int *pnum_nodes, char ***pnodes, int **pslot_idx,
+        int **pslot_list, int **prank_list)
+{
+    char *filename = getenv("NETLOC_CURRENTSLOTS");
+    char word[1024];
+    char *end_word;
+    int *slot_list = NULL;
+    int *rank_list = NULL;
+    int *slot_idx = NULL;
+    char **nodes = NULL;
+
+    if (!filename) {
+        fprintf(stderr, "You need to set NETLOC_CURRENTSLOTS\n");
+        return NETLOC_ERROR;
+    }
+
+    FILE *file = fopen(filename, "r");
+    if (!file) {
+        perror("fopen");
+        return NETLOC_ERROR;
+    }
+
+    checked_fscanf(file, word, "num_nodes", failed);
+
+    int num_nodes;
+    num_nodes = strtol(word, &end_word, 10);
+    if (*word == '\0' || *end_word != '\0' || num_nodes <= 0) {
+        fprintf(stderr, "Oups: incorrect number of nodes (%d) in \"%s\"\n",
+                num_nodes, word);
+        goto ERROR;
+    }
+
+    nodes = (char **)malloc(sizeof(char *[num_nodes]));
+    for (int n = 0; n < num_nodes; n++) {
+        checked_fscanf(file, word, "node", failed);
+        nodes[n] = strdup(word);
+    }
+
+    slot_idx = (int *)malloc(sizeof(int[num_nodes+1]));
+    slot_idx[0] = 0;
+    for (int n = 0; n < num_nodes; n++) {
+        checked_fscanf(file, word, "slot index", failed);
+
+        int slot_index = strtol(word, &end_word, 10);
+        if (*word == '\0' || *end_word != '\0' || num_nodes <= 0) {
+            fprintf(stderr, "Oups: incorrect slot index (%d) in \"%s\"\n",
+                    slot_index, word);
+            goto ERROR;
+        }
+        slot_idx[n+1] = slot_idx[n]+slot_index;
+    }
+
+    slot_list = (int *)malloc(sizeof(int[slot_idx[num_nodes]]));
+    rank_list = (int *)malloc(sizeof(int[slot_idx[num_nodes]]));
+    for (int s = 0; s < slot_idx[num_nodes]; s++) {
+        checked_fscanf(file, word, "slot number", failed);
+        slot_list[s] = strtol(word, &end_word, 10);
+        if (*word == '\0' || *end_word != '\0' || num_nodes <= 0) {
+            fprintf(stderr, "Oups: incorrect slot number (%d) in \"%s\"\n",
+                    slot_list[s], word);
+            goto ERROR;
+        }
+
+        checked_fscanf(file, word, "rank number", failed);
+        rank_list[s] = strtol(word, &end_word, 10);
+        if (*word == '\0' || *end_word != '\0' || num_nodes <= 0) {
+            fprintf(stderr, "Oups: incorrect rank number (%d) in \"%s\"\n",
+                    rank_list[s], word);
+            goto ERROR;
+        }
+    }
+
+    *pnum_nodes = num_nodes;
+    *pnodes = nodes;
+    *pslot_idx = slot_idx;
+    *pslot_list = slot_list;
+    *prank_list = rank_list;
+
+    fclose(file);
+
+    return NETLOC_SUCCESS;
+
+ERROR:
+    fclose(file);
+    free(nodes);
+    free(slot_idx);
+    free(slot_list);
+    free(rank_list);
+    return NETLOC_ERROR;
+}
+
+int netloc_arch_set_current_resources(netloc_arch_t *arch)
+{
+    int ret;
+    int num_nodes;
+    char **nodenames;
+    int *slot_idx;
+    int *slot_list;
+    int *rank_list;
+
+    ret = get_current_resources(&num_nodes, &nodenames, &slot_idx, &slot_list,
+            &rank_list);
+
+    if (ret != NETLOC_SUCCESS || num_nodes <= 0)
+        assert(0); // XXX
+
+    NETLOC_int *current_nodes = NULL;
+
+    if (!arch->has_slots) {
+        current_nodes = (NETLOC_int *) malloc(sizeof(NETLOC_int[num_nodes]));
+    }
+
+    netloc_arch_node_t **arch_node_list = (netloc_arch_node_t **)
+        malloc(sizeof(netloc_arch_node_t *[num_nodes]));
+    netloc_node_t **node_list = (netloc_node_t **)
+        malloc(sizeof(netloc_node_t *[num_nodes]));
+    for (int n = 0; n < num_nodes; n++) {
+        netloc_arch_node_t *arch_node;
+        HASH_FIND_STR(arch->nodes_by_name, nodenames[n], arch_node);
+        if (!arch_node) {
+            ret = NETLOC_ERROR;
+            goto ERROR;
+        }
+        arch_node_list[n] = arch_node;
+        node_list[n] = arch_node->node;
+    }
+
+    ret = netloc_topology_read_hwloc(arch->topology, num_nodes, node_list);
+    if( NETLOC_SUCCESS != ret ) {
+        goto ERROR;
+    }
+
+    int constant_num_slots = 0;
+    for (int n = 0; n < num_nodes; n++) {
+        netloc_arch_node_t *node = arch_node_list[n];
+
+        ret = netloc_arch_node_get_hwloc_info(node);
+        if (ret != NETLOC_SUCCESS)
+            goto ERROR;
+
+
+        if (!arch->has_slots) {
+            current_nodes[n] = node->idx_in_topo;
+        }
+
+        int num_slots = slot_idx[n+1]-slot_idx[n];
+        node->num_current_slots = num_slots;
+
+        /* Nodes with different number of slots are not handled yet, because we
+         * build the scotch architecture without taking account of the
+         * available cores inside nodes, and Scotch is not able to weight the
+         * nodes */
+        if (!arch->has_slots) {
+            if (constant_num_slots) {
+                if (constant_num_slots != num_slots) {
+                    fprintf(stderr, "Oups: the same number of cores by node is needed!\n");
+                    assert(constant_num_slots == num_slots);
+                }
+            } else {
+                constant_num_slots = num_slots;
+            }
+        }
+
+        node->current_slots = (NETLOC_int *)
+            malloc(sizeof(NETLOC_int[num_slots]));
+        NETLOC_int num_leaves = netloc_arch_tree_num_leaves(node->slot_tree);
+        node->slot_ranks = (int *)
+            malloc(sizeof(int[num_leaves]));
+
+        for (int s = slot_idx[n]; s < slot_idx[n+1]; s++) {
+            int slot = slot_list[s];
+            node->current_slots[s-slot_idx[n]] = node->slot_idx[slot];
+            node->slot_ranks[node->slot_idx[slot]] = rank_list[s];
+        }
+    }
+
+    if (!arch->has_slots) {
+        arch->num_current_hosts = num_nodes;
+        arch->current_hosts = current_nodes;
+        arch->arch.global_tree = arch->arch.node_tree;
+
+        /* Build nodes_by_idx */
+        NETLOC_int tree_size = netloc_arch_tree_num_leaves(arch->arch.node_tree);
+        netloc_arch_node_slot_t *nodes_by_idx = (netloc_arch_node_slot_t *)
+            malloc(sizeof(netloc_arch_node_slot_t[tree_size]));
+        for (int n = 0; n < num_nodes; n++) {
+            netloc_arch_node_t *node = arch_node_list[n];
+            nodes_by_idx[node->idx_in_topo].node = node;
+            nodes_by_idx[node->idx_in_topo].slot = -1;
+        }
+        arch->node_slot_by_idx = nodes_by_idx;
+
+
+    } else {
+        int num_hosts = slot_idx[num_nodes];
+        NETLOC_int *current_hosts = (NETLOC_int *)
+            malloc(sizeof(NETLOC_int[num_hosts]));
+        /* Add the slot trees to the node tree */
+
+        /* Check that each slot tree has the same size */
+        int slot_tree_size = 0;
+        for (int n = 0; n < num_nodes; n++) {
+            netloc_arch_node_t *node = arch_node_list[n];
+            int current_size = netloc_arch_tree_num_leaves(node->slot_tree);
+            if (!slot_tree_size) {
+                slot_tree_size = current_size;
+            } else {
+                if (slot_tree_size != current_size) {
+                    assert(0);
+                }
+            }
+        }
+
+        int current_host_idx = 0;
+        int node_tree_size = netloc_arch_tree_num_leaves(arch->arch.node_tree);
+        int global_tree_size = node_tree_size*slot_tree_size;
+        netloc_arch_node_slot_t *nodes_by_idx = (netloc_arch_node_slot_t *)
+            malloc(sizeof(netloc_arch_node_slot_t[global_tree_size]));
+        for (int n = 0; n < num_nodes; n++) {
+            netloc_arch_node_t *node = arch_node_list[n];
+            for (int s = slot_idx[n]; s < slot_idx[n+1]; s++) {
+                int slot_rank = s-slot_idx[n];
+                int topo_idx = node->idx_in_topo*slot_tree_size +
+                    node->slot_idx[slot_rank];
+                nodes_by_idx[topo_idx].node = node;
+                nodes_by_idx[topo_idx].slot = slot_rank;
+                current_hosts[current_host_idx++] = topo_idx;
+            }
+        }
+        arch->num_current_hosts = current_host_idx;
+        arch->current_hosts = current_hosts;
+        arch->node_slot_by_idx = nodes_by_idx;
+
+        netloc_arch_tree_t *new_tree =
+            tree_merge(arch->arch.node_tree, arch_node_list[0]->slot_tree);
+        netloc_arch_tree_destruct(arch->arch.node_tree);
+        arch->arch.global_tree = new_tree;
+    }
+
+ERROR:
+    for (int n = 0; n < num_nodes; n++) {
+        free(nodenames[n]);
+    }
+    free(nodenames);
+    free(slot_idx);
+    free(slot_list);
+    free(rank_list);
+    free(arch_node_list);
+    free(node_list);
+
+    if (ret == NETLOC_SUCCESS)
+        return ret;
+
+    free(current_nodes);
+    return ret;
+}
+
+int netloc_arch_set_global_resources(netloc_arch_t *arch)
+{
+    int ret;
+    NETLOC_int *current_nodes = NULL;
+    int *slot_idx = NULL;
+
+    int num_nodes =  HASH_COUNT(arch->nodes_by_name);
+    if (!arch->has_slots) {
+        current_nodes = (NETLOC_int *) malloc(sizeof(NETLOC_int[num_nodes]));
+    }
+
+    ret = netloc_topology_read_hwloc(arch->topology, 0, NULL);
+    if( NETLOC_SUCCESS != ret ) {
+        goto ERROR;
+    }
+
+    int constant_num_slots = 0;
+    slot_idx = (int *)malloc(sizeof(int[num_nodes+1]));
+    slot_idx[0] = 0;
+    int current_idx = 0;
+    netloc_arch_node_t *node, *node_tmp;
+    HASH_ITER(hh, arch->nodes_by_name, node, node_tmp) {
+        ret = netloc_arch_node_get_hwloc_info(node);
+        if (ret != NETLOC_SUCCESS)
+            goto ERROR;
+
+        if (!arch->has_slots) {
+            current_nodes[current_idx] = node->idx_in_topo;
+        }
+        current_idx++;
+
+        int num_slots = node->num_slots;
+        node->num_current_slots = num_slots;
+
+        slot_idx[current_idx] = slot_idx[current_idx-1]+num_slots;
+
+        /* Nodes with different number of slots are not handled yet, because we
+         * build the scotch architecture without taking account of the
+         * available cores inside nodes, and Scotch is not able to weight the
+         * nodes */
+        if (!arch->has_slots) {
+            if (constant_num_slots) {
+                if (constant_num_slots != num_slots) {
+                    fprintf(stderr, "Oups: the same number of cores by node is needed!\n");
+                    assert(constant_num_slots == num_slots);
+                }
+            } else {
+                constant_num_slots = num_slots;
+            }
+        }
+    }
+
+    if (!arch->has_slots) {
+        arch->num_current_hosts = num_nodes;
+        arch->current_hosts = current_nodes;
+        arch->arch.global_tree = arch->arch.node_tree;
+
+        /* Build nodes_by_idx */
+        int tree_size = netloc_arch_tree_num_leaves(arch->arch.node_tree);
+        netloc_arch_node_slot_t *nodes_by_idx = (netloc_arch_node_slot_t *)
+            malloc(sizeof(netloc_arch_node_slot_t[tree_size]));
+        netloc_arch_node_t *node, *node_tmp;
+        HASH_ITER(hh, arch->nodes_by_name, node, node_tmp) {
+            nodes_by_idx[node->idx_in_topo].node = node;
+            nodes_by_idx[node->idx_in_topo].slot = -1;
+        }
+        arch->node_slot_by_idx = nodes_by_idx;
+
+
+    } else {
+        int num_hosts = slot_idx[num_nodes];
+        NETLOC_int *current_hosts = (NETLOC_int *)
+            malloc(sizeof(NETLOC_int[num_hosts]));
+        netloc_arch_node_t *node, *node_tmp;
+        /* Add the slot trees to the node tree */
+
+        /* Check that each slot tree has the same size */
+        int slot_tree_size = 0;
+        HASH_ITER(hh, arch->nodes_by_name, node, node_tmp) {
+            int current_size = netloc_arch_tree_num_leaves(node->slot_tree);
+            if (!slot_tree_size) {
+                slot_tree_size = current_size;
+            } else {
+                if (slot_tree_size != current_size) {
+                    assert(0);
+                }
+            }
+        }
+
+        int current_host_idx = 0;
+        int node_tree_size = netloc_arch_tree_num_leaves(arch->arch.node_tree);
+        int global_tree_size = node_tree_size*slot_tree_size;
+        netloc_arch_node_slot_t *nodes_by_idx = (netloc_arch_node_slot_t *)
+            malloc(sizeof(netloc_arch_node_slot_t[global_tree_size]));
+        int n = 0;
+        HASH_ITER(hh, arch->nodes_by_name, node, node_tmp) {
+            for (int s = slot_idx[n]; s < slot_idx[n+1]; s++) {
+                int slot_rank = s-slot_idx[n];
+                int topo_idx = node->idx_in_topo*slot_tree_size +
+                    node->slot_idx[slot_rank];
+                nodes_by_idx[topo_idx].node = node;
+                nodes_by_idx[topo_idx].slot = slot_rank;
+                current_hosts[current_host_idx++] = topo_idx;
+            }
+            n++;
+        }
+        arch->num_current_hosts = current_host_idx;
+        arch->current_hosts = current_hosts;
+        arch->node_slot_by_idx = nodes_by_idx;
+
+        netloc_arch_tree_t *new_tree =
+            tree_merge(arch->arch.node_tree, arch->nodes_by_name->slot_tree);
+        netloc_arch_tree_destruct(arch->arch.node_tree);
+        arch->arch.global_tree = new_tree;
+    }
+
+ERROR:
+    free(slot_idx);
+
+    if (ret == NETLOC_SUCCESS)
+        return ret;
+
+    free(current_nodes);
+    return ret;
+}
+
+netloc_arch_tree_t *tree_merge(netloc_arch_tree_t *main, netloc_arch_tree_t *sub)
+{
+    netloc_arch_tree_t *new_tree = (netloc_arch_tree_t *)
+        malloc(sizeof(netloc_arch_tree_t));
+
+    int num_levels = main->num_levels+sub->num_levels;
+    new_tree->num_levels = num_levels;
+    new_tree->degrees = (NETLOC_int *)malloc(sizeof(NETLOC_int[num_levels]));
+    new_tree->cost = (NETLOC_int *)malloc(sizeof(NETLOC_int[num_levels]));
+
+    memcpy(new_tree->degrees, main->degrees,
+            main->num_levels*sizeof(*new_tree->degrees));
+    memcpy(new_tree->degrees+main->num_levels, sub->degrees,
+            sub->num_levels*sizeof(*new_tree->degrees));
+
+    int out_coeff = 10;
+    for (int l = 0; l < main->num_levels; l++) {
+        new_tree->cost[l] = main->cost[l]*sub->cost[0]*out_coeff;
+    }
+    memcpy(new_tree->cost+main->num_levels, sub->cost,
+            sub->num_levels*sizeof(*new_tree->cost));
+
+    return new_tree;
+}
+
+static int netloc_arch_tree_destruct(netloc_arch_tree_t *tree)
+{
+    free(tree->cost);
+    free(tree->degrees);
+    free(tree);
+
+    return NETLOC_SUCCESS;
+}
+
+
+int partition_topology_to_tleaf(netloc_topology_t *topology,
+        int partition, int num_cores, netloc_arch_t *arch)
+{
+    int ret = 0;
+    UT_array *nodes;
+    utarray_new(nodes, &ut_ptr_icd);
+
+    netloc_arch_tree_t *tree = (netloc_arch_tree_t *)
+        malloc(sizeof(netloc_arch_tree_t));
+    arch->arch.node_tree = tree;
+    arch->type = NETLOC_ARCH_TREE;
+
+    /* we build nodes from host list in the given partition
+     * and we init all the analysis data */
+    netloc_node_t *node, *node_tmp;
+    netloc_topology_iter_nodes(topology, node, node_tmp) {
+        if (!netloc_node_is_in_partition(node, partition))
+            continue;
+        void *userdata = node->userdata;
+        node->userdata = (void *)malloc(sizeof(netloc_analysis_data));
+        netloc_analysis_data *analysis_data = (netloc_analysis_data *)node->userdata;
+        analysis_data->level = -1;
+        analysis_data->userdata = userdata; 
+
+        netloc_edge_t *edge, *edge_tmp;
+        netloc_node_iter_edges(node, edge, edge_tmp) {
+            void *userdata = edge->userdata;
+            edge->userdata = (void *)malloc(sizeof(netloc_analysis_data));
+            netloc_analysis_data *analysis_data = (netloc_analysis_data *)edge->userdata;
+            analysis_data->level = -1;
+            analysis_data->userdata = userdata; 
+        }
+
+        if (netloc_node_is_host(node)) {
+            utarray_push_back(nodes, &node);
+        }
+    }
+
+    /* We set the levels in the analysis data */
+    /* Upward edges will have the level of the source node and downward edges
+     * will have -1 as level */
+    int num_levels = 0;
+    netloc_node_t *current_node = /* pointer to one host node */
+        *(void **)utarray_eltptr(nodes, 0);
+    while (utarray_len(nodes)) {
+        UT_array *new_nodes;
+        utarray_new(new_nodes, &ut_ptr_icd);
+
+        for (unsigned int n = 0; n < utarray_len(nodes); n++) {
+            netloc_node_t *node = *(void **)utarray_eltptr(nodes, n);
+            netloc_analysis_data *node_data = (netloc_analysis_data *)node->userdata;
+            /* There is a problem, this is not a tree */
+            if (node_data->level != -1 && node_data->level != num_levels) {
+                utarray_free(new_nodes);
+                ret = -1;
+                goto end;
+            }
+            else {
+                node_data->level = num_levels;
+                netloc_edge_t *edge, *edge_tmp;
+                netloc_node_iter_edges(node, edge, edge_tmp) {
+                    if (!netloc_edge_is_in_partition(edge, partition))
+                        continue;
+                    netloc_analysis_data *edge_data = (netloc_analysis_data *)edge->userdata;
+
+                    netloc_node_t *dest = edge->dest;
+                    netloc_analysis_data *dest_data = (netloc_analysis_data *)dest->userdata;
+                    /* If we are going back */
+                    if (dest_data->level != -1 && dest_data->level < num_levels) {
+                        continue;
+                    }
+                    else {
+                        if (dest_data->level != num_levels) {
+                            edge_data->level = num_levels;
+                            utarray_push_back(new_nodes, &dest);
+                        }
+                    }
+                }
+            }
+        }
+        num_levels++;
+        utarray_free(nodes);
+        nodes = new_nodes;
+    }
+
+    /* We go though the tree to order the leaves  and find the tree
+     * structure */
+    UT_array *ordered_name_array = NULL;
+    UT_array **down_degrees_by_level = NULL;
+    NETLOC_int *max_down_degrees_by_level;
+
+    utarray_new(ordered_name_array, &ut_ptr_icd);
+
+    down_degrees_by_level = (UT_array **)malloc(num_levels*sizeof(UT_array *));
+    for (int l = 0; l < num_levels; l++) {
+        utarray_new(down_degrees_by_level[l], &ut_int_icd);
+    }
+    max_down_degrees_by_level = (NETLOC_int *)
+        calloc(num_levels-1, sizeof(NETLOC_int));
+
+    UT_array *down_edges = NULL;
+    utarray_new(down_edges, &ut_ptr_icd);
+    netloc_edge_t *up_edge = current_node->edges;
+    utarray_push_back(ordered_name_array, &current_node);
+    while (1) {
+        if (utarray_len(down_edges)) {
+            netloc_edge_t *down_edge = *(void **)utarray_back(down_edges);
+            utarray_pop_back(down_edges);
+            netloc_node_t *dest_node = down_edge->dest;
+            if (netloc_node_is_host(dest_node)) {
+                utarray_push_back(ordered_name_array, &dest_node);
+            }
+            else {
+                netloc_edge_t *edge, *edge_tmp;
+                int num_edges = 0;
+                netloc_node_iter_edges(dest_node, edge, edge_tmp) {
+                    if (!netloc_edge_is_in_partition(edge, partition))
+                        continue;
+                    netloc_analysis_data *edge_data = (netloc_analysis_data *)edge->userdata;
+                    int edge_level = edge_data->level;
+                    if (edge_level == -1) {
+                        utarray_push_back(down_edges, &edge);
+                        num_edges++;
+                    }
+                }
+                int level = ((netloc_analysis_data *)dest_node->userdata)->level;
+                utarray_push_back(down_degrees_by_level[num_levels-1-level], &num_edges);
+                max_down_degrees_by_level[num_levels-1-level] =
+                    max_down_degrees_by_level[num_levels-1-level] > num_edges ?
+                    max_down_degrees_by_level[num_levels-1-level]: num_edges;
+            }
+        }
+        else {
+            netloc_edge_t *new_up_edge = NULL;
+            if (!up_edge)
+                break;
+
+            netloc_node_t *up_node = up_edge->dest;
+            netloc_edge_t *edge, *edge_tmp;
+            int num_edges = 0;
+            netloc_node_iter_edges(up_node, edge, edge_tmp) {
+                if (!netloc_edge_is_in_partition(edge, partition))
+                    continue;
+                netloc_analysis_data *edge_data = (netloc_analysis_data *)edge->userdata;
+                int edge_level = edge_data->level;
+
+                netloc_node_t *dest_node = edge->dest;
+
+                /* If the is the node where we are from */
+                if (dest_node == up_edge->node) {
+                    num_edges++;
+                    continue;
+                }
+
+                /* Downward edge */
+                if (edge_level == -1) {
+                    utarray_push_back(down_edges, &edge);
+                    num_edges++;
+                }
+                /* Upward edge */
+                else {
+                    new_up_edge = edge;
+                }
+
+            }
+            int level = ((netloc_analysis_data *)up_node->userdata)->level;
+            utarray_push_back(down_degrees_by_level[num_levels-1-level], &num_edges);
+            max_down_degrees_by_level[num_levels-1-level] =
+                max_down_degrees_by_level[num_levels-1-level] > num_edges ?
+                max_down_degrees_by_level[num_levels-1-level]: num_edges;
+            up_edge = new_up_edge;
+        }
+    }
+
+    tree->num_levels = num_levels-1;
+    tree->degrees = max_down_degrees_by_level;
+
+    int network_coeff = 2;
+    tree->cost = (NETLOC_int *)malloc(sizeof(NETLOC_int[tree->num_levels]));
+    tree->cost[tree->num_levels-1] = 1;
+    for (int i = tree->num_levels-2; i >= 0 ; i--) {
+        tree->cost[i] = tree->cost[i+1]*network_coeff;
+    }
+
+    /* Now we have the degree of each node, so we can complete the topology to
+     * have a complete balanced tree as requested by the tleaf structure */
+    int *arch_idx;
+    int num_nodes = utarray_len(ordered_name_array);
+    netloc_arch_tree_complete(tree, down_degrees_by_level, num_nodes, &arch_idx);
+
+    netloc_node_t **ordered_nodes = (netloc_node_t **)ordered_name_array->d;
+    netloc_arch_node_t *named_nodes = NULL;
+    for (int i = 0; i < num_nodes; i++) {
+        netloc_arch_node_t *node = netloc_arch_node_construct();
+        node->node = ordered_nodes[i];
+        node->name = ordered_nodes[i]->hostname;
+        node->idx_in_topo = arch_idx[i];
+        HASH_ADD_KEYPTR(hh, named_nodes, node->name, strlen(node->name), node);
+    }
+    free(arch_idx);
+
+    arch->nodes_by_name = named_nodes;
+
+end:
+    if (nodes)
+        utarray_free(nodes);
+
+    if (ordered_name_array)
+        utarray_free(ordered_name_array);
+
+    if (down_degrees_by_level) {
+        for (int l = 0; l < num_levels; l++) {
+            utarray_free(down_degrees_by_level[l]);
+        }
+        free(down_degrees_by_level);
+    }
+
+    if (down_edges)
+        utarray_free(down_edges);
+
+    /* We copy back all userdata */
+    netloc_topology_iter_nodes(topology, node, node_tmp) {
+        if (!netloc_node_is_in_partition(node, partition))
+            continue;
+        netloc_analysis_data *analysis_data = (netloc_analysis_data *)node->userdata;
+        if (analysis_data->level == -1 && ret != -1) {
+            ret = -1;
+            printf("The node %s was not browsed\n", node->description);
+        }
+        free(analysis_data);
+
+        netloc_edge_t *edge, *edge_tmp;
+        netloc_node_iter_edges(node, edge, edge_tmp) {
+            netloc_analysis_data *analysis_data = (netloc_analysis_data *)edge->userdata;
+            node->userdata = analysis_data->userdata;
+            free(analysis_data);
+        }
+    }
+
+    return ret;
+}
+
+int netloc_arch_build(netloc_arch_t *arch, int add_slots)
+{
+    char *partition_name = getenv("NETLOC_PARTITION");
+    char *topopath = getenv("NETLOC_TOPOFILE");
+
+    if (!topopath) {
+        fprintf(stderr, "Error: you need to set NETLOC_TOPOFILE in your environment.\n");
+        return NETLOC_ERROR;
+    }
+    topopath = strdup(topopath);
+
+    netloc_topology_t *topology = netloc_topology_construct(topopath);
+    if (topology == NULL) {
+        fprintf(stderr, "Error: netloc_topology_construct failed\n");
+        return NETLOC_ERROR;
+    }
+
+    arch->topology = topology;
+    arch->has_slots = add_slots;
+
+    if (!partition_name) {
+        fprintf(stderr, "Error: you need to set NETLOC_PARTITION in your environment.\n");
+        fprintf(stderr, "\tIt can be: ");
+        unsigned int num_partitions = utarray_len(topology->partitions);
+        for (unsigned int p = 0; p < num_partitions; p++) {
+            char *partition = *(char **)utarray_eltptr(topology->partitions, p);
+            fprintf(stderr, "%s%s", partition, p != num_partitions-1 ? ", ": "\n");
+        }
+        return NETLOC_ERROR;
+    }
+
+    int partition =
+        netloc_topology_find_partition_idx(topology, partition_name);
+
+    partition_topology_to_tleaf(topology, partition, 1, arch);
+
+    return NETLOC_SUCCESS;
+}
+
+netloc_arch_t * netloc_arch_construct(void)
+{
+    netloc_arch_t *arch = (netloc_arch_t *)calloc(1, sizeof(netloc_arch_t));
+
+    return arch;
+}
+
+int netloc_arch_destruct(netloc_arch_t *arch)
+{
+    netloc_topology_destruct(arch->topology);
+
+    netloc_arch_node_t *node, *node_tmp;
+    HASH_ITER(hh, arch->nodes_by_name, node, node_tmp) {
+        HASH_DEL(arch->nodes_by_name, node);
+        netloc_arch_node_destruct(node);
+    }
+
+    free(arch->arch.node_tree->degrees);
+    free(arch->arch.node_tree->cost);
+    free(arch->arch.node_tree);
+    free(arch->current_hosts);
+    free(arch->node_slot_by_idx);
+
+    free(arch);
+
+    return NETLOC_SUCCESS;
+}
+
+static netloc_arch_node_t *netloc_arch_node_construct(void)
+{
+    netloc_arch_node_t *arch_node = (netloc_arch_node_t *)
+        calloc(1, sizeof(netloc_arch_node_t));
+    arch_node->num_slots = -1;
+
+    return arch_node;
+}
+
+static int netloc_arch_node_destruct(netloc_arch_node_t *arch_node)
+{
+    free(arch_node->slot_idx);
+    free(arch_node->slot_os_idx);
+    if (arch_node->slot_tree)
+        netloc_arch_tree_destruct(arch_node->slot_tree);
+    free(arch_node->current_slots);
+    free(arch_node->slot_ranks);
+    free(arch_node);
+
+    return NETLOC_SUCCESS;
+}
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/netloc/edge.c b/opal/mca/hwloc/hwloc2x/hwloc/netloc/edge.c
new file mode 100644
index 0000000000..56e07a5073
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/netloc/edge.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright © 2013-2014 University of Wisconsin-La Crosse.
+ *                         All rights reserved.
+ * Copyright © 2013 Cisco Systems, Inc.  All rights reserved.
+ * Copyright © 2015-2016 Inria.  All rights reserved.
+ *
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ * See COPYING in top-level directory.
+ *
+ * $HEADER$
+ */
+
+#define _GNU_SOURCE         /* See feature_test_macros(7) */
+#include <stdlib.h>
+
+#include <private/autogen/config.h>
+#include <private/netloc.h>
+
+static int cur_uid = 0;
+
+netloc_edge_t * netloc_edge_construct(void)
+{
+
+    netloc_edge_t *edge = NULL;
+
+    edge = (netloc_edge_t*)malloc(sizeof(netloc_edge_t));
+    if( NULL == edge ) {
+        return NULL;
+    }
+
+    edge->id = cur_uid;
+    cur_uid++;
+
+    edge->dest = NULL;
+    edge->node = NULL;
+
+    utarray_new(edge->physical_links, &ut_ptr_icd);
+
+    edge->total_gbits = 0;
+
+    utarray_new(edge->partitions, &ut_int_icd);
+
+    utarray_new(edge->subnode_edges, &ut_ptr_icd);
+
+    edge->userdata = NULL;
+
+    return edge;
+}
+
+char * netloc_edge_pretty_print(netloc_edge_t* edge)
+{
+    // TODO
+    return "TODO";
+}
+
+int netloc_edge_destruct(netloc_edge_t * edge)
+{
+    utarray_free(edge->physical_links);
+    utarray_free(edge->partitions);
+
+    for (unsigned int e = 0; e < netloc_edge_get_num_subedges(edge); e++) {
+        netloc_edge_t *subedge;
+        subedge = netloc_edge_get_subedge(edge, e);
+        netloc_edge_destruct(subedge);
+    }
+    utarray_free(edge->subnode_edges);
+    free(edge);
+    return NETLOC_SUCCESS;
+}
+
+void netloc_edge_reset_uid(void)
+{
+    cur_uid = 0;
+}
+
+int netloc_edge_is_in_partition(netloc_edge_t *edge, int partition)
+{
+    for (unsigned int i = 0; i < netloc_get_num_partitions(edge); i++) {
+        if (netloc_get_partition(edge, i) == partition)
+            return 1;
+    }
+    return NETLOC_SUCCESS;
+}
+
+
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/netloc/hwloc.c b/opal/mca/hwloc/hwloc2x/hwloc/netloc/hwloc.c
new file mode 100644
index 0000000000..0292127540
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/netloc/hwloc.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright © 2016-2017 Inria.  All rights reserved.
+ *
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ * See COPYING in top-level directory.
+ *
+ * $HEADER$
+ */
+
+#define _GNU_SOURCE         /* See feature_test_macros(7) */
+
+#include <sys/types.h>
+#include <dirent.h>
+#include <libgen.h>
+
+#include <private/netloc.h>
+#include <netloc.h>
+#include <hwloc.h>
+
+static UT_icd topos_icd = {sizeof(hwloc_topology_t), NULL, NULL, NULL};
+
+int netloc_topology_read_hwloc(netloc_topology_t *topology, int num_nodes,
+        netloc_node_t **node_list)
+{
+    int ret = 0;
+    int all = 0;
+
+    char *hwloc_path;
+
+    if (!topology->hwlocpath) {
+        printf("No hwloc directory recorded in the topology\n");
+        return NETLOC_ERROR;
+    }
+
+    if (topology->hwlocpath[0] != '/') {
+        char *path_tmp = strdup(topology->topopath);
+        asprintf(&hwloc_path, "%s/%s", dirname(path_tmp), topology->hwlocpath);
+        free(path_tmp);
+    } else {
+        hwloc_path = strdup(topology->hwlocpath);
+    }
+
+    DIR* dir = opendir(hwloc_path);
+    /* Directory does not exist */
+    if (!dir) {
+        printf("Directory (%s) to hwloc does not exist\n", hwloc_path);
+        free(hwloc_path);
+        return NETLOC_ERROR;
+    }
+    else {
+        closedir(dir);
+    }
+
+    UT_array *hwloc_topo_names = topology->topos;
+    UT_array *hwloc_topos;
+    utarray_new(hwloc_topos, &topos_icd);
+
+    int num_diffs = 0;
+
+    if (!num_nodes) {
+        netloc_node_t *node, *node_tmp;
+        num_nodes = HASH_COUNT(topology->nodes);
+        node_list = (netloc_node_t **)malloc(sizeof(netloc_node_t *[num_nodes]));
+        int n = 0;
+        netloc_topology_iter_nodes(topology, node, node_tmp) {
+            node_list[n++] = node;
+        }
+        all = 1;
+    }
+
+    for (int n  = 0; n < num_nodes; n++) {
+        netloc_node_t *node = node_list[n];
+        char *hwloc_file;
+        char *refname;
+
+        if (netloc_node_is_switch(node))
+            continue;
+
+        /* We try to find a diff file */
+        asprintf(&hwloc_file, "%s/%s.diff.xml", hwloc_path, node->hostname);
+        hwloc_topology_diff_t diff;
+        int err;
+        if ((err = hwloc_topology_diff_load_xml(hwloc_file, &diff, &refname)) >= 0) {
+            refname[strlen(refname)-4] = '\0';
+            hwloc_topology_diff_destroy(diff);
+            num_diffs++;
+        }
+        else {
+            free(hwloc_file);
+            /* We try to find a regular file */
+            asprintf(&hwloc_file, "%s/%s.xml", hwloc_path, node->hostname);
+            FILE *fxml;
+            if (!(fxml = fopen(hwloc_file, "r"))) {
+                printf("Hwloc file absent: %s\n", hwloc_file);
+            }
+            else
+                fclose(fxml);
+            asprintf(&refname, "%s", node->hostname);
+        }
+
+        /* Add the hwloc topology */
+        unsigned int t = 0;
+        while (t < utarray_len(hwloc_topo_names) &&
+                strcmp(*(char **)utarray_eltptr(hwloc_topo_names, t), refname)) {
+            t++;
+        }
+        /* Topology not found */
+        if (t == utarray_len(hwloc_topo_names)) {
+            utarray_push_back(hwloc_topo_names, &refname);
+
+            /* Read the hwloc topology */
+            hwloc_topology_t topology;
+            hwloc_topology_init(&topology);
+            hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM);
+
+            char *hwloc_ref_path;
+            asprintf(&hwloc_ref_path, "%s/%s.xml", hwloc_path, refname);
+            ret = hwloc_topology_set_xml(topology, hwloc_ref_path);
+            free(hwloc_ref_path);
+            if (ret == -1) {
+                void *null = NULL;
+                utarray_push_back(hwloc_topos, &null);
+                fprintf(stdout, "Warning: no topology for %s\n", refname);
+                hwloc_topology_destroy(topology);
+                free(refname); free(hwloc_file);
+                continue;
+            }
+
+            ret = hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_STRUCTURE);
+            if (ret == -1) {
+                fprintf(stderr, "hwloc_topology_set_all_types_filter failed\n");
+                free(refname); free(hwloc_file);
+                goto ERROR;
+            }
+
+            ret = hwloc_topology_set_io_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_NONE);
+            if (ret == -1) {
+                fprintf(stderr, "hwloc_topology_set_all_types_filter failed\n");
+                free(refname); free(hwloc_file);
+                goto ERROR;
+            }
+
+            ret = hwloc_topology_load(topology);
+            if (ret == -1) {
+                fprintf(stderr, "hwloc_topology_load failed\n");
+                free(refname); free(hwloc_file);
+                goto ERROR;
+            }
+            utarray_push_back(hwloc_topos, &topology);
+        }
+        free(refname);
+        free(hwloc_file);
+        node->hwlocTopo = *(hwloc_topology_t *)utarray_eltptr(hwloc_topos, t);
+        node->hwlocTopoIdx = t;
+    }
+
+    if (!num_diffs) {
+        printf("Warning: no hwloc diff file found!\n");
+    }
+
+    topology->topos = hwloc_topo_names;
+    topology->hwloc_topos = (hwloc_topology_t *)hwloc_topos->d;
+
+    printf("%d hwloc topologies found:\n", utarray_len(topology->topos));
+    for (unsigned int p = 0; p < utarray_len(topology->topos); p++) {
+        printf("\t'%s'\n", *(char **)utarray_eltptr(topology->topos, p));
+    }
+
+    ret = NETLOC_SUCCESS;
+
+ERROR:
+    if (all) {
+        free(node_list);
+    }
+    free(hwloc_path);
+    if (ret == NETLOC_SUCCESS)
+        free(hwloc_topos);
+    else
+        utarray_free(hwloc_topos);
+    return ret;
+}
+
+/* Set the info from hwloc of the node in the correspondig arch */
+int netloc_arch_node_get_hwloc_info(netloc_arch_node_t *arch_node)
+{
+    hwloc_topology_t topology = arch_node->node->hwlocTopo;
+
+    hwloc_obj_t root = hwloc_get_root_obj(topology);
+
+    int depth = hwloc_topology_get_depth(topology);
+    hwloc_obj_t first_object = root->first_child;
+
+    UT_array **down_degrees_by_level;
+    NETLOC_int *max_down_degrees_by_level;
+
+    down_degrees_by_level = (UT_array **)malloc(depth*sizeof(UT_array *));
+    for (int l = 0; l < depth; l++) {
+        utarray_new(down_degrees_by_level[l], &ut_int_icd);
+    }
+    max_down_degrees_by_level = (NETLOC_int *)
+        calloc(depth-1, sizeof(NETLOC_int));
+
+    int level = depth-1;
+    hwloc_obj_t current_object = first_object;
+    while (level >= 1) {
+        int degree = 1;
+        /* we go through the siblings */
+        while (current_object->next_sibling) {
+            current_object = current_object->next_sibling;
+            degree++;
+        }
+        /* Add the degree to the list of degrees */
+        utarray_push_back(down_degrees_by_level[depth-1-level], &degree);
+        max_down_degrees_by_level[depth-1-level] =
+            max_down_degrees_by_level[depth-1-level] > degree ?
+            max_down_degrees_by_level[depth-1-level] : degree;
+
+        current_object = current_object->next_cousin;
+
+        if (!current_object) {
+            level--;
+            if (!first_object->first_child)
+                break;
+            first_object = first_object->first_child;
+            current_object = first_object;
+        }
+    }
+
+    /* List of PUs */
+    unsigned int max_os_index = 0;
+    UT_array *ordered_host_array;
+    int *ordered_hosts;
+    utarray_new(ordered_host_array, &ut_int_icd);
+    current_object = first_object;
+    while (current_object) {
+        max_os_index = (max_os_index >= current_object->os_index)?
+            max_os_index: current_object->os_index;
+        utarray_push_back(ordered_host_array, &current_object->os_index);
+        current_object = current_object->next_cousin;
+    }
+    ordered_hosts = (int *)ordered_host_array->d;;
+
+    /* Weight for the edges in the tree */
+    NETLOC_int *cost = (NETLOC_int *)malloc((depth-1)*sizeof(NETLOC_int));
+    int level_coeff = 3;
+    cost[depth-2] = 1;
+    for (int l = depth-3; l >= 0; l--) {
+        cost[l] = cost[l+1]*level_coeff;
+    }
+
+    netloc_arch_tree_t *tree = (netloc_arch_tree_t *)
+        malloc(sizeof(netloc_arch_tree_t));
+    tree->num_levels = depth-1;
+    tree->degrees = max_down_degrees_by_level;
+    tree->cost = cost;
+
+    int *arch_idx;
+    int num_cores = utarray_len(ordered_host_array);
+    netloc_arch_tree_complete(tree, down_degrees_by_level, num_cores, &arch_idx);
+
+    int *slot_idx = (int *)malloc(sizeof(int[max_os_index+1]));
+    for (int i = 0; i < num_cores; i++) {
+        slot_idx[ordered_hosts[i]] = arch_idx[i];
+    }
+
+    int num_leaves = netloc_arch_tree_num_leaves(tree);
+    int *slot_os_idx = (int *)malloc(sizeof(int[num_leaves]));
+    for (int i = 0; i < num_cores; i++) {
+        slot_os_idx[arch_idx[i]] = ordered_hosts[i];
+    }
+    free(arch_idx);
+
+    arch_node->slot_tree = tree;
+    arch_node->slot_idx = slot_idx;
+    arch_node->slot_os_idx = slot_os_idx;
+    arch_node->num_slots = max_os_index+1;
+
+    for (int l = 0; l < depth; l++) {
+        utarray_free(down_degrees_by_level[l]);
+    }
+    free(down_degrees_by_level);
+
+    utarray_free(ordered_host_array);
+
+    return NETLOC_SUCCESS;
+}
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/netloc/mpicomm.c b/opal/mca/hwloc/hwloc2x/hwloc/netloc/mpicomm.c
new file mode 100644
index 0000000000..dff8d7548f
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/netloc/mpicomm.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright © 2016-2017 Inria.  All rights reserved.
+ *
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ * See COPYING in top-level directory.
+ *
+ * $HEADER$
+ */
+
+
+#define _GNU_SOURCE         /* See feature_test_macros(7) */
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <netloc.h>
+#include <private/netloc.h>
+
+int netloc_build_comm_mat(char *filename, int *pn, double ***pmat)
+{
+    FILE *input = fopen(filename, "r");
+
+    if (!input ) {
+        perror("fopen");
+        return NETLOC_ERROR;
+    }
+    char *line = NULL;
+    size_t linesize = 0;
+
+    char *ptr= NULL;
+    int i,j;
+
+    j = -1;
+    i = 0;
+
+    /* Get the number of elements in a line to find the size of the matrix */
+    netloc_line_get(&line, &linesize, input);
+    int n = 0;
+    char *remain_line = line;
+    while ((ptr = netloc_line_get_next_token(&remain_line, ' '))) {
+        if (!strlen(ptr))
+            break;
+        n++;
+    }
+    rewind(input);
+
+    if (!n) {
+        goto error;
+    }
+
+    double *mat_values = (double *)malloc(sizeof(double[n*n]));
+    double **mat = (double **)malloc(sizeof(double *[n]));
+    for (int i = 0; i < n; i++) {
+        mat[i] = &mat_values[i*n];
+    }
+
+    while (netloc_line_get(&line, &linesize, input) != -1) {
+        char *remain_line = line;
+        j = 0;
+        while ((ptr = netloc_line_get_next_token(&remain_line, ' '))){
+            if (!strlen(ptr))
+                break;
+            mat[i][j] = atof(ptr);
+            if (mat[i][j] < 0) {
+                fprintf(stderr, "Warning: negative value in comm matrix "
+                        "(mat[%d][%d] = %f)\n", i, j, mat[i][j]);
+            }
+            j++;
+        }
+        if (j != n) {
+            fprintf(stderr, "Error at %d %d (%d!=%d). "
+                    "Too many columns for %s\n", i, j, j, n, filename);
+            goto error;
+        }
+        i++;
+    }
+
+    if (i != n) {
+        fprintf(stderr,"Error at %d %d. Too many rows for %s\n",
+                i, j, filename);
+        goto error;
+    }
+
+    fclose (input);
+
+    *pn = n;
+    *pmat = mat;
+
+    free(line);
+    return NETLOC_SUCCESS;
+
+error:
+    free(line);
+    free(mat_values);
+    free(mat);
+    *pmat = NULL;
+    *pn = 0;
+    fclose (input);
+    return NETLOC_ERROR;
+}
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/netloc/node.c b/opal/mca/hwloc/hwloc2x/hwloc/netloc/node.c
new file mode 100644
index 0000000000..e1adb3a63d
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/netloc/node.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright © 2013-2014 University of Wisconsin-La Crosse.
+ *                         All rights reserved.
+ * Copyright © 2013 Cisco Systems, Inc.  All rights reserved.
+ * Copyright © 2015-2017 Inria.  All rights reserved.
+ *
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ * See COPYING in top-level directory.
+ *
+ * $HEADER$
+ */
+
+#define _GNU_SOURCE         /* See feature_test_macros(7) */
+#include <stdlib.h>
+
+#include <private/autogen/config.h>
+#include <private/netloc.h>
+#include <netloc.h>
+
+static UT_icd node_physical_links_icd = {
+    sizeof(netloc_physical_link_t *), NULL, NULL, NULL
+};
+
+static UT_icd node_physical_nodes_icd = {
+    sizeof(netloc_node_t *), NULL, NULL, NULL
+};
+
+static UT_icd node_partitions_icd = { sizeof(int), NULL, NULL, NULL };
+
+static int node_or_subnode_destruct(netloc_node_t * node, int is_node);
+
+netloc_node_t * netloc_node_construct(void)
+{
+    netloc_node_t *node = NULL;
+
+    node = (netloc_node_t*)malloc(sizeof(netloc_node_t));
+    if (NULL == node) {
+        return NULL;
+    }
+
+    node->physical_id[0]  = '\0';
+    node->logical_id   = -1;
+    node->type    = NETLOC_NODE_TYPE_INVALID;
+    utarray_new(node->physical_links, &node_physical_links_icd);
+    node->description  = NULL;
+    node->userdata     = NULL;
+    node->edges        = NULL;
+    utarray_new(node->subnodes, &node_physical_nodes_icd);
+    node->paths        = NULL;
+    node->hostname     = NULL;
+    utarray_new(node->partitions, &node_partitions_icd);
+    node->hwlocTopo = NULL;
+    node->hwlocTopoIdx = -1;
+
+    return node;
+}
+
+int netloc_node_destruct(netloc_node_t * node)
+{
+    return node_or_subnode_destruct(node, 1);
+}
+
+static int node_or_subnode_destruct(netloc_node_t * node, int is_node)
+{
+    utarray_free(node->physical_links);
+
+    /* Description */
+    free(node->description);
+
+    /* Edges */
+    netloc_edge_t *edge, *edge_tmp;
+    HASH_ITER(hh, node->edges, edge, edge_tmp) {
+        HASH_DEL(node->edges, edge);  /* delete; edge advances to next */
+        netloc_edge_destruct(edge);
+    }
+
+    /* Subnodes */
+    for (unsigned int n = 0; n < utarray_len(node->subnodes); n++) {
+        netloc_node_t *subnode = *(netloc_node_t **)
+            utarray_eltptr(node->subnodes, n);
+        node_or_subnode_destruct(subnode, 0);
+    }
+    utarray_free(node->subnodes);
+
+    /* Paths */
+    netloc_path_t *path, *path_tmp;
+    HASH_ITER(hh, node->paths, path, path_tmp) {
+        HASH_DEL(node->paths, path);  /* delete; path advances to next */
+        netloc_path_destruct(path);
+    }
+
+    /* Hostname */
+    free(node->hostname);
+
+    /* Partitions */
+    utarray_free(node->partitions);
+
+    /* hwlocTopo: nothing to do beacause the pointer is stored also in the topology */
+
+    free(node);
+
+    return NETLOC_SUCCESS;
+}
+
+char *netloc_node_pretty_print(netloc_node_t* node)
+{
+    char * str = NULL;
+
+    asprintf(&str, " [%23s]/[%d] -- %s (%d links)",
+             node->physical_id,
+             node->logical_id,
+             node->description,
+             utarray_len(node->physical_links));
+
+    return str;
+}
+
+int netloc_node_is_in_partition(netloc_node_t *node, int partition)
+{
+    for (unsigned int i = 0; i < netloc_get_num_partitions(node); i++) {
+        if (netloc_get_partition(node, i) == partition)
+            return 1;
+    }
+    return NETLOC_SUCCESS;
+}
+
+
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/netloc/path.c b/opal/mca/hwloc/hwloc2x/hwloc/netloc/path.c
new file mode 100644
index 0000000000..80d3f17253
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/netloc/path.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright © 2016 Inria.  All rights reserved.
+ *
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ * See COPYING in top-level directory.
+ *
+ * $HEADER$
+ */
+
+#define _GNU_SOURCE         /* See feature_test_macros(7) */
+#include <stdlib.h>
+
+#include <private/netloc.h>
+
+netloc_path_t *netloc_path_construct(void)
+{
+    netloc_path_t *path = (netloc_path_t *)
+        malloc(sizeof(netloc_path_t ));
+    utarray_new(path->links, &ut_ptr_icd);
+
+    return path;
+}
+
+int netloc_path_destruct(netloc_path_t *path)
+{
+    utarray_free(path->links);
+    free(path);
+    return NETLOC_SUCCESS;
+}
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/netloc/physical_link.c b/opal/mca/hwloc/hwloc2x/hwloc/netloc/physical_link.c
new file mode 100644
index 0000000000..7047abfc52
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/netloc/physical_link.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright © 2016 Inria.  All rights reserved.
+ *
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ * See COPYING in top-level directory.
+ *
+ * $HEADER$
+ */
+
+#define _GNU_SOURCE         /* See feature_test_macros(7) */
+#include <stdlib.h>
+
+#include <private/autogen/config.h>
+#include <private/netloc.h>
+
+netloc_physical_link_t * netloc_physical_link_construct(void)
+{
+    static int cur_uid = 0;
+
+    netloc_physical_link_t *physical_link = NULL;
+
+    physical_link = (netloc_physical_link_t*)
+        malloc(sizeof(netloc_physical_link_t));
+    if( NULL == physical_link ) {
+        return NULL;
+    }
+
+    physical_link->id = cur_uid;
+    cur_uid++;
+
+    physical_link->src = NULL;
+    physical_link->dest = NULL;
+
+    physical_link->ports[0] = -1;
+    physical_link->ports[1] = -1;
+
+    physical_link->width = NULL;
+    physical_link->speed = NULL;
+
+    physical_link->edge = NULL;
+    physical_link->other_way = NULL;
+
+    utarray_new(physical_link->partitions, &ut_int_icd);
+
+    physical_link->gbits = -1;
+
+    physical_link->description = NULL;
+
+    return physical_link;
+}
+
+int netloc_physical_link_destruct(netloc_physical_link_t *link)
+{
+    free(link->width);
+    free(link->description);
+    free(link->speed);
+    utarray_free(link->partitions);
+    free(link);
+    return NETLOC_SUCCESS;
+}
+
+char * netloc_link_pretty_print(netloc_physical_link_t* link)
+{
+    char * str = NULL;
+    const char * tmp_src_str = NULL;
+    const char * tmp_dest_str = NULL;
+
+    tmp_src_str = netloc_node_type_decode(link->src->type);
+    tmp_dest_str = netloc_node_type_decode(link->dest->type);
+
+    asprintf(&str, "%3d (%s) [%23s] %d [<- %s / %s (%f) ->] (%s) [%23s] %d",
+             link->id,
+             tmp_src_str,
+             link->src->physical_id,
+             link->ports[0],
+             link->speed,
+             link->width,
+             link->gbits,
+             tmp_dest_str,
+             link->dest->physical_id,
+             link->ports[1]);
+
+    return str;
+}
+
+
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/netloc/scotch.c b/opal/mca/hwloc/hwloc2x/hwloc/netloc/scotch.c
new file mode 100644
index 0000000000..24926f512b
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/netloc/scotch.c
@@ -0,0 +1,469 @@
+/*
+ * Copyright © 2016-2017 Inria.  All rights reserved.
+ *
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ * See COPYING in top-level directory.
+ *
+ * $HEADER$
+ */
+
+#define _GNU_SOURCE         /* See feature_test_macros(7) */
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <scotch.h>
+
+#include <netloc.h>
+#include <netlocscotch.h>
+#include <private/netloc.h>
+#include <hwloc.h>
+
+static int arch_tree_to_scotch_arch(netloc_arch_tree_t *tree, SCOTCH_Arch *scotch);
+static int comm_matrix_to_scotch_graph(double **matrix, int n, SCOTCH_Graph *graph);
+static int netlocscotch_get_mapping_from_graph(SCOTCH_Graph *graph,
+        netlocscotch_core_t **pcores);
+
+static int compareint(void const *a, void const *b)
+{
+   const int *int_a = (const int *)a;
+   const int *int_b = (const int *)b;
+   return *int_a-*int_b;
+}
+
+static int build_subarch(SCOTCH_Arch *scotch, NETLOC_int num_nodes, NETLOC_int *node_list,
+        SCOTCH_Arch *subarch)
+{
+    int ret;
+
+    /* Hack to avoid problem with unsorted node list in the subarch and scotch
+     * FIXME TODO */
+    qsort(node_list, num_nodes, sizeof(*node_list), compareint);
+
+    ret = SCOTCH_archSub(subarch, scotch, num_nodes, node_list);
+    if (ret != 0) {
+        fprintf(stderr, "Error: SCOTCH_archSub failed\n");
+    }
+
+    return ret;
+}
+
+/* Convert a netloc tree to a scotch tleaf architecture */
+int arch_tree_to_scotch_arch(netloc_arch_tree_t *tree, SCOTCH_Arch *scotch)
+{
+    int ret;
+
+    ret = SCOTCH_archTleaf(scotch, tree->num_levels, tree->degrees, tree->cost);
+    if (ret != 0) {
+        fprintf(stderr, "Error: SCOTCH_archTleaf failed\n");
+        return NETLOC_ERROR;
+    }
+
+    return NETLOC_SUCCESS;
+}
+
+static int build_subgraph(SCOTCH_Graph *graph, int *vertices, int num_vertices,
+        SCOTCH_Graph *nodegraph)
+{
+    int ret;
+
+    SCOTCH_Num base;       /* Base value               */
+    SCOTCH_Num vert;       /* Number of vertices       */
+    SCOTCH_Num *verttab;   /* Vertex array [vertnbr+1] */
+    SCOTCH_Num *vendtab;   /* Vertex array [vertnbr]   */
+    SCOTCH_Num *velotab;   /* Vertex load array        */
+    SCOTCH_Num *vlbltab;   /* Vertex label array       */
+    SCOTCH_Num edge;       /* Number of edges (arcs)   */
+    SCOTCH_Num *edgetab;   /* Edge array [edgenbr]     */
+    SCOTCH_Num *edlotab;   /* Edge load array          */
+
+    SCOTCH_graphData(graph, &base, &vert, &verttab, &vendtab, &velotab,
+            &vlbltab, &edge, &edgetab, &edlotab);
+
+    int *vertex_is_present = (int *)malloc(vert*sizeof(int));
+    for (int v = 0; v < vert; v++) {
+        vertex_is_present[v] = -1;
+    }
+    for (int v = 0; v < num_vertices; v++) {
+        vertex_is_present[vertices[v]] = v;
+    }
+
+    // TODO handle other cases 
+    if (vendtab) {
+        for (int i = 0; i < vert; i++) {
+            assert(vendtab[i] == verttab[i+1]);
+        }
+    }
+
+    SCOTCH_Num *new_verttab;   /* Vertex array [vertnbr+1] */
+    SCOTCH_Num *new_vendtab;   /* Vertex array [vertnbr]   */
+    SCOTCH_Num *new_velotab;   /* Vertex load array        */
+    SCOTCH_Num *new_vlbltab;   /* Vertex label array       */
+    SCOTCH_Num new_edge;       /* Number of edges (arcs)   */
+    SCOTCH_Num *new_edgetab;   /* Edge array [edgenbr]     */
+    SCOTCH_Num *new_edlotab;   /* Edge load array          */
+
+    new_verttab = (SCOTCH_Num *)malloc((num_vertices+1)*sizeof(SCOTCH_Num));
+    new_vendtab = NULL;
+    if (velotab)
+        new_velotab = (SCOTCH_Num *)malloc(num_vertices*sizeof(SCOTCH_Num));
+    else
+        new_velotab = NULL;
+    if (vlbltab)
+        new_vlbltab = (SCOTCH_Num *)malloc(num_vertices*sizeof(SCOTCH_Num));
+    else
+        new_vlbltab = NULL;
+
+    new_edgetab = (SCOTCH_Num *)malloc(edge*sizeof(SCOTCH_Num));
+    new_edlotab = (SCOTCH_Num *)malloc(edge*sizeof(SCOTCH_Num));
+
+    int edge_idx = 0;
+    new_verttab[0] = 0;
+    for (int v = 0; v < num_vertices; v++) {
+        if (velotab)
+            new_velotab[v] = velotab[vertices[v]];
+        if (vlbltab)
+            new_vlbltab[v] = vlbltab[vertices[v]];
+
+        for (int e = verttab[vertices[v]]; e < verttab[vertices[v]+1]; e++) {
+            int dest_vertex = edgetab[e];
+            int new_dest = vertex_is_present[dest_vertex];
+            if (new_dest != -1) {
+                new_edgetab[edge_idx] = new_dest;
+                new_edlotab[edge_idx] = edlotab[e];
+                edge_idx++;
+            }
+        }
+        new_verttab[v+1] = edge_idx;
+    }
+
+    new_edge = edge_idx;
+
+    SCOTCH_Num *old_edgetab = new_edgetab;
+    new_edgetab = (SCOTCH_Num *)
+        realloc(new_edgetab, new_edge*sizeof(SCOTCH_Num));
+    if (!new_edgetab) {
+        new_edgetab = old_edgetab;
+    }
+
+    SCOTCH_Num *old_edlotab = new_edlotab;
+    new_edlotab = (SCOTCH_Num *)
+        realloc(new_edlotab, new_edge*sizeof(SCOTCH_Num));
+    if (!new_edlotab) {
+        new_edlotab = old_edlotab;
+    }
+
+    ret = SCOTCH_graphBuild (nodegraph, base, num_vertices,
+                new_verttab, new_vendtab, new_velotab, new_vlbltab,
+                new_edge, new_edgetab, new_edlotab);
+
+    free(vertex_is_present);
+
+    return ret;
+}
+
+static int build_current_arch(SCOTCH_Arch *scotch_arch,
+        SCOTCH_Arch *scotch_subarch, netloc_arch_t *arch)
+{
+    int ret;
+    /* First we need to get the topology of the whole machine */
+    ret = netloc_arch_build(arch, 1);
+    if( NETLOC_SUCCESS != ret ) {
+        return ret;
+    }
+
+    if (scotch_subarch) {
+        /* Set the current nodes and slots in the arch */
+        ret = netloc_arch_set_current_resources(arch);
+    } else {
+        ret = netloc_arch_set_global_resources(arch);
+    }
+
+    if( NETLOC_SUCCESS != ret ) {
+        return ret;
+    }
+
+    SCOTCH_archInit(scotch_arch);
+    ret = arch_tree_to_scotch_arch(arch->arch.global_tree, scotch_arch);
+    if (NETLOC_SUCCESS != ret) {
+        return ret;
+    }
+
+    if (scotch_subarch) {
+        /* Now we can build the sub architecture */
+        SCOTCH_archInit(scotch_subarch);
+        ret = build_subarch(scotch_arch, arch->num_current_hosts,
+                arch->current_hosts, scotch_subarch);
+    }
+
+    return ret;
+}
+
+int netlocscotch_build_global_arch(SCOTCH_Arch *arch)
+{
+    int ret;
+    netloc_arch_t *netloc_arch = netloc_arch_construct();
+    ret = build_current_arch(arch, NULL, netloc_arch);
+
+    netloc_arch_destruct(netloc_arch);
+    return ret;
+}
+
+int netlocscotch_build_current_arch(SCOTCH_Arch *arch, SCOTCH_Arch *subarch)
+{
+    int ret;
+    netloc_arch_t *netloc_arch = netloc_arch_construct();
+    ret = build_current_arch(arch, subarch, netloc_arch);
+
+    if (ret == NETLOC_SUCCESS)
+        netloc_arch_destruct(netloc_arch);
+
+    return ret;
+}
+
+int netlocscotch_get_mapping_from_graph(SCOTCH_Graph *graph,
+        netlocscotch_core_t **pcores)
+{
+    int ret;
+
+    SCOTCH_Arch scotch_arch;
+    SCOTCH_Arch scotch_subarch;
+    netlocscotch_core_t *cores = NULL;
+    netloc_arch_t *arch = netloc_arch_construct();
+    ret = build_current_arch(&scotch_arch, &scotch_subarch, arch);
+    if (NETLOC_SUCCESS != ret) {
+        netloc_arch_destruct(arch);
+        return ret;
+    }
+
+    NETLOC_int graph_size;
+    SCOTCH_graphSize(graph, &graph_size, NULL);
+
+    int num_hosts = arch->num_current_hosts;
+
+    SCOTCH_Strat strategy;
+    SCOTCH_stratInit(&strategy);
+    /* We force Scotch to use all the processes
+     * barat is 0.01 as in SCOTCH_STRATDEFAULT */
+    SCOTCH_stratGraphMapBuild(&strategy, SCOTCH_STRATQUALITY, graph_size, 0.01);
+
+    /* The ranks are the indices of the nodes in the complete graph */
+    NETLOC_int *ranks = (NETLOC_int *)malloc(graph_size*sizeof(NETLOC_int));
+    ret = SCOTCH_graphMap(graph, &scotch_subarch, &strategy, ranks);
+
+    SCOTCH_stratExit(&strategy);
+
+    SCOTCH_archExit(&scotch_subarch);
+    SCOTCH_archExit(&scotch_arch);
+
+    if (ret != 0) {
+        fprintf(stderr, "Error: SCOTCH_graphMap failed\n");
+        goto ERROR;
+    }
+
+    cores = (netlocscotch_core_t *)
+        malloc(graph_size*sizeof(netlocscotch_core_t));
+    if (!arch->has_slots) {
+        /* We have the mapping but only for the nodes, not inside the nodes */
+
+        UT_array *process_by_node[num_hosts];
+        for (int n = 0; n < num_hosts; n++) {
+            utarray_new(process_by_node[n], &ut_int_icd);
+        }
+
+        /* Find the processes mapped to the nodes */
+        for (int p = 0; p < graph_size; p++) {
+            int rank = ranks[p];
+            if (rank >= num_hosts || rank < 0) {
+                ret = NETLOC_ERROR;
+                goto ERROR;
+            }
+            utarray_push_back(process_by_node[rank], &p);
+        }
+
+        /* Use the intranode topology */
+        for (int n = 0; n < num_hosts; n++) {
+            int *process_list = (int *)process_by_node[n]->d;
+            int num_processes = utarray_len(process_by_node[n]);
+            netloc_arch_node_t *node =
+                arch->node_slot_by_idx[arch->current_hosts[n]].node;
+            NETLOC_int node_ranks[num_processes];
+
+            /* We need to extract the subgraph with only the vertices mapped to the
+             * current node */
+            SCOTCH_Graph nodegraph; /* graph with only elements for node n */
+            build_subgraph(graph, process_list, num_processes, &nodegraph);
+
+            /* Build the scotch arch of the all node */
+            SCOTCH_Arch scotch_nodearch;
+            ret = arch_tree_to_scotch_arch(node->slot_tree, &scotch_nodearch);
+            if (NETLOC_SUCCESS != ret) {
+                goto ERROR;
+            }
+
+            /* Restrict the scotch arch to the available cores */
+            SCOTCH_Arch scotch_nodesubarch;
+            ret = build_subarch(&scotch_nodearch, node->num_current_slots,
+                    node->current_slots, &scotch_nodesubarch);
+            if (NETLOC_SUCCESS != ret) {
+                goto ERROR;
+            }
+
+            /* Find the mapping to the cores */
+            ret = SCOTCH_graphMap(&nodegraph, &scotch_nodesubarch, &strategy, node_ranks);
+            if (ret != 0) {
+                fprintf(stderr, "Error: SCOTCH_graphMap failed\n");
+                goto ERROR;
+            }
+
+            /* Report the node ranks in the global rank array */
+            for (int p = 0; p < num_processes; p++) {
+                int process = process_list[p];
+                int arch_idx = node->current_slots[node_ranks[p]];
+                cores[process].core = node->slot_os_idx[arch_idx];
+                cores[process].nodename = strdup(node->node->hostname);
+                cores[process].rank = node->slot_ranks[node_ranks[p]];
+            }
+        }
+        for (int n = 0; n < num_hosts; n++) {
+            utarray_free(process_by_node[n]);
+        }
+    } else {
+        for (int p = 0; p < graph_size; p++) {
+            int host_idx = arch->current_hosts[ranks[p]];
+            netloc_arch_node_t *node = arch->node_slot_by_idx[host_idx].node;
+            int slot_rank = arch->node_slot_by_idx[host_idx].slot;
+            cores[p].nodename = strdup(node->node->hostname);
+            cores[p].core = node->slot_os_idx[node->slot_idx[slot_rank]];
+            cores[p].rank = node->slot_ranks[node->slot_idx[slot_rank]];
+        }
+    }
+
+    *pcores = cores;
+
+ERROR:
+    free(ranks);
+    netloc_arch_destruct(arch);
+    if (ret == NETLOC_SUCCESS)
+        return ret;
+    free(cores);
+    return ret;
+}
+
+int netlocscotch_get_mapping_from_comm_matrix(double **comm, int num_vertices,
+        netlocscotch_core_t **pcores)
+{
+    int ret;
+
+    SCOTCH_Graph graph;
+    ret = comm_matrix_to_scotch_graph(comm, num_vertices, &graph);
+    if (NETLOC_SUCCESS != ret) {
+        return ret;
+    }
+
+    ret = netlocscotch_get_mapping_from_graph(&graph, pcores);
+
+    /* Free arrays */
+    {
+        SCOTCH_Num base;       /* Base value               */
+        SCOTCH_Num vert;       /* Number of vertices       */
+        SCOTCH_Num *verttab;   /* Vertex array [vertnbr+1] */
+        SCOTCH_Num *vendtab;   /* Vertex array [vertnbr]   */
+        SCOTCH_Num *velotab;   /* Vertex load array        */
+        SCOTCH_Num *vlbltab;   /* Vertex label array       */
+        SCOTCH_Num edge;       /* Number of edges (arcs)   */
+        SCOTCH_Num *edgetab;   /* Edge array [edgenbr]     */
+        SCOTCH_Num *edlotab;   /* Edge load array          */
+
+        SCOTCH_graphData(&graph, &base, &vert, &verttab, &vendtab, &velotab,
+                &vlbltab, &edge, &edgetab, &edlotab);
+        free(edlotab);
+        free(edgetab);
+        free(verttab);
+        SCOTCH_graphExit(&graph);
+    }
+
+    return ret;
+}
+
+int netlocscotch_get_mapping_from_comm_file(char *filename, int *pnum_processes,
+        netlocscotch_core_t **pcores)
+{
+    int ret;
+    int n;
+    double **mat;
+
+    ret = netloc_build_comm_mat(filename, &n, &mat);
+
+    if (ret != NETLOC_SUCCESS) {
+        return ret;
+    }
+
+    *pnum_processes = n;
+
+    ret = netlocscotch_get_mapping_from_comm_matrix(mat, n, pcores);
+
+    free(mat[0]);
+    free(mat);
+
+    return ret;
+}
+
+static int comm_matrix_to_scotch_graph(double **matrix, int n, SCOTCH_Graph *graph)
+{
+    int ret;
+
+    SCOTCH_Num base;       /* Base value               */
+    SCOTCH_Num vert;       /* Number of vertices       */
+    SCOTCH_Num *verttab;   /* Vertex array [vertnbr+1] */
+    SCOTCH_Num *vendtab;   /* Vertex array [vertnbr]   */
+    SCOTCH_Num *velotab;   /* Vertex load array        */
+    SCOTCH_Num *vlbltab;   /* Vertex label array       */
+    SCOTCH_Num edge;       /* Number of edges (arcs)   */
+    SCOTCH_Num *edgetab;   /* Edge array [edgenbr]     */
+    SCOTCH_Num *edlotab;   /* Edge load array          */
+
+    base = 0;
+    vert = n;
+
+    verttab = (SCOTCH_Num *)malloc((vert+1)*sizeof(SCOTCH_Num));
+    for (int v = 0; v < vert+1; v++) {
+        verttab[v] = v*(n-1);
+    }
+
+    vendtab = NULL;
+    velotab = NULL;
+    vlbltab = NULL;
+
+    edge = n*(n-1);
+
+    /* Compute the lowest load to reduce of the values of the load to avoid overflow */
+    double min_load = -1;
+    for (int v1 = 0; v1 < vert; v1++) {
+        for (int v2 = 0; v2 < vert; v2++) {
+            double load = matrix[v1][v2];
+            if (load >= 0.01 && (load < min_load || min_load < 0)) /* TODO set an epsilon */
+                min_load = load;
+        }
+    }
+
+    edgetab = (SCOTCH_Num *)malloc(n*(n-1)*sizeof(SCOTCH_Num));
+    edlotab = (SCOTCH_Num *)malloc(n*(n-1)*sizeof(SCOTCH_Num));
+    for (int v1 = 0; v1 < vert; v1++) {
+        for (int v2 = 0; v2 < vert; v2++) {
+            if (v2 == v1)
+                continue;
+            int idx = v1*(n-1)+((v2 < v1) ? v2: v2-1);
+            edgetab[idx] = v2;
+            edlotab[idx] = (int)(matrix[v1][v2]/min_load);
+        }
+    }
+
+    ret = SCOTCH_graphBuild(graph, base, vert,
+            verttab, vendtab, velotab, vlbltab, edge, edgetab, edlotab);
+
+    return ret;
+}
+
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/netloc/support.c b/opal/mca/hwloc/hwloc2x/hwloc/netloc/support.c
new file mode 100644
index 0000000000..7ad95130be
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/netloc/support.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright © 2013-2014 University of Wisconsin-La Crosse.
+ *                         All rights reserved.
+ * Copyright © 2016-2017 Inria.  All rights reserved.
+ *
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ * See COPYING in top-level directory.
+ *
+ * $HEADER$
+ */
+
+#include <private/netloc.h>
+#include <netloc.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+char *netloc_line_get_next_token(char **string, char c)
+{
+    char *field;
+    char *string_end;
+
+    if (!*string)
+        return NULL;
+
+    string_end = strchr(*string, c);
+
+    if (string_end) {
+        string_end[0] = '\0';
+        field = *string;
+        *string = string_end+1;
+    } else {
+        field = *string;
+        *string = NULL;
+    }
+
+    return field;
+}
+
+ssize_t netloc_line_get(char **lineptr, size_t *n, FILE *stream)
+{
+    ssize_t read = getline(lineptr, n, stream);
+    if (read == -1)
+        return -1;
+
+    /* Remove last \n character */
+    char *line = *lineptr;
+    size_t lastpos = strlen(line)-1;
+    if (line[lastpos] == '\n') {
+        line[lastpos] = '\0';
+        read--;
+    }
+    return read;
+}
+
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/netloc/topology.c b/opal/mca/hwloc/hwloc2x/hwloc/netloc/topology.c
new file mode 100644
index 0000000000..0efd04b93c
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/netloc/topology.c
@@ -0,0 +1,598 @@
+/*
+ * Copyright © 2013-2014 University of Wisconsin-La Crosse.
+ *                         All rights reserved.
+ * Copyright © 2016-2017 Inria.  All rights reserved.
+ *
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ * See COPYING in top-level directory.
+ *
+ * $HEADER$
+ */
+
+#define _GNU_SOURCE	   /* See feature_test_macros(7) */
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <libgen.h>
+
+#include <private/netloc.h>
+
+static char *line_get_next_field(char **string);
+static void read_partition_list(char *list, UT_array *array);
+static int edges_sort_by_dest(netloc_edge_t *a, netloc_edge_t *b);
+static int find_reverse_edges(netloc_topology_t *topology);
+static int find_similar_nodes(netloc_topology_t *topology);
+static int netloc_node_get_virtual_id(char *id);
+static int edge_merge_into(netloc_edge_t *dest, netloc_edge_t *src, int keep);
+
+netloc_topology_t *netloc_topology_construct(char *path)
+{
+    int ret;
+    char *line = NULL;
+    size_t linesize = 0;
+
+    netloc_topology_t *topology = NULL;
+
+    FILE *input = fopen(path, "r");
+
+    if (!input ) {
+        fprintf(stderr, "Cannot open topology file %s\n", path);
+        perror("fopen");
+        exit(-1);
+    }
+
+    int version;
+    if (fscanf(input , "%d\n,", &version) != 1) {
+        fprintf(stderr, "Cannot read the version number in %s\n", path);
+        perror("fscanf");
+        fclose(input);
+        return NULL;
+    } else if (version != NETLOCFILE_VERSION) {
+        fprintf(stderr, "Incorrect version number, "
+                "please generate your input file again\n");
+        fclose(input);
+        return NULL;
+    }
+
+    char *subnet;
+    if (netloc_line_get(&line, &linesize, input) == -1) {
+        fprintf(stderr, "Cannot read the subnet in %s\n", path);
+        perror("fscanf");
+        free(line);
+        fclose(input);
+        return NULL;
+    } else {
+        subnet = strdup(line);
+    }
+
+    char *hwlocpath;
+    if (netloc_line_get(&line, &linesize, input) == -1) {
+        fprintf(stderr, "Cannot read hwloc path in %s\n", path);
+        perror("fscanf");
+        free(subnet);
+        fclose(input);
+        return NULL;
+    } else if (!strlen(line)) {
+        hwlocpath = NULL;
+    } else {
+        hwlocpath = strdup(line);
+    }
+
+    if (hwlocpath) {
+        DIR *hwlocdir;
+        char *realhwlocpath;
+        if (hwlocpath[0] != '/') {
+            char *path_tmp = strdup(path);
+            asprintf(&realhwlocpath, "%s/%s", dirname(path_tmp), hwlocpath);
+            free(path_tmp);
+        } else {
+            realhwlocpath = strdup(hwlocpath);
+        }
+        if (!(hwlocdir = opendir(realhwlocpath))) {
+            fprintf(stderr, "Couldn't open hwloc directory: \"%s\"\n", realhwlocpath);
+            perror("opendir");
+            free(subnet);
+            free(realhwlocpath);
+            fclose(input);
+            return NULL;
+        } else {
+            closedir(hwlocdir);
+            free(realhwlocpath);
+        }
+    }
+
+    int num_nodes;
+    if (fscanf(input , "%d\n", &num_nodes) != 1) {
+        fprintf(stderr, "Cannot read the number of nodes in %s\n", path);
+        perror("fscanf");
+        free(subnet);
+        fclose(input);
+        return NULL;
+    }
+
+    if (num_nodes <= 0) {
+        fprintf(stderr, "Oups: incorrect number of nodes (%d) in %s\n",
+                num_nodes, path);
+        free(subnet);
+        fclose(input);
+        return NULL;
+    }
+
+    /*
+     * Allocate Memory
+     */
+    topology = (netloc_topology_t *)malloc(sizeof(netloc_topology_t) * 1);
+    if( NULL == topology ) {
+        free(subnet);
+        fclose(input);
+        return NULL;
+    }
+
+    /*
+     * Initialize the structure
+     */
+    topology->topopath = path;
+    topology->hwlocpath = hwlocpath;
+    topology->subnet_id = subnet;
+    topology->nodes          = NULL;
+    topology->physical_links = NULL;
+    topology->type           = NETLOC_TOPOLOGY_TYPE_INVALID ;
+    topology->nodesByHostname = NULL;
+    topology->hwloc_topos = NULL;
+    utarray_new(topology->partitions, &ut_str_icd);
+    utarray_new(topology->topos, &ut_str_icd);
+
+    /* Read nodes from file */
+    for (int n = 0; n < num_nodes; n++) {
+        netloc_node_t *node = netloc_node_construct();
+        netloc_line_get(&line, &linesize, input);
+        char *remain_line = line;
+
+        strncpy(node->physical_id, line_get_next_field(&remain_line), 20);
+            /* Should be shorter than 20 */
+        node->physical_id[19] = '\0'; /* If a problem occurs */
+        node->logical_id = atoi(line_get_next_field(&remain_line));
+        node->type = atoi(line_get_next_field(&remain_line));
+        read_partition_list(line_get_next_field(&remain_line), node->partitions);
+        node->description = strdup(line_get_next_field(&remain_line));
+        node->hostname = strdup(line_get_next_field(&remain_line));
+
+        HASH_ADD_STR(topology->nodes, physical_id, node);
+        if (strlen(node->hostname) > 0) {
+            HASH_ADD_KEYPTR(hh2, topology->nodesByHostname, node->hostname,
+                    strlen(node->hostname), node);
+        }
+    }
+
+    /* Read edges from file */
+    for (int n = 0; n < num_nodes; n++) {
+        char *field;
+        netloc_node_t *node;
+
+        netloc_line_get(&line, &linesize, input);
+        char *remain_line = line;
+
+        field = line_get_next_field(&remain_line);
+        if (strlen(field) > 19)
+            field[19] = '\0';
+        HASH_FIND_STR(topology->nodes, field, node);
+
+        if (!node) {
+            fprintf(stderr, "Node not found: %s\n", field);
+            utarray_free(topology->partitions);
+            utarray_free(topology->topos);
+            return NULL;
+        }
+
+        while ((field = line_get_next_field(&remain_line))) {
+            /* There is an edge */
+            netloc_edge_t *edge = netloc_edge_construct();
+
+            HASH_FIND_STR(topology->nodes, field, edge->dest);
+            edge->total_gbits = strtof(line_get_next_field(&remain_line), NULL);
+            read_partition_list(line_get_next_field(&remain_line), edge->partitions);
+
+            edge->node = node;
+            HASH_ADD_PTR(node->edges, dest, edge);
+
+            /* Read links */
+            int num_links = atoi(line_get_next_field(&remain_line));
+            assert(num_links >= 0);
+            utarray_reserve(edge->physical_links, (unsigned int)num_links);
+            utarray_reserve(node->physical_links, (unsigned int)num_links);
+            for (int i = 0; i < num_links; i++) {
+                netloc_physical_link_t *link;
+                link =  netloc_physical_link_construct();
+
+                link->id = atoi(line_get_next_field(&remain_line));
+
+                link->src = node;
+                link->dest = edge->dest;
+
+                link->ports[0] = atoi(line_get_next_field(&remain_line));
+                link->ports[1] = atoi(line_get_next_field(&remain_line));
+
+                link->width = strdup(line_get_next_field(&remain_line));
+                link->speed = strdup(line_get_next_field(&remain_line));
+                link->gbits = strtof(line_get_next_field(&remain_line), NULL);
+                link->description = strdup(line_get_next_field(&remain_line));
+                link->other_way_id = atoi(line_get_next_field(&remain_line));
+
+                read_partition_list(line_get_next_field(&remain_line),
+                        link->partitions);
+
+                HASH_ADD_INT(topology->physical_links, id, link);
+
+                utarray_push_back(node->physical_links, &link);
+                utarray_push_back(edge->physical_links, &link);
+            }
+
+        }
+        HASH_SRT(hh, node->edges, edges_sort_by_dest);
+    }
+
+    /* Read partitions from file */
+    {
+        netloc_line_get(&line, &linesize, input);
+        char *remain_line = line;
+        char *field;
+
+        while ((field = line_get_next_field(&remain_line))) {
+            utarray_push_back(topology->partitions, &field);
+        }
+    }
+
+    /* Read paths */
+    while (netloc_line_get(&line, &linesize, input) != -1) {
+        netloc_node_t *node;
+        netloc_path_t *path;
+        char *field;
+
+        char *remain_line = line;
+        char *src_id = line_get_next_field(&remain_line);
+        char *dest_id = line_get_next_field(&remain_line);
+
+        HASH_FIND_STR(topology->nodes, src_id, node);
+
+        path = netloc_path_construct();
+        strncpy(path->dest_id, dest_id, 20); /* Should be shorter than 20 */
+        path->dest_id[19] = '\0'; /* If a problem occurs */
+
+        while ((field = line_get_next_field(&remain_line))) {
+            int link_id = atoi(field);
+            netloc_physical_link_t *link;
+
+            HASH_FIND_INT(topology->physical_links, &link_id, link);
+            utarray_push_back(path->links, &link);
+        }
+
+        HASH_ADD_STR(node->paths, dest_id, path);
+    }
+
+    fclose(input);
+    free(line);
+
+    if (find_reverse_edges(topology) != NETLOC_SUCCESS) {
+        netloc_topology_destruct(topology);
+        return NULL;
+    }
+
+    ret = find_similar_nodes(topology);
+    if (ret != NETLOC_SUCCESS) {
+        netloc_topology_destruct(topology);
+        return NULL;
+    }
+
+    return topology;
+}
+
+int netloc_topology_destruct(netloc_topology_t *topology)
+{
+    /*
+     * Sanity Check
+     */
+    if( NULL == topology ) {
+        fprintf(stderr, "Error: Detaching from a NULL pointer\n");
+        return NETLOC_ERROR;
+    }
+
+    free(topology->topopath);
+    free(topology->hwlocpath);
+    free(topology->subnet_id);
+
+    /* Nodes */
+    netloc_node_t *node, *node_tmp;
+    HASH_ITER(hh, topology->nodes, node, node_tmp) {
+        HASH_DELETE(hh, topology->nodes, node);
+    }
+
+    netloc_topology_iter_nodes(topology, node, node_tmp) {
+        HASH_DELETE(hh, topology->nodes, node);
+        netloc_node_destruct(node);
+    }
+
+    /** Partition List */
+    utarray_free(topology->partitions);
+
+    /** Physical links */
+    netloc_physical_link_t *link, *link_tmp;
+    HASH_ITER(hh, topology->physical_links, link, link_tmp) {
+        HASH_DEL(topology->physical_links, link);
+        netloc_physical_link_destruct(link);
+    }
+
+    /** Hwloc topology List */
+    for (unsigned int t = 0; t < utarray_len(topology->topos); t++) {
+        if (topology->hwloc_topos[t])
+            hwloc_topology_destroy(topology->hwloc_topos[t]);
+    }
+    free(topology->hwloc_topos);
+
+    /** Hwloc topology name List */
+    utarray_free(topology->topos);
+
+    free(topology);
+
+    return NETLOC_SUCCESS;
+}
+
+int netloc_topology_find_partition_idx(netloc_topology_t *topology, char *partition_name)
+{
+    if (!partition_name)
+        return -1;
+
+    /* Find the selected partition in the topology */
+    unsigned int p = 0;
+    int found = 0;
+    while (p < utarray_len(topology->partitions)) {
+        char *current_name = *(char **)utarray_eltptr(topology->partitions, p);
+
+        if (!strcmp(current_name, partition_name)) {
+            found = 1;
+            break;
+        }
+        p++;
+    }
+
+    if (!found)
+        return -2;
+
+    assert(p <= INT_MAX);
+
+    return (int)p;
+}
+
+static char *line_get_next_field(char **string)
+{
+    return netloc_line_get_next_token(string, ',');
+}
+
+static void read_partition_list(char *list, UT_array *array)
+{
+    char *partition;
+    if (!strlen(list))
+        return;
+    while ((partition = netloc_line_get_next_token(&list, ':'))) {
+        int partition_num = atoi(partition);
+        utarray_push_back(array, &partition_num);
+    }
+}
+
+static int edges_sort_by_dest(netloc_edge_t *a, netloc_edge_t *b)
+{
+    return strcmp(a->dest->physical_id, b->dest->physical_id);
+}
+
+static int find_reverse_edges(netloc_topology_t *topology)
+{
+    netloc_node_t *node, *node_tmp;
+    HASH_ITER(hh, topology->nodes, node, node_tmp) {
+        netloc_edge_t *edge, *edge_tmp;
+        netloc_node_iter_edges(node, edge, edge_tmp) {
+            netloc_node_t *dest = edge->dest;
+            if (dest > node) {
+                netloc_edge_t *reverse_edge;
+                HASH_FIND_PTR(dest->edges, &node, reverse_edge);
+                if (reverse_edge == NULL) {
+                    return NETLOC_ERROR;
+                }
+                edge->other_way = reverse_edge;
+                reverse_edge->other_way = edge;
+            }
+        }
+    }
+    return NETLOC_SUCCESS;
+}
+
+static int find_similar_nodes(netloc_topology_t * topology)
+{
+    int ret;
+
+    /* Build edge lists by node */
+    int num_nodes = HASH_COUNT(topology->nodes);
+    netloc_node_t **nodes = (netloc_node_t **)malloc(num_nodes*sizeof(netloc_node_t *));
+    netloc_node_t ***edgedest_by_node = (netloc_node_t ***)malloc(num_nodes*sizeof(netloc_node_t **));
+    int *num_edges_by_node = (int *)malloc(num_nodes*sizeof(int));
+    netloc_node_t *node, *node_tmp;
+    int idx = -1;
+    netloc_topology_iter_nodes(topology, node, node_tmp) {
+        idx++;
+        if (netloc_node_is_host(node)) {
+            nodes[idx] = NULL;
+            edgedest_by_node[idx] = NULL;
+            continue;
+        }
+        int num_edges = HASH_COUNT(node->edges);
+        nodes[idx] = node;
+        num_edges_by_node[idx] = num_edges;
+        edgedest_by_node[idx] = (netloc_node_t **)malloc(num_edges*sizeof(netloc_node_t *));
+
+        netloc_edge_t *edge, *edge_tmp;
+        int edge_idx = 0;
+        netloc_node_iter_edges(node, edge, edge_tmp) {
+            edgedest_by_node[idx][edge_idx] = edge->dest;
+            edge_idx++;
+        }
+    }
+
+    /* We compare the edge lists to find similar nodes */
+    UT_array *similar_nodes;
+    utarray_new(similar_nodes, &ut_ptr_icd);
+    for (int idx1 = 0; idx1 < num_nodes; idx1++) {
+        netloc_node_t *node1 = nodes[idx1];
+        netloc_node_t *virtual_node = NULL;
+        netloc_edge_t *first_virtual_edge = NULL;
+        if (!node1)
+            continue;
+        for (int idx2 = idx1+1; idx2 < num_nodes; idx2++) {
+            netloc_node_t *node2 = nodes[idx2];
+            if (!node2)
+                continue;
+            if (num_edges_by_node[idx2] != num_edges_by_node[idx1])
+                continue;
+            if (idx2 == idx1)
+                continue;
+
+            int equal = 1;
+            for (int i = 0; i < num_edges_by_node[idx1]; i++) {
+                if (edgedest_by_node[idx2][i] != edgedest_by_node[idx1][i]) {
+                    equal = 0;
+                    break;
+                }
+            }
+
+            /* If we have similar nodes */
+            if (equal) {
+                /* We create a new virtual node to contain all of them */
+                if (!virtual_node) {
+                    virtual_node = netloc_node_construct();
+                    netloc_node_get_virtual_id(virtual_node->physical_id);
+
+                    virtual_node->type = node1->type;
+                    utarray_concat(virtual_node->physical_links, node1->physical_links);
+                    virtual_node->description = strdup(virtual_node->physical_id);
+
+                    utarray_push_back(virtual_node->subnodes, &node1);
+                    utarray_concat(virtual_node->partitions, node1->partitions);
+
+                    // TODO paths
+
+                    /* Set edges */
+                    netloc_edge_t *edge1, *edge_tmp1;
+                    netloc_node_iter_edges(node1, edge1, edge_tmp1) {
+                        netloc_edge_t *virtual_edge = netloc_edge_construct();
+                        if (!first_virtual_edge)
+                            first_virtual_edge = virtual_edge;
+                        virtual_edge->node = virtual_node;
+                        virtual_edge->dest = edge1->dest;
+                        ret = edge_merge_into(virtual_edge, edge1, 0);
+                        if (ret != NETLOC_SUCCESS) {
+                            netloc_edge_destruct(virtual_edge);
+                            goto ERROR;
+                        }
+                        HASH_ADD_PTR(virtual_node->edges, dest, virtual_edge);
+
+                        /* Change the reverse edge of the neighbours (reverse nodes) */
+                        netloc_node_t *reverse_node = edge1->dest;
+                        netloc_edge_t *reverse_edge = edge1->other_way;
+
+                        netloc_edge_t *reverse_virtual_edge =
+                            netloc_edge_construct();
+                        reverse_virtual_edge->dest = virtual_node;
+                        reverse_virtual_edge->node = reverse_node;
+                        reverse_virtual_edge->other_way = virtual_edge;
+                        virtual_edge->other_way = reverse_virtual_edge;
+                        HASH_ADD_PTR(reverse_node->edges, dest, reverse_virtual_edge);
+                        ret = edge_merge_into(reverse_virtual_edge, reverse_edge, 1);
+                        if (ret != NETLOC_SUCCESS) {
+                            goto ERROR;
+                        }
+                        HASH_DEL(reverse_node->edges, reverse_edge);
+                    }
+
+                    /* We remove the node from the list of nodes */
+                    HASH_DEL(topology->nodes, node1);
+                    HASH_ADD_STR(topology->nodes, physical_id, virtual_node);
+                    printf("First node found: %s (%s)\n", node1->description, node1->physical_id);
+                }
+
+                utarray_concat(virtual_node->physical_links, node2->physical_links);
+                utarray_push_back(virtual_node->subnodes, &node2);
+                utarray_concat(virtual_node->partitions, node2->partitions);
+
+                /* Set edges */
+                netloc_edge_t *edge2, *edge_tmp2;
+                netloc_edge_t *virtual_edge = first_virtual_edge;
+                netloc_node_iter_edges(node2, edge2, edge_tmp2) {
+                    /* Merge the edges from the physical node into the virtual node */
+                    ret = edge_merge_into(virtual_edge, edge2, 0);
+                    if (ret != NETLOC_SUCCESS) {
+                        goto ERROR;
+                    }
+
+                    /* Change the reverse edge of the neighbours (reverse nodes) */
+                    netloc_node_t *reverse_node = edge2->dest;
+                    netloc_edge_t *reverse_edge = edge2->other_way;
+
+                    netloc_edge_t *reverse_virtual_edge;
+                    HASH_FIND_PTR(reverse_node->edges, &virtual_node,
+                            reverse_virtual_edge);
+                    ret = edge_merge_into(reverse_virtual_edge, reverse_edge, 1);
+                    if (ret != NETLOC_SUCCESS) {
+                        goto ERROR;
+                    }
+                    HASH_DEL(reverse_node->edges, reverse_edge);
+
+                    /* Get the next edge */
+                    virtual_edge = virtual_edge->hh.next;
+                }
+
+                /* We remove the node from the list of nodes */
+                HASH_DEL(topology->nodes, node2);
+                printf("\t node found: %s (%s)\n", node2->description, node2->physical_id);
+
+                nodes[idx2] = NULL;
+            }
+        }
+        utarray_clear(similar_nodes);
+    }
+
+    ret = NETLOC_SUCCESS;
+ERROR:
+    free(nodes);
+
+    for (int idx = 0; idx < num_nodes; idx++) {
+        if (edgedest_by_node[idx])
+            free(edgedest_by_node[idx]);
+    }
+    free(edgedest_by_node);
+    free(num_edges_by_node);
+    free(similar_nodes);
+    return ret;
+}
+
+static int netloc_node_get_virtual_id(char *id)
+{
+    static int virtual_id = 0;
+    sprintf(id, "virtual%d", virtual_id++);
+    return 0;
+}
+
+static int edge_merge_into(netloc_edge_t *dest, netloc_edge_t *src, int keep)
+{
+    if (!dest || !src) {
+        return NETLOC_ERROR;
+    }
+
+    utarray_concat(dest->physical_links, src->physical_links);
+    dest->total_gbits += src->total_gbits;
+    utarray_concat(dest->partitions, src->partitions);
+    /* it will keep the duplicated edges */
+    if (keep)
+        utarray_push_back(dest->subnode_edges, &src);
+
+    return NETLOC_SUCCESS;
+}
+
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/netlocscotch.pc.in b/opal/mca/hwloc/hwloc2x/hwloc/netlocscotch.pc.in
new file mode 100644
index 0000000000..f8f4a425ce
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/netlocscotch.pc.in
@@ -0,0 +1,11 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: netlocscotch
+Description: Network locality to scotch architecture
+Version: @HWLOC_VERSION@
+Cflags: -I${includedir}
+Libs: -L${libdir}  -lnetlocscotch -lnetloc -lhwloc
+Libs.private: @LIBS@
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/tests/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/tests/Makefile.am
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/tests/Makefile.am
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/Makefile.am
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/Makefile.am
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/Makefile.am
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/Makefile.am
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/allowed/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/allowed/Makefile.am
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/allowed/Makefile.am
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/allowed/test-topology.sh.in b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/allowed/test-topology.sh.in
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/allowed/test-topology.sh.in
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/gather/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/gather/Makefile.am
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/gather/Makefile.am
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/gather/test-gather-topology.sh.in b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/gather/test-gather-topology.sh.in
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/gather/test-gather-topology.sh.in
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/test-topology.sh.in b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/test-topology.sh.in
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/linux/test-topology.sh.in
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/ports/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/ports/Makefile.am
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/ports/Makefile.am
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/rename/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/rename/Makefile.am
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/rename/Makefile.am
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/wrapper.sh.in b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/wrapper.sh.in
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/wrapper.sh.in
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/x86/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/x86/Makefile.am
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/x86/Makefile.am
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/x86/test-topology.sh.in b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/x86/test-topology.sh.in
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/x86/test-topology.sh.in
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/xml/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/xml/Makefile.am
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/xml/Makefile.am
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/xml/test-topology.sh.in b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/xml/test-topology.sh.in
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/tests/hwloc/xml/test-topology.sh.in
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/tests/netloc/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/tests/netloc/Makefile.am
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/tests/netloc/Makefile.am
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/tests/netloc/tests.sh.in b/opal/mca/hwloc/hwloc2x/hwloc/tests/netloc/tests.sh.in
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/tests/netloc/tests.sh.in
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/utils/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/utils/Makefile.am
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/utils/Makefile.am
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/Makefile.am
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/Makefile.am
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/hwloc-compress-dir.in b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/hwloc-compress-dir.in
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/hwloc-compress-dir.in
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/hwloc-gather-topology.in b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/hwloc-gather-topology.in
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/hwloc-gather-topology.in
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-fake-plugin.sh.in b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-fake-plugin.sh.in
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-fake-plugin.sh.in
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-annotate.sh.in b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-annotate.sh.in
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-annotate.sh.in
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-calc.sh.in b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-calc.sh.in
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-calc.sh.in
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-compress-dir.sh.in b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-compress-dir.sh.in
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-compress-dir.sh.in
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-diffpatch.sh.in b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-diffpatch.sh.in
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-diffpatch.sh.in
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-distrib.sh.in b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-distrib.sh.in
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-distrib.sh.in
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-dump-hwdata/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-dump-hwdata/Makefile.am
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-dump-hwdata/Makefile.am
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-dump-hwdata/test-hwloc-dump-hwdata.sh.in b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-dump-hwdata/test-hwloc-dump-hwdata.sh.in
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-dump-hwdata/test-hwloc-dump-hwdata.sh.in
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-info.sh.in b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-info.sh.in
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/utils/hwloc/test-hwloc-info.sh.in
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/utils/lstopo/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/utils/lstopo/Makefile.am
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/utils/lstopo/Makefile.am
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/utils/lstopo/lstopo-windows.c b/opal/mca/hwloc/hwloc2x/hwloc/utils/lstopo/lstopo-windows.c
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/utils/lstopo/lstopo-windows.c
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/utils/lstopo/test-lstopo.sh.in b/opal/mca/hwloc/hwloc2x/hwloc/utils/lstopo/test-lstopo.sh.in
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/utils/lstopo/test-lstopo.sh.in
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/draw/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/draw/Makefile.am
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/draw/Makefile.am
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/infiniband/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/infiniband/Makefile.am
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/infiniband/Makefile.am
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/infiniband/netloc_ib_gather_raw.in b/opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/infiniband/netloc_ib_gather_raw.in
new file mode 100644
index 0000000000..e93c6abd87
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/infiniband/netloc_ib_gather_raw.in
@@ -0,0 +1,477 @@
+#!/usr/bin/env perl
+
+#
+# Copyright © 2013-2017 Inria.  All rights reserved.
+#
+# See COPYING in top-level directory.
+#
+# $HEADER$
+#
+
+use strict;
+
+use Getopt::Long;
+
+my $HWLOC_top_builddir = "@HWLOC_top_builddir@";
+my $prefix = "@prefix@";
+my $exec_prefix = "@exec_prefix@";
+my $bindir = "@bindir@";
+# this will be changed into $bindir/... during make install
+my $locallstopo = "$HWLOC_top_builddir/utils/lstopo/lstopo-no-graphics";
+
+my $hwlocdir = undef;
+my $outdir = undef;
+my @forcesubnets;
+my $needsudo = 0;
+my $ibnetdiscover = "/usr/sbin/ibnetdiscover";
+my $ibroute = "/usr/sbin/ibroute";
+my $verbose = 0;
+my $force = 0;
+my $dryrun = 0;
+my $ignoreerrors = 0;
+my $help = 0;
+my $sleep = 0;
+
+my $sleepcount = 0;
+sub sleep_between_probes {
+  my $indent = shift;
+  if ($sleep) {
+    print $indent."... Sleeping $sleep seconds between probes ...\n" unless $sleepcount;
+    sleep $sleep if $sleepcount;
+    $sleepcount++;
+  }
+}
+
+&Getopt::Long::Configure("bundling");
+my $ok = Getopt::Long::GetOptions(
+	"hwloc-dir=s" => \$hwlocdir,
+	"force-subnet=s" => \@forcesubnets,
+	"sudo" => \$needsudo,
+	"ibnetdiscover=s" => \$ibnetdiscover,
+	"ibroute=s" => \$ibroute,
+	"sleep=i" => \$sleep,
+        "ignore-errors" => \$ignoreerrors,
+	"verbose|v" => \$verbose,
+	"force|f" => \$force,
+	"dry-run" => \$dryrun,
+	"help|h" => \$help
+    );
+
+if ($help or !$ok or !defined $ARGV[0]) {
+    print "$0 [options] <outdir>\n";
+    print "  Dumps topology information to <outdir>/ib-raw/\n";
+    print "  Subnets are guessed from the <outdir>/hwloc/ directory where\n";
+    print "  the hwloc XML exports of some nodes are stored.\n";
+    print "Options:\n";
+    print " --sudo\n";
+    print "    Pass sudo to internal ibnetdiscover and ibroute invocations.\n";
+    print "    Useful when the entire script cannot run as root.\n";
+    print " --hwloc-dir <dir>\n";
+    print "    Use <dir> instead of <outdir>/hwloc/ for hwloc XML exports.\n";
+    print " --force-subnet [<subnet>:]<board>:<port> to force the discovery\n";
+    print "    Do not guess subnets from hwloc XML exports.\n";
+    print "    Force discovery on local board <board> port <port>\n";
+    print "    and optionally force the subnet id <subnet>\n";
+    print "    instead of reading it from the first GID.\n";
+    print "    Examples: --force-subnet mlx4_0:1\n";
+    print "              --force-subnet fe80:0000:0000:0000:mlx4_0:1\n";
+    print " --ibnetdiscover /path/to/ibnetdiscover\n";
+    print " --ibroute /path/to/ibroute\n";
+    print "    Specify exact location of programs. Default is /usr/bin/<program>\n";
+    print " --sleep <n>\n";
+    print "    Sleep for <n> seconds between invocations of programs probing the network\n";
+    print " --ignore-errors\n";
+    print "    Ignore errors from ibnetdiscover and ibroute, assume their outputs are ok\n";
+    print " --force -f\n";
+    print "    Always rediscover to overwrite existing files without asking\n";
+    print " --verbose -v\n";
+    print "    Add verbose messages\n";
+    print " --dry-run\n";
+    print "    Do not actually run programs or modify anything\n";
+    print " --help -h\n";
+    print "    Show this help\n";
+    exit(1);
+}
+
+my $outdir = $ARGV[0];
+
+mkdir $outdir unless $dryrun;
+die "$outdir isn't a directory\n" unless -d $outdir;
+mkdir "$outdir/ib-raw" unless $dryrun;
+die "$outdir/ib-raw isn't a directory\n" unless -d "$outdir/ib-raw";
+
+my $sudo = $needsudo ? "sudo" : "";
+
+if (`id -u` ne 0 and !$sudo and !$dryrun) {
+    print "WARNING: Not running as root.\n";
+}
+
+# subnets that will be discovered locally
+my %subnets_todiscover;
+
+#########################################
+# Read forced subnets
+if (@forcesubnets) {
+  print "Enforcing list of subnets to discover:\n";
+  foreach my $subnetstring (@forcesubnets) {
+    if ($subnetstring =~ /^([0-9a-fA-F:]{19}):([0-9a-z_-]+):([0-9]+)$/) {
+      my $subnet = $1;
+      my $boardname = $2;
+      my $portnum = $3;
+      print " Subnet $subnet from local board $boardname port $portnum.\n";
+      $subnets_todiscover{$subnet}->{localboardname} = $boardname;
+      $subnets_todiscover{$subnet}->{localportnum} = $portnum;
+
+    } elsif ($subnetstring =~ /^([0-9a-z_-]+):([0-9]+)$/) {
+      my $boardname = $1;
+      my $portnum = $2;
+      my $subnet;
+      print " Unknown subnet from local board $boardname port $portnum.\n";
+      my $filename = "/sys/class/infiniband/$boardname/ports/$portnum/gids/0";
+      if (open FILE, $filename) {
+        my $line = <FILE>;
+        if ($line =~ /^([0-9a-fA-F:]{19}):([0-9a-fA-F:]{19})$/) {
+	  $subnet = $1
+        }
+        close FILE;
+      }
+      if (defined $subnet) {
+	print "  Found subnet $subnet from first GID.\n";
+	$subnets_todiscover{$subnet}->{localboardname} = $boardname;
+	$subnets_todiscover{$subnet}->{localportnum} = $portnum;
+      } else {
+	print "  Couldn't read subnet from GID $filename, ignoring.\n";
+      }
+
+    } else {
+      print " Cannot parse --force-subnet $subnetstring, ignoring.\n";
+    }
+  }
+  print "\n";
+
+} else {
+  #########################################
+  # Guess subnets from hwloc
+  if (!defined $hwlocdir) {
+    $hwlocdir = "${outdir}/hwloc";
+    print "Using $hwlocdir as hwloc lstopo XML directory.\n\n";
+  }
+
+  mkdir $hwlocdir unless $dryrun;
+  die "$hwlocdir isn't a directory\n" unless -d $hwlocdir;
+
+  # at least get the local hwloc XML
+  my $hostname = `hostname`;
+  chomp $hostname;
+  my $lstopooutput = "$hwlocdir/${hostname}.xml";
+  if (!-f $lstopooutput) {
+    print "Exporting local node hwloc XML...\n";
+    print "  Running lstopo-no-graphics...\n";
+    my $cmd = "$locallstopo -f $lstopooutput";
+    if ($dryrun) {
+      print "   NOT running $cmd\n" if $verbose;
+    } else {
+      my $ret = system "$cmd" ;
+      if ($ret and !$ignoreerrors) {
+	print "   Failed (exit code $ret).\n";
+      }
+    }
+    print "\n";
+  }
+
+  # $servers{$hostname}->{gids}->{$boardname}->{$portnum}->{$gidnum}->{subnet} and ->{guid} = xxxx:xxxx:xxxx:xxxx
+  # $servers{$hostname}->{gids}->{$boardname}->{$portnum}->{invalid} = 1
+  # $servers{$hostname}->{subnets}->{$subnet} = 1
+  my %servers;
+
+  # $subnets{$subnet}->{$hostname} = 1;
+  my %subnets;
+
+  opendir DIR, $hwlocdir
+    or die "Failed to open hwloc directory ($!).\n";
+  # list subnets by ports
+  while (my $hwlocfile = readdir DIR) {
+    my $hostname;
+    if ($hwlocfile =~ /(.+).xml$/) {
+      $hostname = $1;
+    } else {
+      next;
+    }
+
+    open FILE, "$hwlocdir/$hwlocfile" or next;
+    my $boardname = undef;
+    my $portnum = undef;
+    while (my $line = <FILE>) {
+      if ($line =~ /<object type=\"OSDev\".* name=\"((?!hfi)(?!usnic).+)\".* osdev_type=\"3\".*>/) {
+        $boardname = $1;
+      } elsif (defined $boardname) {
+        if ($line =~ /<\/object>/) {
+          $boardname = undef;
+        } elsif ($line =~ /<info name=\"Port([0-9]+)GID([0-9]+)\".* value=\"([0-9a-fA-F:]{19}):([0-9a-fA-F:]{19})\"\/.*>/) {
+          $servers{$hostname}->{gids}->{$boardname}->{$1}->{$2}->{subnet} = $3;
+          $servers{$hostname}->{gids}->{$boardname}->{$1}->{$2}->{guid} = $4;
+        } elsif ($line =~ /<info name=\"Port([0-9]+)LID\" value=\"(0x[0-9a-fA-F]+)\"\/>/) {
+          # lid must be between 0x1 and 0xbfff
+          if ((hex $2) < 1 or (hex $2) > 49151) {
+            $servers{$hostname}->{gids}->{$boardname}->{$1}->{invalid} = 1;
+          }
+        } elsif ($line =~ /<info name=\"Port([0-9]+)State\" value=\"([0-9])\"\/>/) {
+          # state must be active = 4
+          if ($2 != 4) {
+            $servers{$hostname}->{gids}->{$boardname}->{$1}->{invalid} = 1;
+          }
+        }
+      }
+    }
+    close FILE;
+  }
+  closedir DIR;
+
+  # remove down/inactive ports/servers/...
+  foreach my $hostname (keys %servers) {
+    foreach my $boardname (keys %{$servers{$hostname}->{gids}}) {
+      foreach my $portnum (keys %{$servers{$hostname}->{gids}->{$boardname}}) {
+        delete $servers{$hostname}->{gids}->{$boardname}->{$portnum}
+	  if exists $servers{$hostname}->{gids}->{$boardname}->{$portnum}->{invalid};
+      }
+      delete $servers{$hostname}->{gids}->{$boardname}
+        unless keys %{$servers{$hostname}->{gids}->{$boardname}};
+    }
+    delete $servers{$hostname}
+      unless keys %{$servers{$hostname}->{gids}};
+  }
+
+  # fill list of hostnames per subnets and subnets per hostnames
+  foreach my $hostname (keys %servers) {
+    foreach my $boardname (keys %{$servers{$hostname}->{gids}}) {
+      foreach my $portnum (keys %{$servers{$hostname}->{gids}->{$boardname}}) {
+	foreach my $gidid (keys %{$servers{$hostname}->{gids}->{$boardname}->{$portnum}}) {
+	  my $subnet  = $servers{$hostname}->{gids}->{$boardname}->{$portnum}->{$gidid}->{subnet};
+	  $servers{$hostname}->{subnets}->{$subnet} = 1;
+	  $subnets{$subnet}->{$hostname} = 1;
+	}
+      }
+    }
+  }
+
+  my $nrsubnets = scalar (keys %subnets);
+  print "Found $nrsubnets subnets in hwloc directory:\n";
+  # find local subnets
+  my $localhostname = `hostname`; chomp $localhostname;
+  {
+    my $hostname = $localhostname;
+    foreach my $boardname (keys %{$servers{$hostname}->{gids}}) {
+      foreach my $portnum (keys %{$servers{$hostname}->{gids}->{$boardname}}) {
+        foreach my $gidid (keys %{$servers{$hostname}->{gids}->{$boardname}->{$portnum}}) {
+          my $subnet = $servers{$hostname}->{gids}->{$boardname}->{$portnum}->{$gidid}->{subnet};
+	  if (!exists $subnets_todiscover{$subnet}) {
+	    print " Subnet $subnet is locally accessible from board $boardname port $portnum.\n";
+	    $subnets_todiscover{$subnet}->{localboardname} = $boardname;
+	    $subnets_todiscover{$subnet}->{localportnum} = $portnum;
+	  } elsif ($verbose) {
+	    print " Subnet $subnet is also locally accessible from board $boardname port $portnum.\n";
+	  }
+        }
+      }
+    }
+  }
+  # find non-locally accessible subnets
+  foreach my $subnet (keys %subnets) {
+    next if exists $subnets{$subnet}->{$localhostname};
+    print " Subnet $subnet is NOT locally accessible.\n";
+    my @hostnames = (keys %{$subnets{$subnet}});
+    if ($verbose) {
+      print "  Subnet $subnet is accessible from nodes:\n";
+      foreach my $hostname (@hostnames) {
+	print "   $hostname\n";
+      }
+    } else {
+      print "  Subnet $subnet is accessible from node ".$hostnames[0];
+      print " (and ".(@hostnames-1)." others)" if (@hostnames > 1);
+      print "\n";
+    }
+  }
+  print "\n";
+
+  # list nodes that are connected to all subnets, if the local isn't
+  if (scalar keys %{$servers{$localhostname}->{subnets}} != $nrsubnets) {
+    my @fullyconnectedhostnames;
+    foreach my $hostname (keys %servers) {
+      if (scalar keys %{$servers{$hostname}->{subnets}} == $nrsubnets) {
+	push @fullyconnectedhostnames, $hostname;
+      }
+    }
+    if (@fullyconnectedhostnames) {
+      if ($verbose) {
+	print "All subnets are accessible from nodes:\n";
+	foreach my $hostname (@fullyconnectedhostnames) {
+	  print " $hostname\n";
+	}
+      } else {
+	print "All subnets are accessible from node ".$fullyconnectedhostnames[0];
+	print " (and ".(@fullyconnectedhostnames-1)." others)" if (@fullyconnectedhostnames > 1);
+	print "\n";
+      }
+    } else {
+      print "No node is connected to all subnets.\n";
+    }
+    print "\n";
+  }
+}
+
+###########################
+# Discovery routines
+
+# ibnetdiscover has GUIDs in the form of 0xXXXXXXXXXXXXXXXX, but hwloc
+# has GUIDs in the form of XXXX:XXXX:XXXX:XXXX.  So just arbitrarily
+# choose hwloc's form and convert everything to that format.
+sub normalize_guid {
+    my ($guid) = @_;
+
+    return ""
+        if ($guid eq "");
+
+    $guid =~ m/0x(.{4})(.{4})(.{4})(.{4})/;
+    return "$1:$2:$3:$4";
+}
+
+sub getroutes {
+    my $subnet = shift;
+    my $boardname = shift;
+    my $portnum = shift;
+    my $ibnetdiscoveroutput = shift;
+    my $ibrouteoutdir = shift;
+    my $lids;
+
+    if (!open(FILE, $ibnetdiscoveroutput)) {
+      print "  Couldn't open $ibnetdiscoveroutput\n";
+      return;
+    }
+
+    while (<FILE>) {
+        # We only need lines that begin with SW
+        next
+            if (! /^SW /);
+
+        # Split out the columns.  Yay regexps.  One form of line has
+        # both source and destination information.  The other form
+        # only has source information (because it's not hooked up to
+        # anything -- usually a switch port that doesn't have anything
+        # plugged in to it).
+        chomp;
+        my $line = $_;
+
+        my ($have_peer, $src_name, $src_type, $src_lid, $src_port_id,
+            $src_guid, $width, $speed, $dest_type, $dest_lid, $dest_port_id,
+            $dest_guid, $dest_name);
+
+        # First, assume that the line has both a port and a peer.
+        if ($line !~ m/^SW\s+(\d+)\s+(\d+)\s+(0x[0-9a-f]{16})\s+(\d+x)\s([^\s]*)\s+-\s+(CA|SW)\s+(\d+)\s+(\d+)\s+(0x[0-9a-f]{16})\s+\(\s+'(.+?)'\s+-\s+'(.+?)'\s\)/) {
+            # If we get here, there was no peer -- just a port.
+            $have_peer = 0;
+
+            if ($line !~ m/^SW\s+(\d+)\s+(\d+)\s+(0x[0-9a-f]{16})\s+(\d+x)\s([^\s]*)\s+'(.+?)'/) {
+                print "Line cannot be parsed:\n$line\n";
+                next;
+            }
+            $src_lid = $1; # This is a decimal number
+            $src_port_id = $2; # This is a decimal number
+            $src_guid = $3;
+            $width = $4;
+            $speed = $5;
+            $src_name = $6;
+        } else {
+            $have_peer = 1;
+
+            $src_lid = $1; # This is a decimal number
+            $src_port_id = $2; # This is a decimal number
+            $src_guid = $3;
+            $width = $4;
+            $speed = $5;
+            $dest_type = $6;
+            $dest_lid = $7; # This is a decimal number
+            $dest_port_id = $8; # This is a decimal number
+            $dest_guid = $9;
+            $src_name = $10;
+            $dest_name = $11;
+        }
+
+        # Convert GUIDs to the form xxxx:xxxx:xxxx:xxxx
+        $src_guid = normalize_guid($src_guid);
+        $dest_guid = normalize_guid($dest_guid)
+            if ($have_peer);
+
+        # If the source switch LID already exists, then just keep
+        # going.
+        next
+            if (exists($lids->{$src_lid}));
+
+        # Run ibroute on this switch LID
+	my $ibrouteoutput = "$ibrouteoutdir/ibroute-$subnet-$src_lid.txt";
+        print "  Running ibroute for switch '$src_name' LID $src_lid...\n";
+	my $cmd = "$sudo $ibroute -C $boardname -P $portnum $src_lid";
+	if ($dryrun) {
+	  print "   NOT running $cmd\n" if $verbose;
+	} else {
+	  sleep_between_probes ("   ");
+	  my $ret = system "$cmd > ${ibrouteoutput}.new" ;
+	  if (!$ret or $ignoreerrors) {
+	    unlink ${ibrouteoutput};
+	    rename "${ibrouteoutput}.new", "${ibrouteoutput}";
+	  } else {
+	    unlink "${ibrouteoutput}.new";
+	    print "   Failed (exit code $ret).\n";
+	    next;
+	  }
+	}
+
+        $lids->{$src_lid} = 1;
+    }
+
+    close FILE;
+}
+
+##############################"
+# Discover subnets for real
+
+foreach my $subnet (keys %subnets_todiscover) {
+  my $boardname = $subnets_todiscover{$subnet}->{localboardname};
+  my $portnum = $subnets_todiscover{$subnet}->{localportnum};
+
+  print "Looking at $subnet (through local board $boardname port $portnum)...\n";
+
+  my $ibnetdiscoveroutput = "$outdir/ib-raw/ib-subnet-$subnet.txt";
+  my $ibrouteoutdir = "$outdir/ib-raw/ibroutes-$subnet";
+
+  if (-f $ibnetdiscoveroutput and !$force) {
+    print " $ibnetdiscoveroutput already exists, discover again? (y/n) ";
+    my $answer = <STDIN>;
+    next if $answer !~ /^y/;
+  }
+
+  print " Running ibnetdiscover...\n";
+  my $cmd = "$sudo $ibnetdiscover -s -l -g -H -S -R -p -C $boardname -P $portnum";
+  if ($dryrun) {
+    print "  NOT running $cmd\n" if $verbose;
+  } else {
+    sleep_between_probes ("  ");
+    print "  $cmd\n" if $verbose;
+    my $ret = system "$cmd > ${ibnetdiscoveroutput}.new" ;
+    if (!$ret or $ignoreerrors) {
+      unlink ${ibnetdiscoveroutput};
+      rename "${ibnetdiscoveroutput}.new", "${ibnetdiscoveroutput}";
+    } else {
+      unlink "${ibnetdiscoveroutput}.new";
+      print "  Failed (exit code $ret).\n";
+      next;
+    }
+  }
+
+  print " Getting routes...\n";
+  if (!$dryrun) {
+    system("rm -rf $ibrouteoutdir");
+    mkdir $ibrouteoutdir unless $dryrun;
+    die "$ibrouteoutdir isn't a directory\n" unless -d $ibrouteoutdir;
+  }
+  getroutes $subnet, $boardname, $portnum, $ibnetdiscoveroutput, $ibrouteoutdir;
+}
diff --git a/opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/mpi/Makefile.am b/opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/mpi/Makefile.am
new file mode 100644
index 0000000000..38dcbf5ee1
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc/utils/netloc/mpi/Makefile.am
@@ -0,0 +1,2 @@
+# This is a dummy file that is not needed in embedded mode,
+# but sadly, automake *requires* it
diff --git a/opal/mca/hwloc/hwloc2x/hwloc2x.h b/opal/mca/hwloc/hwloc2x/hwloc2x.h
new file mode 100644
index 0000000000..c0410187e4
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc2x.h
@@ -0,0 +1,50 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
+/*
+ * Copyright (c) 2011-2017 Cisco Systems, Inc.  All rights reserved
+ * Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
+ * Copyright (c) 2016      Los Alamos National Security, LLC. All rights
+ *                         reserved.
+ * Copyright (c) 2017      Research Organization for Information Science
+ *                         and Technology (RIST). All rights reserved.
+ *
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ *
+ * When this component is used, this file is included in the rest of
+ * the OPAL/ORTE/OMPI code base via opal/mca/hwloc/hwloc-internal.h.  As such,
+ * this header represents the public interface to this static component.
+ */
+
+#ifndef MCA_OPAL_HWLOC_HWLOC2X_H
+#define MCA_OPAL_HWLOC_HWLOC2X_H
+
+BEGIN_C_DECLS
+
+#include "hwloc/include/hwloc.h"
+
+/* If the including file requested it, also include the hwloc verbs
+   helper file.  We can't just always include this file (even if we
+   know we have <infiniband/verbs.h>) because there are some inline
+   functions in that file that invoke ibv_* functions.  Some linkers
+   (e.g., Solaris Studio Compilers) will instantiate those static
+   inline functions even if we don't use them, and therefore we need
+   to be able to resolve the ibv_* symbols at link time.
+
+   Since -libverbs is only specified in places where we use other
+   ibv_* functions (e.g., the OpenFabrics-based BTLs), that means that
+   linking random executables can/will fail (e.g., orterun).
+ */
+#if defined(OPAL_HWLOC_WANT_VERBS_HELPER) && OPAL_HWLOC_WANT_VERBS_HELPER
+#    if defined(HAVE_INFINIBAND_VERBS_H)
+#        include "hwloc/include/hwloc/openfabrics-verbs.h"
+#    else
+#        error Tried to include hwloc verbs helper file, but hwloc was compiled with no OpenFabrics support
+#    endif
+#endif
+
+END_C_DECLS
+
+#endif /* MCA_OPAL_HWLOC_HWLOC2X_H */
diff --git a/opal/mca/hwloc/hwloc2x/hwloc2x_component.c b/opal/mca/hwloc/hwloc2x/hwloc2x_component.c
new file mode 100644
index 0000000000..c9149c8417
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/hwloc2x_component.c
@@ -0,0 +1,57 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
+/*
+ * Copyright (c) 2011-2017 Cisco Systems, Inc.  All rights reserved
+ * Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
+ * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
+ *                         reserved.
+ * Copyright (c) 2017      Research Organization for Information Science
+ *                         and Technology (RIST). All rights reserved.
+ *
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ *
+ * These symbols are in a file by themselves to provide nice linker
+ * semantics.  Since linkers generally pull in symbols by object
+ * files, keeping these symbols as the only symbols in this file
+ * prevents utility programs such as "ompi_info" from having to import
+ * entire components just to query their version and parameters.
+ */
+
+#include "opal_config.h"
+#include "opal/constants.h"
+
+#include "opal/mca/hwloc/hwloc-internal.h"
+#include "hwloc2x.h"
+
+/*
+ * Public string showing the sysinfo ompi_linux component version number
+ */
+const char *opal_hwloc_hwloc2x_component_version_string =
+    "OPAL hwloc2x hwloc MCA component version " OPAL_VERSION;
+
+/*
+ * Instantiate the public struct with all of our public information
+ * and pointers to our public functions in it
+ */
+
+const opal_hwloc_component_t mca_hwloc_hwloc2x_component = {
+
+    /* First, the mca_component_t struct containing meta information
+       about the component itself */
+
+    .base_version = {
+        OPAL_HWLOC_BASE_VERSION_2_0_0,
+
+        /* Component name and version */
+        .mca_component_name = "hwloc2x",
+        MCA_BASE_MAKE_VERSION(component,  OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION,
+                              OPAL_RELEASE_VERSION),
+    },
+    .base_data = {
+        /* The component is checkpoint ready */
+        MCA_BASE_METADATA_PARAM_CHECKPOINT
+    }
+};
diff --git a/opal/mca/hwloc/hwloc2x/owner.txt b/opal/mca/hwloc/hwloc2x/owner.txt
new file mode 100644
index 0000000000..d72196b959
--- /dev/null
+++ b/opal/mca/hwloc/hwloc2x/owner.txt
@@ -0,0 +1,7 @@
+#
+# owner/status file
+# owner: institution that is responsible for this package
+# status: e.g. active, maintenance, unmaintained
+#
+owner:INTEL
+status: maintenance