From 6b0011f138daeb764a562bc272ad34c6fdb3c692 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Thu, 28 Aug 2014 15:52:28 +0200 Subject: [PATCH 01/25] hwloc: v1.9.1 released, doing 1.9.2rc1 now Signed-off-by: Jeff Squyres --- opal/mca/hwloc/hwloc191/hwloc/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opal/mca/hwloc/hwloc191/hwloc/VERSION b/opal/mca/hwloc/hwloc191/hwloc/VERSION index 5c69e54d8d..9ba8c8caa1 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/VERSION +++ b/opal/mca/hwloc/hwloc191/hwloc/VERSION @@ -13,7 +13,7 @@ snapshot_version=gitclone major=1 minor=9 -release=1 +release=2 # greek is used for alpha or beta release tags. If it is non-empty, # it will be appended to the version number. It does not have to be From 6764413aa348162f3b883880d2ffd27dfb9c2ea4 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Thu, 4 Sep 2014 17:02:37 +0200 Subject: [PATCH 02/25] hwloc: misc.h: Fix hwloc_strncasecmp() build under strict flags on BSD strncasecmp() needs Thanks to Pavan Balaji for reporting the failure. (cherry picked from commit open-mpi/hwloc@37439c4801f2f69f83599df3d6254df0aabebeb5) Signed-off-by: Jeff Squyres --- opal/mca/hwloc/hwloc191/hwloc/include/private/misc.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/opal/mca/hwloc/hwloc191/hwloc/include/private/misc.h b/opal/mca/hwloc/hwloc191/hwloc/include/private/misc.h index fd5195b422..de5cc34159 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/include/private/misc.h +++ b/opal/mca/hwloc/hwloc191/hwloc/include/private/misc.h @@ -15,6 +15,12 @@ #include #include +#ifdef HWLOC_HAVE_DECL_STRNCASECMP +#ifdef HAVE_STRINGS_H +#include +#endif +#endif + /* Compile-time assertion */ #define HWLOC_BUILD_ASSERT(condition) ((void)sizeof(char[1 - 2*!(condition)])) From 50b035dddb5e6d318c41232972df93a25a5fa591 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Mon, 8 Sep 2014 21:54:29 +0200 Subject: [PATCH 03/25] hwloc: misc.h: Fix hwloc_strncasecmp() with some icc tolower needs Thanks to Ralph Castain for reporting the failure. (cherry picked from commit open-mpi/hwloc@038c372a58da0a3e4b134459f19194853c8b7769) Signed-off-by: Jeff Squyres --- opal/mca/hwloc/hwloc191/hwloc/NEWS | 6 ++++++ opal/mca/hwloc/hwloc191/hwloc/config/hwloc.m4 | 1 + opal/mca/hwloc/hwloc191/hwloc/include/private/misc.h | 4 ++++ 3 files changed, 11 insertions(+) diff --git a/opal/mca/hwloc/hwloc191/hwloc/NEWS b/opal/mca/hwloc/hwloc191/hwloc/NEWS index f472802a7b..23bce4918f 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/NEWS +++ b/opal/mca/hwloc/hwloc191/hwloc/NEWS @@ -17,6 +17,12 @@ bug fixes (and other actions) for each version of hwloc since version in v0.9.1). +Version 1.9.2 +------------- +* Fix some build failures in private/misc.h. + Thanks to Pavan Balaji and Ralph Castain for the reports. + + Version 1.9.1 ------------- * Fix a crash when the PCI locality is invalid. Attach to the root object diff --git a/opal/mca/hwloc/hwloc191/hwloc/config/hwloc.m4 b/opal/mca/hwloc/hwloc191/hwloc/config/hwloc.m4 index 470ad58386..4fc2c4784b 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/config/hwloc.m4 +++ b/opal/mca/hwloc/hwloc191/hwloc/config/hwloc.m4 @@ -353,6 +353,7 @@ EOF]) AC_CHECK_HEADERS([unistd.h]) AC_CHECK_HEADERS([dirent.h]) AC_CHECK_HEADERS([strings.h]) + AC_CHECK_HEADERS([ctype.h]) AC_CHECK_FUNCS([strncasecmp], [ _HWLOC_CHECK_DECL([strncasecmp], [ diff --git a/opal/mca/hwloc/hwloc191/hwloc/include/private/misc.h b/opal/mca/hwloc/hwloc191/hwloc/include/private/misc.h index de5cc34159..5061d1cd86 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/include/private/misc.h +++ b/opal/mca/hwloc/hwloc191/hwloc/include/private/misc.h @@ -19,6 +19,10 @@ #ifdef HAVE_STRINGS_H #include #endif +#else +#ifdef HAVE_CTYPE_H +#include +#endif #endif /* Compile-time assertion */ From d5f8c89527b1be27c268cc3675f012562ff8d825 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Thu, 18 Sep 2014 11:29:47 +0200 Subject: [PATCH 04/25] hwloc: configure: fix the check for X11/Xutil.h At least some solaris enforce the need to #include X11/Xlib.h first. Thanks to Siegmar Gross for reporting the issue. (cherry picked from commit open-mpi/hwloc@005a7e89b67010952f9d86e905a17b38d18dab7f) Signed-off-by: Jeff Squyres --- opal/mca/hwloc/hwloc191/hwloc/config/hwloc.m4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opal/mca/hwloc/hwloc191/hwloc/config/hwloc.m4 b/opal/mca/hwloc/hwloc191/hwloc/config/hwloc.m4 index 4fc2c4784b..2999f35d9a 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/config/hwloc.m4 +++ b/opal/mca/hwloc/hwloc191/hwloc/config/hwloc.m4 @@ -931,7 +931,7 @@ EOF]) [AC_CHECK_HEADERS([X11/keysym.h], [AC_DEFINE([HWLOC_HAVE_X11_KEYSYM], [1], [Define to 1 if X11 headers including Xutil.h and keysym.h are available.])]) AC_SUBST([HWLOC_X11_LIBS], ["-lX11"]) - ]) + ], [], [#include ]) ]) ]) CPPFLAGS=$CPPFLAGS_save From 7c96aecfaf5f36e7fa82095aec36e7d3bb1d321b Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Sun, 21 Sep 2014 20:48:05 +0200 Subject: [PATCH 05/25] hwloc: errors: improve the advice to send hwloc-gather-topology files in the OS error message (cherry picked from commit open-mpi/hwloc@f77aa01b3c23181107162ac2c8f6155c62d7c798) Signed-off-by: Jeff Squyres --- opal/mca/hwloc/hwloc191/hwloc/src/topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opal/mca/hwloc/hwloc191/hwloc/src/topology.c b/opal/mca/hwloc/hwloc191/hwloc/src/topology.c index d5d1c842f6..28bf670726 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/src/topology.c +++ b/opal/mca/hwloc/hwloc191/hwloc/src/topology.c @@ -79,7 +79,7 @@ void hwloc_report_os_error(const char *msg, int line) fprintf(stderr, "*\n"); fprintf(stderr, "* Please report this error message to the hwloc user's mailing list,\n"); #ifdef HWLOC_LINUX_SYS - fprintf(stderr, "* along with the output from the hwloc-gather-topology script.\n"); + fprintf(stderr, "* along with the output+tarball generated by the hwloc-gather-topology script.\n"); #else fprintf(stderr, "* along with any relevant topology information from your platform.\n"); #endif From a636790604e620d74b7cbcf34c7ba60b67bae629 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Sun, 21 Sep 2014 20:56:08 +0200 Subject: [PATCH 06/25] hwloc: opal/mca/hwloc/hwloc191/hwloc/NEWS update Signed-off-by: Jeff Squyres --- opal/mca/hwloc/hwloc191/hwloc/NEWS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/opal/mca/hwloc/hwloc191/hwloc/NEWS b/opal/mca/hwloc/hwloc191/hwloc/NEWS index 23bce4918f..7c8e48a28a 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/NEWS +++ b/opal/mca/hwloc/hwloc191/hwloc/NEWS @@ -21,6 +21,8 @@ Version 1.9.2 ------------- * Fix some build failures in private/misc.h. Thanks to Pavan Balaji and Ralph Castain for the reports. +* Fix failures to detect X11/Xutil.h on some Solaris platforms. + Thanks to Siegmar Gross for reporting the failure. Version 1.9.1 From db5bc724960b58c8c5be369f2cd7b2ce1a2db39d Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Mon, 10 Nov 2014 07:27:27 +0100 Subject: [PATCH 07/25] hwloc: API: clearly state that os_index isn't unique while logical_index is (cherry picked from commit open-mpi/hwloc@6c75302ab2b7d9d51bb12ec561cf71c6c9437947) Conflicts: opal/mca/hwloc/hwloc191/hwloc/include/hwloc.h Signed-off-by: Jeff Squyres --- opal/mca/hwloc/hwloc191/hwloc/include/hwloc.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/opal/mca/hwloc/hwloc191/hwloc/include/hwloc.h b/opal/mca/hwloc/hwloc191/hwloc/include/hwloc.h index 412e0e7d8e..4131a89a38 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/include/hwloc.h +++ b/opal/mca/hwloc/hwloc191/hwloc/include/hwloc.h @@ -331,7 +331,10 @@ struct hwloc_obj_memory_s { struct hwloc_obj { /* physical information */ hwloc_obj_type_t type; /**< \brief Type of object */ - unsigned os_index; /**< \brief OS-provided physical index number */ + unsigned os_index; /**< \brief OS-provided physical index number. + * It is not guaranteed unique across the entire machine, + * except for PUs and NUMA nodes. + */ char *name; /**< \brief Object description if any */ struct hwloc_obj_memory_s memory; /**< \brief Memory attributes */ @@ -346,7 +349,9 @@ struct hwloc_obj { * of parent/child links from the root object to here. */ unsigned logical_index; /**< \brief Horizontal index in the whole list of similar objects, - * could be a "cousin_rank" since it's the rank within the "cousin" list below */ + * hence guaranteed unique across the entire machine. + * Could be a "cousin_rank" since it's the rank within the "cousin" list below + */ signed os_level; /**< \brief OS-provided physical level, -1 if unknown or meaningless */ /* cousins are all objects of the same type (and depth) across the entire topology */ From 86a536ca58f4eccd25174372a2cf23c746531521 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Mon, 10 Nov 2014 17:13:02 +0100 Subject: [PATCH 08/25] hwloc: x86 and OSF: Don't forget to set NUMA node nodeset x86: Not critical since BSDs that use this backend have no membind support, but better fix it for uniformization. (cherry picked from commit open-mpi/hwloc@a431361c7d2a3772ea7a2ffee922b3cf61e858cd) OSF: Looks like nobody ever tried to play with memory binding on OSF/Tru64. (cherry picked from commit open-mpi/hwloc@2d6c73356d8d2dd7a22b99b3d51fbecb002e77b9) Conflicts: NEWS Signed-off-by: Jeff Squyres --- opal/mca/hwloc/hwloc191/hwloc/src/topology-osf.c | 4 +++- opal/mca/hwloc/hwloc191/hwloc/src/topology-x86.c | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/opal/mca/hwloc/hwloc191/hwloc/src/topology-osf.c b/opal/mca/hwloc/hwloc191/hwloc/src/topology-osf.c index c7ed7b2457..c2f1e59fb6 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/src/topology-osf.c +++ b/opal/mca/hwloc/hwloc191/hwloc/src/topology-osf.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2012 Inria. All rights reserved. + * Copyright © 2009-2014 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux 1 * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -283,6 +283,8 @@ hwloc_look_osf(struct hwloc_backend *backend) indexes[radid] = radid; nodes[radid] = obj = hwloc_alloc_setup_object(HWLOC_OBJ_NODE, radid); + obj->nodeset = hwloc_bitmap_alloc(); + hwloc_bitmap_set(obj->nodeset, radid); obj->cpuset = hwloc_bitmap_alloc(); obj->memory.local_memory = rad_get_physmem(radid) * hwloc_getpagesize(); obj->memory.page_types_len = 2; diff --git a/opal/mca/hwloc/hwloc191/hwloc/src/topology-x86.c b/opal/mca/hwloc/hwloc191/hwloc/src/topology-x86.c index 611aea1e92..81d8bd4cb5 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/src/topology-x86.c +++ b/opal/mca/hwloc/hwloc191/hwloc/src/topology-x86.c @@ -517,6 +517,8 @@ static void summarize(hwloc_topology_t topology, struct procinfo *infos, unsigne } node = hwloc_alloc_setup_object(HWLOC_OBJ_NODE, nodeid); node->cpuset = node_cpuset; + node->nodeset = hwloc_bitmap_alloc(); + hwloc_bitmap_set(node->nodeset, nodeid); hwloc_debug_1arg_bitmap("os node %u has cpuset %s\n", nodeid, node_cpuset); hwloc_insert_object_by_cpuset(topology, node); From 9b59d532fcf12378f4e01f9cb9c8f3df0cfe2483 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Thu, 20 Nov 2014 11:12:20 +0100 Subject: [PATCH 09/25] hwloc: cpuid-x86: Fix duplicate asm labels in case of heavy inlining on x86-32 hwloc_x86_discover() calls hwloc_look_x86() twice, which calls hwloc_have_x86_cpuid(). If everything gets inlined, the asm label inside hwloc_have_x86_cpuid() is duplicated. Use a local label with f annotation in jumps to avoid the problem. Thanks to Thomas Van Doren for reporting the issue (found with gcc -m32). (cherry picked from commit open-mpi/hwloc@50e447f5bc6db88aefdc2f7e55dd43c6f575c870) Signed-off-by: Jeff Squyres --- opal/mca/hwloc/hwloc191/hwloc/include/private/cpuid-x86.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/opal/mca/hwloc/hwloc191/hwloc/include/private/cpuid-x86.h b/opal/mca/hwloc/hwloc191/hwloc/include/private/cpuid-x86.h index 63d6bf6064..1de12bec84 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/include/private/cpuid-x86.h +++ b/opal/mca/hwloc/hwloc191/hwloc/include/private/cpuid-x86.h @@ -32,14 +32,14 @@ static __hwloc_inline int hwloc_have_x86_cpuid(void) "pushfl \n\t" \ "pop %1 \n\t" \ "cmp %1,%2\n\t" /* Compare with expected value */ \ - "jnz Lhwloc1\n\t" /* Unexpected, failure */ \ + "jnz 0f\n\t" /* Unexpected, failure */ \ TRY_TOGGLE /* Try to set/clear */ TRY_TOGGLE /* Try to clear/set */ "mov $1,%0\n\t" /* Passed the test! */ - "Lhwloc1: \n\t" + "0: \n\t" "popfl \n\t" /* Restore flags */ : "=r" (ret), "=&r" (tmp), "=&r" (tmp2)); From 5427b33caf3191f9e3af7a6cb9e2472556c8af16 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Tue, 25 Nov 2014 10:10:56 +0100 Subject: [PATCH 10/25] hwloc: debug: fix an overzealous assertion about the parent cpuset vs its children When I/O are attached under a PU, removing the children's cpusets from the parent cpuset doesn't give 0, it gives the PU cpuset. The assertion fails on single-pu machines with I/O when --merge is given, only one PU remains with I/O under it. But if we insert Misc by cpuset under PU, it gives 0 as expected. Fix the assertion accordingly. Thanks to Thomas Van Doren for reporting the issue. (cherry picked from commit open-mpi/hwloc@45c94c336d828210b2e7fe8cfd6c528066567bc5) Signed-off-by: Jeff Squyres --- opal/mca/hwloc/hwloc191/hwloc/src/topology.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/opal/mca/hwloc/hwloc191/hwloc/src/topology.c b/opal/mca/hwloc/hwloc191/hwloc/src/topology.c index 28bf670726..a86f1ffd7a 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/src/topology.c +++ b/opal/mca/hwloc/hwloc191/hwloc/src/topology.c @@ -3035,7 +3035,17 @@ hwloc__check_children(struct hwloc_obj *parent) assert(hwloc_bitmap_isincluded(parent->children[j]->cpuset, remaining_parent_set)); hwloc_bitmap_andnot(remaining_parent_set, remaining_parent_set, parent->children[j]->cpuset); } - assert(hwloc_bitmap_iszero(remaining_parent_set)); + if (parent->type == HWLOC_OBJ_PU) { + /* if parent is a PU, its os_index bit may remain. + * it may be in a Misc child inserted by cpuset, or could be in no child */ + if (hwloc_bitmap_weight(remaining_parent_set) == 1) + assert((unsigned) hwloc_bitmap_first(remaining_parent_set) == parent->os_index); + else + assert(hwloc_bitmap_iszero(remaining_parent_set)); + } else { + /* nothing remains */ + assert(hwloc_bitmap_iszero(remaining_parent_set)); + } hwloc_bitmap_free(remaining_parent_set); } From 77978a846e54fde5a4f0544b7c91469dae4c2c31 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Tue, 28 Oct 2014 16:47:21 +0100 Subject: [PATCH 11/25] hwloc: core: fix the merging of identical objects in presence of Misc objects If object A contains B + I/O as children, we can "ignore" I/Os and still try to merge A and B. We now do the same for Misc objects without cpusets instead of I/Os. This fixes a corner case when export/reimport to XML creates a slightly different topology (making hwloc_insert_misc fail inside a Linux cgroup). Thanks to Dave Love for reporting the problem. Fixes #118 (cherry picked from commit open-mpi/hwloc@650371e1159fb3826afae8842545c4d16bdad1c8) Signed-off-by: Jeff Squyres --- opal/mca/hwloc/hwloc191/hwloc/src/topology.c | 21 +++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/opal/mca/hwloc/hwloc191/hwloc/src/topology.c b/opal/mca/hwloc/hwloc191/hwloc/src/topology.c index a86f1ffd7a..8652804444 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/src/topology.c +++ b/opal/mca/hwloc/hwloc191/hwloc/src/topology.c @@ -1818,15 +1818,22 @@ merge_useless_child(hwloc_topology_t topology, hwloc_obj_t *pparent) /* There are no child, nothing to merge. */ return; - if (child->next_sibling && !hwloc_obj_type_is_io(child->next_sibling->type)) - /* There are several non-I/O children */ + /* we don't merge if there are multiple "important" children. + * non-important ones are at the end of the list. + * look at the second child to find out. + */ + if (child->next_sibling + /* I/O objects may be ignored when trying to merge */ + && !hwloc_obj_type_is_io(child->next_sibling->type) + /* Misc objects without cpuset may be ignored as well */ + && !(child->next_sibling->type == HWLOC_OBJ_MISC && !child->next_sibling->cpuset)) + /* There are several children that prevent from merging */ return; - /* There is one non-I/O child and possible some I/O children. - * I/O children shouldn't prevent merging because they can be attached - * to anything with the same locality. + /* There is one important child, and some children that may be ignored + * during merging because they can be attached to anything with the same locality. * Move them to the side during merging, and append them back later. - * This is easy because I/O children are always last in the list. + * This is easy because children with no cpuset are always last in the list. */ ios = child->next_sibling; child->next_sibling = NULL; @@ -1873,7 +1880,7 @@ merge_useless_child(hwloc_topology_t topology, hwloc_obj_t *pparent) } if (ios) { - /* append I/O children to the list of children of the remaining object */ + /* append the remaining list of children to the remaining object */ pchild = &((*pparent)->first_child); while (*pchild) pchild = &((*pchild)->next_sibling); From fff1bb5dcd1964e047de4094f5d61ef165d88c4f Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Tue, 2 Dec 2014 08:57:20 +0100 Subject: [PATCH 12/25] hwloc: core: reorder children in merge_useless_child() as well When ignore_keep_structure is enabled, intermediate level can disappear between parent and child, making the new child complete_cpuset smaller, causing the child list to require a reorder just like in remove_ignored(). (cherry picked from commit open-mpi/hwloc@88afbe6b62d1fae99bf36b8f03e26eff37f08240) Embed this related commit: core: abstract out reorder_children(), needed when merging modifies the list of children (cherry picked from commit open-mpi/hwloc@14db82d3917c5de40dfe12ead316f3fc602197f4) Signed-off-by: Jeff Squyres --- opal/mca/hwloc/hwloc191/hwloc/src/topology.c | 64 +++++++++++--------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/opal/mca/hwloc/hwloc191/hwloc/src/topology.c b/opal/mca/hwloc/hwloc191/hwloc/src/topology.c index 8652804444..9dc0d78792 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/src/topology.c +++ b/opal/mca/hwloc/hwloc191/hwloc/src/topology.c @@ -1635,6 +1635,28 @@ unlink_and_free_single_object(hwloc_obj_t *pparent) hwloc_free_unlinked_object(parent); } +static void +reorder_children(hwloc_obj_t parent) +{ + /* move the children list on the side */ + hwloc_obj_t *prev, child, children = parent->first_child; + parent->first_child = NULL; + while (children) { + /* dequeue child */ + child = children; + children = child->next_sibling; + /* find where to enqueue it */ + prev = &parent->first_child; + while (*prev + && (!child->cpuset || !(*prev)->cpuset + || hwloc__object_cpusets_compare_first(child, *prev) > 0)) + prev = &((*prev)->next_sibling); + /* enqueue */ + child->next_sibling = *prev; + *prev = child; + } +} + /* Remove all ignored objects. */ static int remove_ignored(hwloc_topology_t topology, hwloc_obj_t *pparent) @@ -1656,25 +1678,8 @@ remove_ignored(hwloc_topology_t topology, hwloc_obj_t *pparent) dropped = 1; } else if (dropped_children) { - /* we keep this object but its children changed, reorder them by cpuset */ - - /* move the children list on the side */ - hwloc_obj_t *prev, children = parent->first_child; - parent->first_child = NULL; - while (children) { - /* dequeue child */ - child = children; - children = child->next_sibling; - /* find where to enqueue it */ - prev = &parent->first_child; - while (*prev - && (!child->cpuset || !(*prev)->cpuset - || hwloc__object_cpusets_compare_first(child, *prev) > 0)) - prev = &((*prev)->next_sibling); - /* enqueue */ - child->next_sibling = *prev; - *prev = child; - } + /* we keep this object but its children changed, reorder them by complete_cpuset */ + reorder_children(parent); } return dropped; @@ -1804,20 +1809,23 @@ can_merge_group(hwloc_topology_t topology, hwloc_obj_t obj) * Merge with the only child if either the parent or the child has a type to be * ignored while keeping structure */ -static void +static int merge_useless_child(hwloc_topology_t topology, hwloc_obj_t *pparent) { hwloc_obj_t parent = *pparent, child, *pchild, ios; - int replacechild = 0, replaceparent = 0; + int replacechild = 0, replaceparent = 0, droppedchildren = 0; + + if (!parent->first_child) + /* There are no child, nothing to merge. */ + return 0; for_each_child_safe(child, parent, pchild) - merge_useless_child(topology, pchild); + droppedchildren += merge_useless_child(topology, pchild); + + if (droppedchildren) + reorder_children(parent); child = parent->first_child; - if (!child) - /* There are no child, nothing to merge. */ - return; - /* we don't merge if there are multiple "important" children. * non-important ones are at the end of the list. * look at the second child to find out. @@ -1828,7 +1836,7 @@ merge_useless_child(hwloc_topology_t topology, hwloc_obj_t *pparent) /* Misc objects without cpuset may be ignored as well */ && !(child->next_sibling->type == HWLOC_OBJ_MISC && !child->next_sibling->cpuset)) /* There are several children that prevent from merging */ - return; + return 0; /* There is one important child, and some children that may be ignored * during merging because they can be attached to anything with the same locality. @@ -1886,6 +1894,8 @@ merge_useless_child(hwloc_topology_t topology, hwloc_obj_t *pparent) pchild = &((*pchild)->next_sibling); *pchild = ios; } + + return replaceparent ? 1 : 0; } static void From 6caf9edbeab45bd125ad2e24649b62aa6ed3231f Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Fri, 5 Dec 2014 22:19:39 +0100 Subject: [PATCH 13/25] hwloc: hpux: improve hwloc_hpux_find_ldom() looking for NUMA node hwloc_get_first_largest_obj_inside_cpuset() returns the largest/highest object, but it could still have a child with the same cpuset. So check children as well in case there's a matching NUMA node there. (cherry picked from commit open-mpi/hwloc@57a1c4fbe4bc83a6d65b2e752cc7f8b6f28bddc7) Signed-off-by: Jeff Squyres --- opal/mca/hwloc/hwloc191/hwloc/src/topology-hpux.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/opal/mca/hwloc/hwloc191/hwloc/src/topology-hpux.c b/opal/mca/hwloc/hwloc191/hwloc/src/topology-hpux.c index 02c3940eb4..5bc61be36d 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/src/topology-hpux.c +++ b/opal/mca/hwloc/hwloc191/hwloc/src/topology-hpux.c @@ -43,9 +43,17 @@ hwloc_hpux_find_ldom(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set) return -1; obj = hwloc_get_first_largest_obj_inside_cpuset(topology, hwloc_set); - if (!hwloc_bitmap_isequal(obj->cpuset, hwloc_set) || obj->type != HWLOC_OBJ_NODE) { + if (!hwloc_bitmap_isequal(obj->cpuset, hwloc_set)) /* Does not correspond to exactly one node */ return -1; + /* obj is the highest possibly matching object, but some (single) child (with same cpuset) could match too */ + while (obj->type != HWLOC_OBJ_NODE) { + /* try the first child, in case it has the same cpuset */ + if (!obj->first_child + || !obj->first_child->cpuset + || !hwloc_bitmap_isequal(obj->cpuset, obj->first_child->cpuset)) + return -1; + obj = obj->first_child; } return obj->os_index; From 379c7b0d8b26c999ade2d0e91bdc75987e1f5a92 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Fri, 5 Dec 2014 22:57:04 +0100 Subject: [PATCH 14/25] hwloc: x86: use ulong for cache sizes, uint won't be enough in the near future (cherry picked from commit open-mpi/hwloc@ae825977730c7d6b88ccab744e777cace3586c06) Signed-off-by: Jeff Squyres --- opal/mca/hwloc/hwloc191/hwloc/src/topology-x86.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/opal/mca/hwloc/hwloc191/hwloc/src/topology-x86.c b/opal/mca/hwloc/hwloc191/hwloc/src/topology-x86.c index 81d8bd4cb5..e1db138e32 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/src/topology-x86.c +++ b/opal/mca/hwloc/hwloc191/hwloc/src/topology-x86.c @@ -33,7 +33,7 @@ struct cacheinfo { unsigned linepart; int ways; unsigned sets; - unsigned size; + unsigned long size; }; struct procinfo { @@ -68,7 +68,7 @@ static void fill_amd_cache(struct procinfo *infos, unsigned level, unsigned cpui { struct cacheinfo *cache; unsigned cachenum; - unsigned size = 0; + unsigned long size = 0; if (level == 1) size = ((cpuid >> 24)) << 10; @@ -104,7 +104,7 @@ static void fill_amd_cache(struct procinfo *infos, unsigned level, unsigned cpui cache->size = size; cache->sets = 0; - hwloc_debug("cache L%u t%u linesize %u ways %u size %uKB\n", cache->level, cache->nbthreads_sharing, cache->linesize, cache->ways, cache->size >> 10); + hwloc_debug("cache L%u t%u linesize %u ways %u size %luKB\n", cache->level, cache->nbthreads_sharing, cache->linesize, cache->ways, cache->size >> 10); } /* Fetch information from the processor itself thanks to cpuid and store it in @@ -221,7 +221,7 @@ static void look_proc(struct procinfo *infos, unsigned highest_cpuid, unsigned h cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache)); for (cachenum = 0; ; cachenum++) { - unsigned linesize, linepart, ways, sets; + unsigned long linesize, linepart, ways, sets; unsigned type; eax = 0x8000001d; ecx = cachenum; @@ -249,7 +249,7 @@ static void look_proc(struct procinfo *infos, unsigned highest_cpuid, unsigned h cache->sets = sets = ecx + 1; cache->size = linesize * linepart * ways * sets; - hwloc_debug("cache %u type %u L%u t%u c%u linesize %u linepart %u ways %u sets %u, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10); + hwloc_debug("cache %u type %u L%u t%u c%u linesize %lu linepart %lu ways %lu sets %lu, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10); cache++; } @@ -290,7 +290,7 @@ static void look_proc(struct procinfo *infos, unsigned highest_cpuid, unsigned h cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache)); for (cachenum = 0; ; cachenum++) { - unsigned linesize, linepart, ways, sets; + unsigned long linesize, linepart, ways, sets; unsigned type; eax = 0x04; ecx = cachenum; @@ -317,7 +317,7 @@ static void look_proc(struct procinfo *infos, unsigned highest_cpuid, unsigned h cache->sets = sets = ecx + 1; cache->size = linesize * linepart * ways * sets; - hwloc_debug("cache %u type %u L%u t%u c%u linesize %u linepart %u ways %u sets %u, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10); + hwloc_debug("cache %u type %u L%u t%u c%u linesize %lu linepart %lu ways %lu sets %lu, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10); infos->max_nbthreads = infos->max_log_proc / infos->max_nbcores; hwloc_debug("thus %u threads\n", infos->max_nbthreads); infos->threadid = infos->logprocid % infos->max_nbthreads; From 050bb35feb96c6ff4729b631b3fe76fd04290bc8 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Sat, 6 Dec 2014 15:35:14 +0100 Subject: [PATCH 15/25] hwloc: x86: use Group instead of Misc for unknown x2apic levels Misc are reserved for annotating the topology, the core doesn't like merging them. Group is more appropriate. (cherry picked from commit open-mpi/hwloc@3c476495912979ef6967c4d1db80d2657df2c347) Signed-off-by: Jeff Squyres --- opal/mca/hwloc/hwloc191/hwloc/src/topology-x86.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/opal/mca/hwloc/hwloc191/hwloc/src/topology-x86.c b/opal/mca/hwloc/hwloc191/hwloc/src/topology-x86.c index e1db138e32..18ba6e8529 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/src/topology-x86.c +++ b/opal/mca/hwloc/hwloc191/hwloc/src/topology-x86.c @@ -401,6 +401,7 @@ static void summarize(hwloc_topology_t topology, struct procinfo *infos, unsigne unsigned i, j, l, level, type; unsigned nbsockets = 0; int one = -1; + unsigned next_group_depth = topology->next_group_depth; for (i = 0; i < nbprocs; i++) if (infos[i].present) { @@ -580,9 +581,12 @@ static void summarize(hwloc_topology_t topology, struct procinfo *infos, unsigne hwloc_bitmap_clr(unknowns_cpuset, j); } } - unknown_obj = hwloc_alloc_setup_object(HWLOC_OBJ_MISC, unknownid); + unknown_obj = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, unknownid); unknown_obj->cpuset = unknown_cpuset; unknown_obj->os_level = level; + unknown_obj->attr->group.depth = topology->next_group_depth + level; + if (next_group_depth <= topology->next_group_depth + level) + next_group_depth = topology->next_group_depth + level + 1; hwloc_debug_2args_bitmap("os unknown%d %u has cpuset %s\n", level, unknownid, unknown_cpuset); hwloc_insert_object_by_cpuset(topology, unknown_obj); @@ -714,6 +718,7 @@ static void summarize(hwloc_topology_t topology, struct procinfo *infos, unsigne } hwloc_bitmap_free(complete_cpuset); + topology->next_group_depth = next_group_depth; } static int From 3f96e7a2714ce87edf2479ec40b3f034a19439a5 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Sun, 7 Dec 2014 09:27:25 +0100 Subject: [PATCH 16/25] hwloc: synthetic: Misc levels are not allowed in the synthetic description Misc objects were used between system and machine in the past but quickly got replaced with groups. (cherry picked from commit open-mpi/hwloc@6c2aa6d1eabea4be6b436610d79ab245cdffebb5) Signed-off-by: Jeff Squyres --- .../hwloc/hwloc191/hwloc/src/topology-synthetic.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/opal/mca/hwloc/hwloc191/hwloc/src/topology-synthetic.c b/opal/mca/hwloc/hwloc191/hwloc/src/topology-synthetic.c index c374a3d4c8..758d5111d4 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/src/topology-synthetic.c +++ b/opal/mca/hwloc/hwloc191/hwloc/src/topology-synthetic.c @@ -73,6 +73,12 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data, errno = EINVAL; return -1; } + if (type == HWLOC_OBJ_MISC) { + if (verbose) + fprintf(stderr, "Synthetic string with disallow object type at '%s'\n", pos); + errno = EINVAL; + return -1; + } next_pos = strchr(pos, ':'); if (!next_pos) { @@ -134,9 +140,8 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data, case HWLOC_OBJ_CACHE: type = HWLOC_OBJ_SOCKET; break; case HWLOC_OBJ_SOCKET: type = HWLOC_OBJ_NODE; break; case HWLOC_OBJ_NODE: - case HWLOC_OBJ_GROUP: type = HWLOC_OBJ_GROUP; break; case HWLOC_OBJ_MACHINE: - case HWLOC_OBJ_MISC: type = HWLOC_OBJ_MISC; break; + case HWLOC_OBJ_GROUP: type = HWLOC_OBJ_GROUP; break; default: assert(0); } @@ -242,8 +247,6 @@ hwloc__look_synthetic(struct hwloc_topology *topology, /* pre-hooks */ switch (type) { - case HWLOC_OBJ_MISC: - break; case HWLOC_OBJ_GROUP: break; case HWLOC_OBJ_SYSTEM: @@ -265,6 +268,7 @@ hwloc__look_synthetic(struct hwloc_topology *topology, break; case HWLOC_OBJ_PU: break; + case HWLOC_OBJ_MISC: case HWLOC_OBJ_TYPE_MAX: /* Should never happen */ assert(0); @@ -290,8 +294,6 @@ hwloc__look_synthetic(struct hwloc_topology *topology, /* post-hooks */ switch (type) { - case HWLOC_OBJ_MISC: - break; case HWLOC_OBJ_GROUP: obj->attr->group.depth = curlevel->depth; break; @@ -330,6 +332,7 @@ hwloc__look_synthetic(struct hwloc_topology *topology, break; case HWLOC_OBJ_PU: break; + case HWLOC_OBJ_MISC: case HWLOC_OBJ_TYPE_MAX: /* Should never happen */ assert(0); From da164be0efdcdb48aea4b64bdb2da69918a3a8f7 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Sat, 20 Dec 2014 19:49:27 +0100 Subject: [PATCH 17/25] hwloc: error: point to the FAQ when displaying the big OS error message (cherry picked from commit open-mpi/hwloc@b191f816f66eafba0e46427e6609953ed43144bf) Signed-off-by: Jeff Squyres --- opal/mca/hwloc/hwloc191/hwloc/src/topology.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/opal/mca/hwloc/hwloc191/hwloc/src/topology.c b/opal/mca/hwloc/hwloc191/hwloc/src/topology.c index 9dc0d78792..911a5422fc 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/src/topology.c +++ b/opal/mca/hwloc/hwloc191/hwloc/src/topology.c @@ -77,7 +77,9 @@ void hwloc_report_os_error(const char *msg, int line) fprintf(stderr, "* %s\n", msg); fprintf(stderr, "* Error occurred in topology.c line %d\n", line); fprintf(stderr, "*\n"); - fprintf(stderr, "* Please report this error message to the hwloc user's mailing list,\n"); + fprintf(stderr, "* The following FAQ entry in a recent hwloc documentation may help:\n"); + fprintf(stderr, "* What should I do when hwloc reports \"operating system\" warnings?\n"); + fprintf(stderr, "* Otherwise please report this error message to the hwloc user's mailing list,\n"); #ifdef HWLOC_LINUX_SYS fprintf(stderr, "* along with the output+tarball generated by the hwloc-gather-topology script.\n"); #else From 29c99156cfa4c2427878d4e57c80e15e86890fee Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Sat, 10 Jan 2015 10:56:25 +0100 Subject: [PATCH 18/25] hwloc: pci: fix SR-IOV VF vendor/device names Commit 626129d2818693e62b83c1cfa2ba6e058e5bed66 fixed the hwloc device/vendor numbers obtained from libpciaccess. But the corresponding names are still retrieved from pciaccess numbers, so fix these numbers inside pciaccess structures before retrieving the names. (cherry picked from commit open-mpi/hwloc@85ea6e4acc456d398fa995d671960ccc0dff0d42) Signed-off-by: Jeff Squyres --- .../hwloc/hwloc191/hwloc/src/topology-pci.c | 53 ++++++++++--------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/opal/mca/hwloc/hwloc191/hwloc/src/topology-pci.c b/opal/mca/hwloc/hwloc191/hwloc/src/topology-pci.c index eaf6da215f..3bd8044584 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/src/topology-pci.c +++ b/opal/mca/hwloc/hwloc191/hwloc/src/topology-pci.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2013 Inria. All rights reserved. + * Copyright © 2009-2015 Inria. All rights reserved. * Copyright © 2009-2011, 2013 Université Bordeaux 1 * See COPYING in top-level directory. */ @@ -203,27 +203,7 @@ hwloc_look_pci(struct hwloc_backend *backend) #endif #endif - /* might be useful for debugging (note that domain might be truncated) */ - os_index = (domain << 20) + (pcidev->bus << 12) + (pcidev->dev << 4) + pcidev->func; - - obj = hwloc_alloc_setup_object(HWLOC_OBJ_PCI_DEVICE, os_index); - obj->attr->pcidev.domain = domain; - obj->attr->pcidev.bus = pcidev->bus; - obj->attr->pcidev.dev = pcidev->dev; - obj->attr->pcidev.func = pcidev->func; - obj->attr->pcidev.vendor_id = pcidev->vendor_id; - obj->attr->pcidev.device_id = pcidev->device_id; - obj->attr->pcidev.class_id = device_class; - obj->attr->pcidev.revision = config_space_cache[PCI_REVISION_ID]; - - obj->attr->pcidev.linkspeed = 0; /* unknown */ -#ifdef HWLOC_HAVE_PCI_FIND_CAP - cap = pci_find_cap(pcidev, PCI_CAP_ID_EXP, PCI_CAP_NORMAL); - offset = cap ? cap->addr : 0; -#else - offset = hwloc_pci_find_cap(config_space_cache, PCI_CAP_ID_EXP); -#endif /* HWLOC_HAVE_PCI_FIND_CAP */ - + /* fixup SR-IOV buggy VF device/vendor IDs */ if (0xffff == pcidev->vendor_id && 0xffff == pcidev->device_id) { /* SR-IOV puts ffff:ffff in Virtual Function config space. * The actual VF device ID is stored at a special (dynamic) location in the Physical Function config space. @@ -231,7 +211,7 @@ hwloc_look_pci(struct hwloc_backend *backend) * * libpciaccess just returns ffff:ffff, needs to be fixed. * linuxpci is OK because sysfs files are already fixed the kernel. - * pciutils is OK when it uses those Linux sysfs files. + * (pciutils is OK when it uses those Linux sysfs files.) * * Reading these files is an easy way to work around the libpciaccess issue on Linux, * but we have no way to know if this is caused by SR-IOV or not. @@ -258,7 +238,8 @@ hwloc_look_pci(struct hwloc_backend *backend) read = fread(value, 1, sizeof(value), file); fclose(file); if (read) - obj->attr->pcidev.vendor_id = strtoul(value, NULL, 16); + /* fixup the pciaccess struct so that pci_device_get_vendor_name() is correct later. */ + pcidev->vendor_id = strtoul(value, NULL, 16); } snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/device", @@ -268,11 +249,33 @@ hwloc_look_pci(struct hwloc_backend *backend) read = fread(value, 1, sizeof(value), file); fclose(file); if (read) - obj->attr->pcidev.device_id = strtoul(value, NULL, 16); + /* fixup the pciaccess struct so that pci_device_get_device_name() is correct later. */ + pcidev->device_id = strtoul(value, NULL, 16); } #endif } + /* might be useful for debugging (note that domain might be truncated) */ + os_index = (domain << 20) + (pcidev->bus << 12) + (pcidev->dev << 4) + pcidev->func; + + obj = hwloc_alloc_setup_object(HWLOC_OBJ_PCI_DEVICE, os_index); + obj->attr->pcidev.domain = domain; + obj->attr->pcidev.bus = pcidev->bus; + obj->attr->pcidev.dev = pcidev->dev; + obj->attr->pcidev.func = pcidev->func; + obj->attr->pcidev.vendor_id = pcidev->vendor_id; + obj->attr->pcidev.device_id = pcidev->device_id; + obj->attr->pcidev.class_id = device_class; + obj->attr->pcidev.revision = config_space_cache[PCI_REVISION_ID]; + + obj->attr->pcidev.linkspeed = 0; /* unknown */ +#ifdef HWLOC_HAVE_PCI_FIND_CAP + cap = pci_find_cap(pcidev, PCI_CAP_ID_EXP, PCI_CAP_NORMAL); + offset = cap ? cap->addr : 0; +#else + offset = hwloc_pci_find_cap(config_space_cache, PCI_CAP_ID_EXP); +#endif /* HWLOC_HAVE_PCI_FIND_CAP */ + if (offset > 0 && offset + 20 /* size of PCI express block up to link status */ <= CONFIG_SPACE_CACHESIZE) hwloc_pci_find_linkspeed(config_space_cache, offset, &obj->attr->pcidev.linkspeed); From 80140bbe7bd271ed0de7d4232b7dfbf9be228736 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Mon, 12 Jan 2015 16:51:44 +0100 Subject: [PATCH 19/25] hwloc: distances: when we fail to insert an intermediate group, don't try to group further above Otherwise we'll have some NULL objects above, would be annoying. No need to dig further, the distance matrix is likely buggy. We still keep the inserted groups at this level (incomplete level) because removing them is hard. (cherry picked from commit open-mpi/hwloc@312a971ec9dcacf3397a519d621e5bd004fbe60d) Signed-off-by: Jeff Squyres --- opal/mca/hwloc/hwloc191/hwloc/src/distances.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/opal/mca/hwloc/hwloc191/hwloc/src/distances.c b/opal/mca/hwloc/hwloc191/hwloc/src/distances.c index 00d589afd9..ba9ec219e0 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/src/distances.c +++ b/opal/mca/hwloc/hwloc191/hwloc/src/distances.c @@ -1,5 +1,5 @@ /* - * Copyright © 2010-2014 Inria. All rights reserved. + * Copyright © 2010-2015 Inria. All rights reserved. * Copyright © 2011-2012 Université Bordeaux 1 * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -832,6 +832,7 @@ hwloc__groups_by_distances(struct hwloc_topology *topology, hwloc_obj_t *groupobjs = NULL; unsigned *groupsizes = NULL; float *groupdistances = NULL; + unsigned failed = 0; groupobjs = malloc(sizeof(hwloc_obj_t) * nbgroups); groupsizes = malloc(sizeof(unsigned) * nbgroups); @@ -863,10 +864,17 @@ hwloc__groups_by_distances(struct hwloc_topology *topology, groupsizes[i], group_obj->cpuset); res_obj = hwloc__insert_object_by_cpuset(topology, group_obj, fromuser ? hwloc_report_user_distance_error : hwloc_report_os_error); - /* res_obj may be different from group_objs if we got groups from XML import before grouping */ + /* res_obj may be NULL on failure to insert. */ + if (!res_obj) + failed++; + /* or it may be different from groupobjs if we got groups from XML import before grouping */ groupobjs[i] = res_obj; } + if (failed) + /* don't try to group above if we got a NULL group here, just keep this incomplete level */ + goto inner_free; + /* factorize distances */ memset(&(groupdistances[0]), 0, sizeof(groupdistances[0]) * nbgroups * nbgroups); #undef DISTANCE From d6e415cd41c4c8b28481d288ca7947353dac7fdd Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Fri, 16 Jan 2015 18:56:37 +0100 Subject: [PATCH 20/25] hwloc: AIX: Fix PU os_index When looking for PUs inside R_MAXSDL rads, some AIX 6.1 releases return one first rad without any PU. AIX 6.1 00F63F144C00 does (on quad-power7). AIX 6.1 00CBAAC24C00 doesn't (on 16x power6). So we can't assume rad #x contains PU #x. But we already have the right code to fill the cpuset from the rad, so use that to obtain the PU os_index as well. Cannot be used to obtain NUMA node os_index since there's no way to directly retrieve NUMA nodes from rads (mempools seem unrelated). Just keep using #rad for NUMA nodes os_index and document that convention when converting back in set_membind(). Thanks to Hendryk Bockelmann and Erik Schnetter for helping debugging. (cherry picked from commit open-mpi/hwloc@60006c7b88ef517c8be0c10b75bf84aefba7ea87) Signed-off-by: Jeff Squyres --- .../hwloc/hwloc191/hwloc/src/topology-aix.c | 32 ++++++++++++++----- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/opal/mca/hwloc/hwloc191/hwloc/src/topology-aix.c b/opal/mca/hwloc/hwloc191/hwloc/src/topology-aix.c index e2d34eb2f9..5180e43f99 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/src/topology-aix.c +++ b/opal/mca/hwloc/hwloc191/hwloc/src/topology-aix.c @@ -1,7 +1,7 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2012 Inria. All rights reserved. - * Copyright © 2009-2011, 2013 Université Bordeaux 1 + * Copyright © 2009-2015 Inria. All rights reserved. + * Copyright © 2009-2011, 2013 Université Bordeaux * Copyright © 2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. */ @@ -347,6 +347,7 @@ hwloc_aix_prepare_membind(hwloc_topology_t topology, rsethandle_t *rad, hwloc_co noderad = rs_alloc(RS_EMPTY); hwloc_bitmap_foreach_begin(node, nodeset) + /* we used MCMlevel rad number for node->os_index during lookup */ rs_getrad(rset, noderad, MCMlevel, node, 0); rs_op(RS_UNION, noderad, *rad, 0, 0); hwloc_bitmap_foreach_end(); @@ -608,6 +609,9 @@ look_rset(int sdl, hwloc_obj_type_t type, struct hwloc_topology *topology, int l } for (i = 0; i < nbnodes; i++) { + hwloc_bitmap_t cpuset; + unsigned os_index = (unsigned) -1; /* no os_index except for PU and NODE below */ + if (rs_getrad(rset, rad, sdl, i, 0)) { fprintf(stderr,"rs_getrad(%d) failed: %s\n", i, strerror(errno)); continue; @@ -615,16 +619,28 @@ look_rset(int sdl, hwloc_obj_type_t type, struct hwloc_topology *topology, int l if (!rs_getinfo(rad, R_NUMPROCS, 0)) continue; - /* It seems logical processors are numbered from 1 here, while the - * bindprocessor functions numbers them from 0... */ - obj = hwloc_alloc_setup_object(type, i - (type == HWLOC_OBJ_PU)); - obj->cpuset = hwloc_bitmap_alloc(); - obj->os_level = sdl; maxcpus = rs_getinfo(rad, R_MAXPROCS, 0); + cpuset = hwloc_bitmap_alloc(); for (j = 0; j < maxcpus; j++) { if (rs_op(RS_TESTRESOURCE, rad, NULL, R_PROCS, j)) - hwloc_bitmap_set(obj->cpuset, j); + hwloc_bitmap_set(cpuset, j); } + + if (type == HWLOC_OBJ_PU) { + os_index = hwloc_bitmap_first(cpuset); + hwloc_debug("Found PU #%u inside node %d for sdl %d\n", os_index, i, sdl); + assert(hwloc_bitmap_weight(cpuset) == 1); + } else if (type == HWLOC_OBJ_NODE) { + /* NUMA node os_index isn't used for binding, just use the rad number to get unique values. + * Note that we'll use that fact in hwloc_aix_prepare_membind(). */ + os_index = i; + hwloc_debug("Using os_index #%u for NUMA node inside node %d for sdl %d\n", os_index, i, sdl); + } + + obj = hwloc_alloc_setup_object(type, os_index); + obj->cpuset = cpuset; + obj->os_level = sdl; + switch(type) { case HWLOC_OBJ_NODE: obj->nodeset = hwloc_bitmap_alloc(); From dec01097f8ae99a8d297c25319b2bf1564a4fb34 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Sun, 18 Jan 2015 09:30:20 +0100 Subject: [PATCH 21/25] hwloc: groups: add complete sets when inserting distance/pci groups Make sure we define complete cpuset/nodeset when we define groups' main cpuset/nodeset during later insert of groups (for PCI hostbridges or distances). Otherwise they may end up clearing child/parent complete sets which suddenly become incoherent while they were fixed earlier. Needed to fix allowed_nodeset meaning. (cherry picked from commit open-mpi/hwloc@7c88d17addba12c74c7accc69e1a7685e2c01d06) Signed-off-by: Jeff Squyres --- opal/mca/hwloc/hwloc191/hwloc/src/distances.c | 20 +++++++++++++++++++ .../mca/hwloc/hwloc191/hwloc/src/pci-common.c | 3 ++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/opal/mca/hwloc/hwloc191/hwloc/src/distances.c b/opal/mca/hwloc/hwloc191/hwloc/src/distances.c index ba9ec219e0..47085296fc 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/src/distances.c +++ b/opal/mca/hwloc/hwloc191/hwloc/src/distances.c @@ -852,12 +852,22 @@ hwloc__groups_by_distances(struct hwloc_topology *topology, if (groupids[j] == i+1) { /* assemble the group cpuset */ hwloc_bitmap_or(group_obj->cpuset, group_obj->cpuset, objs[j]->cpuset); + if (objs[i]->complete_cpuset) { + if (!group_obj->complete_cpuset) + group_obj->complete_cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_or(group_obj->complete_cpuset, group_obj->complete_cpuset, objs[j]->complete_cpuset); + } /* if one obj has a nodeset, assemble a group nodeset */ if (objs[j]->nodeset) { if (!group_obj->nodeset) group_obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_or(group_obj->nodeset, group_obj->nodeset, objs[j]->nodeset); } + if (objs[i]->complete_nodeset) { + if (!group_obj->complete_nodeset) + group_obj->complete_nodeset = hwloc_bitmap_alloc(); + hwloc_bitmap_or(group_obj->complete_nodeset, group_obj->complete_nodeset, objs[j]->complete_nodeset); + } groupsizes[i]++; } hwloc_debug_1arg_bitmap("adding Group object with %u objects and cpuset %s\n", @@ -1010,12 +1020,22 @@ hwloc_group_by_distances(struct hwloc_topology *topology) for(i=0; icpuset, group_obj->cpuset, osdist->objs[i]->cpuset); + if (osdist->objs[i]->complete_cpuset) { + if (!group_obj->complete_cpuset) + group_obj->complete_cpuset = hwloc_bitmap_alloc(); + hwloc_bitmap_or(group_obj->complete_cpuset, group_obj->complete_cpuset, osdist->objs[i]->complete_cpuset); + } /* if one obj has a nodeset, assemble a group nodeset */ if (osdist->objs[i]->nodeset) { if (!group_obj->nodeset) group_obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_or(group_obj->nodeset, group_obj->nodeset, osdist->objs[i]->nodeset); } + if (osdist->objs[i]->complete_nodeset) { + if (!group_obj->complete_nodeset) + group_obj->complete_nodeset = hwloc_bitmap_alloc(); + hwloc_bitmap_or(group_obj->complete_nodeset, group_obj->complete_nodeset, osdist->objs[i]->complete_nodeset); + } } hwloc_debug_1arg_bitmap("adding Group object (as root of distance matrix with %u objects) with cpuset %s\n", nbobjs, group_obj->cpuset); diff --git a/opal/mca/hwloc/hwloc191/hwloc/src/pci-common.c b/opal/mca/hwloc/hwloc191/hwloc/src/pci-common.c index 562ee3bab8..c48093a95c 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/src/pci-common.c +++ b/opal/mca/hwloc/hwloc191/hwloc/src/pci-common.c @@ -1,5 +1,5 @@ /* - * Copyright © 2009-2014 Inria. All rights reserved. + * Copyright © 2009-2015 Inria. All rights reserved. * See COPYING in top-level directory. */ @@ -263,6 +263,7 @@ hwloc_pci_find_hostbridge_parent(struct hwloc_topology *topology, struct hwloc_b hwloc_obj_t group_obj = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, -1); if (group_obj) { group_obj->cpuset = hwloc_bitmap_dup(cpuset); + group_obj->complete_cpuset = hwloc_bitmap_dup(cpuset); group_obj->attr->group.depth = (unsigned) -1; parent = hwloc__insert_object_by_cpuset(topology, group_obj, hwloc_report_os_error); if (parent == group_obj) From 5c9157c547f1bb4079f86b867f7826c478a435ea Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Sun, 18 Jan 2015 12:33:57 +0100 Subject: [PATCH 22/25] hwloc: core: only update root->complete sets if insert succeeds Otherwise we get spurious bits for crazy topologies such as 8em64t-2s2ca2c-buggynuma.output Will make debug asserts easier. (cherry picked from commit open-mpi/hwloc@546cd9330a401c0c910760c316b33cfc8fd16bef) Signed-off-by: Jeff Squyres --- opal/mca/hwloc/hwloc191/hwloc/src/topology.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/opal/mca/hwloc/hwloc191/hwloc/src/topology.c b/opal/mca/hwloc/hwloc191/hwloc/src/topology.c index 911a5422fc..55340ea2dc 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/src/topology.c +++ b/opal/mca/hwloc/hwloc191/hwloc/src/topology.c @@ -1026,14 +1026,16 @@ hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj, { struct hwloc_obj *result; /* Start at the top. */ - /* Add the cpuset to the top */ - hwloc_bitmap_or(topology->levels[0][0]->complete_cpuset, topology->levels[0][0]->complete_cpuset, obj->cpuset); - if (obj->nodeset) - hwloc_bitmap_or(topology->levels[0][0]->complete_nodeset, topology->levels[0][0]->complete_nodeset, obj->nodeset); result = hwloc___insert_object_by_cpuset(topology, topology->levels[0][0], obj, report_error); - if (result != obj) + if (result != obj) { /* either failed to insert, or got merged, free the original object */ hwloc_free_unlinked_object(obj); + } else { + /* Add the cpuset to the top */ + hwloc_bitmap_or(topology->levels[0][0]->complete_cpuset, topology->levels[0][0]->complete_cpuset, obj->cpuset); + if (obj->nodeset) + hwloc_bitmap_or(topology->levels[0][0]->complete_nodeset, topology->levels[0][0]->complete_nodeset, obj->nodeset); + } return result; } From 1905f35a1e64a23b1e34a5fc46db09a1b0bda684 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Wed, 21 Jan 2015 15:53:43 +0100 Subject: [PATCH 23/25] hwloc: bitmap: fix a corner case in hwloc_bitmap_isincluded() with infinite sets If super_set contains more allocated ulongs than sub_set, we did not check the last ulongs. We would return true instead of false when sub_set is infinite while the last ulongs in super_set are not full. This fixes tests/hwloc_bitmap_compare_inclusion on some platforms. (cherry picked from commit open-mpi/hwloc@299e6e846f1d00930607d5d6d120174024f576e7) Signed-off-by: Jeff Squyres --- opal/mca/hwloc/hwloc191/hwloc/src/bitmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/opal/mca/hwloc/hwloc191/hwloc/src/bitmap.c b/opal/mca/hwloc/hwloc191/hwloc/src/bitmap.c index 39f4dbfe3a..2fc3b831e3 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/src/bitmap.c +++ b/opal/mca/hwloc/hwloc191/hwloc/src/bitmap.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2011 inria. All rights reserved. + * Copyright © 2009-2015 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux 1 * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * See COPYING in top-level directory. @@ -903,7 +903,7 @@ int hwloc_bitmap_isincluded (const struct hwloc_bitmap_s *sub_set, const struct HWLOC__BITMAP_CHECK(sub_set); HWLOC__BITMAP_CHECK(super_set); - for(i=0; iulongs_count; i++) + for(i=0; iulongs_count || iulongs_count; i++) if (HWLOC_SUBBITMAP_READULONG(super_set, i) != (HWLOC_SUBBITMAP_READULONG(super_set, i) | HWLOC_SUBBITMAP_READULONG(sub_set, i))) return 0; From 29ccbfd5909fc0347f546bd28ae096ed53ed27ca Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Tue, 17 Feb 2015 16:09:57 +0100 Subject: [PATCH 24/25] hwloc pci: fix bridge depth It was setup in the PCI backend before filtering, and partially updated after filtering in the core. Only setup once correctly after filtering in the core. (cherry picked from commit open-mpi/hwloc@9659653d24d6acbd6162503b706615be26f327e4) Conflicts: tests/hwloc/linux/40intel64-2g2n4c+pci.output tests/hwloc/xml/192em64t-12gr2n8c2t-distancegroups.xml tests/hwloc/xml/192em64t-24n8c2t-distancegroups.xml tests/hwloc/xml/192em64t-24n8c2t-nodistancegroups.xml tests/hwloc/xml/24em64t-2n6c2t-pci.xml tests/hwloc/xml/32em64t-2n8c2t-pci-normalio.xml tests/hwloc/xml/96em64t-4n4d3ca2co-pci.xml utils/hwloc/test-hwloc-compress-dir.input.tar.gz utils/hwloc/test-hwloc-compress-dir.output.tar.gz Signed-off-by: Jeff Squyres --- opal/mca/hwloc/hwloc191/hwloc/src/pci-common.c | 11 +---------- opal/mca/hwloc/hwloc191/hwloc/src/topology.c | 2 ++ 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/opal/mca/hwloc/hwloc191/hwloc/src/pci-common.c b/opal/mca/hwloc/hwloc191/hwloc/src/pci-common.c index c48093a95c..111e65a943 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/src/pci-common.c +++ b/opal/mca/hwloc/hwloc191/hwloc/src/pci-common.c @@ -31,14 +31,6 @@ hwloc_pci_traverse_print_cb(void * cbdata __hwloc_attribute_unused, pcidev->attr->pcidev.revision, pcidev->attr->pcidev.class_id); } -static void -hwloc_pci_traverse_setbridgedepth_cb(void * cbdata __hwloc_attribute_unused, - struct hwloc_obj *pcidev, int depth) -{ - if (pcidev->type == HWLOC_OBJ_BRIDGE) - pcidev->attr->bridge.depth = depth; -} - static void hwloc_pci_traverse_lookuposdevices_cb(void * cbdata, struct hwloc_obj *pcidev, int depth __hwloc_attribute_unused) @@ -305,8 +297,7 @@ hwloc_insert_pci_device_list(struct hwloc_backend *backend, hwloc_debug("%s", "\nPCI hierarchy under fake parent:\n"); hwloc_pci_traverse(NULL, &fakeparent, hwloc_pci_traverse_print_cb); - /* walk the hierarchy, set bridge depth and lookup OS devices */ - hwloc_pci_traverse(NULL, &fakeparent, hwloc_pci_traverse_setbridgedepth_cb); + /* walk the hierarchy, and lookup OS devices */ hwloc_pci_traverse(backend, &fakeparent, hwloc_pci_traverse_lookuposdevices_cb); /* diff --git a/opal/mca/hwloc/hwloc191/hwloc/src/topology.c b/opal/mca/hwloc/hwloc191/hwloc/src/topology.c index 55340ea2dc..bcab9a0734 100644 --- a/opal/mca/hwloc/hwloc191/hwloc/src/topology.c +++ b/opal/mca/hwloc/hwloc191/hwloc/src/topology.c @@ -1984,6 +1984,8 @@ hwloc_propagate_bridge_depth(hwloc_topology_t topology, hwloc_obj_t root, unsign if (child->type == HWLOC_OBJ_BRIDGE) { child->attr->bridge.depth = depth; hwloc_propagate_bridge_depth(topology, child, depth+1); + } else if (!hwloc_obj_type_is_io(child->type)) { + hwloc_propagate_bridge_depth(topology, child, 0); } child = child->next_sibling; } From 0c502d90cdd2efa9a03eff32efc0d09f8d7c388b Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Fri, 27 Mar 2015 06:05:03 -0700 Subject: [PATCH 25/25] hwloc README-ompi.txt: update for what we pulled from hwloc Document what we pulled from the hwloc tree. --- opal/mca/hwloc/hwloc191/README-ompi.txt | 32 +++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/opal/mca/hwloc/hwloc191/README-ompi.txt b/opal/mca/hwloc/hwloc191/README-ompi.txt index 242e4fc022..948285aaff 100644 --- a/opal/mca/hwloc/hwloc191/README-ompi.txt +++ b/opal/mca/hwloc/hwloc191/README-ompi.txt @@ -1,3 +1,35 @@ Applied the following patches from the upstream hwloc 1.9 branch after the v1.9.1 release: +All relevant commits up to open-mpi/hwloc@4e23b12 (i.e., the HEAD as +of 27 March 2015). "Relevant" commits are defined as those that +included files that are embedded in the Open MPI tree (e.g., updates +to files in docs/, utils/, etc. aren't relevant because they are not +embedded in the Open MPI tree). To be specific, the following commits +have been cherry-picked over to Open MPI: + +* open-mpi/hwloc@7c03216 v1.9.1 released, doing 1.9.2rc1 now +* open-mpi/hwloc@b35ced8 misc.h: Fix hwloc_strncasecmp() build under strict flags on BSD +* open-mpi/hwloc@d8c3f3d misc.h: Fix hwloc_strncasecmp() with some icc +* open-mpi/hwloc@f705a23 Use gcc's __asm__ version of the asm extension, which can be used in all standards +* open-mpi/hwloc@307726a configure: fix the check for X11/Xutil.h +* open-mpi/hwloc@ec58c05 errors: improve the advice to send hwloc-gather-topology files in the OS error message +* open-mpi/hwloc@35c743d NEWS update +* open-mpi/hwloc@868170e API: clearly state that os_index isn't unique while logical_index is +* open-mpi/hwloc@851532d x86 and OSF: Don't forget to set NUMA node nodeset +* open-mpi/hwloc@790aa2e cpuid-x86: Fix duplicate asm labels in case of heavy inlining on x86-32 +* open-mpi/hwloc@dd09aa5 debug: fix an overzealous assertion about the parent cpuset vs its children +* open-mpi/hwloc@769b9b5 core: fix the merging of identical objects in presence of Misc objects +* open-mpi/hwloc@71da0f1 core: reorder children in merge_useless_child() as well +* open-mpi/hwloc@c9cef07 hpux: improve hwloc_hpux_find_ldom() looking for NUMA node +* open-mpi/hwloc@cdffea6 x86: use ulong for cache sizes, uint won't be enough in the near future +* open-mpi/hwloc@55b0676 x86: use Group instead of Misc for unknown x2apic levels +* open-mpi/hwloc@7764ce5 synthetic: Misc levels are not allowed in the synthetic description +* open-mpi/hwloc@5b2dce1 error: point to the FAQ when displaying the big OS error message +* open-mpi/hwloc@c7bd9e6 pci: fix SR-IOV VF vendor/device names +* open-mpi/hwloc@a0f72ef distances: when we fail to insert an intermediate group, don't try to group further above +* open-mpi/hwloc@e419811 AIX: Fix PU os_index +* open-mpi/hwloc@08ab793 groups: add complete sets when inserting distance/pci groups +* open-mpi/hwloc@c66e714 core: only update root->complete sets if insert succeeds +* open-mpi/hwloc@01da9b9 bitmap: fix a corner case in hwloc_bitmap_isincluded() with infinite sets +* open-mpi/hwloc@e7b192b pci: fix bridge depth