From 82567996f76c6438379a20b9492856b167c20bcc Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Wed, 27 Nov 2019 12:32:27 +0100 Subject: [PATCH 1/4] hwloc/base: fix opal proc locality wrt to NUMA nodes on hwloc 2.0 Both opal_hwloc_base_get_relative_locality() and _get_locality_string() iterate over hwloc levels to build the proc locality information. Unfortunately, NUMA nodes are not in those normal levels anymore since 2.0. We have to explicitly look a the special NUMA level to get that locality info. I am factorizing the core of the iterations inside dedicated "_by_depth" functions and calling them again for the NUMA level at the end of the loops. Thanks to Hatem Elshazly for reporting the NUMA communicator split failure at https://www.mail-archive.com/users@lists.open-mpi.org/msg33589.html It looks like only the opal_hwloc_base_get_locality_string() part is needed to fix that split, but there's no reason not to fix get_relative_locality() as well. Signed-off-by: Brice Goglin (cherry picked from commit ea80a20e108cb69efc67ad04ad968da7b85772af) --- opal/mca/hwloc/base/hwloc_base_util.c | 207 ++++++++++++++++---------- 1 file changed, 127 insertions(+), 80 deletions(-) diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c index ba26ba0ac6..7bb9520f35 100644 --- a/opal/mca/hwloc/base/hwloc_base_util.c +++ b/opal/mca/hwloc/base/hwloc_base_util.c @@ -20,6 +20,7 @@ * All rights reserved. * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2019 IBM Corporation. All rights reserved. + * Copyright (c) 2019 Inria. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -1215,16 +1216,84 @@ int opal_hwloc_base_cpu_list_parse(const char *slot_str, return OPAL_SUCCESS; } +static void opal_hwloc_base_get_relative_locality_by_depth(hwloc_topology_t topo, unsigned d, + hwloc_cpuset_t loc1, hwloc_cpuset_t loc2, + opal_hwloc_locality_t *locality, bool *shared) +{ + unsigned width, w; + hwloc_obj_t obj; + int sect1, sect2; + + /* get the width of the topology at this depth */ + width = hwloc_get_nbobjs_by_depth(topo, d); + + /* scan all objects at this depth to see if + * our locations overlap with them + */ + for (w=0; w < width; w++) { + /* get the object at this depth/index */ + obj = hwloc_get_obj_by_depth(topo, d, w); + /* see if our locations intersect with the cpuset for this obj */ + sect1 = hwloc_bitmap_intersects(obj->cpuset, loc1); + sect2 = hwloc_bitmap_intersects(obj->cpuset, loc2); + /* if both intersect, then we share this level */ + if (sect1 && sect2) { + *shared = true; + switch(obj->type) { + case HWLOC_OBJ_NODE: + *locality |= OPAL_PROC_ON_NUMA; + break; + case HWLOC_OBJ_SOCKET: + *locality |= OPAL_PROC_ON_SOCKET; + break; +#if HWLOC_API_VERSION < 0x20000 + case HWLOC_OBJ_CACHE: + if (3 == obj->attr->cache.depth) { + *locality |= OPAL_PROC_ON_L3CACHE; + } else if (2 == obj->attr->cache.depth) { + *locality |= OPAL_PROC_ON_L2CACHE; + } else { + *locality |= OPAL_PROC_ON_L1CACHE; + } + break; +#else + case HWLOC_OBJ_L3CACHE: + *locality |= OPAL_PROC_ON_L3CACHE; + break; + case HWLOC_OBJ_L2CACHE: + *locality |= OPAL_PROC_ON_L2CACHE; + break; + case HWLOC_OBJ_L1CACHE: + *locality |= OPAL_PROC_ON_L1CACHE; + break; +#endif + case HWLOC_OBJ_CORE: + *locality |= OPAL_PROC_ON_CORE; + break; + case HWLOC_OBJ_PU: + *locality |= OPAL_PROC_ON_HWTHREAD; + break; + default: + /* just ignore it */ + break; + } + break; + } + /* otherwise, we don't share this + * object - but we still might share another object + * on this level, so we have to keep searching + */ + } +} + opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t topo, char *cpuset1, char *cpuset2) { opal_hwloc_locality_t locality; - hwloc_obj_t obj; - unsigned depth, d, width, w; + hwloc_cpuset_t loc1, loc2; + unsigned depth, d; bool shared; hwloc_obj_type_t type; - int sect1, sect2; - hwloc_cpuset_t loc1, loc2; /* start with what we know - they share a node on a cluster * NOTE: we may alter that latter part as hwloc's ability to @@ -1265,66 +1334,8 @@ opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t top HWLOC_OBJ_PU != type) { continue; } - /* get the width of the topology at this depth */ - width = hwloc_get_nbobjs_by_depth(topo, d); + opal_hwloc_base_get_relative_locality_by_depth(topo, d, loc1, loc2, &locality, &shared); - /* scan all objects at this depth to see if - * our locations overlap with them - */ - for (w=0; w < width; w++) { - /* get the object at this depth/index */ - obj = hwloc_get_obj_by_depth(topo, d, w); - /* see if our locations intersect with the cpuset for this obj */ - sect1 = hwloc_bitmap_intersects(obj->cpuset, loc1); - sect2 = hwloc_bitmap_intersects(obj->cpuset, loc2); - /* if both intersect, then we share this level */ - if (sect1 && sect2) { - shared = true; - switch(obj->type) { - case HWLOC_OBJ_NODE: - locality |= OPAL_PROC_ON_NUMA; - break; - case HWLOC_OBJ_SOCKET: - locality |= OPAL_PROC_ON_SOCKET; - break; -#if HWLOC_API_VERSION < 0x20000 - case HWLOC_OBJ_CACHE: - if (3 == obj->attr->cache.depth) { - locality |= OPAL_PROC_ON_L3CACHE; - } else if (2 == obj->attr->cache.depth) { - locality |= OPAL_PROC_ON_L2CACHE; - } else { - locality |= OPAL_PROC_ON_L1CACHE; - } - break; -#else - case HWLOC_OBJ_L3CACHE: - locality |= OPAL_PROC_ON_L3CACHE; - break; - case HWLOC_OBJ_L2CACHE: - locality |= OPAL_PROC_ON_L2CACHE; - break; - case HWLOC_OBJ_L1CACHE: - locality |= OPAL_PROC_ON_L1CACHE; - break; -#endif - case HWLOC_OBJ_CORE: - locality |= OPAL_PROC_ON_CORE; - break; - case HWLOC_OBJ_PU: - locality |= OPAL_PROC_ON_HWTHREAD; - break; - default: - /* just ignore it */ - break; - } - break; - } - /* otherwise, we don't share this - * object - but we still might share another object - * on this level, so we have to keep searching - */ - } /* if we spanned the entire width without finding * a point of intersection, then no need to go * deeper @@ -1333,6 +1344,9 @@ opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t top break; } } +#if HWLOC_API_VERSION >= 0x20000 + opal_hwloc_base_get_relative_locality_by_depth(topo, HWLOC_TYPE_DEPTH_NUMANODE, loc1, loc2, &locality, &shared); +#endif opal_output_verbose(5, opal_hwloc_base_framework.framework_output, "locality: %s", @@ -2063,12 +2077,40 @@ char* opal_hwloc_base_get_topo_signature(hwloc_topology_t topo) return sig; } +static int opal_hwloc_base_get_locality_string_by_depth(hwloc_topology_t topo, + int d, + hwloc_cpuset_t cpuset, + hwloc_cpuset_t result) +{ + hwloc_obj_t obj; + unsigned width, w; + + /* get the width of the topology at this depth */ + width = hwloc_get_nbobjs_by_depth(topo, d); + if (0 == width) { + return -1; + } + + /* scan all objects at this depth to see if + * the location overlaps with them + */ + for (w=0; w < width; w++) { + /* get the object at this depth/index */ + obj = hwloc_get_obj_by_depth(topo, d, w); + /* see if the location intersects with it */ + if (hwloc_bitmap_intersects(obj->cpuset, cpuset)) { + hwloc_bitmap_set(result, w); + } + } + + return 0; +} + char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo, char *bitmap) { - hwloc_obj_t obj; char *locality=NULL, *tmp, *t2; - unsigned depth, d, width, w; + unsigned depth, d; hwloc_cpuset_t cpuset, result; hwloc_obj_type_t type; @@ -2111,28 +2153,15 @@ char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo, continue; } - /* get the width of the topology at this depth */ - width = hwloc_get_nbobjs_by_depth(topo, d); - if (0 == width) { + if (opal_hwloc_base_get_locality_string_by_depth(topo, d, cpuset, result) < 0) { continue; } - /* scan all objects at this depth to see if - * the location overlaps with them - */ - for (w=0; w < width; w++) { - /* get the object at this depth/index */ - obj = hwloc_get_obj_by_depth(topo, d, w); - /* see if the location intersects with it */ - if (hwloc_bitmap_intersects(obj->cpuset, cpuset)) { - hwloc_bitmap_set(result, w); - } - } /* it should be impossible, but allow for the possibility * that we came up empty at this depth */ if (!hwloc_bitmap_iszero(result)) { hwloc_bitmap_list_asprintf(&tmp, result); - switch(obj->type) { + switch(type) { case HWLOC_OBJ_NODE: asprintf(&t2, "%sNM%s:", (NULL == locality) ? "" : locality, tmp); if (NULL != locality) { @@ -2217,6 +2246,24 @@ char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo, } hwloc_bitmap_zero(result); } + +#if HWLOC_API_VERSION >= 0x20000 + if (opal_hwloc_base_get_locality_string_by_depth(topo, HWLOC_TYPE_DEPTH_NUMANODE, cpuset, result) == 0) { + /* it should be impossible, but allow for the possibility + * that we came up empty at this depth */ + if (!hwloc_bitmap_iszero(result)) { + hwloc_bitmap_list_asprintf(&tmp, result); + opal_asprintf(&t2, "%sNM%s:", (NULL == locality) ? "" : locality, tmp); + if (NULL != locality) { + free(locality); + } + locality = t2; + free(tmp); + } + hwloc_bitmap_zero(result); + } +#endif + hwloc_bitmap_free(result); hwloc_bitmap_free(cpuset); From bed0ce70a7436b31e7a6f86ddb0d22dda29f75ae Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Mon, 3 Feb 2020 14:19:12 -0700 Subject: [PATCH 2/4] fix a problem with opal_asprintf not being defined. related to #7201 Signed-off-by: Howard Pritchard --- opal/mca/hwloc/base/hwloc_base_util.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c index 7bb9520f35..34885f9f19 100644 --- a/opal/mca/hwloc/base/hwloc_base_util.c +++ b/opal/mca/hwloc/base/hwloc_base_util.c @@ -21,6 +21,7 @@ * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2019 IBM Corporation. All rights reserved. * Copyright (c) 2019 Inria. All rights reserved. + * Copyright (c) 2020 Triad National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -59,6 +60,7 @@ #include "opal/mca/hwloc/hwloc-internal.h" #include "opal/mca/hwloc/base/base.h" +#include "opal/util/printf.h" static bool topo_in_shmem = false; From 6702a4febbf25e345455eeaf73f6435960b8079f Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Tue, 4 Feb 2020 22:22:10 +0100 Subject: [PATCH 3/4] opal/hwloc: remove some unused variables when building with hwloc < 1.7 Refs #7362 Signed-off-by: Brice Goglin (cherry picked from commit 329d4451a6cdd544e532a29f594f6e5ee63e06da) --- opal/mca/hwloc/base/hwloc_base_util.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c index 34885f9f19..f88f990311 100644 --- a/opal/mca/hwloc/base/hwloc_base_util.c +++ b/opal/mca/hwloc/base/hwloc_base_util.c @@ -1365,9 +1365,10 @@ opal_hwloc_locality_t opal_hwloc_base_get_relative_locality(hwloc_topology_t top */ char* opal_hwloc_base_find_coprocessors(hwloc_topology_t topo) { +#if HAVE_DECL_HWLOC_OBJ_OSDEV_COPROC hwloc_obj_t osdev; - unsigned i; char **cps = NULL; +#endif char *cpstring = NULL; int depth; @@ -1385,6 +1386,7 @@ char* opal_hwloc_base_find_coprocessors(hwloc_topology_t topo) while (NULL != osdev) { if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type) { /* got one! find and save its serial number */ + unsigned i; for (i=0; i < osdev->infos_count; i++) { if (0 == strncmp(osdev->infos[i].name, "MICSerialNumber", strlen("MICSerialNumber"))) { OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, From f136804c453a75763241158d9821ccee4ac4b5f7 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Tue, 4 Feb 2020 22:20:27 +0100 Subject: [PATCH 4/4] hwloc/base: fix opal proc locality wrt to NUMA nodes on hwloc 1.11 Build was broken by mistake in commit d40662edc41a5a4d09ae690b640cfdeeb24e15a1 Fixes #7362 Signed-off-by: Brice Goglin (cherry picked from commit 907ad854b46b42ae7cb1e9c87238691a5cc25e36) --- opal/mca/hwloc/base/hwloc_base_util.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c index f88f990311..80d6b95ab0 100644 --- a/opal/mca/hwloc/base/hwloc_base_util.c +++ b/opal/mca/hwloc/base/hwloc_base_util.c @@ -19,8 +19,8 @@ * Copyright (C) 2018 Mellanox Technologies, Ltd. * All rights reserved. * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. - * Copyright (c) 2019 IBM Corporation. All rights reserved. - * Copyright (c) 2019 Inria. All rights reserved. + * Copyright (c) 2019 IBM Corporation. All rights reserved. + * Copyright (c) 2019-2020 Inria. All rights reserved. * Copyright (c) 2020 Triad National Security, LLC. All rights reserved. * $COPYRIGHT$ * @@ -2181,15 +2181,16 @@ char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo, locality = t2; break; #if HWLOC_API_VERSION < 0x20000 - case HWLOC_OBJ_CACHE: - if (3 == obj->attr->cache.depth) { + case HWLOC_OBJ_CACHE: { + unsigned cachedepth = hwloc_get_obj_by_depth(topo, d, 0)->attr->cache.depth; + if (3 == cachedepth) { asprintf(&t2, "%sL3%s:", (NULL == locality) ? "" : locality, tmp); if (NULL != locality) { free(locality); } locality = t2; break; - } else if (2 == obj->attr->cache.depth) { + } else if (2 == cachedepth) { asprintf(&t2, "%sL2%s:", (NULL == locality) ? "" : locality, tmp); if (NULL != locality) { free(locality); @@ -2205,6 +2206,7 @@ char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo, break; } break; + } #else case HWLOC_OBJ_L3CACHE: asprintf(&t2, "%sL3%s:", (NULL == locality) ? "" : locality, tmp); @@ -2257,7 +2259,7 @@ char* opal_hwloc_base_get_locality_string(hwloc_topology_t topo, * that we came up empty at this depth */ if (!hwloc_bitmap_iszero(result)) { hwloc_bitmap_list_asprintf(&tmp, result); - opal_asprintf(&t2, "%sNM%s:", (NULL == locality) ? "" : locality, tmp); + asprintf(&t2, "%sNM%s:", (NULL == locality) ? "" : locality, tmp); if (NULL != locality) { free(locality); }