1
1

Merge pull request #498 from jsquyres/pr/hwloc-19x-updates

hwloc 1.9.x updates
Этот коммит содержится в:
Jeff Squyres 2015-03-27 10:19:46 -04:00
родитель a85edb8ad4 0c502d90cd
Коммит 2145dfb2e8
17 изменённых файлов: 260 добавлений и 112 удалений

Просмотреть файл

@ -1,3 +1,35 @@
Applied the following patches from the upstream hwloc 1.9 branch after
the v1.9.1 release:
All relevant commits up to open-mpi/hwloc@4e23b12 (i.e., the HEAD as
of 27 March 2015). "Relevant" commits are defined as those that
included files that are embedded in the Open MPI tree (e.g., updates
to files in docs/, utils/, etc. aren't relevant because they are not
embedded in the Open MPI tree). To be specific, the following commits
have been cherry-picked over to Open MPI:
* open-mpi/hwloc@7c03216 v1.9.1 released, doing 1.9.2rc1 now
* open-mpi/hwloc@b35ced8 misc.h: Fix hwloc_strncasecmp() build under strict flags on BSD
* open-mpi/hwloc@d8c3f3d misc.h: Fix hwloc_strncasecmp() with some icc
* open-mpi/hwloc@f705a23 Use gcc's __asm__ version of the asm extension, which can be used in all standards
* open-mpi/hwloc@307726a configure: fix the check for X11/Xutil.h
* open-mpi/hwloc@ec58c05 errors: improve the advice to send hwloc-gather-topology files in the OS error message
* open-mpi/hwloc@35c743d NEWS update
* open-mpi/hwloc@868170e API: clearly state that os_index isn't unique while logical_index is
* open-mpi/hwloc@851532d x86 and OSF: Don't forget to set NUMA node nodeset
* open-mpi/hwloc@790aa2e cpuid-x86: Fix duplicate asm labels in case of heavy inlining on x86-32
* open-mpi/hwloc@dd09aa5 debug: fix an overzealous assertion about the parent cpuset vs its children
* open-mpi/hwloc@769b9b5 core: fix the merging of identical objects in presence of Misc objects
* open-mpi/hwloc@71da0f1 core: reorder children in merge_useless_child() as well
* open-mpi/hwloc@c9cef07 hpux: improve hwloc_hpux_find_ldom() looking for NUMA node
* open-mpi/hwloc@cdffea6 x86: use ulong for cache sizes, uint won't be enough in the near future
* open-mpi/hwloc@55b0676 x86: use Group instead of Misc for unknown x2apic levels
* open-mpi/hwloc@7764ce5 synthetic: Misc levels are not allowed in the synthetic description
* open-mpi/hwloc@5b2dce1 error: point to the FAQ when displaying the big OS error message
* open-mpi/hwloc@c7bd9e6 pci: fix SR-IOV VF vendor/device names
* open-mpi/hwloc@a0f72ef distances: when we fail to insert an intermediate group, don't try to group further above
* open-mpi/hwloc@e419811 AIX: Fix PU os_index
* open-mpi/hwloc@08ab793 groups: add complete sets when inserting distance/pci groups
* open-mpi/hwloc@c66e714 core: only update root->complete sets if insert succeeds
* open-mpi/hwloc@01da9b9 bitmap: fix a corner case in hwloc_bitmap_isincluded() with infinite sets
* open-mpi/hwloc@e7b192b pci: fix bridge depth

Просмотреть файл

@ -17,6 +17,14 @@ bug fixes (and other actions) for each version of hwloc since version
in v0.9.1).
Version 1.9.2
-------------
* Fix some build failures in private/misc.h.
Thanks to Pavan Balaji and Ralph Castain for the reports.
* Fix failures to detect X11/Xutil.h on some Solaris platforms.
Thanks to Siegmar Gross for reporting the failure.
Version 1.9.1
-------------
* Fix a crash when the PCI locality is invalid. Attach to the root object

Просмотреть файл

@ -13,7 +13,7 @@ snapshot_version=gitclone
major=1
minor=9
release=1
release=2
# greek is used for alpha or beta release tags. If it is non-empty,
# it will be appended to the version number. It does not have to be

Просмотреть файл

@ -353,6 +353,7 @@ EOF])
AC_CHECK_HEADERS([unistd.h])
AC_CHECK_HEADERS([dirent.h])
AC_CHECK_HEADERS([strings.h])
AC_CHECK_HEADERS([ctype.h])
AC_CHECK_FUNCS([strncasecmp], [
_HWLOC_CHECK_DECL([strncasecmp], [
@ -930,7 +931,7 @@ EOF])
[AC_CHECK_HEADERS([X11/keysym.h],
[AC_DEFINE([HWLOC_HAVE_X11_KEYSYM], [1], [Define to 1 if X11 headers including Xutil.h and keysym.h are available.])])
AC_SUBST([HWLOC_X11_LIBS], ["-lX11"])
])
], [], [#include <X11/Xlib.h>])
])
])
CPPFLAGS=$CPPFLAGS_save

Просмотреть файл

@ -331,7 +331,10 @@ struct hwloc_obj_memory_s {
struct hwloc_obj {
/* physical information */
hwloc_obj_type_t type; /**< \brief Type of object */
unsigned os_index; /**< \brief OS-provided physical index number */
unsigned os_index; /**< \brief OS-provided physical index number.
* It is not guaranteed unique across the entire machine,
* except for PUs and NUMA nodes.
*/
char *name; /**< \brief Object description if any */
struct hwloc_obj_memory_s memory; /**< \brief Memory attributes */
@ -346,7 +349,9 @@ struct hwloc_obj {
* of parent/child links from the root object to here.
*/
unsigned logical_index; /**< \brief Horizontal index in the whole list of similar objects,
* could be a "cousin_rank" since it's the rank within the "cousin" list below */
* hence guaranteed unique across the entire machine.
* Could be a "cousin_rank" since it's the rank within the "cousin" list below
*/
signed os_level; /**< \brief OS-provided physical level, -1 if unknown or meaningless */
/* cousins are all objects of the same type (and depth) across the entire topology */

Просмотреть файл

@ -32,14 +32,14 @@ static __hwloc_inline int hwloc_have_x86_cpuid(void)
"pushfl \n\t" \
"pop %1 \n\t" \
"cmp %1,%2\n\t" /* Compare with expected value */ \
"jnz Lhwloc1\n\t" /* Unexpected, failure */ \
"jnz 0f\n\t" /* Unexpected, failure */ \
TRY_TOGGLE /* Try to set/clear */
TRY_TOGGLE /* Try to clear/set */
"mov $1,%0\n\t" /* Passed the test! */
"Lhwloc1: \n\t"
"0: \n\t"
"popfl \n\t" /* Restore flags */
: "=r" (ret), "=&r" (tmp), "=&r" (tmp2));

Просмотреть файл

@ -15,6 +15,16 @@
#include <private/autogen/config.h>
#include <ctype.h>
#ifdef HWLOC_HAVE_DECL_STRNCASECMP
#ifdef HAVE_STRINGS_H
#include <strings.h>
#endif
#else
#ifdef HAVE_CTYPE_H
#include <ctype.h>
#endif
#endif
/* Compile-time assertion */
#define HWLOC_BUILD_ASSERT(condition) ((void)sizeof(char[1 - 2*!(condition)]))

Просмотреть файл

@ -1,6 +1,6 @@
/*
* Copyright © 2009 CNRS
* Copyright © 2009-2011 inria. All rights reserved.
* Copyright © 2009-2015 Inria. All rights reserved.
* Copyright © 2009-2011 Université Bordeaux 1
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
@ -903,7 +903,7 @@ int hwloc_bitmap_isincluded (const struct hwloc_bitmap_s *sub_set, const struct
HWLOC__BITMAP_CHECK(sub_set);
HWLOC__BITMAP_CHECK(super_set);
for(i=0; i<sub_set->ulongs_count; i++)
for(i=0; i<sub_set->ulongs_count || i<super_set->ulongs_count; i++)
if (HWLOC_SUBBITMAP_READULONG(super_set, i) != (HWLOC_SUBBITMAP_READULONG(super_set, i) | HWLOC_SUBBITMAP_READULONG(sub_set, i)))
return 0;

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright © 2010-2014 Inria. All rights reserved.
* Copyright © 2010-2015 Inria. All rights reserved.
* Copyright © 2011-2012 Université Bordeaux 1
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
@ -832,6 +832,7 @@ hwloc__groups_by_distances(struct hwloc_topology *topology,
hwloc_obj_t *groupobjs = NULL;
unsigned *groupsizes = NULL;
float *groupdistances = NULL;
unsigned failed = 0;
groupobjs = malloc(sizeof(hwloc_obj_t) * nbgroups);
groupsizes = malloc(sizeof(unsigned) * nbgroups);
@ -851,22 +852,39 @@ hwloc__groups_by_distances(struct hwloc_topology *topology,
if (groupids[j] == i+1) {
/* assemble the group cpuset */
hwloc_bitmap_or(group_obj->cpuset, group_obj->cpuset, objs[j]->cpuset);
if (objs[i]->complete_cpuset) {
if (!group_obj->complete_cpuset)
group_obj->complete_cpuset = hwloc_bitmap_alloc();
hwloc_bitmap_or(group_obj->complete_cpuset, group_obj->complete_cpuset, objs[j]->complete_cpuset);
}
/* if one obj has a nodeset, assemble a group nodeset */
if (objs[j]->nodeset) {
if (!group_obj->nodeset)
group_obj->nodeset = hwloc_bitmap_alloc();
hwloc_bitmap_or(group_obj->nodeset, group_obj->nodeset, objs[j]->nodeset);
}
if (objs[i]->complete_nodeset) {
if (!group_obj->complete_nodeset)
group_obj->complete_nodeset = hwloc_bitmap_alloc();
hwloc_bitmap_or(group_obj->complete_nodeset, group_obj->complete_nodeset, objs[j]->complete_nodeset);
}
groupsizes[i]++;
}
hwloc_debug_1arg_bitmap("adding Group object with %u objects and cpuset %s\n",
groupsizes[i], group_obj->cpuset);
res_obj = hwloc__insert_object_by_cpuset(topology, group_obj,
fromuser ? hwloc_report_user_distance_error : hwloc_report_os_error);
/* res_obj may be different from group_objs if we got groups from XML import before grouping */
/* res_obj may be NULL on failure to insert. */
if (!res_obj)
failed++;
/* or it may be different from groupobjs if we got groups from XML import before grouping */
groupobjs[i] = res_obj;
}
if (failed)
/* don't try to group above if we got a NULL group here, just keep this incomplete level */
goto inner_free;
/* factorize distances */
memset(&(groupdistances[0]), 0, sizeof(groupdistances[0]) * nbgroups * nbgroups);
#undef DISTANCE
@ -1002,12 +1020,22 @@ hwloc_group_by_distances(struct hwloc_topology *topology)
for(i=0; i<nbobjs; i++) {
/* assemble the group cpuset */
hwloc_bitmap_or(group_obj->cpuset, group_obj->cpuset, osdist->objs[i]->cpuset);
if (osdist->objs[i]->complete_cpuset) {
if (!group_obj->complete_cpuset)
group_obj->complete_cpuset = hwloc_bitmap_alloc();
hwloc_bitmap_or(group_obj->complete_cpuset, group_obj->complete_cpuset, osdist->objs[i]->complete_cpuset);
}
/* if one obj has a nodeset, assemble a group nodeset */
if (osdist->objs[i]->nodeset) {
if (!group_obj->nodeset)
group_obj->nodeset = hwloc_bitmap_alloc();
hwloc_bitmap_or(group_obj->nodeset, group_obj->nodeset, osdist->objs[i]->nodeset);
}
if (osdist->objs[i]->complete_nodeset) {
if (!group_obj->complete_nodeset)
group_obj->complete_nodeset = hwloc_bitmap_alloc();
hwloc_bitmap_or(group_obj->complete_nodeset, group_obj->complete_nodeset, osdist->objs[i]->complete_nodeset);
}
}
hwloc_debug_1arg_bitmap("adding Group object (as root of distance matrix with %u objects) with cpuset %s\n",
nbobjs, group_obj->cpuset);

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright © 2009-2014 Inria. All rights reserved.
* Copyright © 2009-2015 Inria. All rights reserved.
* See COPYING in top-level directory.
*/
@ -31,14 +31,6 @@ hwloc_pci_traverse_print_cb(void * cbdata __hwloc_attribute_unused,
pcidev->attr->pcidev.revision, pcidev->attr->pcidev.class_id);
}
static void
hwloc_pci_traverse_setbridgedepth_cb(void * cbdata __hwloc_attribute_unused,
struct hwloc_obj *pcidev, int depth)
{
if (pcidev->type == HWLOC_OBJ_BRIDGE)
pcidev->attr->bridge.depth = depth;
}
static void
hwloc_pci_traverse_lookuposdevices_cb(void * cbdata,
struct hwloc_obj *pcidev, int depth __hwloc_attribute_unused)
@ -263,6 +255,7 @@ hwloc_pci_find_hostbridge_parent(struct hwloc_topology *topology, struct hwloc_b
hwloc_obj_t group_obj = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, -1);
if (group_obj) {
group_obj->cpuset = hwloc_bitmap_dup(cpuset);
group_obj->complete_cpuset = hwloc_bitmap_dup(cpuset);
group_obj->attr->group.depth = (unsigned) -1;
parent = hwloc__insert_object_by_cpuset(topology, group_obj, hwloc_report_os_error);
if (parent == group_obj)
@ -304,8 +297,7 @@ hwloc_insert_pci_device_list(struct hwloc_backend *backend,
hwloc_debug("%s", "\nPCI hierarchy under fake parent:\n");
hwloc_pci_traverse(NULL, &fakeparent, hwloc_pci_traverse_print_cb);
/* walk the hierarchy, set bridge depth and lookup OS devices */
hwloc_pci_traverse(NULL, &fakeparent, hwloc_pci_traverse_setbridgedepth_cb);
/* walk the hierarchy, and lookup OS devices */
hwloc_pci_traverse(backend, &fakeparent, hwloc_pci_traverse_lookuposdevices_cb);
/*

Просмотреть файл

@ -1,7 +1,7 @@
/*
* Copyright © 2009 CNRS
* Copyright © 2009-2012 Inria. All rights reserved.
* Copyright © 2009-2011, 2013 Université Bordeaux 1
* Copyright © 2009-2015 Inria. All rights reserved.
* Copyright © 2009-2011, 2013 Université Bordeaux
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*/
@ -347,6 +347,7 @@ hwloc_aix_prepare_membind(hwloc_topology_t topology, rsethandle_t *rad, hwloc_co
noderad = rs_alloc(RS_EMPTY);
hwloc_bitmap_foreach_begin(node, nodeset)
/* we used MCMlevel rad number for node->os_index during lookup */
rs_getrad(rset, noderad, MCMlevel, node, 0);
rs_op(RS_UNION, noderad, *rad, 0, 0);
hwloc_bitmap_foreach_end();
@ -608,6 +609,9 @@ look_rset(int sdl, hwloc_obj_type_t type, struct hwloc_topology *topology, int l
}
for (i = 0; i < nbnodes; i++) {
hwloc_bitmap_t cpuset;
unsigned os_index = (unsigned) -1; /* no os_index except for PU and NODE below */
if (rs_getrad(rset, rad, sdl, i, 0)) {
fprintf(stderr,"rs_getrad(%d) failed: %s\n", i, strerror(errno));
continue;
@ -615,16 +619,28 @@ look_rset(int sdl, hwloc_obj_type_t type, struct hwloc_topology *topology, int l
if (!rs_getinfo(rad, R_NUMPROCS, 0))
continue;
/* It seems logical processors are numbered from 1 here, while the
* bindprocessor functions numbers them from 0... */
obj = hwloc_alloc_setup_object(type, i - (type == HWLOC_OBJ_PU));
obj->cpuset = hwloc_bitmap_alloc();
obj->os_level = sdl;
maxcpus = rs_getinfo(rad, R_MAXPROCS, 0);
cpuset = hwloc_bitmap_alloc();
for (j = 0; j < maxcpus; j++) {
if (rs_op(RS_TESTRESOURCE, rad, NULL, R_PROCS, j))
hwloc_bitmap_set(obj->cpuset, j);
hwloc_bitmap_set(cpuset, j);
}
if (type == HWLOC_OBJ_PU) {
os_index = hwloc_bitmap_first(cpuset);
hwloc_debug("Found PU #%u inside node %d for sdl %d\n", os_index, i, sdl);
assert(hwloc_bitmap_weight(cpuset) == 1);
} else if (type == HWLOC_OBJ_NODE) {
/* NUMA node os_index isn't used for binding, just use the rad number to get unique values.
* Note that we'll use that fact in hwloc_aix_prepare_membind(). */
os_index = i;
hwloc_debug("Using os_index #%u for NUMA node inside node %d for sdl %d\n", os_index, i, sdl);
}
obj = hwloc_alloc_setup_object(type, os_index);
obj->cpuset = cpuset;
obj->os_level = sdl;
switch(type) {
case HWLOC_OBJ_NODE:
obj->nodeset = hwloc_bitmap_alloc();

Просмотреть файл

@ -43,9 +43,17 @@ hwloc_hpux_find_ldom(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set)
return -1;
obj = hwloc_get_first_largest_obj_inside_cpuset(topology, hwloc_set);
if (!hwloc_bitmap_isequal(obj->cpuset, hwloc_set) || obj->type != HWLOC_OBJ_NODE) {
if (!hwloc_bitmap_isequal(obj->cpuset, hwloc_set))
/* Does not correspond to exactly one node */
return -1;
/* obj is the highest possibly matching object, but some (single) child (with same cpuset) could match too */
while (obj->type != HWLOC_OBJ_NODE) {
/* try the first child, in case it has the same cpuset */
if (!obj->first_child
|| !obj->first_child->cpuset
|| !hwloc_bitmap_isequal(obj->cpuset, obj->first_child->cpuset))
return -1;
obj = obj->first_child;
}
return obj->os_index;

Просмотреть файл

@ -1,6 +1,6 @@
/*
* Copyright © 2009 CNRS
* Copyright © 2009-2012 Inria. All rights reserved.
* Copyright © 2009-2014 Inria. All rights reserved.
* Copyright © 2009-2011 Université Bordeaux 1
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
@ -283,6 +283,8 @@ hwloc_look_osf(struct hwloc_backend *backend)
indexes[radid] = radid;
nodes[radid] = obj = hwloc_alloc_setup_object(HWLOC_OBJ_NODE, radid);
obj->nodeset = hwloc_bitmap_alloc();
hwloc_bitmap_set(obj->nodeset, radid);
obj->cpuset = hwloc_bitmap_alloc();
obj->memory.local_memory = rad_get_physmem(radid) * hwloc_getpagesize();
obj->memory.page_types_len = 2;

Просмотреть файл

@ -1,6 +1,6 @@
/*
* Copyright © 2009 CNRS
* Copyright © 2009-2013 Inria. All rights reserved.
* Copyright © 2009-2015 Inria. All rights reserved.
* Copyright © 2009-2011, 2013 Université Bordeaux 1
* See COPYING in top-level directory.
*/
@ -203,27 +203,7 @@ hwloc_look_pci(struct hwloc_backend *backend)
#endif
#endif
/* might be useful for debugging (note that domain might be truncated) */
os_index = (domain << 20) + (pcidev->bus << 12) + (pcidev->dev << 4) + pcidev->func;
obj = hwloc_alloc_setup_object(HWLOC_OBJ_PCI_DEVICE, os_index);
obj->attr->pcidev.domain = domain;
obj->attr->pcidev.bus = pcidev->bus;
obj->attr->pcidev.dev = pcidev->dev;
obj->attr->pcidev.func = pcidev->func;
obj->attr->pcidev.vendor_id = pcidev->vendor_id;
obj->attr->pcidev.device_id = pcidev->device_id;
obj->attr->pcidev.class_id = device_class;
obj->attr->pcidev.revision = config_space_cache[PCI_REVISION_ID];
obj->attr->pcidev.linkspeed = 0; /* unknown */
#ifdef HWLOC_HAVE_PCI_FIND_CAP
cap = pci_find_cap(pcidev, PCI_CAP_ID_EXP, PCI_CAP_NORMAL);
offset = cap ? cap->addr : 0;
#else
offset = hwloc_pci_find_cap(config_space_cache, PCI_CAP_ID_EXP);
#endif /* HWLOC_HAVE_PCI_FIND_CAP */
/* fixup SR-IOV buggy VF device/vendor IDs */
if (0xffff == pcidev->vendor_id && 0xffff == pcidev->device_id) {
/* SR-IOV puts ffff:ffff in Virtual Function config space.
* The actual VF device ID is stored at a special (dynamic) location in the Physical Function config space.
@ -231,7 +211,7 @@ hwloc_look_pci(struct hwloc_backend *backend)
*
* libpciaccess just returns ffff:ffff, needs to be fixed.
* linuxpci is OK because sysfs files are already fixed the kernel.
* pciutils is OK when it uses those Linux sysfs files.
* (pciutils is OK when it uses those Linux sysfs files.)
*
* Reading these files is an easy way to work around the libpciaccess issue on Linux,
* but we have no way to know if this is caused by SR-IOV or not.
@ -258,7 +238,8 @@ hwloc_look_pci(struct hwloc_backend *backend)
read = fread(value, 1, sizeof(value), file);
fclose(file);
if (read)
obj->attr->pcidev.vendor_id = strtoul(value, NULL, 16);
/* fixup the pciaccess struct so that pci_device_get_vendor_name() is correct later. */
pcidev->vendor_id = strtoul(value, NULL, 16);
}
snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/device",
@ -268,11 +249,33 @@ hwloc_look_pci(struct hwloc_backend *backend)
read = fread(value, 1, sizeof(value), file);
fclose(file);
if (read)
obj->attr->pcidev.device_id = strtoul(value, NULL, 16);
/* fixup the pciaccess struct so that pci_device_get_device_name() is correct later. */
pcidev->device_id = strtoul(value, NULL, 16);
}
#endif
}
/* might be useful for debugging (note that domain might be truncated) */
os_index = (domain << 20) + (pcidev->bus << 12) + (pcidev->dev << 4) + pcidev->func;
obj = hwloc_alloc_setup_object(HWLOC_OBJ_PCI_DEVICE, os_index);
obj->attr->pcidev.domain = domain;
obj->attr->pcidev.bus = pcidev->bus;
obj->attr->pcidev.dev = pcidev->dev;
obj->attr->pcidev.func = pcidev->func;
obj->attr->pcidev.vendor_id = pcidev->vendor_id;
obj->attr->pcidev.device_id = pcidev->device_id;
obj->attr->pcidev.class_id = device_class;
obj->attr->pcidev.revision = config_space_cache[PCI_REVISION_ID];
obj->attr->pcidev.linkspeed = 0; /* unknown */
#ifdef HWLOC_HAVE_PCI_FIND_CAP
cap = pci_find_cap(pcidev, PCI_CAP_ID_EXP, PCI_CAP_NORMAL);
offset = cap ? cap->addr : 0;
#else
offset = hwloc_pci_find_cap(config_space_cache, PCI_CAP_ID_EXP);
#endif /* HWLOC_HAVE_PCI_FIND_CAP */
if (offset > 0 && offset + 20 /* size of PCI express block up to link status */ <= CONFIG_SPACE_CACHESIZE)
hwloc_pci_find_linkspeed(config_space_cache, offset, &obj->attr->pcidev.linkspeed);

Просмотреть файл

@ -73,6 +73,12 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
errno = EINVAL;
return -1;
}
if (type == HWLOC_OBJ_MISC) {
if (verbose)
fprintf(stderr, "Synthetic string with disallow object type at '%s'\n", pos);
errno = EINVAL;
return -1;
}
next_pos = strchr(pos, ':');
if (!next_pos) {
@ -134,9 +140,8 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
case HWLOC_OBJ_CACHE: type = HWLOC_OBJ_SOCKET; break;
case HWLOC_OBJ_SOCKET: type = HWLOC_OBJ_NODE; break;
case HWLOC_OBJ_NODE:
case HWLOC_OBJ_GROUP: type = HWLOC_OBJ_GROUP; break;
case HWLOC_OBJ_MACHINE:
case HWLOC_OBJ_MISC: type = HWLOC_OBJ_MISC; break;
case HWLOC_OBJ_GROUP: type = HWLOC_OBJ_GROUP; break;
default:
assert(0);
}
@ -242,8 +247,6 @@ hwloc__look_synthetic(struct hwloc_topology *topology,
/* pre-hooks */
switch (type) {
case HWLOC_OBJ_MISC:
break;
case HWLOC_OBJ_GROUP:
break;
case HWLOC_OBJ_SYSTEM:
@ -265,6 +268,7 @@ hwloc__look_synthetic(struct hwloc_topology *topology,
break;
case HWLOC_OBJ_PU:
break;
case HWLOC_OBJ_MISC:
case HWLOC_OBJ_TYPE_MAX:
/* Should never happen */
assert(0);
@ -290,8 +294,6 @@ hwloc__look_synthetic(struct hwloc_topology *topology,
/* post-hooks */
switch (type) {
case HWLOC_OBJ_MISC:
break;
case HWLOC_OBJ_GROUP:
obj->attr->group.depth = curlevel->depth;
break;
@ -330,6 +332,7 @@ hwloc__look_synthetic(struct hwloc_topology *topology,
break;
case HWLOC_OBJ_PU:
break;
case HWLOC_OBJ_MISC:
case HWLOC_OBJ_TYPE_MAX:
/* Should never happen */
assert(0);

Просмотреть файл

@ -33,7 +33,7 @@ struct cacheinfo {
unsigned linepart;
int ways;
unsigned sets;
unsigned size;
unsigned long size;
};
struct procinfo {
@ -68,7 +68,7 @@ static void fill_amd_cache(struct procinfo *infos, unsigned level, unsigned cpui
{
struct cacheinfo *cache;
unsigned cachenum;
unsigned size = 0;
unsigned long size = 0;
if (level == 1)
size = ((cpuid >> 24)) << 10;
@ -104,7 +104,7 @@ static void fill_amd_cache(struct procinfo *infos, unsigned level, unsigned cpui
cache->size = size;
cache->sets = 0;
hwloc_debug("cache L%u t%u linesize %u ways %u size %uKB\n", cache->level, cache->nbthreads_sharing, cache->linesize, cache->ways, cache->size >> 10);
hwloc_debug("cache L%u t%u linesize %u ways %u size %luKB\n", cache->level, cache->nbthreads_sharing, cache->linesize, cache->ways, cache->size >> 10);
}
/* Fetch information from the processor itself thanks to cpuid and store it in
@ -221,7 +221,7 @@ static void look_proc(struct procinfo *infos, unsigned highest_cpuid, unsigned h
cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache));
for (cachenum = 0; ; cachenum++) {
unsigned linesize, linepart, ways, sets;
unsigned long linesize, linepart, ways, sets;
unsigned type;
eax = 0x8000001d;
ecx = cachenum;
@ -249,7 +249,7 @@ static void look_proc(struct procinfo *infos, unsigned highest_cpuid, unsigned h
cache->sets = sets = ecx + 1;
cache->size = linesize * linepart * ways * sets;
hwloc_debug("cache %u type %u L%u t%u c%u linesize %u linepart %u ways %u sets %u, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10);
hwloc_debug("cache %u type %u L%u t%u c%u linesize %lu linepart %lu ways %lu sets %lu, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10);
cache++;
}
@ -290,7 +290,7 @@ static void look_proc(struct procinfo *infos, unsigned highest_cpuid, unsigned h
cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache));
for (cachenum = 0; ; cachenum++) {
unsigned linesize, linepart, ways, sets;
unsigned long linesize, linepart, ways, sets;
unsigned type;
eax = 0x04;
ecx = cachenum;
@ -317,7 +317,7 @@ static void look_proc(struct procinfo *infos, unsigned highest_cpuid, unsigned h
cache->sets = sets = ecx + 1;
cache->size = linesize * linepart * ways * sets;
hwloc_debug("cache %u type %u L%u t%u c%u linesize %u linepart %u ways %u sets %u, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10);
hwloc_debug("cache %u type %u L%u t%u c%u linesize %lu linepart %lu ways %lu sets %lu, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10);
infos->max_nbthreads = infos->max_log_proc / infos->max_nbcores;
hwloc_debug("thus %u threads\n", infos->max_nbthreads);
infos->threadid = infos->logprocid % infos->max_nbthreads;
@ -401,6 +401,7 @@ static void summarize(hwloc_topology_t topology, struct procinfo *infos, unsigne
unsigned i, j, l, level, type;
unsigned nbsockets = 0;
int one = -1;
unsigned next_group_depth = topology->next_group_depth;
for (i = 0; i < nbprocs; i++)
if (infos[i].present) {
@ -517,6 +518,8 @@ static void summarize(hwloc_topology_t topology, struct procinfo *infos, unsigne
}
node = hwloc_alloc_setup_object(HWLOC_OBJ_NODE, nodeid);
node->cpuset = node_cpuset;
node->nodeset = hwloc_bitmap_alloc();
hwloc_bitmap_set(node->nodeset, nodeid);
hwloc_debug_1arg_bitmap("os node %u has cpuset %s\n",
nodeid, node_cpuset);
hwloc_insert_object_by_cpuset(topology, node);
@ -578,9 +581,12 @@ static void summarize(hwloc_topology_t topology, struct procinfo *infos, unsigne
hwloc_bitmap_clr(unknowns_cpuset, j);
}
}
unknown_obj = hwloc_alloc_setup_object(HWLOC_OBJ_MISC, unknownid);
unknown_obj = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, unknownid);
unknown_obj->cpuset = unknown_cpuset;
unknown_obj->os_level = level;
unknown_obj->attr->group.depth = topology->next_group_depth + level;
if (next_group_depth <= topology->next_group_depth + level)
next_group_depth = topology->next_group_depth + level + 1;
hwloc_debug_2args_bitmap("os unknown%d %u has cpuset %s\n",
level, unknownid, unknown_cpuset);
hwloc_insert_object_by_cpuset(topology, unknown_obj);
@ -712,6 +718,7 @@ static void summarize(hwloc_topology_t topology, struct procinfo *infos, unsigne
}
hwloc_bitmap_free(complete_cpuset);
topology->next_group_depth = next_group_depth;
}
static int

Просмотреть файл

@ -77,9 +77,11 @@ void hwloc_report_os_error(const char *msg, int line)
fprintf(stderr, "* %s\n", msg);
fprintf(stderr, "* Error occurred in topology.c line %d\n", line);
fprintf(stderr, "*\n");
fprintf(stderr, "* Please report this error message to the hwloc user's mailing list,\n");
fprintf(stderr, "* The following FAQ entry in a recent hwloc documentation may help:\n");
fprintf(stderr, "* What should I do when hwloc reports \"operating system\" warnings?\n");
fprintf(stderr, "* Otherwise please report this error message to the hwloc user's mailing list,\n");
#ifdef HWLOC_LINUX_SYS
fprintf(stderr, "* along with the output from the hwloc-gather-topology script.\n");
fprintf(stderr, "* along with the output+tarball generated by the hwloc-gather-topology script.\n");
#else
fprintf(stderr, "* along with any relevant topology information from your platform.\n");
#endif
@ -1024,14 +1026,16 @@ hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj,
{
struct hwloc_obj *result;
/* Start at the top. */
/* Add the cpuset to the top */
hwloc_bitmap_or(topology->levels[0][0]->complete_cpuset, topology->levels[0][0]->complete_cpuset, obj->cpuset);
if (obj->nodeset)
hwloc_bitmap_or(topology->levels[0][0]->complete_nodeset, topology->levels[0][0]->complete_nodeset, obj->nodeset);
result = hwloc___insert_object_by_cpuset(topology, topology->levels[0][0], obj, report_error);
if (result != obj)
if (result != obj) {
/* either failed to insert, or got merged, free the original object */
hwloc_free_unlinked_object(obj);
} else {
/* Add the cpuset to the top */
hwloc_bitmap_or(topology->levels[0][0]->complete_cpuset, topology->levels[0][0]->complete_cpuset, obj->cpuset);
if (obj->nodeset)
hwloc_bitmap_or(topology->levels[0][0]->complete_nodeset, topology->levels[0][0]->complete_nodeset, obj->nodeset);
}
return result;
}
@ -1635,6 +1639,28 @@ unlink_and_free_single_object(hwloc_obj_t *pparent)
hwloc_free_unlinked_object(parent);
}
static void
reorder_children(hwloc_obj_t parent)
{
/* move the children list on the side */
hwloc_obj_t *prev, child, children = parent->first_child;
parent->first_child = NULL;
while (children) {
/* dequeue child */
child = children;
children = child->next_sibling;
/* find where to enqueue it */
prev = &parent->first_child;
while (*prev
&& (!child->cpuset || !(*prev)->cpuset
|| hwloc__object_cpusets_compare_first(child, *prev) > 0))
prev = &((*prev)->next_sibling);
/* enqueue */
child->next_sibling = *prev;
*prev = child;
}
}
/* Remove all ignored objects. */
static int
remove_ignored(hwloc_topology_t topology, hwloc_obj_t *pparent)
@ -1656,25 +1682,8 @@ remove_ignored(hwloc_topology_t topology, hwloc_obj_t *pparent)
dropped = 1;
} else if (dropped_children) {
/* we keep this object but its children changed, reorder them by cpuset */
/* move the children list on the side */
hwloc_obj_t *prev, children = parent->first_child;
parent->first_child = NULL;
while (children) {
/* dequeue child */
child = children;
children = child->next_sibling;
/* find where to enqueue it */
prev = &parent->first_child;
while (*prev
&& (!child->cpuset || !(*prev)->cpuset
|| hwloc__object_cpusets_compare_first(child, *prev) > 0))
prev = &((*prev)->next_sibling);
/* enqueue */
child->next_sibling = *prev;
*prev = child;
}
/* we keep this object but its children changed, reorder them by complete_cpuset */
reorder_children(parent);
}
return dropped;
@ -1804,29 +1813,39 @@ can_merge_group(hwloc_topology_t topology, hwloc_obj_t obj)
* Merge with the only child if either the parent or the child has a type to be
* ignored while keeping structure
*/
static void
static int
merge_useless_child(hwloc_topology_t topology, hwloc_obj_t *pparent)
{
hwloc_obj_t parent = *pparent, child, *pchild, ios;
int replacechild = 0, replaceparent = 0;
int replacechild = 0, replaceparent = 0, droppedchildren = 0;
if (!parent->first_child)
/* There are no child, nothing to merge. */
return 0;
for_each_child_safe(child, parent, pchild)
merge_useless_child(topology, pchild);
droppedchildren += merge_useless_child(topology, pchild);
if (droppedchildren)
reorder_children(parent);
child = parent->first_child;
if (!child)
/* There are no child, nothing to merge. */
return;
/* we don't merge if there are multiple "important" children.
* non-important ones are at the end of the list.
* look at the second child to find out.
*/
if (child->next_sibling
/* I/O objects may be ignored when trying to merge */
&& !hwloc_obj_type_is_io(child->next_sibling->type)
/* Misc objects without cpuset may be ignored as well */
&& !(child->next_sibling->type == HWLOC_OBJ_MISC && !child->next_sibling->cpuset))
/* There are several children that prevent from merging */
return 0;
if (child->next_sibling && !hwloc_obj_type_is_io(child->next_sibling->type))
/* There are several non-I/O children */
return;
/* There is one non-I/O child and possible some I/O children.
* I/O children shouldn't prevent merging because they can be attached
* to anything with the same locality.
/* There is one important child, and some children that may be ignored
* during merging because they can be attached to anything with the same locality.
* Move them to the side during merging, and append them back later.
* This is easy because I/O children are always last in the list.
* This is easy because children with no cpuset are always last in the list.
*/
ios = child->next_sibling;
child->next_sibling = NULL;
@ -1873,12 +1892,14 @@ merge_useless_child(hwloc_topology_t topology, hwloc_obj_t *pparent)
}
if (ios) {
/* append I/O children to the list of children of the remaining object */
/* append the remaining list of children to the remaining object */
pchild = &((*pparent)->first_child);
while (*pchild)
pchild = &((*pchild)->next_sibling);
*pchild = ios;
}
return replaceparent ? 1 : 0;
}
static void
@ -1963,6 +1984,8 @@ hwloc_propagate_bridge_depth(hwloc_topology_t topology, hwloc_obj_t root, unsign
if (child->type == HWLOC_OBJ_BRIDGE) {
child->attr->bridge.depth = depth;
hwloc_propagate_bridge_depth(topology, child, depth+1);
} else if (!hwloc_obj_type_is_io(child->type)) {
hwloc_propagate_bridge_depth(topology, child, 0);
}
child = child->next_sibling;
}
@ -3035,7 +3058,17 @@ hwloc__check_children(struct hwloc_obj *parent)
assert(hwloc_bitmap_isincluded(parent->children[j]->cpuset, remaining_parent_set));
hwloc_bitmap_andnot(remaining_parent_set, remaining_parent_set, parent->children[j]->cpuset);
}
assert(hwloc_bitmap_iszero(remaining_parent_set));
if (parent->type == HWLOC_OBJ_PU) {
/* if parent is a PU, its os_index bit may remain.
* it may be in a Misc child inserted by cpuset, or could be in no child */
if (hwloc_bitmap_weight(remaining_parent_set) == 1)
assert((unsigned) hwloc_bitmap_first(remaining_parent_set) == parent->os_index);
else
assert(hwloc_bitmap_iszero(remaining_parent_set));
} else {
/* nothing remains */
assert(hwloc_bitmap_iszero(remaining_parent_set));
}
hwloc_bitmap_free(remaining_parent_set);
}