Merge pull request #498 from jsquyres/pr/hwloc-19x-updates
hwloc 1.9.x updates
Этот коммит содержится в:
Коммит
2145dfb2e8
@ -1,3 +1,35 @@
|
||||
Applied the following patches from the upstream hwloc 1.9 branch after
|
||||
the v1.9.1 release:
|
||||
|
||||
All relevant commits up to open-mpi/hwloc@4e23b12 (i.e., the HEAD as
|
||||
of 27 March 2015). "Relevant" commits are defined as those that
|
||||
included files that are embedded in the Open MPI tree (e.g., updates
|
||||
to files in docs/, utils/, etc. aren't relevant because they are not
|
||||
embedded in the Open MPI tree). To be specific, the following commits
|
||||
have been cherry-picked over to Open MPI:
|
||||
|
||||
* open-mpi/hwloc@7c03216 v1.9.1 released, doing 1.9.2rc1 now
|
||||
* open-mpi/hwloc@b35ced8 misc.h: Fix hwloc_strncasecmp() build under strict flags on BSD
|
||||
* open-mpi/hwloc@d8c3f3d misc.h: Fix hwloc_strncasecmp() with some icc
|
||||
* open-mpi/hwloc@f705a23 Use gcc's __asm__ version of the asm extension, which can be used in all standards
|
||||
* open-mpi/hwloc@307726a configure: fix the check for X11/Xutil.h
|
||||
* open-mpi/hwloc@ec58c05 errors: improve the advice to send hwloc-gather-topology files in the OS error message
|
||||
* open-mpi/hwloc@35c743d NEWS update
|
||||
* open-mpi/hwloc@868170e API: clearly state that os_index isn't unique while logical_index is
|
||||
* open-mpi/hwloc@851532d x86 and OSF: Don't forget to set NUMA node nodeset
|
||||
* open-mpi/hwloc@790aa2e cpuid-x86: Fix duplicate asm labels in case of heavy inlining on x86-32
|
||||
* open-mpi/hwloc@dd09aa5 debug: fix an overzealous assertion about the parent cpuset vs its children
|
||||
* open-mpi/hwloc@769b9b5 core: fix the merging of identical objects in presence of Misc objects
|
||||
* open-mpi/hwloc@71da0f1 core: reorder children in merge_useless_child() as well
|
||||
* open-mpi/hwloc@c9cef07 hpux: improve hwloc_hpux_find_ldom() looking for NUMA node
|
||||
* open-mpi/hwloc@cdffea6 x86: use ulong for cache sizes, uint won't be enough in the near future
|
||||
* open-mpi/hwloc@55b0676 x86: use Group instead of Misc for unknown x2apic levels
|
||||
* open-mpi/hwloc@7764ce5 synthetic: Misc levels are not allowed in the synthetic description
|
||||
* open-mpi/hwloc@5b2dce1 error: point to the FAQ when displaying the big OS error message
|
||||
* open-mpi/hwloc@c7bd9e6 pci: fix SR-IOV VF vendor/device names
|
||||
* open-mpi/hwloc@a0f72ef distances: when we fail to insert an intermediate group, don't try to group further above
|
||||
* open-mpi/hwloc@e419811 AIX: Fix PU os_index
|
||||
* open-mpi/hwloc@08ab793 groups: add complete sets when inserting distance/pci groups
|
||||
* open-mpi/hwloc@c66e714 core: only update root->complete sets if insert succeeds
|
||||
* open-mpi/hwloc@01da9b9 bitmap: fix a corner case in hwloc_bitmap_isincluded() with infinite sets
|
||||
* open-mpi/hwloc@e7b192b pci: fix bridge depth
|
||||
|
@ -17,6 +17,14 @@ bug fixes (and other actions) for each version of hwloc since version
|
||||
in v0.9.1).
|
||||
|
||||
|
||||
Version 1.9.2
|
||||
-------------
|
||||
* Fix some build failures in private/misc.h.
|
||||
Thanks to Pavan Balaji and Ralph Castain for the reports.
|
||||
* Fix failures to detect X11/Xutil.h on some Solaris platforms.
|
||||
Thanks to Siegmar Gross for reporting the failure.
|
||||
|
||||
|
||||
Version 1.9.1
|
||||
-------------
|
||||
* Fix a crash when the PCI locality is invalid. Attach to the root object
|
||||
|
@ -13,7 +13,7 @@ snapshot_version=gitclone
|
||||
|
||||
major=1
|
||||
minor=9
|
||||
release=1
|
||||
release=2
|
||||
|
||||
# greek is used for alpha or beta release tags. If it is non-empty,
|
||||
# it will be appended to the version number. It does not have to be
|
||||
|
@ -353,6 +353,7 @@ EOF])
|
||||
AC_CHECK_HEADERS([unistd.h])
|
||||
AC_CHECK_HEADERS([dirent.h])
|
||||
AC_CHECK_HEADERS([strings.h])
|
||||
AC_CHECK_HEADERS([ctype.h])
|
||||
|
||||
AC_CHECK_FUNCS([strncasecmp], [
|
||||
_HWLOC_CHECK_DECL([strncasecmp], [
|
||||
@ -930,7 +931,7 @@ EOF])
|
||||
[AC_CHECK_HEADERS([X11/keysym.h],
|
||||
[AC_DEFINE([HWLOC_HAVE_X11_KEYSYM], [1], [Define to 1 if X11 headers including Xutil.h and keysym.h are available.])])
|
||||
AC_SUBST([HWLOC_X11_LIBS], ["-lX11"])
|
||||
])
|
||||
], [], [#include <X11/Xlib.h>])
|
||||
])
|
||||
])
|
||||
CPPFLAGS=$CPPFLAGS_save
|
||||
|
@ -331,7 +331,10 @@ struct hwloc_obj_memory_s {
|
||||
struct hwloc_obj {
|
||||
/* physical information */
|
||||
hwloc_obj_type_t type; /**< \brief Type of object */
|
||||
unsigned os_index; /**< \brief OS-provided physical index number */
|
||||
unsigned os_index; /**< \brief OS-provided physical index number.
|
||||
* It is not guaranteed unique across the entire machine,
|
||||
* except for PUs and NUMA nodes.
|
||||
*/
|
||||
char *name; /**< \brief Object description if any */
|
||||
|
||||
struct hwloc_obj_memory_s memory; /**< \brief Memory attributes */
|
||||
@ -346,7 +349,9 @@ struct hwloc_obj {
|
||||
* of parent/child links from the root object to here.
|
||||
*/
|
||||
unsigned logical_index; /**< \brief Horizontal index in the whole list of similar objects,
|
||||
* could be a "cousin_rank" since it's the rank within the "cousin" list below */
|
||||
* hence guaranteed unique across the entire machine.
|
||||
* Could be a "cousin_rank" since it's the rank within the "cousin" list below
|
||||
*/
|
||||
signed os_level; /**< \brief OS-provided physical level, -1 if unknown or meaningless */
|
||||
|
||||
/* cousins are all objects of the same type (and depth) across the entire topology */
|
||||
|
@ -32,14 +32,14 @@ static __hwloc_inline int hwloc_have_x86_cpuid(void)
|
||||
"pushfl \n\t" \
|
||||
"pop %1 \n\t" \
|
||||
"cmp %1,%2\n\t" /* Compare with expected value */ \
|
||||
"jnz Lhwloc1\n\t" /* Unexpected, failure */ \
|
||||
"jnz 0f\n\t" /* Unexpected, failure */ \
|
||||
|
||||
TRY_TOGGLE /* Try to set/clear */
|
||||
TRY_TOGGLE /* Try to clear/set */
|
||||
|
||||
"mov $1,%0\n\t" /* Passed the test! */
|
||||
|
||||
"Lhwloc1: \n\t"
|
||||
"0: \n\t"
|
||||
"popfl \n\t" /* Restore flags */
|
||||
|
||||
: "=r" (ret), "=&r" (tmp), "=&r" (tmp2));
|
||||
|
@ -15,6 +15,16 @@
|
||||
#include <private/autogen/config.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#ifdef HWLOC_HAVE_DECL_STRNCASECMP
|
||||
#ifdef HAVE_STRINGS_H
|
||||
#include <strings.h>
|
||||
#endif
|
||||
#else
|
||||
#ifdef HAVE_CTYPE_H
|
||||
#include <ctype.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Compile-time assertion */
|
||||
#define HWLOC_BUILD_ASSERT(condition) ((void)sizeof(char[1 - 2*!(condition)]))
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2011 inria. All rights reserved.
|
||||
* Copyright © 2009-2015 Inria. All rights reserved.
|
||||
* Copyright © 2009-2011 Université Bordeaux 1
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
@ -903,7 +903,7 @@ int hwloc_bitmap_isincluded (const struct hwloc_bitmap_s *sub_set, const struct
|
||||
HWLOC__BITMAP_CHECK(sub_set);
|
||||
HWLOC__BITMAP_CHECK(super_set);
|
||||
|
||||
for(i=0; i<sub_set->ulongs_count; i++)
|
||||
for(i=0; i<sub_set->ulongs_count || i<super_set->ulongs_count; i++)
|
||||
if (HWLOC_SUBBITMAP_READULONG(super_set, i) != (HWLOC_SUBBITMAP_READULONG(super_set, i) | HWLOC_SUBBITMAP_READULONG(sub_set, i)))
|
||||
return 0;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright © 2010-2014 Inria. All rights reserved.
|
||||
* Copyright © 2010-2015 Inria. All rights reserved.
|
||||
* Copyright © 2011-2012 Université Bordeaux 1
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
@ -832,6 +832,7 @@ hwloc__groups_by_distances(struct hwloc_topology *topology,
|
||||
hwloc_obj_t *groupobjs = NULL;
|
||||
unsigned *groupsizes = NULL;
|
||||
float *groupdistances = NULL;
|
||||
unsigned failed = 0;
|
||||
|
||||
groupobjs = malloc(sizeof(hwloc_obj_t) * nbgroups);
|
||||
groupsizes = malloc(sizeof(unsigned) * nbgroups);
|
||||
@ -851,22 +852,39 @@ hwloc__groups_by_distances(struct hwloc_topology *topology,
|
||||
if (groupids[j] == i+1) {
|
||||
/* assemble the group cpuset */
|
||||
hwloc_bitmap_or(group_obj->cpuset, group_obj->cpuset, objs[j]->cpuset);
|
||||
if (objs[i]->complete_cpuset) {
|
||||
if (!group_obj->complete_cpuset)
|
||||
group_obj->complete_cpuset = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_or(group_obj->complete_cpuset, group_obj->complete_cpuset, objs[j]->complete_cpuset);
|
||||
}
|
||||
/* if one obj has a nodeset, assemble a group nodeset */
|
||||
if (objs[j]->nodeset) {
|
||||
if (!group_obj->nodeset)
|
||||
group_obj->nodeset = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_or(group_obj->nodeset, group_obj->nodeset, objs[j]->nodeset);
|
||||
}
|
||||
if (objs[i]->complete_nodeset) {
|
||||
if (!group_obj->complete_nodeset)
|
||||
group_obj->complete_nodeset = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_or(group_obj->complete_nodeset, group_obj->complete_nodeset, objs[j]->complete_nodeset);
|
||||
}
|
||||
groupsizes[i]++;
|
||||
}
|
||||
hwloc_debug_1arg_bitmap("adding Group object with %u objects and cpuset %s\n",
|
||||
groupsizes[i], group_obj->cpuset);
|
||||
res_obj = hwloc__insert_object_by_cpuset(topology, group_obj,
|
||||
fromuser ? hwloc_report_user_distance_error : hwloc_report_os_error);
|
||||
/* res_obj may be different from group_objs if we got groups from XML import before grouping */
|
||||
/* res_obj may be NULL on failure to insert. */
|
||||
if (!res_obj)
|
||||
failed++;
|
||||
/* or it may be different from groupobjs if we got groups from XML import before grouping */
|
||||
groupobjs[i] = res_obj;
|
||||
}
|
||||
|
||||
if (failed)
|
||||
/* don't try to group above if we got a NULL group here, just keep this incomplete level */
|
||||
goto inner_free;
|
||||
|
||||
/* factorize distances */
|
||||
memset(&(groupdistances[0]), 0, sizeof(groupdistances[0]) * nbgroups * nbgroups);
|
||||
#undef DISTANCE
|
||||
@ -1002,12 +1020,22 @@ hwloc_group_by_distances(struct hwloc_topology *topology)
|
||||
for(i=0; i<nbobjs; i++) {
|
||||
/* assemble the group cpuset */
|
||||
hwloc_bitmap_or(group_obj->cpuset, group_obj->cpuset, osdist->objs[i]->cpuset);
|
||||
if (osdist->objs[i]->complete_cpuset) {
|
||||
if (!group_obj->complete_cpuset)
|
||||
group_obj->complete_cpuset = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_or(group_obj->complete_cpuset, group_obj->complete_cpuset, osdist->objs[i]->complete_cpuset);
|
||||
}
|
||||
/* if one obj has a nodeset, assemble a group nodeset */
|
||||
if (osdist->objs[i]->nodeset) {
|
||||
if (!group_obj->nodeset)
|
||||
group_obj->nodeset = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_or(group_obj->nodeset, group_obj->nodeset, osdist->objs[i]->nodeset);
|
||||
}
|
||||
if (osdist->objs[i]->complete_nodeset) {
|
||||
if (!group_obj->complete_nodeset)
|
||||
group_obj->complete_nodeset = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_or(group_obj->complete_nodeset, group_obj->complete_nodeset, osdist->objs[i]->complete_nodeset);
|
||||
}
|
||||
}
|
||||
hwloc_debug_1arg_bitmap("adding Group object (as root of distance matrix with %u objects) with cpuset %s\n",
|
||||
nbobjs, group_obj->cpuset);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright © 2009-2014 Inria. All rights reserved.
|
||||
* Copyright © 2009-2015 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
@ -31,14 +31,6 @@ hwloc_pci_traverse_print_cb(void * cbdata __hwloc_attribute_unused,
|
||||
pcidev->attr->pcidev.revision, pcidev->attr->pcidev.class_id);
|
||||
}
|
||||
|
||||
static void
|
||||
hwloc_pci_traverse_setbridgedepth_cb(void * cbdata __hwloc_attribute_unused,
|
||||
struct hwloc_obj *pcidev, int depth)
|
||||
{
|
||||
if (pcidev->type == HWLOC_OBJ_BRIDGE)
|
||||
pcidev->attr->bridge.depth = depth;
|
||||
}
|
||||
|
||||
static void
|
||||
hwloc_pci_traverse_lookuposdevices_cb(void * cbdata,
|
||||
struct hwloc_obj *pcidev, int depth __hwloc_attribute_unused)
|
||||
@ -263,6 +255,7 @@ hwloc_pci_find_hostbridge_parent(struct hwloc_topology *topology, struct hwloc_b
|
||||
hwloc_obj_t group_obj = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, -1);
|
||||
if (group_obj) {
|
||||
group_obj->cpuset = hwloc_bitmap_dup(cpuset);
|
||||
group_obj->complete_cpuset = hwloc_bitmap_dup(cpuset);
|
||||
group_obj->attr->group.depth = (unsigned) -1;
|
||||
parent = hwloc__insert_object_by_cpuset(topology, group_obj, hwloc_report_os_error);
|
||||
if (parent == group_obj)
|
||||
@ -304,8 +297,7 @@ hwloc_insert_pci_device_list(struct hwloc_backend *backend,
|
||||
hwloc_debug("%s", "\nPCI hierarchy under fake parent:\n");
|
||||
hwloc_pci_traverse(NULL, &fakeparent, hwloc_pci_traverse_print_cb);
|
||||
|
||||
/* walk the hierarchy, set bridge depth and lookup OS devices */
|
||||
hwloc_pci_traverse(NULL, &fakeparent, hwloc_pci_traverse_setbridgedepth_cb);
|
||||
/* walk the hierarchy, and lookup OS devices */
|
||||
hwloc_pci_traverse(backend, &fakeparent, hwloc_pci_traverse_lookuposdevices_cb);
|
||||
|
||||
/*
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2012 Inria. All rights reserved.
|
||||
* Copyright © 2009-2011, 2013 Université Bordeaux 1
|
||||
* Copyright © 2009-2015 Inria. All rights reserved.
|
||||
* Copyright © 2009-2011, 2013 Université Bordeaux
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
@ -347,6 +347,7 @@ hwloc_aix_prepare_membind(hwloc_topology_t topology, rsethandle_t *rad, hwloc_co
|
||||
noderad = rs_alloc(RS_EMPTY);
|
||||
|
||||
hwloc_bitmap_foreach_begin(node, nodeset)
|
||||
/* we used MCMlevel rad number for node->os_index during lookup */
|
||||
rs_getrad(rset, noderad, MCMlevel, node, 0);
|
||||
rs_op(RS_UNION, noderad, *rad, 0, 0);
|
||||
hwloc_bitmap_foreach_end();
|
||||
@ -608,6 +609,9 @@ look_rset(int sdl, hwloc_obj_type_t type, struct hwloc_topology *topology, int l
|
||||
}
|
||||
|
||||
for (i = 0; i < nbnodes; i++) {
|
||||
hwloc_bitmap_t cpuset;
|
||||
unsigned os_index = (unsigned) -1; /* no os_index except for PU and NODE below */
|
||||
|
||||
if (rs_getrad(rset, rad, sdl, i, 0)) {
|
||||
fprintf(stderr,"rs_getrad(%d) failed: %s\n", i, strerror(errno));
|
||||
continue;
|
||||
@ -615,16 +619,28 @@ look_rset(int sdl, hwloc_obj_type_t type, struct hwloc_topology *topology, int l
|
||||
if (!rs_getinfo(rad, R_NUMPROCS, 0))
|
||||
continue;
|
||||
|
||||
/* It seems logical processors are numbered from 1 here, while the
|
||||
* bindprocessor functions numbers them from 0... */
|
||||
obj = hwloc_alloc_setup_object(type, i - (type == HWLOC_OBJ_PU));
|
||||
obj->cpuset = hwloc_bitmap_alloc();
|
||||
obj->os_level = sdl;
|
||||
maxcpus = rs_getinfo(rad, R_MAXPROCS, 0);
|
||||
cpuset = hwloc_bitmap_alloc();
|
||||
for (j = 0; j < maxcpus; j++) {
|
||||
if (rs_op(RS_TESTRESOURCE, rad, NULL, R_PROCS, j))
|
||||
hwloc_bitmap_set(obj->cpuset, j);
|
||||
hwloc_bitmap_set(cpuset, j);
|
||||
}
|
||||
|
||||
if (type == HWLOC_OBJ_PU) {
|
||||
os_index = hwloc_bitmap_first(cpuset);
|
||||
hwloc_debug("Found PU #%u inside node %d for sdl %d\n", os_index, i, sdl);
|
||||
assert(hwloc_bitmap_weight(cpuset) == 1);
|
||||
} else if (type == HWLOC_OBJ_NODE) {
|
||||
/* NUMA node os_index isn't used for binding, just use the rad number to get unique values.
|
||||
* Note that we'll use that fact in hwloc_aix_prepare_membind(). */
|
||||
os_index = i;
|
||||
hwloc_debug("Using os_index #%u for NUMA node inside node %d for sdl %d\n", os_index, i, sdl);
|
||||
}
|
||||
|
||||
obj = hwloc_alloc_setup_object(type, os_index);
|
||||
obj->cpuset = cpuset;
|
||||
obj->os_level = sdl;
|
||||
|
||||
switch(type) {
|
||||
case HWLOC_OBJ_NODE:
|
||||
obj->nodeset = hwloc_bitmap_alloc();
|
||||
|
@ -43,9 +43,17 @@ hwloc_hpux_find_ldom(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set)
|
||||
return -1;
|
||||
|
||||
obj = hwloc_get_first_largest_obj_inside_cpuset(topology, hwloc_set);
|
||||
if (!hwloc_bitmap_isequal(obj->cpuset, hwloc_set) || obj->type != HWLOC_OBJ_NODE) {
|
||||
if (!hwloc_bitmap_isequal(obj->cpuset, hwloc_set))
|
||||
/* Does not correspond to exactly one node */
|
||||
return -1;
|
||||
/* obj is the highest possibly matching object, but some (single) child (with same cpuset) could match too */
|
||||
while (obj->type != HWLOC_OBJ_NODE) {
|
||||
/* try the first child, in case it has the same cpuset */
|
||||
if (!obj->first_child
|
||||
|| !obj->first_child->cpuset
|
||||
|| !hwloc_bitmap_isequal(obj->cpuset, obj->first_child->cpuset))
|
||||
return -1;
|
||||
obj = obj->first_child;
|
||||
}
|
||||
|
||||
return obj->os_index;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2012 Inria. All rights reserved.
|
||||
* Copyright © 2009-2014 Inria. All rights reserved.
|
||||
* Copyright © 2009-2011 Université Bordeaux 1
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
@ -283,6 +283,8 @@ hwloc_look_osf(struct hwloc_backend *backend)
|
||||
|
||||
indexes[radid] = radid;
|
||||
nodes[radid] = obj = hwloc_alloc_setup_object(HWLOC_OBJ_NODE, radid);
|
||||
obj->nodeset = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_set(obj->nodeset, radid);
|
||||
obj->cpuset = hwloc_bitmap_alloc();
|
||||
obj->memory.local_memory = rad_get_physmem(radid) * hwloc_getpagesize();
|
||||
obj->memory.page_types_len = 2;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2013 Inria. All rights reserved.
|
||||
* Copyright © 2009-2015 Inria. All rights reserved.
|
||||
* Copyright © 2009-2011, 2013 Université Bordeaux 1
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
@ -203,27 +203,7 @@ hwloc_look_pci(struct hwloc_backend *backend)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* might be useful for debugging (note that domain might be truncated) */
|
||||
os_index = (domain << 20) + (pcidev->bus << 12) + (pcidev->dev << 4) + pcidev->func;
|
||||
|
||||
obj = hwloc_alloc_setup_object(HWLOC_OBJ_PCI_DEVICE, os_index);
|
||||
obj->attr->pcidev.domain = domain;
|
||||
obj->attr->pcidev.bus = pcidev->bus;
|
||||
obj->attr->pcidev.dev = pcidev->dev;
|
||||
obj->attr->pcidev.func = pcidev->func;
|
||||
obj->attr->pcidev.vendor_id = pcidev->vendor_id;
|
||||
obj->attr->pcidev.device_id = pcidev->device_id;
|
||||
obj->attr->pcidev.class_id = device_class;
|
||||
obj->attr->pcidev.revision = config_space_cache[PCI_REVISION_ID];
|
||||
|
||||
obj->attr->pcidev.linkspeed = 0; /* unknown */
|
||||
#ifdef HWLOC_HAVE_PCI_FIND_CAP
|
||||
cap = pci_find_cap(pcidev, PCI_CAP_ID_EXP, PCI_CAP_NORMAL);
|
||||
offset = cap ? cap->addr : 0;
|
||||
#else
|
||||
offset = hwloc_pci_find_cap(config_space_cache, PCI_CAP_ID_EXP);
|
||||
#endif /* HWLOC_HAVE_PCI_FIND_CAP */
|
||||
|
||||
/* fixup SR-IOV buggy VF device/vendor IDs */
|
||||
if (0xffff == pcidev->vendor_id && 0xffff == pcidev->device_id) {
|
||||
/* SR-IOV puts ffff:ffff in Virtual Function config space.
|
||||
* The actual VF device ID is stored at a special (dynamic) location in the Physical Function config space.
|
||||
@ -231,7 +211,7 @@ hwloc_look_pci(struct hwloc_backend *backend)
|
||||
*
|
||||
* libpciaccess just returns ffff:ffff, needs to be fixed.
|
||||
* linuxpci is OK because sysfs files are already fixed the kernel.
|
||||
* pciutils is OK when it uses those Linux sysfs files.
|
||||
* (pciutils is OK when it uses those Linux sysfs files.)
|
||||
*
|
||||
* Reading these files is an easy way to work around the libpciaccess issue on Linux,
|
||||
* but we have no way to know if this is caused by SR-IOV or not.
|
||||
@ -258,7 +238,8 @@ hwloc_look_pci(struct hwloc_backend *backend)
|
||||
read = fread(value, 1, sizeof(value), file);
|
||||
fclose(file);
|
||||
if (read)
|
||||
obj->attr->pcidev.vendor_id = strtoul(value, NULL, 16);
|
||||
/* fixup the pciaccess struct so that pci_device_get_vendor_name() is correct later. */
|
||||
pcidev->vendor_id = strtoul(value, NULL, 16);
|
||||
}
|
||||
|
||||
snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/device",
|
||||
@ -268,11 +249,33 @@ hwloc_look_pci(struct hwloc_backend *backend)
|
||||
read = fread(value, 1, sizeof(value), file);
|
||||
fclose(file);
|
||||
if (read)
|
||||
obj->attr->pcidev.device_id = strtoul(value, NULL, 16);
|
||||
/* fixup the pciaccess struct so that pci_device_get_device_name() is correct later. */
|
||||
pcidev->device_id = strtoul(value, NULL, 16);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* might be useful for debugging (note that domain might be truncated) */
|
||||
os_index = (domain << 20) + (pcidev->bus << 12) + (pcidev->dev << 4) + pcidev->func;
|
||||
|
||||
obj = hwloc_alloc_setup_object(HWLOC_OBJ_PCI_DEVICE, os_index);
|
||||
obj->attr->pcidev.domain = domain;
|
||||
obj->attr->pcidev.bus = pcidev->bus;
|
||||
obj->attr->pcidev.dev = pcidev->dev;
|
||||
obj->attr->pcidev.func = pcidev->func;
|
||||
obj->attr->pcidev.vendor_id = pcidev->vendor_id;
|
||||
obj->attr->pcidev.device_id = pcidev->device_id;
|
||||
obj->attr->pcidev.class_id = device_class;
|
||||
obj->attr->pcidev.revision = config_space_cache[PCI_REVISION_ID];
|
||||
|
||||
obj->attr->pcidev.linkspeed = 0; /* unknown */
|
||||
#ifdef HWLOC_HAVE_PCI_FIND_CAP
|
||||
cap = pci_find_cap(pcidev, PCI_CAP_ID_EXP, PCI_CAP_NORMAL);
|
||||
offset = cap ? cap->addr : 0;
|
||||
#else
|
||||
offset = hwloc_pci_find_cap(config_space_cache, PCI_CAP_ID_EXP);
|
||||
#endif /* HWLOC_HAVE_PCI_FIND_CAP */
|
||||
|
||||
if (offset > 0 && offset + 20 /* size of PCI express block up to link status */ <= CONFIG_SPACE_CACHESIZE)
|
||||
hwloc_pci_find_linkspeed(config_space_cache, offset, &obj->attr->pcidev.linkspeed);
|
||||
|
||||
|
@ -73,6 +73,12 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
if (type == HWLOC_OBJ_MISC) {
|
||||
if (verbose)
|
||||
fprintf(stderr, "Synthetic string with disallow object type at '%s'\n", pos);
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
next_pos = strchr(pos, ':');
|
||||
if (!next_pos) {
|
||||
@ -134,9 +140,8 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
|
||||
case HWLOC_OBJ_CACHE: type = HWLOC_OBJ_SOCKET; break;
|
||||
case HWLOC_OBJ_SOCKET: type = HWLOC_OBJ_NODE; break;
|
||||
case HWLOC_OBJ_NODE:
|
||||
case HWLOC_OBJ_GROUP: type = HWLOC_OBJ_GROUP; break;
|
||||
case HWLOC_OBJ_MACHINE:
|
||||
case HWLOC_OBJ_MISC: type = HWLOC_OBJ_MISC; break;
|
||||
case HWLOC_OBJ_GROUP: type = HWLOC_OBJ_GROUP; break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
@ -242,8 +247,6 @@ hwloc__look_synthetic(struct hwloc_topology *topology,
|
||||
|
||||
/* pre-hooks */
|
||||
switch (type) {
|
||||
case HWLOC_OBJ_MISC:
|
||||
break;
|
||||
case HWLOC_OBJ_GROUP:
|
||||
break;
|
||||
case HWLOC_OBJ_SYSTEM:
|
||||
@ -265,6 +268,7 @@ hwloc__look_synthetic(struct hwloc_topology *topology,
|
||||
break;
|
||||
case HWLOC_OBJ_PU:
|
||||
break;
|
||||
case HWLOC_OBJ_MISC:
|
||||
case HWLOC_OBJ_TYPE_MAX:
|
||||
/* Should never happen */
|
||||
assert(0);
|
||||
@ -290,8 +294,6 @@ hwloc__look_synthetic(struct hwloc_topology *topology,
|
||||
|
||||
/* post-hooks */
|
||||
switch (type) {
|
||||
case HWLOC_OBJ_MISC:
|
||||
break;
|
||||
case HWLOC_OBJ_GROUP:
|
||||
obj->attr->group.depth = curlevel->depth;
|
||||
break;
|
||||
@ -330,6 +332,7 @@ hwloc__look_synthetic(struct hwloc_topology *topology,
|
||||
break;
|
||||
case HWLOC_OBJ_PU:
|
||||
break;
|
||||
case HWLOC_OBJ_MISC:
|
||||
case HWLOC_OBJ_TYPE_MAX:
|
||||
/* Should never happen */
|
||||
assert(0);
|
||||
|
@ -33,7 +33,7 @@ struct cacheinfo {
|
||||
unsigned linepart;
|
||||
int ways;
|
||||
unsigned sets;
|
||||
unsigned size;
|
||||
unsigned long size;
|
||||
};
|
||||
|
||||
struct procinfo {
|
||||
@ -68,7 +68,7 @@ static void fill_amd_cache(struct procinfo *infos, unsigned level, unsigned cpui
|
||||
{
|
||||
struct cacheinfo *cache;
|
||||
unsigned cachenum;
|
||||
unsigned size = 0;
|
||||
unsigned long size = 0;
|
||||
|
||||
if (level == 1)
|
||||
size = ((cpuid >> 24)) << 10;
|
||||
@ -104,7 +104,7 @@ static void fill_amd_cache(struct procinfo *infos, unsigned level, unsigned cpui
|
||||
cache->size = size;
|
||||
cache->sets = 0;
|
||||
|
||||
hwloc_debug("cache L%u t%u linesize %u ways %u size %uKB\n", cache->level, cache->nbthreads_sharing, cache->linesize, cache->ways, cache->size >> 10);
|
||||
hwloc_debug("cache L%u t%u linesize %u ways %u size %luKB\n", cache->level, cache->nbthreads_sharing, cache->linesize, cache->ways, cache->size >> 10);
|
||||
}
|
||||
|
||||
/* Fetch information from the processor itself thanks to cpuid and store it in
|
||||
@ -221,7 +221,7 @@ static void look_proc(struct procinfo *infos, unsigned highest_cpuid, unsigned h
|
||||
cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache));
|
||||
|
||||
for (cachenum = 0; ; cachenum++) {
|
||||
unsigned linesize, linepart, ways, sets;
|
||||
unsigned long linesize, linepart, ways, sets;
|
||||
unsigned type;
|
||||
eax = 0x8000001d;
|
||||
ecx = cachenum;
|
||||
@ -249,7 +249,7 @@ static void look_proc(struct procinfo *infos, unsigned highest_cpuid, unsigned h
|
||||
cache->sets = sets = ecx + 1;
|
||||
cache->size = linesize * linepart * ways * sets;
|
||||
|
||||
hwloc_debug("cache %u type %u L%u t%u c%u linesize %u linepart %u ways %u sets %u, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10);
|
||||
hwloc_debug("cache %u type %u L%u t%u c%u linesize %lu linepart %lu ways %lu sets %lu, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10);
|
||||
|
||||
cache++;
|
||||
}
|
||||
@ -290,7 +290,7 @@ static void look_proc(struct procinfo *infos, unsigned highest_cpuid, unsigned h
|
||||
cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache));
|
||||
|
||||
for (cachenum = 0; ; cachenum++) {
|
||||
unsigned linesize, linepart, ways, sets;
|
||||
unsigned long linesize, linepart, ways, sets;
|
||||
unsigned type;
|
||||
eax = 0x04;
|
||||
ecx = cachenum;
|
||||
@ -317,7 +317,7 @@ static void look_proc(struct procinfo *infos, unsigned highest_cpuid, unsigned h
|
||||
cache->sets = sets = ecx + 1;
|
||||
cache->size = linesize * linepart * ways * sets;
|
||||
|
||||
hwloc_debug("cache %u type %u L%u t%u c%u linesize %u linepart %u ways %u sets %u, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10);
|
||||
hwloc_debug("cache %u type %u L%u t%u c%u linesize %lu linepart %lu ways %lu sets %lu, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10);
|
||||
infos->max_nbthreads = infos->max_log_proc / infos->max_nbcores;
|
||||
hwloc_debug("thus %u threads\n", infos->max_nbthreads);
|
||||
infos->threadid = infos->logprocid % infos->max_nbthreads;
|
||||
@ -401,6 +401,7 @@ static void summarize(hwloc_topology_t topology, struct procinfo *infos, unsigne
|
||||
unsigned i, j, l, level, type;
|
||||
unsigned nbsockets = 0;
|
||||
int one = -1;
|
||||
unsigned next_group_depth = topology->next_group_depth;
|
||||
|
||||
for (i = 0; i < nbprocs; i++)
|
||||
if (infos[i].present) {
|
||||
@ -517,6 +518,8 @@ static void summarize(hwloc_topology_t topology, struct procinfo *infos, unsigne
|
||||
}
|
||||
node = hwloc_alloc_setup_object(HWLOC_OBJ_NODE, nodeid);
|
||||
node->cpuset = node_cpuset;
|
||||
node->nodeset = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_set(node->nodeset, nodeid);
|
||||
hwloc_debug_1arg_bitmap("os node %u has cpuset %s\n",
|
||||
nodeid, node_cpuset);
|
||||
hwloc_insert_object_by_cpuset(topology, node);
|
||||
@ -578,9 +581,12 @@ static void summarize(hwloc_topology_t topology, struct procinfo *infos, unsigne
|
||||
hwloc_bitmap_clr(unknowns_cpuset, j);
|
||||
}
|
||||
}
|
||||
unknown_obj = hwloc_alloc_setup_object(HWLOC_OBJ_MISC, unknownid);
|
||||
unknown_obj = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, unknownid);
|
||||
unknown_obj->cpuset = unknown_cpuset;
|
||||
unknown_obj->os_level = level;
|
||||
unknown_obj->attr->group.depth = topology->next_group_depth + level;
|
||||
if (next_group_depth <= topology->next_group_depth + level)
|
||||
next_group_depth = topology->next_group_depth + level + 1;
|
||||
hwloc_debug_2args_bitmap("os unknown%d %u has cpuset %s\n",
|
||||
level, unknownid, unknown_cpuset);
|
||||
hwloc_insert_object_by_cpuset(topology, unknown_obj);
|
||||
@ -712,6 +718,7 @@ static void summarize(hwloc_topology_t topology, struct procinfo *infos, unsigne
|
||||
}
|
||||
|
||||
hwloc_bitmap_free(complete_cpuset);
|
||||
topology->next_group_depth = next_group_depth;
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -77,9 +77,11 @@ void hwloc_report_os_error(const char *msg, int line)
|
||||
fprintf(stderr, "* %s\n", msg);
|
||||
fprintf(stderr, "* Error occurred in topology.c line %d\n", line);
|
||||
fprintf(stderr, "*\n");
|
||||
fprintf(stderr, "* Please report this error message to the hwloc user's mailing list,\n");
|
||||
fprintf(stderr, "* The following FAQ entry in a recent hwloc documentation may help:\n");
|
||||
fprintf(stderr, "* What should I do when hwloc reports \"operating system\" warnings?\n");
|
||||
fprintf(stderr, "* Otherwise please report this error message to the hwloc user's mailing list,\n");
|
||||
#ifdef HWLOC_LINUX_SYS
|
||||
fprintf(stderr, "* along with the output from the hwloc-gather-topology script.\n");
|
||||
fprintf(stderr, "* along with the output+tarball generated by the hwloc-gather-topology script.\n");
|
||||
#else
|
||||
fprintf(stderr, "* along with any relevant topology information from your platform.\n");
|
||||
#endif
|
||||
@ -1024,14 +1026,16 @@ hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj,
|
||||
{
|
||||
struct hwloc_obj *result;
|
||||
/* Start at the top. */
|
||||
/* Add the cpuset to the top */
|
||||
hwloc_bitmap_or(topology->levels[0][0]->complete_cpuset, topology->levels[0][0]->complete_cpuset, obj->cpuset);
|
||||
if (obj->nodeset)
|
||||
hwloc_bitmap_or(topology->levels[0][0]->complete_nodeset, topology->levels[0][0]->complete_nodeset, obj->nodeset);
|
||||
result = hwloc___insert_object_by_cpuset(topology, topology->levels[0][0], obj, report_error);
|
||||
if (result != obj)
|
||||
if (result != obj) {
|
||||
/* either failed to insert, or got merged, free the original object */
|
||||
hwloc_free_unlinked_object(obj);
|
||||
} else {
|
||||
/* Add the cpuset to the top */
|
||||
hwloc_bitmap_or(topology->levels[0][0]->complete_cpuset, topology->levels[0][0]->complete_cpuset, obj->cpuset);
|
||||
if (obj->nodeset)
|
||||
hwloc_bitmap_or(topology->levels[0][0]->complete_nodeset, topology->levels[0][0]->complete_nodeset, obj->nodeset);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -1635,6 +1639,28 @@ unlink_and_free_single_object(hwloc_obj_t *pparent)
|
||||
hwloc_free_unlinked_object(parent);
|
||||
}
|
||||
|
||||
static void
|
||||
reorder_children(hwloc_obj_t parent)
|
||||
{
|
||||
/* move the children list on the side */
|
||||
hwloc_obj_t *prev, child, children = parent->first_child;
|
||||
parent->first_child = NULL;
|
||||
while (children) {
|
||||
/* dequeue child */
|
||||
child = children;
|
||||
children = child->next_sibling;
|
||||
/* find where to enqueue it */
|
||||
prev = &parent->first_child;
|
||||
while (*prev
|
||||
&& (!child->cpuset || !(*prev)->cpuset
|
||||
|| hwloc__object_cpusets_compare_first(child, *prev) > 0))
|
||||
prev = &((*prev)->next_sibling);
|
||||
/* enqueue */
|
||||
child->next_sibling = *prev;
|
||||
*prev = child;
|
||||
}
|
||||
}
|
||||
|
||||
/* Remove all ignored objects. */
|
||||
static int
|
||||
remove_ignored(hwloc_topology_t topology, hwloc_obj_t *pparent)
|
||||
@ -1656,25 +1682,8 @@ remove_ignored(hwloc_topology_t topology, hwloc_obj_t *pparent)
|
||||
dropped = 1;
|
||||
|
||||
} else if (dropped_children) {
|
||||
/* we keep this object but its children changed, reorder them by cpuset */
|
||||
|
||||
/* move the children list on the side */
|
||||
hwloc_obj_t *prev, children = parent->first_child;
|
||||
parent->first_child = NULL;
|
||||
while (children) {
|
||||
/* dequeue child */
|
||||
child = children;
|
||||
children = child->next_sibling;
|
||||
/* find where to enqueue it */
|
||||
prev = &parent->first_child;
|
||||
while (*prev
|
||||
&& (!child->cpuset || !(*prev)->cpuset
|
||||
|| hwloc__object_cpusets_compare_first(child, *prev) > 0))
|
||||
prev = &((*prev)->next_sibling);
|
||||
/* enqueue */
|
||||
child->next_sibling = *prev;
|
||||
*prev = child;
|
||||
}
|
||||
/* we keep this object but its children changed, reorder them by complete_cpuset */
|
||||
reorder_children(parent);
|
||||
}
|
||||
|
||||
return dropped;
|
||||
@ -1804,29 +1813,39 @@ can_merge_group(hwloc_topology_t topology, hwloc_obj_t obj)
|
||||
* Merge with the only child if either the parent or the child has a type to be
|
||||
* ignored while keeping structure
|
||||
*/
|
||||
static void
|
||||
static int
|
||||
merge_useless_child(hwloc_topology_t topology, hwloc_obj_t *pparent)
|
||||
{
|
||||
hwloc_obj_t parent = *pparent, child, *pchild, ios;
|
||||
int replacechild = 0, replaceparent = 0;
|
||||
int replacechild = 0, replaceparent = 0, droppedchildren = 0;
|
||||
|
||||
if (!parent->first_child)
|
||||
/* There are no child, nothing to merge. */
|
||||
return 0;
|
||||
|
||||
for_each_child_safe(child, parent, pchild)
|
||||
merge_useless_child(topology, pchild);
|
||||
droppedchildren += merge_useless_child(topology, pchild);
|
||||
|
||||
if (droppedchildren)
|
||||
reorder_children(parent);
|
||||
|
||||
child = parent->first_child;
|
||||
if (!child)
|
||||
/* There are no child, nothing to merge. */
|
||||
return;
|
||||
/* we don't merge if there are multiple "important" children.
|
||||
* non-important ones are at the end of the list.
|
||||
* look at the second child to find out.
|
||||
*/
|
||||
if (child->next_sibling
|
||||
/* I/O objects may be ignored when trying to merge */
|
||||
&& !hwloc_obj_type_is_io(child->next_sibling->type)
|
||||
/* Misc objects without cpuset may be ignored as well */
|
||||
&& !(child->next_sibling->type == HWLOC_OBJ_MISC && !child->next_sibling->cpuset))
|
||||
/* There are several children that prevent from merging */
|
||||
return 0;
|
||||
|
||||
if (child->next_sibling && !hwloc_obj_type_is_io(child->next_sibling->type))
|
||||
/* There are several non-I/O children */
|
||||
return;
|
||||
|
||||
/* There is one non-I/O child and possible some I/O children.
|
||||
* I/O children shouldn't prevent merging because they can be attached
|
||||
* to anything with the same locality.
|
||||
/* There is one important child, and some children that may be ignored
|
||||
* during merging because they can be attached to anything with the same locality.
|
||||
* Move them to the side during merging, and append them back later.
|
||||
* This is easy because I/O children are always last in the list.
|
||||
* This is easy because children with no cpuset are always last in the list.
|
||||
*/
|
||||
ios = child->next_sibling;
|
||||
child->next_sibling = NULL;
|
||||
@ -1873,12 +1892,14 @@ merge_useless_child(hwloc_topology_t topology, hwloc_obj_t *pparent)
|
||||
}
|
||||
|
||||
if (ios) {
|
||||
/* append I/O children to the list of children of the remaining object */
|
||||
/* append the remaining list of children to the remaining object */
|
||||
pchild = &((*pparent)->first_child);
|
||||
while (*pchild)
|
||||
pchild = &((*pchild)->next_sibling);
|
||||
*pchild = ios;
|
||||
}
|
||||
|
||||
return replaceparent ? 1 : 0;
|
||||
}
|
||||
|
||||
static void
|
||||
@ -1963,6 +1984,8 @@ hwloc_propagate_bridge_depth(hwloc_topology_t topology, hwloc_obj_t root, unsign
|
||||
if (child->type == HWLOC_OBJ_BRIDGE) {
|
||||
child->attr->bridge.depth = depth;
|
||||
hwloc_propagate_bridge_depth(topology, child, depth+1);
|
||||
} else if (!hwloc_obj_type_is_io(child->type)) {
|
||||
hwloc_propagate_bridge_depth(topology, child, 0);
|
||||
}
|
||||
child = child->next_sibling;
|
||||
}
|
||||
@ -3035,7 +3058,17 @@ hwloc__check_children(struct hwloc_obj *parent)
|
||||
assert(hwloc_bitmap_isincluded(parent->children[j]->cpuset, remaining_parent_set));
|
||||
hwloc_bitmap_andnot(remaining_parent_set, remaining_parent_set, parent->children[j]->cpuset);
|
||||
}
|
||||
assert(hwloc_bitmap_iszero(remaining_parent_set));
|
||||
if (parent->type == HWLOC_OBJ_PU) {
|
||||
/* if parent is a PU, its os_index bit may remain.
|
||||
* it may be in a Misc child inserted by cpuset, or could be in no child */
|
||||
if (hwloc_bitmap_weight(remaining_parent_set) == 1)
|
||||
assert((unsigned) hwloc_bitmap_first(remaining_parent_set) == parent->os_index);
|
||||
else
|
||||
assert(hwloc_bitmap_iszero(remaining_parent_set));
|
||||
} else {
|
||||
/* nothing remains */
|
||||
assert(hwloc_bitmap_iszero(remaining_parent_set));
|
||||
}
|
||||
hwloc_bitmap_free(remaining_parent_set);
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user