diff --git a/README b/README index d4d8fcfb3a..8d3e67d82d 100644 --- a/README +++ b/README @@ -584,6 +584,24 @@ for a full list); a summary of the more commonly used ones follows: look in /lib and /lib64, which covers most cases. This option is only needed for special configurations. +--with-hwloc= + Build hwloc support. If is "internal", Open MPI's + internal copy of hwloc is used. If is "external", Open + MPI will search in default locations for an hwloc installation. + Finally, if is a directory, that directory will be + searched for a valid hwloc installation, just like other + --with-FOO= configure options. + + hwloc is a support library that provides processor and memory + affinity information for NUMA platforms. + +--with-hwloc-libdir= + + Look in directory for the hwloc libraries. This option is only + usable when building Open MPI against an external hwloc + installation. Just like other --with-FOO-libdir configure options, + this option is only needed for special configurations. + --with-knem= Specify the directory where the knem libraries and header files are located. This option is generally only necessary if the kenm headers diff --git a/configure.ac b/configure.ac index 7f18366095..8592088606 100644 --- a/configure.ac +++ b/configure.ac @@ -30,6 +30,8 @@ # Load project list and names m4_include([config/project_list.m4]) +m4_pattern_allow([m4_ifval]) + # Init autoconf # We don't have the version number to put in here yet, and we can't @@ -44,6 +46,27 @@ AC_PREREQ(2.60) AC_CONFIG_AUX_DIR(config) AC_CONFIG_MACRO_DIR(config) +# +# Start it up +# + +OMPI_CONFIGURE_SETUP +ompi_show_title "Configuring project_name_long" + +# +# Setup some things that must be done before AM-INIT-AUTOMAKE +# + +ompi_show_subtitle "Startup tests" +AC_CANONICAL_HOST +AC_CANONICAL_TARGET +AC_DEFINE_UNQUOTED(OPAL_ARCH, "$target", [OMPI architecture string]) +AS_IF([test "$host" != "$target"], + [AC_MSG_WARN([Cross-compile detected]) + AC_MSG_WARN([Cross-compiling is only partially supported]) + AC_MSG_WARN([Proceed at your own risk!])]) +AC_USE_SYSTEM_EXTENSIONS + # Get our platform support file. This has to be done very, very early # because it twiddles random bits of autoconf OMPI_LOAD_PLATFORM @@ -53,8 +76,6 @@ OMPI_LOAD_PLATFORM # AM_INIT_AUTOMAKE([foreign dist-bzip2 subdir-objects no-define 1.10 tar-ustar]) -m4_pattern_allow([m4_ifval]) - # If Automake supports silent rules, enable them. m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) @@ -65,13 +86,6 @@ AC_SUBST([CONFIGURE_DEPENDENCIES], ['$(top_srcdir)/VERSION']) AM_CONDITIONAL([PROJECT_OMPI], m4_ifdef([project_ompi], [true], [false])) AM_CONDITIONAL([PROJECT_ORTE], m4_ifdef([project_orte], [true], [false])) -# -# Start it up -# - -OMPI_CONFIGURE_SETUP -ompi_show_title "Configuring project_name_long" - ompi_show_subtitle "Checking versions" # Get the version of OMPI that we are installing diff --git a/opal/config/opal_functions.m4 b/opal/config/opal_functions.m4 index 717d667109..db030b5175 100644 --- a/opal/config/opal_functions.m4 +++ b/opal/config/opal_functions.m4 @@ -109,13 +109,6 @@ OMPI_CONFIGURE_DATE="`date`" CLEANFILES="*~ .\#*" AC_SUBST(CLEANFILES) -# -# This is useful later (ompi_info, and therefore mpiexec) -# - -AC_CANONICAL_HOST -AC_DEFINE_UNQUOTED(OPAL_ARCH, "$host", [OMPI architecture string]) - # # See if we can find an old installation of OMPI to overwrite # diff --git a/opal/mca/paffinity/hwloc/.ompi_ignore b/opal/mca/paffinity/hwloc/.ompi_ignore deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/opal/mca/paffinity/hwloc/.ompi_unignore b/opal/mca/paffinity/hwloc/.ompi_unignore deleted file mode 100644 index 814285c7e5..0000000000 --- a/opal/mca/paffinity/hwloc/.ompi_unignore +++ /dev/null @@ -1 +0,0 @@ -jsquyres diff --git a/opal/mca/paffinity/hwloc/Makefile.am b/opal/mca/paffinity/hwloc/Makefile.am new file mode 100644 index 0000000000..a8a385914a --- /dev/null +++ b/opal/mca/paffinity/hwloc/Makefile.am @@ -0,0 +1,72 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +SUBDIRS = +if OPAL_PAFFINITY_HWLOC_INTERNAL +SUBDIRS += hwloc +endif + +# To find hwloc_bottom.h. EMBEDDED flags are for if we OMPI's +# internal hwloc is used; paffinity_hwloc_CPPFLAGS is if we are using +# an external install. +AM_CPPFLAGS = $(HWLOC_EMBEDDED_CPPFLAGS) $(paffinity_hwloc_CPPFLAGS) +# To get the cflags for the stuff in hwloc.h +AM_CFLAGS = $(HWLOC_EMBEDDED_CFLAGS) $(paffinity_hwloc_CFLAGS) + +sources = \ + paffinity_hwloc.h \ + paffinity_hwloc_component.c \ + paffinity_hwloc_module.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_paffinity_hwloc_DSO +component_noinst = +component_install = mca_paffinity_hwloc.la +else +component_noinst = libmca_paffinity_hwloc.la +component_install = +endif + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_paffinity_hwloc_la_SOURCES = $(sources) +mca_paffinity_hwloc_la_LDFLAGS = -module -avoid-version +mca_paffinity_hwloc_la_LIBADD = +if OPAL_PAFFINITY_HWLOC_INTERNAL +mca_paffinity_hwloc_la_LIBADD += \ + $(top_ompi_builddir)/opal/mca/paffinity/hwloc/hwloc/src/libhwloc_embedded.la +else +mca_paffinity_hwloc_la_LDFLAGS += $(paffinity_hwloc_LDFLAGS) +mca_paffinity_hwloc_la_LIBADD += $(paffinity_hwloc_LIBS) +endif + +noinst_LTLIBRARIES = $(component_noinst) +libmca_paffinity_hwloc_la_SOURCES =$(sources) +libmca_paffinity_hwloc_la_LDFLAGS = -module -avoid-version +libmca_paffinity_hwloc_la_LIBADD = +if OPAL_PAFFINITY_HWLOC_INTERNAL +libmca_paffinity_hwloc_la_LIBADD += \ + $(top_ompi_builddir)/opal/mca/paffinity/hwloc/hwloc/src/libhwloc_embedded.la +else +libmca_paffinity_hwloc_la_LDFLAGS += $(paffinity_hwloc_LDFLAGS) +libmca_paffinity_hwloc_la_LIBADD += $(paffinity_hwloc_LIBS) +endif diff --git a/opal/mca/paffinity/hwloc/configure.m4 b/opal/mca/paffinity/hwloc/configure.m4 new file mode 100644 index 0000000000..e0ddbc651a --- /dev/null +++ b/opal/mca/paffinity/hwloc/configure.m4 @@ -0,0 +1,137 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Include hwloc m4 files +m4_include(opal/mca/paffinity/hwloc/hwloc/config/hwloc.m4) +m4_include(opal/mca/paffinity/hwloc/hwloc/config/hwloc_pkg.m4) +m4_include(opal/mca/paffinity/hwloc/hwloc/config/hwloc_check_attributes.m4) +m4_include(opal/mca/paffinity/hwloc/hwloc/config/hwloc_check_visibility.m4) + +# MCA_paffinity_hwloc_POST_CONFIG() +# --------------------------------- +AC_DEFUN([MCA_paffinity_hwloc_POST_CONFIG],[ + HWLOC_DO_AM_CONDITIONALS +])dnl + + +# MCA_paffinity_hwloc_CONFIG([action-if-found], [action-if-not-found]) +# -------------------------------------------------------------------- +AC_DEFUN([MCA_paffinity_hwloc_CONFIG],[ + OMPI_VAR_SCOPE_PUSH([HWLOC_VERSION opal_check_hwloc_happy paffinity_hwloc_location opal_check_hwloc_save_CPPFLAGS opal_check_hwloc_save_LDFLAGS opal_check_hwloc_save_LIBS]) + + # Allowing building using either the internal copy of + # hwloc, or an external version. + AC_ARG_WITH([hwloc], + [AC_HELP_STRING([--with-hwloc(=DIR)], + [Build hwloc support. DIR can take one of three values: "internal", "external", or a valid directory name. "internal" (or no DIR value) forces Open MPI to use its internal copy of hwloc. "external" forces Open MPI to use an external installation of hwloc. Supplying a valid directory name also forces Open MPI to use an external installation of hwloc, and adds DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries.])]) + + # Default to building the internal copy. After this, + # paffinity_hwloc_location is guaranteed to be set to one of: + # "internal", a directory name (i.e., whatever the user supplied), + # or "no". + paffinity_hwloc_location=$with_hwloc + AS_IF([test -z "$paffinity_hwloc_location" -o "$paffinity_hwloc_location" = "yes"], + [paffinity_hwloc_location=internal]) + + # Check the DIR value if it's a directory + case $paffinity_hwloc_location in + no|internal|external) ;; + *) OMPI_CHECK_WITHDIR([hwloc], [$paffinity_hwloc_location], [include/hwloc.h]) ;; + esac + + AC_ARG_WITH([hwloc-libdir], + [AC_HELP_STRING([--with-hwloc-libdir=DIR], + [Search for hwloc libraries in DIR. Should only be used if an external copy of hwloc is being used.])]) + AS_IF([test "$with_hwloc_libdir" = "internal" -a "$with_hwloc_libdir" != ""], + [AC_MSG_WARN([Both --with-hwloc=internal and --with-hwloc-libdir=DIR]) + AC_MSG_WARN([were specified, which does not make sense.]) + AC_MSG_ERROR([Cannot continue])]) + OMPI_CHECK_WITHDIR([hwloc-libdir], [$with_hwloc_libdir], [libhwloc.*]) + + opal_check_hwloc_save_CPPFLAGS=$CPPFLAGS + opal_check_hwloc_save_LDFLAGS=$LDFLAGS + opal_check_hwloc_save_LIBS=$LIBS + + AS_IF([test "$paffinity_hwloc_location" != "no"], + [AC_MSG_CHECKING([where to look for hwloc])]) + + # If we're building internal, run the hwloc configuration. + AS_IF([test "$paffinity_hwloc_location" = "internal"], + [# Main hwloc configuration + AC_MSG_RESULT([internal copy]) + HWLOC_SET_SYMBOL_PREFIX([opal_paffinity_]) + HWLOC_SETUP_CORE([opal/mca/paffinity/hwloc/hwloc], + [AC_MSG_CHECKING([whether hwloc configure succeeded]) + AC_MSG_RESULT([yes]) + HWLOC_VERSION="internal v`$srcdir/opal/mca/paffinity/hwloc/hwloc/config/hwloc_get_version.sh $srcdir/opal/mca/paffinity/hwloc/hwloc/VERSION`" + opal_check_hwloc_happy=yes], + [AC_MSG_CHECKING([whether hwloc configure succeeded]) + AC_MSG_RESULT([no]) + opal_check_hwloc_happy=no]) + ]) + + # If we are not building internal, then run all the normal checks + AS_IF([test "$paffinity_hwloc_location" != "internal" -a "$paffinity_hwloc_location" != "no"], + [AS_IF([test ! -z "$paffinity_hwloc_location" -a "$paffinity_hwloc_location" != "yes"], + [opal_check_hwloc_dir=$paffinity_hwloc_location + AC_MSG_RESULT([external install ($paffinity_hwloc_location)])], + [AC_MSG_RESULT([external install (default search paths)])]) + AS_IF([test ! -z "$with_hwloc_libdir" -a "$with_hwloc_libdir" != "yes"], + [opal_check_hwloc_libdir="$with_hwloc_libdir"]) + AS_IF([test "$paffinity_hwloc_location" = no], + [opal_check_hwloc_happy=no], + [opal_check_hwloc_happy=yes]) + + HWLOC_VERSION=external + OMPI_CHECK_PACKAGE([paffinity_hwloc], + [hwloc.h], + [hwloc], + [hwloc_topology_init], + [], + [$opal_check_hwloc_dir], + [$opal_check_hwloc_libdir], + [opal_check_hwloc_happy=yes], + [opal_check_hwloc_happy=no]) + ]) + + CPPFLAGS=$opal_check_hwloc_save_CPPFLAGS + LDFLAGS=$opal_check_hwloc_save_LDFLAGS + LIBS=$opal_check_hwloc_save_LIBS + + AC_SUBST([paffinity_hwloc_CFLAGS]) + AC_SUBST([paffinity_hwloc_CPPFLAGS]) + AC_SUBST([paffinity_hwloc_LDFLAGS]) + AC_SUBST([paffinity_hwloc_LIBS]) + + # Done! + AM_CONDITIONAL([OPAL_PAFFINITY_HWLOC_INTERNAL], + [test "$paffinity_hwloc_location" = "internal"]) + AS_IF([test "$opal_check_hwloc_happy" = "yes"], + [AC_DEFINE_UNQUOTED([PAFFINITY_HWLOC_HWLOC_VERSION], + ["$HWLOC_VERSION"], + [Version of hwloc]) + $1], + [AS_IF([test ! -z "$with_hwloc" -a "$with_hwloc" != "no"], + [AC_MSG_WARN([hwloc support requested (via --with-hwloc) but not found.]) + AC_MSG_ERROR([Cannot continue.])]) + $2]) + OMPI_VAR_SCOPE_POP +])dnl diff --git a/opal/mca/paffinity/hwloc/configure.params b/opal/mca/paffinity/hwloc/configure.params new file mode 100644 index 0000000000..b4a6df72a6 --- /dev/null +++ b/opal/mca/paffinity/hwloc/configure.params @@ -0,0 +1,28 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +PARAM_CONFIG_FILES="Makefile" + +# +# Set the config priority so that, if we can build, +# only this component will build + +PARAM_CONFIG_PRIORITY=75 diff --git a/opal/mca/paffinity/hwloc/paffinity_hwloc.h b/opal/mca/paffinity/hwloc/paffinity_hwloc.h new file mode 100644 index 0000000000..16d00e9939 --- /dev/null +++ b/opal/mca/paffinity/hwloc/paffinity_hwloc.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_PAFFINITY_HWLOC_EXPORT_H +#define MCA_PAFFINITY_HWLOC_EXPORT_H + +#include "opal_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/paffinity/paffinity.h" +#include "hwloc.h" + + +BEGIN_C_DECLS + +typedef struct { + /* Base paffinity component */ + opal_paffinity_base_component_2_0_0_t super; + + /* This component's data */ + hwloc_topology_t topology; + bool topology_need_destroy; + int cpuset_max_size; +} opal_paffinity_hwloc_component_t; + +/** + * Globally exported variable + */ +OPAL_DECLSPEC extern opal_paffinity_hwloc_component_t + mca_paffinity_hwloc_component; + +/** + * paffinity query API function + * + * Query function for paffinity components. Simply returns a priority + * to rank it against other available paffinity components (assumedly, + * only one component will be available per platform, but it's + * possible that there could be more than one available). + */ +int opal_paffinity_hwloc_component_query(mca_base_module_t **module, + int *priority); + +END_C_DECLS + +#endif /* MCA_PAFFINITY_HWLOC_EXPORT_H */ diff --git a/opal/mca/paffinity/hwloc/paffinity_hwloc_component.c b/opal/mca/paffinity/hwloc/paffinity_hwloc_component.c new file mode 100644 index 0000000000..ae0fa5f59d --- /dev/null +++ b/opal/mca/paffinity/hwloc/paffinity_hwloc_component.c @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/constants.h" +#include "opal/mca/base/mca_base_param.h" + +#include "opal/mca/paffinity/paffinity.h" +#include "paffinity_hwloc.h" + +/* + * Public string showing the paffinity ompi_hwloc component version number + */ +const char *opal_paffinity_hwloc_component_version_string = + "OPAL hwloc paffinity MCA component version " OPAL_VERSION; + +/* + * Local function + */ +static int hwloc_open(void); +static int hwloc_close(void); +static int hwloc_register(void); + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ + +opal_paffinity_hwloc_component_t mca_paffinity_hwloc_component = { + { + /* First, the mca_component_t struct containing meta information + about the component itself */ + { + OPAL_PAFFINITY_BASE_VERSION_2_0_0, + + /* Component name and version */ + "hwloc", + OPAL_MAJOR_VERSION, + OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION, + + /* Component open and close functions */ + hwloc_open, + hwloc_close, + opal_paffinity_hwloc_component_query, + hwloc_register, + }, + { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + } + }, + /* NULL fill the rest of the component data */ +}; + + +static int hwloc_register(void) +{ + mca_base_param_reg_int(&mca_paffinity_hwloc_component.super.base_version, + "priority", + "Priority of the hwloc paffinity component", + false, false, 40, NULL); + mca_base_param_reg_string(&mca_paffinity_hwloc_component.super.base_version, + "hwloc_version", + "Version of HWLOC that is embedded in Open MPI", + false, true, PAFFINITY_HWLOC_HWLOC_VERSION, NULL); + + return OPAL_SUCCESS; +} + + +static int hwloc_open(void) +{ + hwloc_cpuset_t set; + + /* Initialize hwloc */ + if (0 != hwloc_topology_init(&(mca_paffinity_hwloc_component.topology)) || + 0 != hwloc_topology_load(mca_paffinity_hwloc_component.topology)) { + return OPAL_ERR_NOT_AVAILABLE; + } + mca_paffinity_hwloc_component.topology_need_destroy = true; + + /* Find out how many cpu's an hwloc_cpuset_t can hold */ + set = hwloc_cpuset_alloc(); + hwloc_cpuset_fill(set); + mca_paffinity_hwloc_component.cpuset_max_size = hwloc_cpuset_weight(set); + hwloc_cpuset_free(set); + + return OPAL_SUCCESS; +} + +static int hwloc_close(void) +{ + /* If we set up hwloc, tear it down */ + if (mca_paffinity_hwloc_component.topology_need_destroy) { + hwloc_topology_destroy(mca_paffinity_hwloc_component.topology); + mca_paffinity_hwloc_component.topology_need_destroy = false; + } + + return OPAL_SUCCESS; +} diff --git a/opal/mca/paffinity/hwloc/paffinity_hwloc_module.c b/opal/mca/paffinity/hwloc/paffinity_hwloc_module.c new file mode 100644 index 0000000000..44aba09f48 --- /dev/null +++ b/opal/mca/paffinity/hwloc/paffinity_hwloc_module.c @@ -0,0 +1,516 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +/* This component will only be compiled on Hwloc, where we are + guaranteed to have and friends */ +#include + +#include +#include +#include +#include + +#include "opal/constants.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/mca/paffinity/paffinity.h" +#include "opal/mca/paffinity/base/base.h" +#include "paffinity_hwloc.h" +#include "opal/mca/paffinity/hwloc/hwloc/include/hwloc.h" + +/* + * Local functions + */ +static int module_init(void); +static int module_set(opal_paffinity_base_cpu_set_t cpumask); +static int module_get(opal_paffinity_base_cpu_set_t *cpumask); +static int module_map_to_processor_id(int socket, int core, int *processor_id); +static int module_map_to_socket_core(int processor_id, int *socket, int *core); +static int module_get_processor_info(int *num_processors); +static int module_get_socket_info(int *num_sockets); +static int module_get_core_info(int socket, int *num_cores); +static int module_get_physical_processor_id(int logical_processor_id); +static int module_get_physical_socket_id(int logical_socket_id); +static int module_get_physical_core_id(int physical_socket_id, int logical_core_id); + +/* + * Hwloc paffinity module + */ +static const opal_paffinity_base_module_1_1_0_t loc_module = { + /* Initialization function */ + module_init, + + /* Module function pointers */ + module_set, + module_get, + module_map_to_processor_id, + module_map_to_socket_core, + module_get_processor_info, + module_get_socket_info, + module_get_core_info, + module_get_physical_processor_id, + module_get_physical_socket_id, + module_get_physical_core_id, + NULL +}; + +/* + * Trivial DFS traversal recursion function + */ +static hwloc_obj_t dfs_find_os_index(hwloc_obj_t root, hwloc_obj_type_t type, + unsigned os_index) +{ + unsigned i; + hwloc_obj_t ret; + + if (root->type == type && root->os_index == os_index) { + return root; + } + for (i = 0; i < root->arity; ++i) { + ret = dfs_find_os_index(root->children[i], type, os_index); + if (NULL != ret) { + return ret; + } + } + + return NULL; +} + +/* + * Trivial DFS traversal recursion function + */ +static hwloc_obj_t dfs_find_nth_item(hwloc_obj_t root, + hwloc_obj_type_t type, + unsigned *current, + unsigned n) +{ + unsigned i; + hwloc_obj_t ret; + + if (root->type == type) { + if (*current == n) { + return root; + } + ++(*current); + } + for (i = 0; i < root->arity; ++i) { + ret = dfs_find_nth_item(root->children[i], type, current, n); + if (NULL != ret) { + return ret; + } + } + + return NULL; +} + +/* + * Trivial DFS traversal recursion function + */ +static int dfs_count_type(hwloc_obj_t root, hwloc_obj_type_t type) +{ + unsigned i; + int count = 0; + if (root->type == type) { + ++count; + } + for (i = 0; i < root->arity; ++i) { + count += dfs_count_type(root->children[i], type); + } + + return count; +} + + +int opal_paffinity_hwloc_component_query(mca_base_module_t **module, + int *priority) +{ + int param; + + param = mca_base_param_find("paffinity", "hwloc", "priority"); + mca_base_param_lookup_int(param, priority); + + *module = (mca_base_module_t *)&loc_module; + + return OPAL_SUCCESS; +} + + +static int module_init(void) +{ + /* Nothing to do */ + + return OPAL_SUCCESS; +} + + +static int module_set(opal_paffinity_base_cpu_set_t mask) +{ + int i, ret = OPAL_SUCCESS; + hwloc_cpuset_t set; + hwloc_topology_t *t = &mca_paffinity_hwloc_component.topology; + + set = hwloc_cpuset_alloc(); + hwloc_cpuset_zero(set); + for (i = 0; ((unsigned int) i) < OPAL_PAFFINITY_BITMASK_T_NUM_BITS; ++i) { + if (OPAL_PAFFINITY_CPU_ISSET(i, mask) && + i < mca_paffinity_hwloc_component.cpuset_max_size) { + hwloc_cpuset_cpu(set, i); + } + } + + if (0 != hwloc_set_cpubind(*t, set, 0)) { + ret = OPAL_ERR_IN_ERRNO; + } + hwloc_cpuset_free(set); + + return ret; +} + + +static int module_get(opal_paffinity_base_cpu_set_t *mask) +{ + int i, ret = OPAL_SUCCESS; + hwloc_cpuset_t set; + hwloc_topology_t *t = &mca_paffinity_hwloc_component.topology; + + if (NULL == mask) { + return OPAL_ERR_BAD_PARAM; + } + + set = hwloc_cpuset_alloc(); + if (0 != hwloc_get_cpubind(*t, set, 0)) { + ret = OPAL_ERR_IN_ERRNO; + } else { + OPAL_PAFFINITY_CPU_ZERO(*mask); + for (i = 0; ((unsigned int) i) < 8 * sizeof(*mask); i++) { + if (i < mca_paffinity_hwloc_component.cpuset_max_size && + hwloc_cpuset_isset(set, i)) { + OPAL_PAFFINITY_CPU_SET(i, *mask); + } + } + } + hwloc_cpuset_free(set); + + return ret; +} + +/* + * Returns mapping of PHYSICAL socket:core -> PHYSICAL processor id. + * + * Since paffinity currently does not understand hardware threads, + * return the processor ID of the first hardware thread in the target + * core. + */ +static int module_map_to_processor_id(int socket, int core, int *processor_id) +{ + unsigned i; + hwloc_topology_t *t = &mca_paffinity_hwloc_component.topology; + hwloc_obj_t obj; + + opal_output(0, "map_to_proc_id: looking for socket %d, core %d\n", socket, core); + /* Traverse all sockets, looking for the right physical ID number. + Once we find it, traverse all that socket's cores looking for + the right physial ID number. Once we find it, return the + physical processor ID number. */ + for (obj = hwloc_get_next_obj_by_type(*t, HWLOC_OBJ_SOCKET, NULL); + NULL != obj; + obj = hwloc_get_next_obj_by_type(*t, HWLOC_OBJ_SOCKET, obj)) { + if (obj->os_index == (unsigned int) socket) { + /* Ok, we found the right socket. Browse its descendants + looking for the core with the right os_index (don't + assume all cores are at the same level). */ + + obj = dfs_find_os_index(obj, HWLOC_OBJ_CORE, core); + if (NULL != obj) { + /* Ok, we found the right core. Get the cpuset and + return the first PU (because hwloc understands + hardware threads, of which there might be multiple + on this core). */ + + hwloc_cpuset_t good; + good = hwloc_cpuset_alloc(); + if (NULL == good) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + hwloc_cpuset_and(good, obj->online_cpuset, + obj->allowed_cpuset); + + for (i = 0; + (int) i < mca_paffinity_hwloc_component.cpuset_max_size; + ++i) { + if (hwloc_cpuset_isset(good, i)) { + *processor_id = i; + hwloc_cpuset_free(good); + return OPAL_SUCCESS; + } + } + + /* Huh. This shouldn't happen. */ + hwloc_cpuset_free(good); + return OPAL_ERR_NOT_FOUND; + } + + /* If we found the right socket but not the right core, we + didn't find it. */ + return OPAL_ERR_NOT_FOUND; + } + } + + /* If we didn't even find the right socket, we didn't find it. */ + return OPAL_ERR_NOT_FOUND; +} + +/* + * Provides mapping of PHYSICAL processor id -> PHYSICAL socket:core. + */ +static int module_map_to_socket_core(int processor_id, int *socket, int *core) +{ + hwloc_obj_t obj; + hwloc_topology_t *t = &mca_paffinity_hwloc_component.topology; + hwloc_cpuset_t good; + + good = hwloc_cpuset_alloc(); + if (NULL == good) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + /* Iterate through every core and find one that contains the + processor_id. Then find the corresponding socket. */ + for (obj = hwloc_get_next_obj_by_type(*t, HWLOC_OBJ_CORE, NULL); + NULL != obj; + obj = hwloc_get_next_obj_by_type(*t, HWLOC_OBJ_CORE, obj)) { + hwloc_cpuset_and(good, obj->online_cpuset, + obj->allowed_cpuset); + + /* Does this core contain the processor_id in question? */ + if (hwloc_cpuset_isset(good, processor_id)) { + *core = obj->os_index; + + /* Go upward from the core object until we find its parent + socket. */ + while (HWLOC_OBJ_SOCKET != obj->type) { + if (NULL == obj->parent) { + /* If we get to the root without finding a socket, + er.. Hmm. Error! */ + return OPAL_ERR_NOT_FOUND; + } + obj = obj->parent; + } + *socket = obj->os_index; + return OPAL_SUCCESS; + } + } + + /* If we didn't even find the right core, we didn't find it. */ + return OPAL_ERR_NOT_FOUND; +} + +/* + * Provides number of LOGICAL processors in a host. Since paffinity + * does not currently understand hardware threads, we interpret + * "processors" to mean "cores". + */ +static int module_get_processor_info(int *num_processors) +{ + hwloc_topology_t *t = &mca_paffinity_hwloc_component.topology; + + /* Try the simple hwloc_get_nbobjs_by_type() first. If we get -1, + go aggregate ourselves (because it means that there are cores + are multiple levels in the topology). */ + *num_processors = (int) hwloc_get_nbobjs_by_type(*t, HWLOC_OBJ_CORE); + if (-1 == *num_processors) { + hwloc_obj_t obj; + + *num_processors = 0; + for (obj = hwloc_get_next_obj_by_type(*t, HWLOC_OBJ_CORE, NULL); + NULL != obj; + obj = hwloc_get_next_obj_by_type(*t, HWLOC_OBJ_CORE, obj)) { + if (HWLOC_OBJ_CORE == obj->type) { + ++*num_processors; + } + } + } + + return OPAL_SUCCESS; +} + +/* + * Provides the number of LOGICAL sockets in a host. + */ +static int module_get_socket_info(int *num_sockets) +{ + hwloc_topology_t *t = &mca_paffinity_hwloc_component.topology; + + /* Try the simple hwloc_get_nbobjs_by_type() first. If we get -1, + go aggregate ourselves (because it means that there are cores + are multiple levels in the topology). */ + *num_sockets = (int) hwloc_get_nbobjs_by_type(*t, HWLOC_OBJ_SOCKET); + if (-1 == *num_sockets) { + hwloc_obj_t obj; + + *num_sockets = 0; + for (obj = hwloc_get_next_obj_by_type(*t, HWLOC_OBJ_SOCKET, NULL); + NULL != obj; + obj = hwloc_get_next_obj_by_type(*t, HWLOC_OBJ_SOCKET, obj)) { + if (HWLOC_OBJ_CORE == obj->type) { + ++*num_sockets; + } + } + } + + return OPAL_SUCCESS; +} + +/* + * Provides the number of LOGICAL cores in a PHYSICAL socket. + */ +static int module_get_core_info(int socket, int *num_cores) +{ + hwloc_obj_t obj; + hwloc_topology_t *t = &mca_paffinity_hwloc_component.topology; + + /* Traverse all sockets, looking for the right physical ID + number. */ + for (obj = hwloc_get_next_obj_by_type(*t, HWLOC_OBJ_SOCKET, NULL); + NULL != obj; + obj = hwloc_get_next_obj_by_type(*t, HWLOC_OBJ_SOCKET, obj)) { + if (obj->os_index == (unsigned int) socket) { + /* Ok, we found the right socket. Browse its descendants + looking for all cores. */ + *num_cores = dfs_count_type(obj, HWLOC_OBJ_CORE); + return OPAL_SUCCESS; + } + } + + /* If we didn't even find the right socket, we didn't find it. */ + return OPAL_ERR_NOT_FOUND; +} + +/* + * Return the PHYSICAL processor id that corresponds to the given + * LOGICAL processor id (remember: paffinity does not understand + * hardware threads, so "processor" here means "core"). + */ +static int module_get_physical_processor_id(int logical_processor_id) +{ + int i; + hwloc_obj_t obj; + hwloc_cpuset_t good; + hwloc_topology_t *t = &mca_paffinity_hwloc_component.topology; + + opal_output(0, "get phsy proc id: %d\n", logical_processor_id); + obj = hwloc_get_obj_by_type(*t, HWLOC_OBJ_CORE, logical_processor_id); + if (NULL == obj) { + return OPAL_ERR_NOT_FOUND; + } + + /* Found the right core. Now find the processor ID of the first + PU available in that core. */ + good = hwloc_cpuset_alloc(); + if (NULL == good) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + hwloc_cpuset_and(good, obj->online_cpuset, + obj->allowed_cpuset); + + for (i = 0; + (int) i < mca_paffinity_hwloc_component.cpuset_max_size; + ++i) { + if (hwloc_cpuset_isset(good, i)) { + hwloc_cpuset_free(good); + return i; + } + } + + /* Huh. This shouldn't happen. */ + hwloc_cpuset_free(good); + return OPAL_ERR_NOT_FOUND; +} + +/* + * Return the PHYSICAL socket id that corresponds to the given + * LOGICAL socket id + */ +static int module_get_physical_socket_id(int logical_socket_id) +{ + hwloc_obj_t obj; + hwloc_topology_t *t = &mca_paffinity_hwloc_component.topology; + + obj = hwloc_get_obj_by_type(*t, HWLOC_OBJ_SOCKET, logical_socket_id); + if (NULL == obj) { + return OPAL_ERR_NOT_FOUND; + } + return obj->os_index; +} + +/* + * Return the PHYSICAL core id that corresponds to the given LOGICAL + * core id on the given PHYSICAL socket id + */ +static int module_get_physical_core_id(int physical_socket_id, + int logical_core_id) +{ + int i; + unsigned count = 0; + hwloc_obj_t obj; + hwloc_cpuset_t good; + hwloc_topology_t *t = &mca_paffinity_hwloc_component.topology; + + obj = hwloc_get_root_obj(*t); + if (NULL == obj) { + return OPAL_ERR_NOT_FOUND; + } + obj = dfs_find_os_index(obj, HWLOC_OBJ_SOCKET, physical_socket_id); + if (NULL == obj) { + return OPAL_ERR_NOT_FOUND; + } + + /* Note that we can't look at hwloc's logical_index here -- hwloc + counts logically across *all* cores. We only want to find the + Nth logical core under this particular socket. */ + obj = dfs_find_nth_item(obj, HWLOC_OBJ_CORE, &count, logical_core_id); + if (NULL == obj) { + return OPAL_ERR_NOT_FOUND; + } + + /* Found the right core. Now find the processor ID of the first + PU available in that core. */ + good = hwloc_cpuset_alloc(); + if (NULL == good) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + hwloc_cpuset_and(good, obj->online_cpuset, + obj->allowed_cpuset); + + for (i = 0; + (int) i < mca_paffinity_hwloc_component.cpuset_max_size; + ++i) { + if (hwloc_cpuset_isset(good, i)) { + hwloc_cpuset_free(good); + return i; + } + } + + /* Huh. This shouldn't happen. */ + hwloc_cpuset_free(good); + return OPAL_ERR_NOT_FOUND; +} +