diff --git a/opal/mca/hwloc/base/base.h b/opal/mca/hwloc/base/base.h index 845e9e1e7f..45ab2e771d 100644 --- a/opal/mca/hwloc/base/base.h +++ b/opal/mca/hwloc/base/base.h @@ -139,6 +139,11 @@ OPAL_DECLSPEC int opal_hwloc_base_filter_cpus(hwloc_topology_t topo); */ OPAL_DECLSPEC int opal_hwloc_base_get_topology(void); +/** + * Set the hwloc topology to that from the given topo file + */ +OPAL_DECLSPEC int opal_hwloc_base_set_topology(char *topofile); + /** * Free the hwloc topology. */ diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c index 7aeb92ab8f..6c77aca4f2 100644 --- a/opal/mca/hwloc/base/hwloc_base_util.c +++ b/opal/mca/hwloc/base/hwloc_base_util.c @@ -231,6 +231,87 @@ int opal_hwloc_base_get_topology(void) return rc; } +int opal_hwloc_base_set_topology(char *topofile) +{ + hwloc_obj_t obj; + unsigned j, k; + struct hwloc_topology_support *support; + int rc; + + OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, + "hwloc:base:set_topology")); + + if (NULL != opal_hwloc_topology) { + hwloc_topology_destroy(opal_hwloc_topology); + } + if (0 != hwloc_topology_init(&opal_hwloc_topology)) { + return OPAL_ERR_NOT_SUPPORTED; + } + if (0 != hwloc_topology_set_xml(opal_hwloc_topology, topofile)) { + hwloc_topology_destroy(opal_hwloc_topology); + return OPAL_ERR_NOT_SUPPORTED; + } + /* since we are loading this from an external source, we have to + * explicitly set a flag so hwloc sets things up correctly + */ + if (0 != hwloc_topology_set_flags(opal_hwloc_topology, + (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | + HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | + HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { + hwloc_topology_destroy(opal_hwloc_topology); + return OPAL_ERR_NOT_SUPPORTED; + } + if (0 != hwloc_topology_load(opal_hwloc_topology)) { + hwloc_topology_destroy(opal_hwloc_topology); + return OPAL_ERR_NOT_SUPPORTED; + } + /* remove the hostname from the topology. Unfortunately, hwloc + * decided to add the source hostname to the "topology", thus + * rendering it unusable as a pure topological description. So + * we remove that information here. + */ + obj = hwloc_get_root_obj(opal_hwloc_topology); + for (k=0; k < obj->infos_count; k++) { + if (NULL == obj->infos[k].name || + NULL == obj->infos[k].value) { + continue; + } + if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) { + free(obj->infos[k].name); + free(obj->infos[k].value); + /* left justify the array */ + for (j=k; j < obj->infos_count-1; j++) { + obj->infos[j] = obj->infos[j+1]; + } + obj->infos[obj->infos_count-1].name = NULL; + obj->infos[obj->infos_count-1].value = NULL; + obj->infos_count--; + break; + } + } + /* unfortunately, hwloc does not include support info in its + * xml output :-(( We default to assuming it is present as + * systems that use this option are likely to provide + * binding support + */ + support = (struct hwloc_topology_support*)hwloc_topology_get_support(opal_hwloc_topology); + support->cpubind->set_thisproc_cpubind = true; + support->membind->set_thisproc_membind = true; + + /* filter the cpus thru any default cpu set */ + rc = opal_hwloc_base_filter_cpus(opal_hwloc_topology); + if (OPAL_SUCCESS != rc) { + return rc; + } + + /* fill opal_cache_line_size global with the smallest L1 cache + line size */ + fill_cache_line_size(); + + /* all done */ + return OPAL_SUCCESS; +} + static void free_object(hwloc_obj_t obj) { opal_hwloc_obj_data_t *data; diff --git a/orte/mca/rmaps/base/help-orte-rmaps-base.txt b/orte/mca/rmaps/base/help-orte-rmaps-base.txt index c1604ca239..1451c0c260 100644 --- a/orte/mca/rmaps/base/help-orte-rmaps-base.txt +++ b/orte/mca/rmaps/base/help-orte-rmaps-base.txt @@ -204,3 +204,14 @@ No objects of the specified type were found on at least one node: Node: %s The map cannot be done as specified. +# +[topo-file] +A topology file was given for the compute nodes, but +we were unable to correctly process it. Common errors +include incorrectly specifying the path to the file, +or the file being generated in a way that is incompatible +with the version of hwloc being used by OMPI. + + File: %s + +Please correct the problem and try again. diff --git a/orte/mca/rmaps/base/rmaps_base_frame.c b/orte/mca/rmaps/base/rmaps_base_frame.c index 7dce0f1cc7..0994404b11 100644 --- a/orte/mca/rmaps/base/rmaps_base_frame.c +++ b/orte/mca/rmaps/base/rmaps_base_frame.c @@ -33,6 +33,7 @@ #include "orte/runtime/orte_globals.h" #include "orte/util/show_help.h" +#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rmaps/base/rmaps_private.h" #include "orte/mca/rmaps/base/base.h" @@ -65,6 +66,7 @@ static bool rmaps_base_no_oversubscribe = false; static bool rmaps_base_oversubscribe = false; static bool rmaps_base_display_devel_map = false; static bool rmaps_base_display_diffable_map = false; +static char *rmaps_base_topo_file = NULL; static int orte_rmaps_base_register(mca_base_register_flag_t flags) { @@ -202,6 +204,13 @@ static int orte_rmaps_base_register(mca_base_register_flag_t flags) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &rmaps_base_display_diffable_map); + rmaps_base_topo_file = NULL; + (void) mca_base_var_register("orte", "rmaps", "base", "topology", + "hwloc topology file (xml format) describing the topology of the compute nodes [default: none]", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, &rmaps_base_topo_file); + + return ORTE_SUCCESS; } @@ -237,6 +246,18 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags) orte_rmaps_base.mapping = 0; orte_rmaps_base.ranking = 0; + /* if a topology file was given, then set our topology + * from it. Even though our actual topology may differ, + * mpirun only needs to see the compute node topology + * for mapping purposes + */ + if (NULL != rmaps_base_topo_file) { + if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology(rmaps_base_topo_file))) { + orte_show_help("help-orte-rmaps-base.txt", "topo-file", true, rmaps_base_topo_file); + return ORTE_ERR_SILENT; + } + } + if (NULL == rmaps_base_mapping_policy) { ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYSLOT); ORTE_UNSET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);