diff --git a/opal/mca/hwloc/base/base.h b/opal/mca/hwloc/base/base.h index 18a9aec491..1f1e5e3097 100644 --- a/opal/mca/hwloc/base/base.h +++ b/opal/mca/hwloc/base/base.h @@ -191,10 +191,11 @@ OPAL_DECLSPEC bool opal_hwloc_base_single_cpu(hwloc_cpuset_t cpuset); /** * Provide a utility to parse a slot list against the local - * logical cpus, and produce a cpuset for the described binding + * cpus of given type, and produce a cpuset for the described binding */ OPAL_DECLSPEC int opal_hwloc_base_slot_list_parse(const char *slot_str, hwloc_topology_t topo, + opal_hwloc_resource_type_t rtype, hwloc_cpuset_t cpumask); OPAL_DECLSPEC char* opal_hwloc_base_find_coprocessors(hwloc_topology_t topo); @@ -266,8 +267,10 @@ OPAL_DECLSPEC int opal_hwloc_base_cset2mapstr(char *str, int len, hwloc_topology_t topo, hwloc_cpuset_t cpuset); -/* get the hwloc object that corresponds to the given LOGICAL processor id */ -OPAL_DECLSPEC hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo, int lid); +/* get the hwloc object that corresponds to the given processor id and type */ +OPAL_DECLSPEC hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo, + int lid, + opal_hwloc_resource_type_t rtype); #endif diff --git a/opal/mca/hwloc/base/help-opal-hwloc-base.txt b/opal/mca/hwloc/base/help-opal-hwloc-base.txt index 271cf734b8..68b353a936 100644 --- a/opal/mca/hwloc/base/help-opal-hwloc-base.txt +++ b/opal/mca/hwloc/base/help-opal-hwloc-base.txt @@ -28,9 +28,10 @@ The specified %s policy is not recognized: Please check for a typo or ensure that the option is a supported one. # -[logical-cpu-not-found] -A specified logical processor does not exist in this topology: +[cpu-not-found] +A specified %s processor does not exist in this topology: + CPU number: %d Cpu set given: %s # [redefining-policy] diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c index e209ee06c0..04cfea8848 100644 --- a/opal/mca/hwloc/base/hwloc_base_util.c +++ b/opal/mca/hwloc/base/hwloc_base_util.c @@ -43,16 +43,19 @@ /* * Provide the hwloc object that corresponds to the given - * LOGICAL processor id. Remember: "processor" here [usually] means "core" -- + * processor id of the given type. Remember: "processor" here [usually] means "core" -- * except that on some platforms, hwloc won't find any cores; it'll * only find PUs (!). On such platforms, then do the same calculation * but with PUs instead of COREs. */ -hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo, int lid) +hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo, + int lid, + opal_hwloc_resource_type_t rtype) { hwloc_obj_type_t obj_type = HWLOC_OBJ_CORE; hwloc_obj_t obj; - + int cnt; + /* hwloc isn't able to find cores on all platforms. Example: PPC64 running RHEL 5.4 (linux kernel 2.6.18) only reports NUMA nodes and PU's. Fine. @@ -70,12 +73,34 @@ hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo, int lid) obj_type = HWLOC_OBJ_PU; } + if (OPAL_HWLOC_PHYSICAL == rtype) { + /* find the pu */ + obj = hwloc_get_obj_by_type(topo, obj_type, 0); + cnt = 0; + opal_output_verbose(5, opal_hwloc_base_framework.framework_output, + "Searching for %d PHYSICAL PU", lid); + while (lid != cnt && NULL != obj) { + obj = obj->next_cousin; + cnt++; + } + if (lid != cnt) { + opal_show_help("help-opal-hwloc-base.txt", + "cpu-not-found", true, "physical", + lid, opal_hwloc_base_cpu_set); + return NULL; // failed to find it + } + return obj; + } + + opal_output_verbose(5, opal_hwloc_base_framework.framework_output, + "Searching for %d LOGICAL PU", lid); + /* Now do the actual lookup. */ obj = hwloc_get_obj_by_type(topo, obj_type, lid); if (NULL == obj) { opal_show_help("help-opal-hwloc-base.txt", - "logical-cpu-not-found", true, - opal_hwloc_base_cpu_set); + "cpu-not-found", true, "logical", + lid, opal_hwloc_base_cpu_set); return NULL; } @@ -130,7 +155,7 @@ int opal_hwloc_base_filter_cpus(hwloc_topology_t topo) case 1: /* only one cpu given - get that object */ cpu = strtoul(range[0], NULL, 10); - if (NULL == (pu = opal_hwloc_base_get_pu(topo, cpu))) { + if (NULL == (pu = opal_hwloc_base_get_pu(topo, cpu, OPAL_HWLOC_LOGICAL))) { opal_argv_free(ranges); opal_argv_free(range); return OPAL_ERROR; @@ -144,7 +169,7 @@ int opal_hwloc_base_filter_cpus(hwloc_topology_t topo) start = strtoul(range[0], NULL, 10); end = strtoul(range[1], NULL, 10); for (cpu=start; cpu <= end; cpu++) { - if (NULL == (pu = opal_hwloc_base_get_pu(topo, cpu))) { + if (NULL == (pu = opal_hwloc_base_get_pu(topo, cpu, OPAL_HWLOC_LOGICAL))) { opal_argv_free(ranges); opal_argv_free(range); hwloc_bitmap_free(avail); @@ -1030,6 +1055,7 @@ void opal_hwloc_base_clear_usage(hwloc_topology_t topo) static int socket_to_cpu_set(char *cpus, hwloc_topology_t topo, + opal_hwloc_resource_type_t rtype, hwloc_bitmap_t cpumask) { char **range; @@ -1042,7 +1068,7 @@ static int socket_to_cpu_set(char *cpus, if ('*' == cpus[0]) { /* requesting cpumask for ALL sockets */ obj = hwloc_get_root_obj(topo); - /* set to all available logical processors - essentially, + /* set to all available processors - essentially, * this specification equates to unbound */ res = opal_hwloc_base_get_available_cpus(topo, obj); @@ -1055,8 +1081,8 @@ static int socket_to_cpu_set(char *cpus, switch (range_cnt) { case 1: /* no range was present, so just one socket given */ socket_id = atoi(range[0]); - obj = opal_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0, socket_id, OPAL_HWLOC_LOGICAL); - /* get the available logical cpus for this socket */ + obj = opal_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0, socket_id, rtype); + /* get the available cpus for this socket */ res = opal_hwloc_base_get_available_cpus(topo, obj); hwloc_bitmap_or(cpumask, cpumask, res); break; @@ -1066,8 +1092,8 @@ static int socket_to_cpu_set(char *cpus, upper_range = atoi(range[1]); /* cycle across the range of sockets */ for (socket_id=lower_range; socket_id<=upper_range; socket_id++) { - obj = opal_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0, socket_id, OPAL_HWLOC_LOGICAL); - /* get the available logical cpus for this socket */ + obj = opal_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0, socket_id, rtype); + /* get the available cpus for this socket */ res = opal_hwloc_base_get_available_cpus(topo, obj); /* set the corresponding bits in the bitmask */ hwloc_bitmap_or(cpumask, cpumask, res); @@ -1084,6 +1110,7 @@ static int socket_to_cpu_set(char *cpus, static int socket_core_to_cpu_set(char *socket_core_list, hwloc_topology_t topo, + opal_hwloc_resource_type_t rtype, hwloc_bitmap_t cpumask) { int rc=OPAL_SUCCESS, i, j; @@ -1102,7 +1129,7 @@ static int socket_core_to_cpu_set(char *socket_core_list, /* get the object for this socket id */ if (NULL == (socket = opal_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0, - socket_id, OPAL_HWLOC_LOGICAL))) { + socket_id, rtype))) { opal_argv_free(socket_core); return OPAL_ERR_NOT_FOUND; } @@ -1123,7 +1150,7 @@ static int socket_core_to_cpu_set(char *socket_core_list, corestr = socket_core[i]; } if ('*' == corestr[0]) { - /* set to all available logical cpus on this socket */ + /* set to all available cpus on this socket */ res = opal_hwloc_base_get_available_cpus(topo, socket); hwloc_bitmap_or(cpumask, cpumask, res); /* we are done - already assigned all cores! */ @@ -1188,6 +1215,7 @@ static int socket_core_to_cpu_set(char *socket_core_list, int opal_hwloc_base_slot_list_parse(const char *slot_str, hwloc_topology_t topo, + opal_hwloc_resource_type_t rtype, hwloc_cpuset_t cpumask) { char **item; @@ -1233,7 +1261,7 @@ int opal_hwloc_base_slot_list_parse(const char *slot_str, * it could specify multiple sockets */ if (OPAL_SUCCESS != (rc = socket_to_cpu_set(&item[i][1], /* skip the 'S' */ - topo, cpumask))) { + topo, rtype, cpumask))) { opal_argv_free(item); return rc; } @@ -1242,13 +1270,13 @@ int opal_hwloc_base_slot_list_parse(const char *slot_str, if ('S' == item[i][0] || 's' == item[i][0]) { if (OPAL_SUCCESS != (rc = socket_core_to_cpu_set(&item[i][1], /* skip the 'S' */ - topo, cpumask))) { + topo, rtype, cpumask))) { opal_argv_free(item); return rc; } } else { if (OPAL_SUCCESS != (rc = socket_core_to_cpu_set(item[i], - topo, cpumask))) { + topo, rtype, cpumask))) { opal_argv_free(item); return rc; } @@ -1263,9 +1291,10 @@ int opal_hwloc_base_slot_list_parse(const char *slot_str, case 1: /* only one core, or a list of cores, specified */ list = opal_argv_split(range[0], ','); for (j=0; NULL != list[j]; j++) { + opal_output(0, "LIST %d VAL %s", j, list[j]); core_id = atoi(list[j]); - /* find the specified logical available cpu */ - if (NULL == (pu = opal_hwloc_base_get_pu(topo, core_id))) { + /* find the specified available cpu */ + if (NULL == (pu = opal_hwloc_base_get_pu(topo, core_id, rtype))) { opal_argv_free(range); opal_argv_free(item); return OPAL_ERROR; @@ -1283,7 +1312,7 @@ int opal_hwloc_base_slot_list_parse(const char *slot_str, upper_range = atoi(range[1]); for (core_id=lower_range; core_id <= upper_range; core_id++) { /* find the specified logical available cpu */ - if (NULL == (pu = opal_hwloc_base_get_pu(topo, core_id))) { + if (NULL == (pu = opal_hwloc_base_get_pu(topo, core_id, rtype))) { opal_argv_free(range); opal_argv_free(item); return OPAL_ERROR; diff --git a/orte/mca/ess/base/ess_base_fns.c b/orte/mca/ess/base/ess_base_fns.c index 0928f60c08..25c701d6ee 100644 --- a/orte/mca/ess/base/ess_base_fns.c +++ b/orte/mca/ess/base/ess_base_fns.c @@ -118,7 +118,8 @@ int orte_ess_base_proc_binding(void) hwloc_bitmap_zero(cpus); if (OPAL_BIND_TO_CPUSET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { if (OPAL_SUCCESS != (ret = opal_hwloc_base_slot_list_parse(opal_hwloc_base_slot_list, - opal_hwloc_topology, cpus))) { + opal_hwloc_topology, + OPAL_HWLOC_LOGICAL, cpus))) { error = "Setting processor affinity failed"; hwloc_bitmap_free(cpus); goto error; diff --git a/orte/mca/rmaps/rank_file/rmaps_rank_file.c b/orte/mca/rmaps/rank_file/rmaps_rank_file.c index a26cb7ae0c..095069c12e 100644 --- a/orte/mca/rmaps/rank_file/rmaps_rank_file.c +++ b/orte/mca/rmaps/rank_file/rmaps_rank_file.c @@ -81,7 +81,10 @@ static int orte_rmaps_rf_map(orte_job_t *jdata) mca_base_component_t *c = &mca_rmaps_rank_file_component.super.base_version; char *slots; bool initial_map=true; - +#if OPAL_HAVE_HWLOC + opal_hwloc_resource_type_t rtype; +#endif + /* only handle initial launch of rf job */ if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, @@ -113,6 +116,19 @@ static int orte_rmaps_rf_map(orte_job_t *jdata) /* convenience def */ map = jdata->map; + +#if OPAL_HAVE_HWLOC + /* default to LOGICAL processors */ + if (mca_rmaps_rank_file_component.physical) { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_file: using PHYSICAL processors"); + rtype = OPAL_HWLOC_PHYSICAL; + } else { + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "mca:rmaps:rank_file: using LOGICAL processors"); + rtype = OPAL_HWLOC_LOGICAL; + } +#endif /* setup the node list */ OBJ_CONSTRUCT(&node_list, opal_list_t); @@ -276,7 +292,7 @@ static int orte_rmaps_rf_map(orte_job_t *jdata) } bitmap = hwloc_bitmap_alloc(); /* parse the slot_list to find the socket and core */ - if (ORTE_SUCCESS != (rc = opal_hwloc_base_slot_list_parse(slots, node->topology, bitmap))) { + if (ORTE_SUCCESS != (rc = opal_hwloc_base_slot_list_parse(slots, node->topology, rtype, bitmap))) { ORTE_ERROR_LOG(rc); goto error; } diff --git a/orte/mca/rmaps/rank_file/rmaps_rank_file.h b/orte/mca/rmaps/rank_file/rmaps_rank_file.h index aa2cb96779..17d7b7b347 100644 --- a/orte/mca/rmaps/rank_file/rmaps_rank_file.h +++ b/orte/mca/rmaps/rank_file/rmaps_rank_file.h @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Voltaire. All rights reserved - * - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,6 +41,7 @@ int orte_rmaps_rank_file_lex_destroy (void); struct orte_rmaps_rf_component_t { orte_rmaps_base_component_t super; char *slot_list; + bool physical; }; typedef struct orte_rmaps_rf_component_t orte_rmaps_rf_component_t; diff --git a/orte/mca/rmaps/rank_file/rmaps_rank_file_component.c b/orte/mca/rmaps/rank_file/rmaps_rank_file_component.c index 38969e91e3..4aea047baf 100644 --- a/orte/mca/rmaps/rank_file/rmaps_rank_file_component.c +++ b/orte/mca/rmaps/rank_file/rmaps_rank_file_component.c @@ -10,8 +10,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Voltaire. All rights reserved - * - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -92,6 +92,14 @@ static int orte_rmaps_rank_file_register(void) MCA_BASE_VAR_SCOPE_READONLY, &orte_rankfile); (void) mca_base_var_register_synonym(tmp, "orte", "orte", NULL, "rankfile", 0); + mca_rmaps_rank_file_component.physical = false; + (void) mca_base_component_var_register(c, "physical", "Rankfile contains physical cpu designations", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_rmaps_rank_file_component.physical); + + return ORTE_SUCCESS; } diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index d00f960f28..3695ffb37c 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -391,7 +391,7 @@ int orte_daemon(int argc, char *argv[]) res = hwloc_bitmap_alloc(); for (i=0; NULL != cores[i]; i++) { core = strtoul(cores[i], NULL, 10); - if (NULL == (pu = opal_hwloc_base_get_pu(opal_hwloc_topology, core))) { + if (NULL == (pu = opal_hwloc_base_get_pu(opal_hwloc_topology, core, OPAL_HWLOC_LOGICAL))) { /* turn off the show help forwarding as we won't * be able to cycle the event library to send */