From ed93154e43ed7c7fdf09205c42922e1790a8a67c Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 7 Jul 2015 12:52:16 -0700 Subject: [PATCH] Fix hetero operations. An error in the hwloc utilities only allocated memory for the first display of a binding map, and then assumed that all nodes had the same number of cores in them. This resulted in memory corruption whenever someone displayed a binding pattern for a hetero cluster, and a smaller node was first in line. --- opal/mca/hwloc/base/hwloc_base_util.c | 38 +++--- orte/mca/ras/simulator/ras_sim.h | 2 + orte/mca/ras/simulator/ras_sim_component.c | 8 ++ orte/mca/ras/simulator/ras_sim_module.c | 124 +++++++++++++++--- orte/mca/rmaps/base/rmaps_base_binding.c | 5 +- orte/mca/rmaps/base/rmaps_base_map_job.c | 11 +- .../data_type_support/orte_dt_print_fns.c | 2 +- 7 files changed, 146 insertions(+), 44 deletions(-) diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c index 68f66fb394..8cb187028f 100644 --- a/opal/mca/hwloc/base/hwloc_base_util.c +++ b/opal/mca/hwloc/base/hwloc_base_util.c @@ -1703,26 +1703,23 @@ static char *bitmap2rangestr(int bitmap) static int build_map(int *num_sockets_arg, int *num_cores_arg, hwloc_cpuset_t cpuset, int ***map, hwloc_topology_t topo) { - static int num_sockets = -1, num_cores = -1; + int num_sockets, num_cores; int socket_index, core_index, pu_index; hwloc_obj_t socket, core, pu; int **data; - /* Find out how many sockets we have (cached so that we don't have - to look this up every time) */ - if (num_sockets < 0) { - num_sockets = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_SOCKET); - /* some systems (like the iMac) only have one - * socket and so don't report a socket - */ - if (0 == num_sockets) { - num_sockets = 1; - } - /* Lazy: take the total number of cores that we have in the - topology; that'll be more than the max number of cores - under any given socket */ - num_cores = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE); + /* Find out how many sockets we have */ + num_sockets = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_SOCKET); + /* some systems (like the iMac) only have one + * socket and so don't report a socket + */ + if (0 == num_sockets) { + num_sockets = 1; } + /* Lazy: take the total number of cores that we have in the + topology; that'll be more than the max number of cores + under any given socket */ + num_cores = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE); *num_sockets_arg = num_sockets; *num_cores_arg = num_cores; @@ -1791,7 +1788,7 @@ int opal_hwloc_base_cset2str(char *str, int len, int ret, socket_index, core_index; char tmp[BUFSIZ]; const int stmp = sizeof(tmp) - 1; - int **map; + int **map=NULL; hwloc_obj_t root; opal_hwloc_topo_data_t *sum; @@ -1819,7 +1816,6 @@ int opal_hwloc_base_cset2str(char *str, int len, if (OPAL_SUCCESS != (ret = build_map(&num_sockets, &num_cores, cpuset, &map, topo))) { return ret; } - /* Iterate over the data matrix and build up the string */ first = true; for (socket_index = 0; socket_index < num_sockets; ++socket_index) { @@ -1837,8 +1833,12 @@ int opal_hwloc_base_cset2str(char *str, int len, } } } - free(map[0]); - free(map); + if (NULL != map) { + if (NULL != map[0]) { + free(map[0]); + } + free(map); + } return OPAL_SUCCESS; } diff --git a/orte/mca/ras/simulator/ras_sim.h b/orte/mca/ras/simulator/ras_sim.h index ecd2829e6e..b47886accf 100644 --- a/orte/mca/ras/simulator/ras_sim.h +++ b/orte/mca/ras/simulator/ras_sim.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -22,6 +23,7 @@ struct orte_ras_sim_component_t { char * slots; char * slots_max; char *topofiles; + char *topologies; bool have_cpubind; bool have_membind; }; diff --git a/orte/mca/ras/simulator/ras_sim_component.c b/orte/mca/ras/simulator/ras_sim_component.c index 1de654ca53..368426b2c7 100644 --- a/orte/mca/ras/simulator/ras_sim_component.c +++ b/orte/mca/ras/simulator/ras_sim_component.c @@ -12,6 +12,7 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -95,6 +96,13 @@ static int ras_sim_register(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &mca_ras_simulator_component.topofiles); + mca_ras_simulator_component.topologies = NULL; + (void) mca_base_component_var_register (component, "topologies", + "Comma-separated list of topology descriptions for simulated nodes", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_ras_simulator_component.topologies); mca_ras_simulator_component.have_cpubind = true; (void) mca_base_component_var_register (component, "have_cpubind", "Topology supports binding to cpus", diff --git a/orte/mca/ras/simulator/ras_sim_module.c b/orte/mca/ras/simulator/ras_sim_module.c index 997763b765..2cbf8e22b3 100644 --- a/orte/mca/ras/simulator/ras_sim_module.c +++ b/orte/mca/ras/simulator/ras_sim_module.c @@ -3,6 +3,7 @@ * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved * * $COPYRIGHT$ * @@ -56,6 +57,7 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) unsigned j, k; struct hwloc_topology_support *support; char **files=NULL; + char **topos = NULL; bool use_local_topology = false; #endif char **node_cnt=NULL; @@ -65,32 +67,39 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) char prefix[6]; node_cnt = opal_argv_split(mca_ras_simulator_component.num_nodes, ','); - slot_cnt = opal_argv_split(mca_ras_simulator_component.slots, ','); - max_slot_cnt = opal_argv_split(mca_ras_simulator_component.slots_max, ','); - - /* backfill the slot_cnt as reqd so we don't have to - * specify slot_cnt for each set of nodes - we'll set - * */ - tmp = slot_cnt[opal_argv_count(slot_cnt)-1]; - for (n=opal_argv_count(slot_cnt); n < opal_argv_count(node_cnt); n++) { - opal_argv_append_nosize(&slot_cnt, tmp); + if (NULL != mca_ras_simulator_component.slots) { + slot_cnt = opal_argv_split(mca_ras_simulator_component.slots, ','); + /* backfile the slot_cnt so every topology has a cnt */ + tmp = slot_cnt[opal_argv_count(slot_cnt)-1]; + for (n=opal_argv_count(slot_cnt); n < opal_argv_count(node_cnt); n++) { + opal_argv_append_nosize(&slot_cnt, tmp); + } } - /* backfill the max_slot_cnt as reqd */ - tmp = max_slot_cnt[opal_argv_count(slot_cnt)-1]; - for (n=opal_argv_count(max_slot_cnt); n < opal_argv_count(max_slot_cnt); n++) { - opal_argv_append_nosize(&max_slot_cnt, tmp); + if (NULL != mca_ras_simulator_component.slots_max) { + max_slot_cnt = opal_argv_split(mca_ras_simulator_component.slots_max, ','); + /* backfill the max_slot_cnt as reqd */ + tmp = max_slot_cnt[opal_argv_count(slot_cnt)-1]; + for (n=opal_argv_count(max_slot_cnt); n < opal_argv_count(max_slot_cnt); n++) { + opal_argv_append_nosize(&max_slot_cnt, tmp); + } } - + #if OPAL_HAVE_HWLOC - if (NULL == mca_ras_simulator_component.topofiles) { - /* use our topology */ - use_local_topology = true; - } else { + if (NULL != mca_ras_simulator_component.topofiles) { files = opal_argv_split(mca_ras_simulator_component.topofiles, ','); if (opal_argv_count(files) != opal_argv_count(node_cnt)) { orte_show_help("help-ras-base.txt", "ras-sim:mismatch", true); goto error_silent; } + } else if (NULL != mca_ras_simulator_component.topologies) { + topos = opal_argv_split(mca_ras_simulator_component.topologies, ','); + if (opal_argv_count(topos) != opal_argv_count(node_cnt)) { + orte_show_help("help-ras-base.txt", "ras-sim:mismatch", true); + goto error_silent; + } + } else { + /* use our topology */ + use_local_topology = true; } #else /* If we don't have hwloc and hwloc files were specified, then @@ -123,7 +132,7 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) if (use_local_topology) { /* use our topology */ topo = opal_hwloc_topology; - } else { + } else if (NULL != files) { if (0 != hwloc_topology_init(&topo)) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, @@ -188,6 +197,69 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) t->topo = topo; t->sig = opal_hwloc_base_get_topo_signature(topo); opal_pointer_array_add(orte_node_topologies, t); + } else { + if (0 != hwloc_topology_init(&topo)) { + orte_show_help("help-ras-simulator.txt", + "hwloc API fail", true, + __FILE__, __LINE__, "hwloc_topology_init"); + goto error_silent; + } + if (0 != hwloc_topology_set_synthetic(topo, topos[n])) { + orte_show_help("help-ras-simulator.txt", + "hwloc API fail", true, + __FILE__, __LINE__, "hwloc_topology_set_synthetic"); + hwloc_topology_destroy(topo); + goto error_silent; + } + if (0 != hwloc_topology_load(topo)) { + orte_show_help("help-ras-simulator.txt", + "hwloc API fail", true, + __FILE__, __LINE__, "hwloc_topology_load"); + hwloc_topology_destroy(topo); + goto error_silent; + } + if (OPAL_SUCCESS != opal_hwloc_base_filter_cpus(topo)) { + orte_show_help("help-ras-simulator.txt", + "hwloc API fail", true, + __FILE__, __LINE__, "opal_hwloc_base_filter_cpus"); + hwloc_topology_destroy(topo); + goto error_silent; + } + /* remove the hostname from the topology. Unfortunately, hwloc + * decided to add the source hostname to the "topology", thus + * rendering it unusable as a pure topological description. So + * we remove that information here. + */ + obj = hwloc_get_root_obj(topo); + for (k=0; k < obj->infos_count; k++) { + if (NULL == obj->infos[k].name || + NULL == obj->infos[k].value) { + continue; + } + if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) { + free(obj->infos[k].name); + free(obj->infos[k].value); + /* left justify the array */ + for (j=k; j < obj->infos_count-1; j++) { + obj->infos[j] = obj->infos[j+1]; + } + obj->infos[obj->infos_count-1].name = NULL; + obj->infos[obj->infos_count-1].value = NULL; + obj->infos_count--; + break; + } + } + /* unfortunately, hwloc does not include support info in its + * xml output :-(( To aid in debugging, we set it here + */ + support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo); + support->cpubind->set_thisproc_cpubind = mca_ras_simulator_component.have_cpubind; + support->membind->set_thisproc_membind = mca_ras_simulator_component.have_membind; + /* add it to our array */ + t = OBJ_NEW(orte_topology_t); + t->topo = topo; + t->sig = opal_hwloc_base_get_topo_signature(topo); + opal_pointer_array_add(orte_node_topologies, t); } #endif @@ -196,9 +268,19 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) asprintf(&node->name, "%s%0*d", prefix, dig, i); node->state = ORTE_NODE_STATE_UP; node->slots_inuse = 0; - node->slots_max = (NULL == max_slot_cnt[n] ? 0 : atoi(max_slot_cnt[n])); - node->slots = (NULL == slot_cnt[n] ? 0 : atoi(slot_cnt[n])); #if OPAL_HAVE_HWLOC + if (NULL == max_slot_cnt || NULL == max_slot_cnt[n]) { + node->slots_max = 0; + } else { + obj = hwloc_get_root_obj(topo); + node->slots_max = opal_hwloc_base_get_npus(topo, obj); + } + if (NULL == slot_cnt || NULL == slot_cnt[n]) { + node->slots = 0; + } else { + obj = hwloc_get_root_obj(topo); + node->slots = opal_hwloc_base_get_npus(topo, obj); + } node->topology = topo; #endif opal_output_verbose(1, orte_ras_base_framework.framework_output, diff --git a/orte/mca/rmaps/base/rmaps_base_binding.c b/orte/mca/rmaps/base/rmaps_base_binding.c index a46c2b441a..155b297823 100644 --- a/orte/mca/rmaps/base/rmaps_base_binding.c +++ b/orte/mca/rmaps/base/rmaps_base_binding.c @@ -12,7 +12,7 @@ * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -277,7 +277,8 @@ static int bind_downwards(orte_job_t *jdata, } /* bozo check */ locale = NULL; - if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) { + if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR) || + NULL == locale) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-locale", true, ORTE_NAME_PRINT(&proc->name)); hwloc_bitmap_free(totalcpuset); return ORTE_ERR_SILENT; diff --git a/orte/mca/rmaps/base/rmaps_base_map_job.c b/orte/mca/rmaps/base/rmaps_base_map_job.c index 9f8e163ee8..0a5669c973 100644 --- a/orte/mca/rmaps/base/rmaps_base_map_job.c +++ b/orte/mca/rmaps/base/rmaps_base_map_job.c @@ -93,7 +93,16 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) nprocs = 0; for (i=0; i < jdata->apps->size; i++) { if (NULL != (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { - nprocs += app->num_procs; + if (0 == app->num_procs) { + opal_list_t nodes; + orte_std_cntr_t slots; + OBJ_CONSTRUCT(&nodes, opal_list_t); + orte_rmaps_base_get_target_nodes(&nodes, &slots, app, ORTE_MAPPING_BYNODE, true, true); + OPAL_LIST_DESTRUCT(&nodes); + nprocs += slots; + } else { + nprocs += app->num_procs; + } } } opal_output_verbose(5, orte_rmaps_base_framework.framework_output, diff --git a/orte/runtime/data_type_support/orte_dt_print_fns.c b/orte/runtime/data_type_support/orte_dt_print_fns.c index 76af17c6c1..5dc55efe9f 100644 --- a/orte/runtime/data_type_support/orte_dt_print_fns.c +++ b/orte/runtime/data_type_support/orte_dt_print_fns.c @@ -529,7 +529,7 @@ int orte_dt_print_proc(char **output, char *prefix, orte_proc_t *src, opal_data_ if (orte_get_attribute(&src->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&loc, OPAL_PTR)) { if (NULL != loc) { if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2mapstr(locale, sizeof(locale), src->node->topology, loc->cpuset)) { - strcpy(locale, "UNBOUND"); + strcpy(locale, "NODE"); } } else { strcpy(locale, "UNKNOWN");