diff --git a/opal/mca/hwloc/base/help-opal-hwloc-base.txt b/opal/mca/hwloc/base/help-opal-hwloc-base.txt index 68b353a936..7e9274f163 100644 --- a/opal/mca/hwloc/base/help-opal-hwloc-base.txt +++ b/opal/mca/hwloc/base/help-opal-hwloc-base.txt @@ -28,12 +28,6 @@ The specified %s policy is not recognized: Please check for a typo or ensure that the option is a supported one. # -[cpu-not-found] -A specified %s processor does not exist in this topology: - - CPU number: %d - Cpu set given: %s -# [redefining-policy] Conflicting directives for binding policy are causing the policy to be redefined: diff --git a/opal/mca/hwloc/base/hwloc_base_dt.c b/opal/mca/hwloc/base/hwloc_base_dt.c index cf076e5139..10e3af8123 100644 --- a/opal/mca/hwloc/base/hwloc_base_dt.c +++ b/opal/mca/hwloc/base/hwloc_base_dt.c @@ -104,7 +104,9 @@ int opal_hwloc_unpack(opal_buffer_t *buffer, void *dest, /* since we are loading this from an external source, we have to * explicitly set a flag so hwloc sets things up correctly */ - if (0 != hwloc_topology_set_flags(t, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | HWLOC_TOPOLOGY_FLAG_IO_DEVICES)) { + if (0 != hwloc_topology_set_flags(t, (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | + HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | + HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { rc = OPAL_ERROR; hwloc_topology_destroy(t); goto cleanup; @@ -133,6 +135,11 @@ int opal_hwloc_unpack(opal_buffer_t *buffer, void *dest, goto cleanup; } + /* filter the cpus thru any default cpu set */ + if (OPAL_SUCCESS != (rc = opal_hwloc_base_filter_cpus(t))) { + goto cleanup; + } + /* pass it back */ tarray[i] = t; diff --git a/opal/mca/hwloc/base/hwloc_base_frame.c b/opal/mca/hwloc/base/hwloc_base_frame.c index b50cce42e9..0e6935f27f 100644 --- a/opal/mca/hwloc/base/hwloc_base_frame.c +++ b/opal/mca/hwloc/base/hwloc_base_frame.c @@ -449,7 +449,8 @@ char* opal_hwloc_base_print_locality(opal_hwloc_locality_t locality) static void obj_data_const(opal_hwloc_obj_data_t *ptr) { ptr->available = NULL; - ptr->npus = UINT_MAX; + ptr->npus_calculated = false; + ptr->npus = 0; ptr->idx = UINT_MAX; ptr->num_bound = 0; } diff --git a/opal/mca/hwloc/base/hwloc_base_util.c b/opal/mca/hwloc/base/hwloc_base_util.c index e59c0eefd0..71a9a34a60 100644 --- a/opal/mca/hwloc/base/hwloc_base_util.c +++ b/opal/mca/hwloc/base/hwloc_base_util.c @@ -80,14 +80,12 @@ hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo, * numbered within their sockets instead). So we find the * specified PU, and then return the core object that contains it */ obj = hwloc_get_pu_obj_by_os_index(topo, lid); - if (NULL == obj) { - opal_show_help("help-opal-hwloc-base.txt", - "cpu-not-found", true, "physical", - lid, (NULL == opal_hwloc_base_cpu_set) ? "None" : opal_hwloc_base_cpu_set); - return NULL; // failed to find it - } + OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, + "physical cpu %d %s found in cpuset %s", + lid, (NULL == obj) ? "not" : "is", + (NULL == opal_hwloc_base_cpu_set) ? "None" : opal_hwloc_base_cpu_set)); /* we now need to shift upward to the core including this PU */ - if (HWLOC_OBJ_CORE == obj_type) { + if (NULL != obj && HWLOC_OBJ_CORE == obj_type) { obj = obj->parent; } return obj; @@ -98,12 +96,10 @@ hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo, /* Now do the actual lookup. */ obj = hwloc_get_obj_by_type(topo, obj_type, lid); - if (NULL == obj) { - opal_show_help("help-opal-hwloc-base.txt", - "cpu-not-found", true, "logical", - lid, (NULL == opal_hwloc_base_cpu_set) ? "None" : opal_hwloc_base_cpu_set); - return NULL; - } + OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, + "logical cpu %d %s found in cpuset %s", + lid, (NULL == obj) ? "not" : "is", + (NULL == opal_hwloc_base_cpu_set) ? "None" : opal_hwloc_base_cpu_set)); /* Found the right core (or PU). Return the object */ return obj; @@ -117,6 +113,7 @@ int opal_hwloc_base_filter_cpus(hwloc_topology_t topo) hwloc_obj_t root, pu; hwloc_cpuset_t avail = NULL, pucpus, res; opal_hwloc_topo_data_t *sum; + opal_hwloc_obj_data_t *data; char **ranges=NULL, **range=NULL; int idx, cpu, start, end; @@ -129,8 +126,6 @@ int opal_hwloc_base_filter_cpus(hwloc_topology_t topo) /* should only ever enter here once, but check anyway */ if (NULL != sum->available) { - OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, - "hwloc:base:filter_cpus specified - already done")); return OPAL_SUCCESS; } @@ -156,43 +151,38 @@ int opal_hwloc_base_filter_cpus(hwloc_topology_t topo) case 1: /* only one cpu given - get that object */ cpu = strtoul(range[0], NULL, 10); - if (NULL == (pu = opal_hwloc_base_get_pu(topo, cpu, OPAL_HWLOC_LOGICAL))) { - opal_argv_free(ranges); - opal_argv_free(range); - hwloc_bitmap_free(avail); - hwloc_bitmap_free(res); - hwloc_bitmap_free(pucpus); - return OPAL_ERR_SILENT; + if (NULL != (pu = opal_hwloc_base_get_pu(topo, cpu, OPAL_HWLOC_LOGICAL))) { + hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset); + hwloc_bitmap_or(res, avail, pucpus); + hwloc_bitmap_copy(avail, res); + data = (opal_hwloc_obj_data_t*)pu->userdata; + if (NULL == data) { + pu->userdata = (void*)OBJ_NEW(opal_hwloc_obj_data_t); + data = (opal_hwloc_obj_data_t*)pu->userdata; + } + data->npus++; } - hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset); - hwloc_bitmap_or(res, avail, pucpus); - hwloc_bitmap_copy(avail, res); break; case 2: /* range given */ start = strtoul(range[0], NULL, 10); end = strtoul(range[1], NULL, 10); for (cpu=start; cpu <= end; cpu++) { - if (NULL == (pu = opal_hwloc_base_get_pu(topo, cpu, OPAL_HWLOC_LOGICAL))) { - opal_argv_free(ranges); - opal_argv_free(range); - hwloc_bitmap_free(avail); - hwloc_bitmap_free(res); - hwloc_bitmap_free(pucpus); - return OPAL_ERR_SILENT; + if (NULL != (pu = opal_hwloc_base_get_pu(topo, cpu, OPAL_HWLOC_LOGICAL))) { + hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset); + hwloc_bitmap_or(res, avail, pucpus); + hwloc_bitmap_copy(avail, res); + data = (opal_hwloc_obj_data_t*)pu->userdata; + if (NULL == data) { + pu->userdata = (void*)OBJ_NEW(opal_hwloc_obj_data_t); + data = (opal_hwloc_obj_data_t*)pu->userdata; + } + data->npus++; } - hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset); - hwloc_bitmap_or(res, avail, pucpus); - hwloc_bitmap_copy(avail, res); } break; default: - hwloc_bitmap_free(avail); - hwloc_bitmap_free(res); - hwloc_bitmap_free(pucpus); - opal_argv_free(ranges); - opal_argv_free(range); - return OPAL_ERR_BAD_PARAM; + break; } opal_argv_free(range); } @@ -249,7 +239,7 @@ static void fill_cache_line_size(void) int opal_hwloc_base_get_topology(void) { - int rc; + int rc=OPAL_SUCCESS; OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, "hwloc:base:get_topology")); @@ -262,15 +252,11 @@ int opal_hwloc_base_get_topology(void) 0 != hwloc_topology_load(opal_hwloc_topology)) { return OPAL_ERR_NOT_SUPPORTED; } - - /* filter the cpus thru any default cpu set */ - rc = opal_hwloc_base_filter_cpus(opal_hwloc_topology); - if (OPAL_SUCCESS != rc) { + if (OPAL_SUCCESS != (rc = opal_hwloc_base_filter_cpus(opal_hwloc_topology))) { return rc; } } else { - rc = opal_hwloc_base_set_topology(opal_hwloc_base_topo_file); - if (OPAL_SUCCESS != rc) { + if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology(opal_hwloc_base_topo_file))) { return rc; } } @@ -435,7 +421,7 @@ hwloc_cpuset_t opal_hwloc_base_get_available_cpus(hwloc_topology_t topo, opal_hwloc_topo_data_t *rdata; opal_hwloc_obj_data_t *data; - OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, + OPAL_OUTPUT_VERBOSE((10, opal_hwloc_base_framework.framework_output, "hwloc:base: get available cpus")); /* get the node-level information */ @@ -448,8 +434,6 @@ hwloc_cpuset_t opal_hwloc_base_get_available_cpus(hwloc_topology_t topo, OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, "hwloc:base:get_available_cpus first time - filtering cpus")); } - /* ensure the topo-level cpuset was prepared */ - opal_hwloc_base_filter_cpus(topo); /* are we asking about the root object? */ if (obj == root) { @@ -498,9 +482,17 @@ hwloc_cpuset_t opal_hwloc_base_get_available_cpus(hwloc_topology_t topo, static void df_search_cores(hwloc_obj_t obj, unsigned int *cnt) { unsigned k; + opal_hwloc_obj_data_t *data; if (HWLOC_OBJ_CORE == obj->type) { - *cnt += 1; + data = (opal_hwloc_obj_data_t*)obj->userdata; + if (NULL == data) { + return; + } + if (NULL == opal_hwloc_base_cpu_set) { + data->npus = 1; + } + *cnt += data->npus; return; } @@ -547,28 +539,8 @@ unsigned int opal_hwloc_base_get_npus(hwloc_topology_t topo, unsigned int cnt = 0; hwloc_cpuset_t cpuset; - /* if the object is a hwthread (i.e., HWLOC_OBJ_PU), - * then the answer is always 1 since there isn't - * anything underneath it - */ - if (HWLOC_OBJ_PU == obj->type) { - return 1; - } - - /* if the object is a core (i.e., HWLOC_OBJ_CORE) and - * we are NOT treating hwthreads as independent cpus, - * then the answer is also 1 since we don't allow - * you to use the underlying hwthreads as separate - * entities - */ - if (HWLOC_OBJ_CORE == obj->type && - !opal_hwloc_use_hwthreads_as_cpus) { - return 1; - } - data = (opal_hwloc_obj_data_t*)obj->userdata; - - if (NULL == data || UINT_MAX == data->npus) { + if (NULL == data || !data->npus_calculated) { if (!opal_hwloc_use_hwthreads_as_cpus) { /* if we are treating cores as cpus, then we really * want to know how many cores are in this object. @@ -618,6 +590,7 @@ unsigned int opal_hwloc_base_get_npus(hwloc_topology_t topo, obj->userdata = (void*)data; } data->npus = cnt; + data->npus_calculated = true; } return data->npus; @@ -876,6 +849,10 @@ static hwloc_obj_t df_search_min_bound(hwloc_topology_t topo, opal_hwloc_obj_data_t *data; if (target == start->type) { + /* only consider procs that are allowed */ + if (0 == (k = opal_hwloc_base_get_npus(topo, start))) { + goto notfound; + } if (HWLOC_OBJ_CACHE == start->type && cache_level != start->attr->cache.depth) { goto notfound; } @@ -885,6 +862,7 @@ static hwloc_obj_t df_search_min_bound(hwloc_topology_t topo, data = OBJ_NEW(opal_hwloc_obj_data_t); start->userdata = data; } + OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, "hwloc:base:min_bound_under_obj object %s:%u nbound %u min %u", hwloc_obj_type_string(target), start->logical_index, diff --git a/opal/mca/hwloc/hwloc.h b/opal/mca/hwloc/hwloc.h index a1bb24aa70..f9ef947621 100644 --- a/opal/mca/hwloc/hwloc.h +++ b/opal/mca/hwloc/hwloc.h @@ -143,6 +143,7 @@ typedef uint8_t opal_hwloc_resource_type_t; typedef struct { opal_object_t super; hwloc_cpuset_t available; + bool npus_calculated; unsigned int npus; unsigned int idx; unsigned int num_bound; diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index 9dcdfd04eb..c536631eec 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -145,7 +145,7 @@ int orte_ess_base_orted_setup(char **hosts) /* get the local topology */ if (NULL == opal_hwloc_topology) { - if (OPAL_SUCCESS != opal_hwloc_base_get_topology()) { + if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) { error = "topology discovery"; goto error; } diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index b81e4bec4b..b6991f685e 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -197,7 +197,7 @@ static int rte_init(void) { /* get the local topology */ if (NULL == opal_hwloc_topology) { - if (OPAL_SUCCESS != opal_hwloc_base_get_topology()) { + if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) { error = "topology discovery"; goto error; } diff --git a/orte/mca/ess/pmi/ess_pmi_module.c b/orte/mca/ess/pmi/ess_pmi_module.c index 6dcab96051..6ce8c6f90a 100644 --- a/orte/mca/ess/pmi/ess_pmi_module.c +++ b/orte/mca/ess/pmi/ess_pmi_module.c @@ -96,16 +96,6 @@ static int rte_init(void) goto error; } -#if OPAL_HAVE_HWLOC - /* get the topology */ - if (NULL == opal_hwloc_topology) { - if (OPAL_SUCCESS != opal_hwloc_base_get_topology()) { - error = "topology discovery"; - goto error; - } - } -#endif - /* we don't have to call pmix.init because the pmix select did it */ /**** THE FOLLOWING ARE REQUIRED VALUES ***/ @@ -202,6 +192,16 @@ static int rte_init(void) free(string_key); } +#if OPAL_HAVE_HWLOC + /* if it wasn't passed down to us, get the topology */ + if (NULL == opal_hwloc_topology) { + if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) { + error = "topology discovery"; + goto error; + } + } +#endif + /* we don't need to force the routed system to pick the * "direct" component as that should happen automatically * in those cases where we are direct launched (i.e., no diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 5dc8d5cb81..09b138ce5f 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -13,7 +13,7 @@ * Copyright (c) 2009 Institut National de Recherche en Informatique * et Automatique. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -85,21 +85,32 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata) #if OPAL_HAVE_HWLOC { - orte_topology_t *t; + hwloc_topology_t t; orte_node_t *node; int i; - /* set the nodes to point to the topology - * of mpirun's node for any nodes that didn't send - * back their topology, thus indicating they are different - */ - t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, 0); - for (i=1; i < orte_node_pool->size; i++) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { - continue; + /* if we got back topology info from the first node, then we use + * it as the "standard" for all other nodes unless they sent + * back their own topology */ + if (1 < orte_process_info.num_procs) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 1)) || + NULL == (t = node->topology)) { + /* something is wrong */ + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + ORTE_FORCED_TERMINATE(ORTE_ERR_NOT_FOUND); + OBJ_RELEASE(caddy); + return; } - if (NULL == node->topology) { - node->topology = t->topo; + OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, + "%s plm:base:setting topo to that from node %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name)); + for (i=2; i < orte_node_pool->size; i++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { + continue; + } + if (NULL == node->topology) { + node->topology = t; + } } } /* if this is an unmanaged allocation, then set the default @@ -874,12 +885,14 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, if (10 < opal_output_get_verbosity(orte_plm_base_framework.framework_output)) { opal_dss.dump(0, topo, OPAL_HWLOC_TOPO); } - if (orte_hetero_nodes) { + if (1 == dname.vpid || orte_hetero_nodes) { /* the user has told us that something is different, so just store it */ OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, "%s ADDING TOPOLOGY PER USER REQUEST", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); t = OBJ_NEW(orte_topology_t); + /* filter the topology as we'll need it that way later */ + opal_hwloc_base_filter_cpus(topo); t->topo = topo; t->sig = sig; opal_pointer_array_add(orte_node_topologies, t); @@ -909,6 +922,8 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, "%s NEW TOPOLOGY - ADDING", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); t = OBJ_NEW(orte_topology_t); + /* filter the topology as we'll need it that way later */ + opal_hwloc_base_filter_cpus(topo); t->topo = topo; t->sig = sig; opal_pointer_array_add(orte_node_topologies, t); diff --git a/orte/mca/rmaps/base/rmaps_base_binding.c b/orte/mca/rmaps/base/rmaps_base_binding.c index cc831cc6b9..daac358862 100644 --- a/orte/mca/rmaps/base/rmaps_base_binding.c +++ b/orte/mca/rmaps/base/rmaps_base_binding.c @@ -351,6 +351,10 @@ static int bind_downwards(orte_job_t *jdata, nxt_obj = trg_obj->next_cousin; } while (total_cpus < orte_rmaps_base.cpus_per_rank); hwloc_bitmap_list_asprintf(&cpu_bitmap, totalcpuset); + opal_output_verbose(5, orte_rmaps_base_framework.framework_output, + "%s PROC %s BITMAP %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&proc->name), cpu_bitmap); orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, cpu_bitmap, OPAL_STRING); if (NULL != cpu_bitmap) { free(cpu_bitmap); @@ -680,9 +684,9 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata) int bind_depth, map_depth; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps: compute bindings for job %s with policy %s", + "mca:rmaps: compute bindings for job %s with policy %s[%x]", ORTE_JOBID_PRINT(jdata->jobid), - opal_hwloc_base_print_binding(jdata->map->binding)); + opal_hwloc_base_print_binding(jdata->map->binding), jdata->map->binding); map = ORTE_GET_MAPPING_POLICY(jdata->map->mapping); bind = OPAL_GET_BINDING_POLICY(jdata->map->binding); diff --git a/orte/mca/rmaps/base/rmaps_base_map_job.c b/orte/mca/rmaps/base/rmaps_base_map_job.c index 4dd9a83e5d..0a15737494 100644 --- a/orte/mca/rmaps/base/rmaps_base_map_job.c +++ b/orte/mca/rmaps/base/rmaps_base_map_job.c @@ -153,10 +153,6 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) /* ranking was already handled, so just use it here */ map->ranking = orte_rmaps_base.ranking; -#if OPAL_HAVE_HWLOC - map->binding = opal_hwloc_binding_policy; -#endif - if (NULL != orte_rmaps_base.ppr) { map->ppr = strdup(orte_rmaps_base.ppr); } @@ -231,14 +227,42 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) if (!ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) { jdata->map->ranking = orte_rmaps_base.ranking; } -#if OPAL_HAVE_HWLOC - if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) { - jdata->map->binding = opal_hwloc_binding_policy; - } -#endif } #if OPAL_HAVE_HWLOC + /* define the binding policy for this job - if the user specified one + * already (e.g., during the call to comm_spawn), then we don't + * override it */ + if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) { + /* if the user specified a default binding policy via + * MCA param, then we use it */ + if (OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) { + jdata->map->binding = opal_hwloc_binding_policy; + } else { + /* if nothing was specified, then we default to a policy + * based on number of procs and cpus_per_rank */ + if (2 <= nprocs) { + if (1 < orte_rmaps_base.cpus_per_rank) { + /* assigning multiple cpus to a rank implies threading, + * so we only bind to the NUMA level */ + OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NUMA); + } else { + /* for performance, bind to core */ + OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_CORE); + } + } else { + if (1 < orte_rmaps_base.cpus_per_rank) { + /* assigning multiple cpus to a rank implies threading, + * so we only bind to the NUMA level */ + OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_NUMA); + } else { + /* for performance, bind to socket */ + OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_SOCKET); + } + } + } + } + /* if we are not going to launch, then we need to set any * undefined topologies to match our own so the mapper * can operate diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index d2c646e04d..ae9582d7d4 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -15,7 +15,7 @@ * Copyright (c) 2009 Institut National de Recherche en Informatique * et Automatique. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -752,8 +752,10 @@ int orte_daemon(int argc, char *argv[]) char *coprocessors; uint8_t tflag; - /* add the local topology, if different from the HNP's or user directed us to */ - if (orte_hetero_nodes || 0 != strcmp(orte_topo_signature, orted_globals.hnp_topo_sig)) { + /* add the local topology, if different from the HNP's or user directed us to, + * but always if we are the first daemon to ensure we get a compute node */ + if (1 == ORTE_PROC_MY_NAME->vpid || orte_hetero_nodes || + 0 != strcmp(orte_topo_signature, orted_globals.hnp_topo_sig)) { tflag = 1; if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &tflag, 1, OPAL_UINT8))) { ORTE_ERROR_LOG(ret);