There are cases where we want to use the novm state machine, but the backend node topology differs from that where mpirun is executing. In those cases, we can wind up thinking we are oversubscribed because the head node has fewer cores than the compute nodes.
To resolve this situation, add the ability to specify a backend topology file that mpirun shall use for its mapping operations. Create a new "set_topology" function in opal hwloc to support it. This commit was SVN r28682.
Этот коммит содержится в:
родитель
75e4b92edd
Коммит
446e33a5d8
@ -139,6 +139,11 @@ OPAL_DECLSPEC int opal_hwloc_base_filter_cpus(hwloc_topology_t topo);
|
||||
*/
|
||||
OPAL_DECLSPEC int opal_hwloc_base_get_topology(void);
|
||||
|
||||
/**
|
||||
* Set the hwloc topology to that from the given topo file
|
||||
*/
|
||||
OPAL_DECLSPEC int opal_hwloc_base_set_topology(char *topofile);
|
||||
|
||||
/**
|
||||
* Free the hwloc topology.
|
||||
*/
|
||||
|
@ -231,6 +231,87 @@ int opal_hwloc_base_get_topology(void)
|
||||
return rc;
|
||||
}
|
||||
|
||||
int opal_hwloc_base_set_topology(char *topofile)
|
||||
{
|
||||
hwloc_obj_t obj;
|
||||
unsigned j, k;
|
||||
struct hwloc_topology_support *support;
|
||||
int rc;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output,
|
||||
"hwloc:base:set_topology"));
|
||||
|
||||
if (NULL != opal_hwloc_topology) {
|
||||
hwloc_topology_destroy(opal_hwloc_topology);
|
||||
}
|
||||
if (0 != hwloc_topology_init(&opal_hwloc_topology)) {
|
||||
return OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
if (0 != hwloc_topology_set_xml(opal_hwloc_topology, topofile)) {
|
||||
hwloc_topology_destroy(opal_hwloc_topology);
|
||||
return OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
/* since we are loading this from an external source, we have to
|
||||
* explicitly set a flag so hwloc sets things up correctly
|
||||
*/
|
||||
if (0 != hwloc_topology_set_flags(opal_hwloc_topology,
|
||||
(HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM |
|
||||
HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM |
|
||||
HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) {
|
||||
hwloc_topology_destroy(opal_hwloc_topology);
|
||||
return OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
if (0 != hwloc_topology_load(opal_hwloc_topology)) {
|
||||
hwloc_topology_destroy(opal_hwloc_topology);
|
||||
return OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
/* remove the hostname from the topology. Unfortunately, hwloc
|
||||
* decided to add the source hostname to the "topology", thus
|
||||
* rendering it unusable as a pure topological description. So
|
||||
* we remove that information here.
|
||||
*/
|
||||
obj = hwloc_get_root_obj(opal_hwloc_topology);
|
||||
for (k=0; k < obj->infos_count; k++) {
|
||||
if (NULL == obj->infos[k].name ||
|
||||
NULL == obj->infos[k].value) {
|
||||
continue;
|
||||
}
|
||||
if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) {
|
||||
free(obj->infos[k].name);
|
||||
free(obj->infos[k].value);
|
||||
/* left justify the array */
|
||||
for (j=k; j < obj->infos_count-1; j++) {
|
||||
obj->infos[j] = obj->infos[j+1];
|
||||
}
|
||||
obj->infos[obj->infos_count-1].name = NULL;
|
||||
obj->infos[obj->infos_count-1].value = NULL;
|
||||
obj->infos_count--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* unfortunately, hwloc does not include support info in its
|
||||
* xml output :-(( We default to assuming it is present as
|
||||
* systems that use this option are likely to provide
|
||||
* binding support
|
||||
*/
|
||||
support = (struct hwloc_topology_support*)hwloc_topology_get_support(opal_hwloc_topology);
|
||||
support->cpubind->set_thisproc_cpubind = true;
|
||||
support->membind->set_thisproc_membind = true;
|
||||
|
||||
/* filter the cpus thru any default cpu set */
|
||||
rc = opal_hwloc_base_filter_cpus(opal_hwloc_topology);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* fill opal_cache_line_size global with the smallest L1 cache
|
||||
line size */
|
||||
fill_cache_line_size();
|
||||
|
||||
/* all done */
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static void free_object(hwloc_obj_t obj)
|
||||
{
|
||||
opal_hwloc_obj_data_t *data;
|
||||
|
@ -204,3 +204,14 @@ No objects of the specified type were found on at least one node:
|
||||
Node: %s
|
||||
|
||||
The map cannot be done as specified.
|
||||
#
|
||||
[topo-file]
|
||||
A topology file was given for the compute nodes, but
|
||||
we were unable to correctly process it. Common errors
|
||||
include incorrectly specifying the path to the file,
|
||||
or the file being generated in a way that is incompatible
|
||||
with the version of hwloc being used by OMPI.
|
||||
|
||||
File: %s
|
||||
|
||||
Please correct the problem and try again.
|
||||
|
@ -33,6 +33,7 @@
|
||||
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "orte/mca/rmaps/base/rmaps_private.h"
|
||||
#include "orte/mca/rmaps/base/base.h"
|
||||
@ -65,6 +66,7 @@ static bool rmaps_base_no_oversubscribe = false;
|
||||
static bool rmaps_base_oversubscribe = false;
|
||||
static bool rmaps_base_display_devel_map = false;
|
||||
static bool rmaps_base_display_diffable_map = false;
|
||||
static char *rmaps_base_topo_file = NULL;
|
||||
|
||||
static int orte_rmaps_base_register(mca_base_register_flag_t flags)
|
||||
{
|
||||
@ -202,6 +204,13 @@ static int orte_rmaps_base_register(mca_base_register_flag_t flags)
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY, &rmaps_base_display_diffable_map);
|
||||
|
||||
rmaps_base_topo_file = NULL;
|
||||
(void) mca_base_var_register("orte", "rmaps", "base", "topology",
|
||||
"hwloc topology file (xml format) describing the topology of the compute nodes [default: none]",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY, &rmaps_base_topo_file);
|
||||
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -237,6 +246,18 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
||||
orte_rmaps_base.mapping = 0;
|
||||
orte_rmaps_base.ranking = 0;
|
||||
|
||||
/* if a topology file was given, then set our topology
|
||||
* from it. Even though our actual topology may differ,
|
||||
* mpirun only needs to see the compute node topology
|
||||
* for mapping purposes
|
||||
*/
|
||||
if (NULL != rmaps_base_topo_file) {
|
||||
if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology(rmaps_base_topo_file))) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "topo-file", true, rmaps_base_topo_file);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
}
|
||||
|
||||
if (NULL == rmaps_base_mapping_policy) {
|
||||
ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYSLOT);
|
||||
ORTE_UNSET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user