Define a new binding method and qualifier
Allow users to request that procs be bound to a cpu in a given cpu-list based on their corresponding local rank Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
151d13c248
Коммит
f17d47087a
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016-2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -119,9 +119,9 @@ static int opal_hwloc_base_register(mca_base_register_flag_t flags)
|
||||
opal_hwloc_base_binding_policy = NULL;
|
||||
(void) mca_base_var_register("opal", "hwloc", "base", "binding_policy",
|
||||
"Policy for binding processes. Allowed values: none, hwthread, core, l1cache, l2cache, "
|
||||
"l3cache, socket, numa, board (\"none\" is the default when oversubscribed, \"core\" is "
|
||||
"l3cache, socket, numa, board, cpuset (\"none\" is the default when oversubscribed, \"core\" is "
|
||||
"the default when np<=2, and \"numa\" is the default when np>2). Allowed qualifiers: "
|
||||
"overload-allowed, if-supported",
|
||||
"overload-allowed, if-supported, ordered",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY, &opal_hwloc_base_binding_policy);
|
||||
|
||||
@ -504,6 +504,8 @@ int opal_hwloc_base_set_binding_policy(opal_binding_policy_t *policy, char *spec
|
||||
} else if (0 == strncasecmp(quals[i], "overload-allowed", strlen(quals[i])) ||
|
||||
0 == strncasecmp(quals[i], "oversubscribe-allowed", strlen(quals[i]))) {
|
||||
tmp |= OPAL_BIND_ALLOW_OVERLOAD;
|
||||
} else if (0 == strncasecmp(quals[i], "ordered", strlen(quals[i]))) {
|
||||
tmp |= OPAL_BIND_ORDERED;
|
||||
} else {
|
||||
/* unknown option */
|
||||
opal_output(0, "Unknown qualifier to binding policy: %s", spec);
|
||||
@ -534,6 +536,8 @@ int opal_hwloc_base_set_binding_policy(opal_binding_policy_t *policy, char *spec
|
||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_NUMA);
|
||||
} else if (0 == strcasecmp(tmpvals[0], "board")) {
|
||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_BOARD);
|
||||
} else if (0 == strcasecmp(tmpvals[0], "cpuset")) {
|
||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_CPUSET);
|
||||
} else {
|
||||
opal_show_help("help-opal-hwloc-base.txt", "invalid binding_policy", true, "binding", spec);
|
||||
opal_argv_free(tmpvals);
|
||||
|
@ -1,7 +1,7 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
*
|
||||
@ -172,8 +172,15 @@ typedef uint16_t opal_binding_policy_t;
|
||||
|
||||
/* binding directives */
|
||||
#define OPAL_BIND_IF_SUPPORTED 0x1000
|
||||
/* allow assignment of multiple procs to
|
||||
* same cpu */
|
||||
#define OPAL_BIND_ALLOW_OVERLOAD 0x2000
|
||||
/* the binding policy was specified by the user */
|
||||
#define OPAL_BIND_GIVEN 0x4000
|
||||
/* bind each rank to the cpu in the given
|
||||
* cpu list based on its node-local-rank */
|
||||
#define OPAL_BIND_ORDERED 0x8000
|
||||
|
||||
/* binding policies - any changes in these
|
||||
* values must be reflected in orte/mca/rmaps/rmaps.h
|
||||
*/
|
||||
@ -190,7 +197,7 @@ typedef uint16_t opal_binding_policy_t;
|
||||
#define OPAL_GET_BINDING_POLICY(pol) \
|
||||
((pol) & 0x0fff)
|
||||
#define OPAL_SET_BINDING_POLICY(target, pol) \
|
||||
(target) = (pol) | (((target) & 0x2000) | OPAL_BIND_GIVEN)
|
||||
(target) = (pol) | (((target) & 0xf000) | OPAL_BIND_GIVEN)
|
||||
#define OPAL_SET_DEFAULT_BINDING_POLICY(target, pol) \
|
||||
do { \
|
||||
if (!OPAL_BINDING_POLICY_IS_SET((target))) { \
|
||||
@ -208,6 +215,8 @@ typedef uint16_t opal_binding_policy_t;
|
||||
/* macro to detect if binding is forced */
|
||||
#define OPAL_BIND_OVERLOAD_ALLOWED(n) \
|
||||
(OPAL_BIND_ALLOW_OVERLOAD & (n))
|
||||
#define OPAL_BIND_ORDERED_REQUESTED(n) \
|
||||
(OPAL_BIND_ORDERED & (n))
|
||||
|
||||
/* some global values */
|
||||
OPAL_DECLSPEC extern hwloc_topology_t opal_hwloc_topology;
|
||||
|
@ -13,7 +13,7 @@
|
||||
# Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
# Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -420,3 +420,16 @@ compute node failed:
|
||||
|
||||
We cannot continue - please check that the policy is in
|
||||
accordance with the actual available hardware.
|
||||
#
|
||||
[rmaps:insufficient-cpus]
|
||||
The request to bind processes to cpus in a provided list
|
||||
of logical id's based on their local rank on a node cannot
|
||||
be met due to there being more processes on a node than
|
||||
available cpus:
|
||||
|
||||
Node: %s
|
||||
Local rank: %d
|
||||
Cpu list: %s
|
||||
|
||||
Please adjust either the number of processes per node or
|
||||
the list of cpus.
|
||||
|
@ -505,6 +505,9 @@ static int bind_to_cpuset(orte_job_t *jdata)
|
||||
opal_hwloc_topo_data_t *sum;
|
||||
hwloc_obj_t root;
|
||||
char *cpu_bitmap;
|
||||
unsigned id;
|
||||
orte_local_rank_t lrank;
|
||||
hwloc_bitmap_t mycpuset;
|
||||
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps: bind job %s to cpus %s",
|
||||
@ -512,6 +515,7 @@ static int bind_to_cpuset(orte_job_t *jdata)
|
||||
opal_hwloc_base_cpu_list);
|
||||
/* initialize */
|
||||
map = jdata->map;
|
||||
mycpuset = hwloc_bitmap_alloc();
|
||||
|
||||
for (i=0; i < map->nodes->size; i++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
|
||||
@ -569,6 +573,8 @@ static int bind_to_cpuset(orte_job_t *jdata)
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
/* the cpu list in sum->available has already been filtered
|
||||
* to include _only_ the cpus defined by the user */
|
||||
for (j=0; j < node->procs->size; j++) {
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
|
||||
continue;
|
||||
@ -577,13 +583,38 @@ static int bind_to_cpuset(orte_job_t *jdata)
|
||||
if (proc->name.jobid != jdata->jobid) {
|
||||
continue;
|
||||
}
|
||||
hwloc_bitmap_list_asprintf(&cpu_bitmap, sum->available);
|
||||
if (OPAL_BIND_ORDERED_REQUESTED(jdata->map->binding)) {
|
||||
/* assign each proc, in local rank order, to
|
||||
* the corresponding cpu in the list */
|
||||
id = hwloc_bitmap_first(sum->available);
|
||||
lrank = 0;
|
||||
while (lrank != proc->local_rank) {
|
||||
id = hwloc_bitmap_next(sum->available, id);
|
||||
if ((unsigned)-1 == id) {
|
||||
break;
|
||||
}
|
||||
++lrank;
|
||||
}
|
||||
if ((unsigned)-1 ==id) {
|
||||
/* ran out of cpus - that's an error */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:insufficient-cpus", true,
|
||||
node->name, (int)proc->local_rank, opal_hwloc_base_cpu_list);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
/* set the bit of interest */
|
||||
hwloc_bitmap_only(mycpuset, id);
|
||||
} else {
|
||||
/* bind the proc to all assigned cpus */
|
||||
mycpuset = sum->available;
|
||||
}
|
||||
hwloc_bitmap_list_asprintf(&cpu_bitmap, mycpuset);
|
||||
orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, cpu_bitmap, OPAL_STRING);
|
||||
if (NULL != cpu_bitmap) {
|
||||
free(cpu_bitmap);
|
||||
}
|
||||
}
|
||||
}
|
||||
hwloc_bitmap_free(mycpuset);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -280,6 +280,7 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
}
|
||||
|
||||
if (0 < orte_rmaps_base.cpus_per_rank) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
|
||||
"--cpus-per-proc, -cpus-per-proc, --cpus-per-rank, -cpus-per-rank",
|
||||
|
@ -199,6 +199,7 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* check for oversubscribe directives */
|
||||
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
|
||||
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
|
||||
@ -212,12 +213,14 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* check for no-use-local directive */
|
||||
if (!(ORTE_MAPPING_LOCAL_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
|
||||
if (ORTE_MAPPING_NO_USE_LOCAL & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||
ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_USE_LOCAL);
|
||||
}
|
||||
}
|
||||
|
||||
/* ditto for rank policy */
|
||||
if (!ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) {
|
||||
jdata->map->ranking = orte_rmaps_base.ranking;
|
||||
|
@ -14,7 +14,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 Voltaire. All rights reserved
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2016 IBM Corporation. All rights reserved.
|
||||
@ -110,6 +110,10 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
|
||||
/* NOT FOR US */
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
if (OPAL_BIND_ORDERED_REQUESTED(jdata->map->binding)) {
|
||||
/* NOT FOR US */
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:rank_file: mapping job %s",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 Voltaire. All rights reserved
|
||||
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -106,7 +106,8 @@ static int orte_rmaps_rank_file_register(void)
|
||||
static int orte_rmaps_rank_file_open(void)
|
||||
{
|
||||
/* ensure we flag mapping by user */
|
||||
if (NULL != opal_hwloc_base_cpu_list || NULL != orte_rankfile) {
|
||||
if ((NULL != opal_hwloc_base_cpu_list && !OPAL_BIND_ORDERED_REQUESTED(opal_hwloc_binding_policy)) ||
|
||||
NULL != orte_rankfile) {
|
||||
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||
/* if a non-default mapping is already specified, then we
|
||||
* have an error
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user