Define a new binding method and qualifier
Allow users to request that procs be bound to a cpu in a given cpu-list based on their corresponding local rank Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
151d13c248
Коммит
f17d47087a
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved
|
* Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved
|
||||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
|
||||||
* Copyright (c) 2016-2017 Research Organization for Information Science
|
* Copyright (c) 2016-2017 Research Organization for Information Science
|
||||||
* and Technology (RIST). All rights reserved.
|
* and Technology (RIST). All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
@ -119,9 +119,9 @@ static int opal_hwloc_base_register(mca_base_register_flag_t flags)
|
|||||||
opal_hwloc_base_binding_policy = NULL;
|
opal_hwloc_base_binding_policy = NULL;
|
||||||
(void) mca_base_var_register("opal", "hwloc", "base", "binding_policy",
|
(void) mca_base_var_register("opal", "hwloc", "base", "binding_policy",
|
||||||
"Policy for binding processes. Allowed values: none, hwthread, core, l1cache, l2cache, "
|
"Policy for binding processes. Allowed values: none, hwthread, core, l1cache, l2cache, "
|
||||||
"l3cache, socket, numa, board (\"none\" is the default when oversubscribed, \"core\" is "
|
"l3cache, socket, numa, board, cpuset (\"none\" is the default when oversubscribed, \"core\" is "
|
||||||
"the default when np<=2, and \"numa\" is the default when np>2). Allowed qualifiers: "
|
"the default when np<=2, and \"numa\" is the default when np>2). Allowed qualifiers: "
|
||||||
"overload-allowed, if-supported",
|
"overload-allowed, if-supported, ordered",
|
||||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
|
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
|
||||||
MCA_BASE_VAR_SCOPE_READONLY, &opal_hwloc_base_binding_policy);
|
MCA_BASE_VAR_SCOPE_READONLY, &opal_hwloc_base_binding_policy);
|
||||||
|
|
||||||
@ -504,6 +504,8 @@ int opal_hwloc_base_set_binding_policy(opal_binding_policy_t *policy, char *spec
|
|||||||
} else if (0 == strncasecmp(quals[i], "overload-allowed", strlen(quals[i])) ||
|
} else if (0 == strncasecmp(quals[i], "overload-allowed", strlen(quals[i])) ||
|
||||||
0 == strncasecmp(quals[i], "oversubscribe-allowed", strlen(quals[i]))) {
|
0 == strncasecmp(quals[i], "oversubscribe-allowed", strlen(quals[i]))) {
|
||||||
tmp |= OPAL_BIND_ALLOW_OVERLOAD;
|
tmp |= OPAL_BIND_ALLOW_OVERLOAD;
|
||||||
|
} else if (0 == strncasecmp(quals[i], "ordered", strlen(quals[i]))) {
|
||||||
|
tmp |= OPAL_BIND_ORDERED;
|
||||||
} else {
|
} else {
|
||||||
/* unknown option */
|
/* unknown option */
|
||||||
opal_output(0, "Unknown qualifier to binding policy: %s", spec);
|
opal_output(0, "Unknown qualifier to binding policy: %s", spec);
|
||||||
@ -534,6 +536,8 @@ int opal_hwloc_base_set_binding_policy(opal_binding_policy_t *policy, char *spec
|
|||||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_NUMA);
|
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_NUMA);
|
||||||
} else if (0 == strcasecmp(tmpvals[0], "board")) {
|
} else if (0 == strcasecmp(tmpvals[0], "board")) {
|
||||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_BOARD);
|
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_BOARD);
|
||||||
|
} else if (0 == strcasecmp(tmpvals[0], "cpuset")) {
|
||||||
|
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_CPUSET);
|
||||||
} else {
|
} else {
|
||||||
opal_show_help("help-opal-hwloc-base.txt", "invalid binding_policy", true, "binding", spec);
|
opal_show_help("help-opal-hwloc-base.txt", "invalid binding_policy", true, "binding", spec);
|
||||||
opal_argv_free(tmpvals);
|
opal_argv_free(tmpvals);
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
|
||||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
*
|
*
|
||||||
@ -172,8 +172,15 @@ typedef uint16_t opal_binding_policy_t;
|
|||||||
|
|
||||||
/* binding directives */
|
/* binding directives */
|
||||||
#define OPAL_BIND_IF_SUPPORTED 0x1000
|
#define OPAL_BIND_IF_SUPPORTED 0x1000
|
||||||
|
/* allow assignment of multiple procs to
|
||||||
|
* same cpu */
|
||||||
#define OPAL_BIND_ALLOW_OVERLOAD 0x2000
|
#define OPAL_BIND_ALLOW_OVERLOAD 0x2000
|
||||||
|
/* the binding policy was specified by the user */
|
||||||
#define OPAL_BIND_GIVEN 0x4000
|
#define OPAL_BIND_GIVEN 0x4000
|
||||||
|
/* bind each rank to the cpu in the given
|
||||||
|
* cpu list based on its node-local-rank */
|
||||||
|
#define OPAL_BIND_ORDERED 0x8000
|
||||||
|
|
||||||
/* binding policies - any changes in these
|
/* binding policies - any changes in these
|
||||||
* values must be reflected in orte/mca/rmaps/rmaps.h
|
* values must be reflected in orte/mca/rmaps/rmaps.h
|
||||||
*/
|
*/
|
||||||
@ -190,7 +197,7 @@ typedef uint16_t opal_binding_policy_t;
|
|||||||
#define OPAL_GET_BINDING_POLICY(pol) \
|
#define OPAL_GET_BINDING_POLICY(pol) \
|
||||||
((pol) & 0x0fff)
|
((pol) & 0x0fff)
|
||||||
#define OPAL_SET_BINDING_POLICY(target, pol) \
|
#define OPAL_SET_BINDING_POLICY(target, pol) \
|
||||||
(target) = (pol) | (((target) & 0x2000) | OPAL_BIND_GIVEN)
|
(target) = (pol) | (((target) & 0xf000) | OPAL_BIND_GIVEN)
|
||||||
#define OPAL_SET_DEFAULT_BINDING_POLICY(target, pol) \
|
#define OPAL_SET_DEFAULT_BINDING_POLICY(target, pol) \
|
||||||
do { \
|
do { \
|
||||||
if (!OPAL_BINDING_POLICY_IS_SET((target))) { \
|
if (!OPAL_BINDING_POLICY_IS_SET((target))) { \
|
||||||
@ -208,6 +215,8 @@ typedef uint16_t opal_binding_policy_t;
|
|||||||
/* macro to detect if binding is forced */
|
/* macro to detect if binding is forced */
|
||||||
#define OPAL_BIND_OVERLOAD_ALLOWED(n) \
|
#define OPAL_BIND_OVERLOAD_ALLOWED(n) \
|
||||||
(OPAL_BIND_ALLOW_OVERLOAD & (n))
|
(OPAL_BIND_ALLOW_OVERLOAD & (n))
|
||||||
|
#define OPAL_BIND_ORDERED_REQUESTED(n) \
|
||||||
|
(OPAL_BIND_ORDERED & (n))
|
||||||
|
|
||||||
/* some global values */
|
/* some global values */
|
||||||
OPAL_DECLSPEC extern hwloc_topology_t opal_hwloc_topology;
|
OPAL_DECLSPEC extern hwloc_topology_t opal_hwloc_topology;
|
||||||
|
@ -13,7 +13,7 @@
|
|||||||
# Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved.
|
# Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved.
|
||||||
# Copyright (c) 2011 Los Alamos National Security, LLC.
|
# Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
# Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
# Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||||
# $COPYRIGHT$
|
# $COPYRIGHT$
|
||||||
#
|
#
|
||||||
# Additional copyrights may follow
|
# Additional copyrights may follow
|
||||||
@ -420,3 +420,16 @@ compute node failed:
|
|||||||
|
|
||||||
We cannot continue - please check that the policy is in
|
We cannot continue - please check that the policy is in
|
||||||
accordance with the actual available hardware.
|
accordance with the actual available hardware.
|
||||||
|
#
|
||||||
|
[rmaps:insufficient-cpus]
|
||||||
|
The request to bind processes to cpus in a provided list
|
||||||
|
of logical id's based on their local rank on a node cannot
|
||||||
|
be met due to there being more processes on a node than
|
||||||
|
available cpus:
|
||||||
|
|
||||||
|
Node: %s
|
||||||
|
Local rank: %d
|
||||||
|
Cpu list: %s
|
||||||
|
|
||||||
|
Please adjust either the number of processes per node or
|
||||||
|
the list of cpus.
|
||||||
|
@ -505,6 +505,9 @@ static int bind_to_cpuset(orte_job_t *jdata)
|
|||||||
opal_hwloc_topo_data_t *sum;
|
opal_hwloc_topo_data_t *sum;
|
||||||
hwloc_obj_t root;
|
hwloc_obj_t root;
|
||||||
char *cpu_bitmap;
|
char *cpu_bitmap;
|
||||||
|
unsigned id;
|
||||||
|
orte_local_rank_t lrank;
|
||||||
|
hwloc_bitmap_t mycpuset;
|
||||||
|
|
||||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||||
"mca:rmaps: bind job %s to cpus %s",
|
"mca:rmaps: bind job %s to cpus %s",
|
||||||
@ -512,6 +515,7 @@ static int bind_to_cpuset(orte_job_t *jdata)
|
|||||||
opal_hwloc_base_cpu_list);
|
opal_hwloc_base_cpu_list);
|
||||||
/* initialize */
|
/* initialize */
|
||||||
map = jdata->map;
|
map = jdata->map;
|
||||||
|
mycpuset = hwloc_bitmap_alloc();
|
||||||
|
|
||||||
for (i=0; i < map->nodes->size; i++) {
|
for (i=0; i < map->nodes->size; i++) {
|
||||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
|
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
|
||||||
@ -569,6 +573,8 @@ static int bind_to_cpuset(orte_job_t *jdata)
|
|||||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||||
return ORTE_ERR_NOT_FOUND;
|
return ORTE_ERR_NOT_FOUND;
|
||||||
}
|
}
|
||||||
|
/* the cpu list in sum->available has already been filtered
|
||||||
|
* to include _only_ the cpus defined by the user */
|
||||||
for (j=0; j < node->procs->size; j++) {
|
for (j=0; j < node->procs->size; j++) {
|
||||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
|
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
|
||||||
continue;
|
continue;
|
||||||
@ -577,13 +583,38 @@ static int bind_to_cpuset(orte_job_t *jdata)
|
|||||||
if (proc->name.jobid != jdata->jobid) {
|
if (proc->name.jobid != jdata->jobid) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
hwloc_bitmap_list_asprintf(&cpu_bitmap, sum->available);
|
if (OPAL_BIND_ORDERED_REQUESTED(jdata->map->binding)) {
|
||||||
|
/* assign each proc, in local rank order, to
|
||||||
|
* the corresponding cpu in the list */
|
||||||
|
id = hwloc_bitmap_first(sum->available);
|
||||||
|
lrank = 0;
|
||||||
|
while (lrank != proc->local_rank) {
|
||||||
|
id = hwloc_bitmap_next(sum->available, id);
|
||||||
|
if ((unsigned)-1 == id) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
++lrank;
|
||||||
|
}
|
||||||
|
if ((unsigned)-1 ==id) {
|
||||||
|
/* ran out of cpus - that's an error */
|
||||||
|
orte_show_help("help-orte-rmaps-base.txt", "rmaps:insufficient-cpus", true,
|
||||||
|
node->name, (int)proc->local_rank, opal_hwloc_base_cpu_list);
|
||||||
|
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
|
}
|
||||||
|
/* set the bit of interest */
|
||||||
|
hwloc_bitmap_only(mycpuset, id);
|
||||||
|
} else {
|
||||||
|
/* bind the proc to all assigned cpus */
|
||||||
|
mycpuset = sum->available;
|
||||||
|
}
|
||||||
|
hwloc_bitmap_list_asprintf(&cpu_bitmap, mycpuset);
|
||||||
orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, cpu_bitmap, OPAL_STRING);
|
orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, cpu_bitmap, OPAL_STRING);
|
||||||
if (NULL != cpu_bitmap) {
|
if (NULL != cpu_bitmap) {
|
||||||
free(cpu_bitmap);
|
free(cpu_bitmap);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
hwloc_bitmap_free(mycpuset);
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -280,6 +280,7 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
|||||||
return ORTE_ERR_SILENT;
|
return ORTE_ERR_SILENT;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (0 < orte_rmaps_base.cpus_per_rank) {
|
if (0 < orte_rmaps_base.cpus_per_rank) {
|
||||||
orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
|
orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
|
||||||
"--cpus-per-proc, -cpus-per-proc, --cpus-per-rank, -cpus-per-rank",
|
"--cpus-per-proc, -cpus-per-proc, --cpus-per-rank, -cpus-per-rank",
|
||||||
|
@ -199,6 +199,7 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check for oversubscribe directives */
|
/* check for oversubscribe directives */
|
||||||
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
|
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
|
||||||
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
|
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
|
||||||
@ -212,12 +213,14 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check for no-use-local directive */
|
/* check for no-use-local directive */
|
||||||
if (!(ORTE_MAPPING_LOCAL_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
|
if (!(ORTE_MAPPING_LOCAL_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
|
||||||
if (ORTE_MAPPING_NO_USE_LOCAL & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
if (ORTE_MAPPING_NO_USE_LOCAL & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||||
ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_USE_LOCAL);
|
ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_USE_LOCAL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ditto for rank policy */
|
/* ditto for rank policy */
|
||||||
if (!ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) {
|
if (!ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) {
|
||||||
jdata->map->ranking = orte_rmaps_base.ranking;
|
jdata->map->ranking = orte_rmaps_base.ranking;
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2008 Voltaire. All rights reserved
|
* Copyright (c) 2008 Voltaire. All rights reserved
|
||||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||||
* Copyright (c) 2015 Research Organization for Information Science
|
* Copyright (c) 2015 Research Organization for Information Science
|
||||||
* and Technology (RIST). All rights reserved.
|
* and Technology (RIST). All rights reserved.
|
||||||
* Copyright (c) 2016 IBM Corporation. All rights reserved.
|
* Copyright (c) 2016 IBM Corporation. All rights reserved.
|
||||||
@ -110,6 +110,10 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
|
|||||||
/* NOT FOR US */
|
/* NOT FOR US */
|
||||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||||
}
|
}
|
||||||
|
if (OPAL_BIND_ORDERED_REQUESTED(jdata->map->binding)) {
|
||||||
|
/* NOT FOR US */
|
||||||
|
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||||
|
}
|
||||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||||
"mca:rmaps:rank_file: mapping job %s",
|
"mca:rmaps:rank_file: mapping job %s",
|
||||||
ORTE_JOBID_PRINT(jdata->jobid));
|
ORTE_JOBID_PRINT(jdata->jobid));
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2008 Voltaire. All rights reserved
|
* Copyright (c) 2008 Voltaire. All rights reserved
|
||||||
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
@ -106,7 +106,8 @@ static int orte_rmaps_rank_file_register(void)
|
|||||||
static int orte_rmaps_rank_file_open(void)
|
static int orte_rmaps_rank_file_open(void)
|
||||||
{
|
{
|
||||||
/* ensure we flag mapping by user */
|
/* ensure we flag mapping by user */
|
||||||
if (NULL != opal_hwloc_base_cpu_list || NULL != orte_rankfile) {
|
if ((NULL != opal_hwloc_base_cpu_list && !OPAL_BIND_ORDERED_REQUESTED(opal_hwloc_binding_policy)) ||
|
||||||
|
NULL != orte_rankfile) {
|
||||||
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||||
/* if a non-default mapping is already specified, then we
|
/* if a non-default mapping is already specified, then we
|
||||||
* have an error
|
* have an error
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user