1
1

Define a new binding method and qualifier

Allow users to request that procs be bound to a cpu in a given cpu-list based on their corresponding local rank

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2018-06-20 21:26:09 -07:00
родитель 151d13c248
Коммит f17d47087a
8 изменённых файлов: 76 добавлений и 10 удалений

Просмотреть файл

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011-2017 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2017 Research Organization for Information Science * Copyright (c) 2016-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
@ -119,9 +119,9 @@ static int opal_hwloc_base_register(mca_base_register_flag_t flags)
opal_hwloc_base_binding_policy = NULL; opal_hwloc_base_binding_policy = NULL;
(void) mca_base_var_register("opal", "hwloc", "base", "binding_policy", (void) mca_base_var_register("opal", "hwloc", "base", "binding_policy",
"Policy for binding processes. Allowed values: none, hwthread, core, l1cache, l2cache, " "Policy for binding processes. Allowed values: none, hwthread, core, l1cache, l2cache, "
"l3cache, socket, numa, board (\"none\" is the default when oversubscribed, \"core\" is " "l3cache, socket, numa, board, cpuset (\"none\" is the default when oversubscribed, \"core\" is "
"the default when np<=2, and \"numa\" is the default when np>2). Allowed qualifiers: " "the default when np<=2, and \"numa\" is the default when np>2). Allowed qualifiers: "
"overload-allowed, if-supported", "overload-allowed, if-supported, ordered",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &opal_hwloc_base_binding_policy); MCA_BASE_VAR_SCOPE_READONLY, &opal_hwloc_base_binding_policy);
@ -504,6 +504,8 @@ int opal_hwloc_base_set_binding_policy(opal_binding_policy_t *policy, char *spec
} else if (0 == strncasecmp(quals[i], "overload-allowed", strlen(quals[i])) || } else if (0 == strncasecmp(quals[i], "overload-allowed", strlen(quals[i])) ||
0 == strncasecmp(quals[i], "oversubscribe-allowed", strlen(quals[i]))) { 0 == strncasecmp(quals[i], "oversubscribe-allowed", strlen(quals[i]))) {
tmp |= OPAL_BIND_ALLOW_OVERLOAD; tmp |= OPAL_BIND_ALLOW_OVERLOAD;
} else if (0 == strncasecmp(quals[i], "ordered", strlen(quals[i]))) {
tmp |= OPAL_BIND_ORDERED;
} else { } else {
/* unknown option */ /* unknown option */
opal_output(0, "Unknown qualifier to binding policy: %s", spec); opal_output(0, "Unknown qualifier to binding policy: %s", spec);
@ -534,6 +536,8 @@ int opal_hwloc_base_set_binding_policy(opal_binding_policy_t *policy, char *spec
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_NUMA); OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_NUMA);
} else if (0 == strcasecmp(tmpvals[0], "board")) { } else if (0 == strcasecmp(tmpvals[0], "board")) {
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_BOARD); OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_BOARD);
} else if (0 == strcasecmp(tmpvals[0], "cpuset")) {
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_CPUSET);
} else { } else {
opal_show_help("help-opal-hwloc-base.txt", "invalid binding_policy", true, "binding", spec); opal_show_help("help-opal-hwloc-base.txt", "invalid binding_policy", true, "binding", spec);
opal_argv_free(tmpvals); opal_argv_free(tmpvals);

Просмотреть файл

@ -1,7 +1,7 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights * Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* *
@ -172,8 +172,15 @@ typedef uint16_t opal_binding_policy_t;
/* binding directives */ /* binding directives */
#define OPAL_BIND_IF_SUPPORTED 0x1000 #define OPAL_BIND_IF_SUPPORTED 0x1000
/* allow assignment of multiple procs to
* same cpu */
#define OPAL_BIND_ALLOW_OVERLOAD 0x2000 #define OPAL_BIND_ALLOW_OVERLOAD 0x2000
/* the binding policy was specified by the user */
#define OPAL_BIND_GIVEN 0x4000 #define OPAL_BIND_GIVEN 0x4000
/* bind each rank to the cpu in the given
* cpu list based on its node-local-rank */
#define OPAL_BIND_ORDERED 0x8000
/* binding policies - any changes in these /* binding policies - any changes in these
* values must be reflected in orte/mca/rmaps/rmaps.h * values must be reflected in orte/mca/rmaps/rmaps.h
*/ */
@ -190,7 +197,7 @@ typedef uint16_t opal_binding_policy_t;
#define OPAL_GET_BINDING_POLICY(pol) \ #define OPAL_GET_BINDING_POLICY(pol) \
((pol) & 0x0fff) ((pol) & 0x0fff)
#define OPAL_SET_BINDING_POLICY(target, pol) \ #define OPAL_SET_BINDING_POLICY(target, pol) \
(target) = (pol) | (((target) & 0x2000) | OPAL_BIND_GIVEN) (target) = (pol) | (((target) & 0xf000) | OPAL_BIND_GIVEN)
#define OPAL_SET_DEFAULT_BINDING_POLICY(target, pol) \ #define OPAL_SET_DEFAULT_BINDING_POLICY(target, pol) \
do { \ do { \
if (!OPAL_BINDING_POLICY_IS_SET((target))) { \ if (!OPAL_BINDING_POLICY_IS_SET((target))) { \
@ -208,6 +215,8 @@ typedef uint16_t opal_binding_policy_t;
/* macro to detect if binding is forced */ /* macro to detect if binding is forced */
#define OPAL_BIND_OVERLOAD_ALLOWED(n) \ #define OPAL_BIND_OVERLOAD_ALLOWED(n) \
(OPAL_BIND_ALLOW_OVERLOAD & (n)) (OPAL_BIND_ALLOW_OVERLOAD & (n))
#define OPAL_BIND_ORDERED_REQUESTED(n) \
(OPAL_BIND_ORDERED & (n))
/* some global values */ /* some global values */
OPAL_DECLSPEC extern hwloc_topology_t opal_hwloc_topology; OPAL_DECLSPEC extern hwloc_topology_t opal_hwloc_topology;

Просмотреть файл

@ -13,7 +13,7 @@
# Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011 Los Alamos National Security, LLC. # Copyright (c) 2011 Los Alamos National Security, LLC.
# All rights reserved. # All rights reserved.
# Copyright (c) 2014-2017 Intel, Inc. All rights reserved. # Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
# $COPYRIGHT$ # $COPYRIGHT$
# #
# Additional copyrights may follow # Additional copyrights may follow
@ -420,3 +420,16 @@ compute node failed:
We cannot continue - please check that the policy is in We cannot continue - please check that the policy is in
accordance with the actual available hardware. accordance with the actual available hardware.
#
[rmaps:insufficient-cpus]
The request to bind processes to cpus in a provided list
of logical id's based on their local rank on a node cannot
be met due to there being more processes on a node than
available cpus:
Node: %s
Local rank: %d
Cpu list: %s
Please adjust either the number of processes per node or
the list of cpus.

Просмотреть файл

@ -505,6 +505,9 @@ static int bind_to_cpuset(orte_job_t *jdata)
opal_hwloc_topo_data_t *sum; opal_hwloc_topo_data_t *sum;
hwloc_obj_t root; hwloc_obj_t root;
char *cpu_bitmap; char *cpu_bitmap;
unsigned id;
orte_local_rank_t lrank;
hwloc_bitmap_t mycpuset;
opal_output_verbose(5, orte_rmaps_base_framework.framework_output, opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps: bind job %s to cpus %s", "mca:rmaps: bind job %s to cpus %s",
@ -512,6 +515,7 @@ static int bind_to_cpuset(orte_job_t *jdata)
opal_hwloc_base_cpu_list); opal_hwloc_base_cpu_list);
/* initialize */ /* initialize */
map = jdata->map; map = jdata->map;
mycpuset = hwloc_bitmap_alloc();
for (i=0; i < map->nodes->size; i++) { for (i=0; i < map->nodes->size; i++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
@ -569,6 +573,8 @@ static int bind_to_cpuset(orte_job_t *jdata)
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND; return ORTE_ERR_NOT_FOUND;
} }
/* the cpu list in sum->available has already been filtered
* to include _only_ the cpus defined by the user */
for (j=0; j < node->procs->size; j++) { for (j=0; j < node->procs->size; j++) {
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
continue; continue;
@ -577,13 +583,38 @@ static int bind_to_cpuset(orte_job_t *jdata)
if (proc->name.jobid != jdata->jobid) { if (proc->name.jobid != jdata->jobid) {
continue; continue;
} }
hwloc_bitmap_list_asprintf(&cpu_bitmap, sum->available); if (OPAL_BIND_ORDERED_REQUESTED(jdata->map->binding)) {
/* assign each proc, in local rank order, to
* the corresponding cpu in the list */
id = hwloc_bitmap_first(sum->available);
lrank = 0;
while (lrank != proc->local_rank) {
id = hwloc_bitmap_next(sum->available, id);
if ((unsigned)-1 == id) {
break;
}
++lrank;
}
if ((unsigned)-1 ==id) {
/* ran out of cpus - that's an error */
orte_show_help("help-orte-rmaps-base.txt", "rmaps:insufficient-cpus", true,
node->name, (int)proc->local_rank, opal_hwloc_base_cpu_list);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* set the bit of interest */
hwloc_bitmap_only(mycpuset, id);
} else {
/* bind the proc to all assigned cpus */
mycpuset = sum->available;
}
hwloc_bitmap_list_asprintf(&cpu_bitmap, mycpuset);
orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, cpu_bitmap, OPAL_STRING); orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, cpu_bitmap, OPAL_STRING);
if (NULL != cpu_bitmap) { if (NULL != cpu_bitmap) {
free(cpu_bitmap); free(cpu_bitmap);
} }
} }
} }
hwloc_bitmap_free(mycpuset);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }

Просмотреть файл

@ -280,6 +280,7 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
return ORTE_ERR_SILENT; return ORTE_ERR_SILENT;
} }
} }
if (0 < orte_rmaps_base.cpus_per_rank) { if (0 < orte_rmaps_base.cpus_per_rank) {
orte_show_help("help-orte-rmaps-base.txt", "deprecated", true, orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
"--cpus-per-proc, -cpus-per-proc, --cpus-per-rank, -cpus-per-rank", "--cpus-per-proc, -cpus-per-proc, --cpus-per-rank, -cpus-per-rank",

Просмотреть файл

@ -199,6 +199,7 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
} }
} }
} }
/* check for oversubscribe directives */ /* check for oversubscribe directives */
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) { if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) { if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
@ -212,12 +213,14 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
} }
} }
} }
/* check for no-use-local directive */ /* check for no-use-local directive */
if (!(ORTE_MAPPING_LOCAL_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) { if (!(ORTE_MAPPING_LOCAL_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
if (ORTE_MAPPING_NO_USE_LOCAL & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { if (ORTE_MAPPING_NO_USE_LOCAL & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_USE_LOCAL); ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_NO_USE_LOCAL);
} }
} }
/* ditto for rank policy */ /* ditto for rank policy */
if (!ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) { if (!ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) {
jdata->map->ranking = orte_rmaps_base.ranking; jdata->map->ranking = orte_rmaps_base.ranking;

Просмотреть файл

@ -14,7 +14,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2008 Voltaire. All rights reserved * Copyright (c) 2008 Voltaire. All rights reserved
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science * Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* Copyright (c) 2016 IBM Corporation. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved.
@ -110,6 +110,10 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
/* NOT FOR US */ /* NOT FOR US */
return ORTE_ERR_TAKE_NEXT_OPTION; return ORTE_ERR_TAKE_NEXT_OPTION;
} }
if (OPAL_BIND_ORDERED_REQUESTED(jdata->map->binding)) {
/* NOT FOR US */
return ORTE_ERR_TAKE_NEXT_OPTION;
}
opal_output_verbose(5, orte_rmaps_base_framework.framework_output, opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:rank_file: mapping job %s", "mca:rmaps:rank_file: mapping job %s",
ORTE_JOBID_PRINT(jdata->jobid)); ORTE_JOBID_PRINT(jdata->jobid));

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2008 Voltaire. All rights reserved * Copyright (c) 2008 Voltaire. All rights reserved
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights * Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* $COPYRIGHT$ * $COPYRIGHT$
@ -106,7 +106,8 @@ static int orte_rmaps_rank_file_register(void)
static int orte_rmaps_rank_file_open(void) static int orte_rmaps_rank_file_open(void)
{ {
/* ensure we flag mapping by user */ /* ensure we flag mapping by user */
if (NULL != opal_hwloc_base_cpu_list || NULL != orte_rankfile) { if ((NULL != opal_hwloc_base_cpu_list && !OPAL_BIND_ORDERED_REQUESTED(opal_hwloc_binding_policy)) ||
NULL != orte_rankfile) {
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) { if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
/* if a non-default mapping is already specified, then we /* if a non-default mapping is already specified, then we
* have an error * have an error