1
1

Deprecate the --slot-list paramaeter in favor of --cpu-list. Remove the --cpu-set param (mark it as deprecated) and use --cpu-list instead as it was confusing having the two params. The --cpu-list param defines the cpus to be used by procs of this job, and the binding policy will be overlayed on top of it.

Note: since the discovered cpus are filtered against this list, #slots will be set to the #cpus in the list if no slot values are given in a -host or -hostname specification.

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2017-01-24 11:43:14 -08:00
родитель 0bfdc0057a
Коммит ef86707fbe
14 изменённых файлов: 50 добавлений и 69 удалений

Просмотреть файл

@ -1,6 +1,6 @@
/*
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -57,8 +57,7 @@ opal_hwloc_print_buffers_t *opal_hwloc_get_print_buffer(void);
extern char* opal_hwloc_print_null;
OPAL_DECLSPEC char* opal_hwloc_base_print_locality(opal_hwloc_locality_t locality);
OPAL_DECLSPEC extern char *opal_hwloc_base_slot_list;
OPAL_DECLSPEC extern char *opal_hwloc_base_cpu_set;
OPAL_DECLSPEC extern char *opal_hwloc_base_cpu_list;
OPAL_DECLSPEC extern hwloc_cpuset_t opal_hwloc_base_given_cpus;
OPAL_DECLSPEC extern char *opal_hwloc_base_topo_file;
@ -192,7 +191,7 @@ OPAL_DECLSPEC bool opal_hwloc_base_single_cpu(hwloc_cpuset_t cpuset);
* Provide a utility to parse a slot list against the local
* cpus of given type, and produce a cpuset for the described binding
*/
OPAL_DECLSPEC int opal_hwloc_base_slot_list_parse(const char *slot_str,
OPAL_DECLSPEC int opal_hwloc_base_cpu_list_parse(const char *slot_str,
hwloc_topology_t topo,
opal_hwloc_resource_type_t rtype,
hwloc_cpuset_t cpumask);

Просмотреть файл

@ -1,6 +1,6 @@
/*
* Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -44,8 +44,7 @@ hwloc_cpuset_t opal_hwloc_base_given_cpus=NULL;
opal_hwloc_base_map_t opal_hwloc_base_map = OPAL_HWLOC_BASE_MAP_NONE;
opal_hwloc_base_mbfa_t opal_hwloc_base_mbfa = OPAL_HWLOC_BASE_MBFA_WARN;
opal_binding_policy_t opal_hwloc_binding_policy=0;
char *opal_hwloc_base_slot_list=NULL;
char *opal_hwloc_base_cpu_set=NULL;
char *opal_hwloc_base_cpu_list=NULL;
bool opal_hwloc_report_bindings=false;
hwloc_obj_type_t opal_hwloc_levels[] = {
HWLOC_OBJ_MACHINE,
@ -87,7 +86,7 @@ static bool opal_hwloc_base_bind_to_socket = false;
static int opal_hwloc_base_register(mca_base_register_flag_t flags)
{
mca_base_var_enum_t *new_enum;
int ret;
int ret, varid;
/* hwloc_base_mbind_policy */
@ -142,17 +141,13 @@ static int opal_hwloc_base_register(mca_base_register_flag_t flags)
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &opal_hwloc_report_bindings);
opal_hwloc_base_slot_list = NULL;
(void) mca_base_var_register("opal", "hwloc", "base", "slot_list",
"List of processor IDs to bind processes to [default=NULL]",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &opal_hwloc_base_slot_list);
opal_hwloc_base_cpu_set = NULL;
(void) mca_base_var_register("opal", "hwloc", "base", "cpu_set",
"Comma-separated list of ranges specifying logical cpus allocated to this job [default: none]",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &opal_hwloc_base_cpu_set);
opal_hwloc_base_cpu_list = NULL;
varid = mca_base_var_register("opal", "hwloc", "base", "cpu_list",
"Comma-separated list of ranges specifying logical cpus to be used by these processes [default: none]",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &opal_hwloc_base_cpu_list);
mca_base_var_register_synonym (varid, "opal", "hwloc", "base", "slot_list", MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
mca_base_var_register_synonym (varid, "opal", "hwloc", "base", "cpu_set", MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
/* declare hwthreads as independent cpus */
opal_hwloc_use_hwthreads_as_cpus = false;
@ -217,27 +212,14 @@ static int opal_hwloc_base_open(mca_base_open_flag_t flags)
}
/* did the user provide a slot list? */
if (NULL != opal_hwloc_base_slot_list) {
/* if we already were given a policy, then this is an error */
if (OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) {
opal_show_help("help-opal-hwloc-base.txt", "redefining-policy", true,
"socket", opal_hwloc_base_print_binding(opal_hwloc_binding_policy));
return OPAL_ERR_SILENT;
}
if (NULL != opal_hwloc_base_cpu_list) {
/* it is okay if a binding policy was already given - just ensure that
* we do bind to the given cpus if provided, otherwise this would be
* ignored if someone didn't also specify a binding policy
*/
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CPUSET);
}
/* cpu allocation specification */
if (NULL != opal_hwloc_base_cpu_set) {
if (!OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) {
/* it is okay if a binding policy was already given - just ensure that
* we do bind to the given cpus if provided, otherwise this would be
* ignored if someone didn't also specify a binding policy
*/
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CPUSET);
}
}
/* if we are binding to hwthreads, then we must use hwthreads as cpus */
if (OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) == OPAL_BIND_TO_HWTHREAD) {
opal_hwloc_use_hwthreads_as_cpus = true;

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2015 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -85,7 +85,7 @@ hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo,
OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output,
"physical cpu %d %s found in cpuset %s",
lid, (NULL == obj) ? "not" : "is",
(NULL == opal_hwloc_base_cpu_set) ? "None" : opal_hwloc_base_cpu_set));
(NULL == opal_hwloc_base_cpu_list) ? "None" : opal_hwloc_base_cpu_list));
/* we now need to shift upward to the core including this PU */
if (NULL != obj && HWLOC_OBJ_CORE == obj_type) {
obj = obj->parent;
@ -101,7 +101,7 @@ hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo,
OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output,
"logical cpu %d %s found in cpuset %s",
lid, (NULL == obj) ? "not" : "is",
(NULL == opal_hwloc_base_cpu_set) ? "None" : opal_hwloc_base_cpu_set));
(NULL == opal_hwloc_base_cpu_list) ? "None" : opal_hwloc_base_cpu_list));
/* Found the right core (or PU). Return the object */
return obj;
@ -132,7 +132,7 @@ int opal_hwloc_base_filter_cpus(hwloc_topology_t topo)
}
/* process any specified default cpu set against this topology */
if (NULL == opal_hwloc_base_cpu_set) {
if (NULL == opal_hwloc_base_cpu_list) {
/* get the root available cpuset */
avail = hwloc_bitmap_alloc();
hwloc_bitmap_and(avail, root->online_cpuset, root->allowed_cpuset);
@ -142,7 +142,7 @@ int opal_hwloc_base_filter_cpus(hwloc_topology_t topo)
OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output,
"hwloc:base: filtering cpuset"));
/* find the specified logical cpus */
ranges = opal_argv_split(opal_hwloc_base_cpu_set, ',');
ranges = opal_argv_split(opal_hwloc_base_cpu_list, ',');
avail = hwloc_bitmap_alloc();
hwloc_bitmap_zero(avail);
res = hwloc_bitmap_alloc();
@ -550,7 +550,7 @@ static void df_search_cores(hwloc_obj_t obj, unsigned int *cnt)
data = OBJ_NEW(opal_hwloc_obj_data_t);
obj->userdata = (void*)data;
}
if (NULL == opal_hwloc_base_cpu_set) {
if (NULL == opal_hwloc_base_cpu_list) {
if (!hwloc_bitmap_intersects(obj->cpuset, obj->allowed_cpuset)) {
/*
* do not count not allowed cores (e.g. cores with zero allowed PU)
@ -1250,7 +1250,7 @@ static int socket_core_to_cpu_set(char *socket_core_list,
return rc;
}
int opal_hwloc_base_slot_list_parse(const char *slot_str,
int opal_hwloc_base_cpu_list_parse(const char *slot_str,
hwloc_topology_t topo,
opal_hwloc_resource_type_t rtype,
hwloc_cpuset_t cpumask)

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -149,7 +149,7 @@ int orte_ess_base_proc_binding(void)
*/
hwloc_bitmap_zero(cpus);
if (OPAL_BIND_TO_CPUSET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
if (OPAL_SUCCESS != (ret = opal_hwloc_base_slot_list_parse(opal_hwloc_base_slot_list,
if (OPAL_SUCCESS != (ret = opal_hwloc_base_cpu_list_parse(opal_hwloc_base_cpu_list,
opal_hwloc_topology,
OPAL_HWLOC_LOGICAL, cpus))) {
error = "Setting processor affinity failed";

Просмотреть файл

@ -585,7 +585,7 @@ static int bind_in_place(orte_job_t *jdata,
static int bind_to_cpuset(orte_job_t *jdata)
{
/* bind each process to opal_hwloc_base_cpu_set */
/* bind each process to opal_hwloc_base_cpu_list */
int i, j;
orte_job_map_t *map;
orte_node_t *node;
@ -596,9 +596,9 @@ static int bind_to_cpuset(orte_job_t *jdata)
char *cpu_bitmap;
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps: bind job %s to cpuset %s",
"mca:rmaps: bind job %s to cpus %s",
ORTE_JOBID_PRINT(jdata->jobid),
opal_hwloc_base_cpu_set);
opal_hwloc_base_cpu_list);
/* initialize */
map = jdata->map;

Просмотреть файл

@ -199,8 +199,8 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
/* get the rankfile entry for this rank */
if (NULL == (rfmap = (orte_rmaps_rank_file_map_t*)opal_pointer_array_get_item(&rankmap, rank))) {
/* if we were give a default slot-list, then use it */
if (NULL != opal_hwloc_base_slot_list) {
slots = opal_hwloc_base_slot_list;
if (NULL != opal_hwloc_base_cpu_list) {
slots = opal_hwloc_base_cpu_list;
/* take the next node off of the available list */
node = NULL;
OPAL_LIST_FOREACH(nd, &node_list, orte_node_t) {
@ -318,7 +318,7 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
}
bitmap = hwloc_bitmap_alloc();
/* parse the slot_list to find the socket and core */
if (ORTE_SUCCESS != (rc = opal_hwloc_base_slot_list_parse(slots, node->topology->topo, rtype, bitmap))) {
if (ORTE_SUCCESS != (rc = opal_hwloc_base_cpu_list_parse(slots, node->topology->topo, rtype, bitmap))) {
ORTE_ERROR_LOG(rc);
hwloc_bitmap_free(bitmap);
goto error;

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2008 Voltaire. All rights reserved
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
@ -106,7 +106,7 @@ static int orte_rmaps_rank_file_register(void)
static int orte_rmaps_rank_file_open(void)
{
/* ensure we flag mapping by user */
if (NULL != opal_hwloc_base_slot_list || NULL != orte_rankfile) {
if (NULL != opal_hwloc_base_cpu_list || NULL != orte_rankfile) {
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
/* if a non-default mapping is already specified, then we
* have an error

Просмотреть файл

@ -460,7 +460,7 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
/* setup the bitmap */
bitmap = hwloc_bitmap_alloc();
/* parse the slot_list to find the socket and core */
if (ORTE_SUCCESS != (rc = opal_hwloc_base_slot_list_parse(sq->cpuset, node->topology->topo, rtype, bitmap))) {
if (ORTE_SUCCESS != (rc = opal_hwloc_base_cpu_list_parse(sq->cpuset, node->topology->topo, rtype, bitmap))) {
ORTE_ERROR_LOG(rc);
hwloc_bitmap_free(bitmap);
goto error;

Просмотреть файл

@ -317,8 +317,8 @@ static opal_cmd_line_init_t cmd_line_init[] = {
"Whether to report process bindings to stderr" },
/* slot list option */
{ "hwloc_base_slot_list", '\0', "slot-list", "slot-list", 1,
&orte_cmd_options.slot_list, OPAL_CMD_LINE_TYPE_STRING,
{ "hwloc_base_cpu_list", '\0', "cpu-list", "cpu-list", 1,
&orte_cmd_options.cpu_list, OPAL_CMD_LINE_TYPE_STRING,
"List of processor IDs to bind processes to [default=NULL]"},
/* generalized pattern mapping option */

Просмотреть файл

@ -880,8 +880,8 @@ int orte_submit_job(char *argv[], int *index,
if (orte_cmd_options.report_bindings) {
orte_set_attribute(&jdata->attributes, ORTE_JOB_REPORT_BINDINGS, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
}
if (orte_cmd_options.slot_list) {
orte_set_attribute(&jdata->attributes, ORTE_JOB_SLOT_LIST, ORTE_ATTR_GLOBAL, orte_cmd_options.slot_list, OPAL_STRING);
if (orte_cmd_options.cpu_list) {
orte_set_attribute(&jdata->attributes, ORTE_JOB_CPU_LIST, ORTE_ATTR_GLOBAL, orte_cmd_options.cpu_list, OPAL_STRING);
}
/* if recovery was enabled on the cmd line, do so */
@ -1158,10 +1158,10 @@ static int init_globals(void)
free(orte_cmd_options.report_uri);
orte_cmd_options.report_uri = NULL;
}
if (NULL != orte_cmd_options.slot_list) {
free(orte_cmd_options.slot_list);
orte_cmd_options.slot_list= NULL;
}
if (NULL != orte_cmd_options.cpu_list) {
free(orte_cmd_options.cpu_list);
orte_cmd_options.cpu_list= NULL;
}
orte_cmd_options.preload_binaries = false;
if (NULL != orte_cmd_options.preload_files) {
free(orte_cmd_options.preload_files);

Просмотреть файл

@ -84,7 +84,7 @@ struct orte_cmd_options_t {
char *ranking_policy;
char *binding_policy;
bool report_bindings;
char *slot_list;
char *cpu_list;
bool debug;
bool tag_output;
bool timestamp_output;

Просмотреть файл

@ -677,7 +677,7 @@ int orte_dt_print_map(char **output, char *prefix, orte_job_map_t *src, opal_dat
orte_rmaps_base_print_mapping(src->mapping),
orte_rmaps_base_print_ranking(src->ranking),
pfx2, opal_hwloc_base_print_binding(src->binding),
(NULL == opal_hwloc_base_cpu_set) ? "NULL" : opal_hwloc_base_cpu_set,
(NULL == opal_hwloc_base_cpu_list) ? "NULL" : opal_hwloc_base_cpu_list,
(NULL == src->ppr) ? "NULL" : src->ppr,
(int)src->cpus_per_rank);

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -254,8 +254,8 @@ const char *orte_attr_key_to_str(orte_attribute_key_t key)
return "JOB-LAUNCHED-DAEMONS";
case ORTE_JOB_REPORT_BINDINGS:
return "JOB-REPORT-BINDINGS";
case ORTE_JOB_SLOT_LIST:
return "JOB-SLOT-LIST";
case ORTE_JOB_CPU_LIST:
return "JOB-CPU-LIST";
case ORTE_JOB_NOTIFICATIONS:
return "JOB-NOTIFICATIONS";
case ORTE_JOB_ROOM_NUM:

Просмотреть файл

@ -128,7 +128,7 @@ typedef uint16_t orte_job_flags_t;
#define ORTE_JOB_PHYSICAL_CPUIDS (ORTE_JOB_START_KEY + 34) // bool - Hostfile contains physical jobids in cpuset
#define ORTE_JOB_LAUNCHED_DAEMONS (ORTE_JOB_START_KEY + 35) // bool - Job caused new daemons to be spawned
#define ORTE_JOB_REPORT_BINDINGS (ORTE_JOB_START_KEY + 36) // bool - Report process bindings
#define ORTE_JOB_SLOT_LIST (ORTE_JOB_START_KEY + 37) // string - constraints on cores to use
#define ORTE_JOB_CPU_LIST (ORTE_JOB_START_KEY + 37) // string - cpus to which procs are to be bound
#define ORTE_JOB_NOTIFICATIONS (ORTE_JOB_START_KEY + 38) // string - comma-separated list of desired notifications+methods
#define ORTE_JOB_ROOM_NUM (ORTE_JOB_START_KEY + 39) // int - number of remote request's hotel room
#define ORTE_JOB_LAUNCH_PROXY (ORTE_JOB_START_KEY + 40) // opal_process_name_t - name of spawn requestor