1
1

Fix rank_file operations. We changed the syntax to use semi-colons between multiple slot assignments so that we could use the comma to separate specific cores, but somehow the flex definitions didn't get updated to accept that character. We also incorrectly zero'd the bitmap between slot assignment sections, and so multiple slot assignments only wound up making the last one in the list.

This commit was SVN r27908.
Этот коммит содержится в:
Ralph Castain 2013-01-25 18:33:25 +00:00
родитель 2504da1ac9
Коммит f6b4db0b79
4 изменённых файлов: 56 добавлений и 33 удалений

Просмотреть файл

@ -10,7 +10,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC. * Copyright (c) 2012-2013 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -941,7 +941,7 @@ static int socket_to_cpu_set(char *cpus,
int lower_range, upper_range; int lower_range, upper_range;
int socket_id; int socket_id;
hwloc_obj_t obj; hwloc_obj_t obj;
hwloc_bitmap_t avail, res; hwloc_bitmap_t res;
if ('*' == cpus[0]) { if ('*' == cpus[0]) {
/* requesting cpumask for ALL sockets */ /* requesting cpumask for ALL sockets */
@ -950,7 +950,7 @@ static int socket_to_cpu_set(char *cpus,
* this specification equates to unbound * this specification equates to unbound
*/ */
res = opal_hwloc_base_get_available_cpus(topo, obj); res = opal_hwloc_base_get_available_cpus(topo, obj);
hwloc_bitmap_copy(cpumask, res); hwloc_bitmap_or(cpumask, cpumask, res);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
@ -962,25 +962,20 @@ static int socket_to_cpu_set(char *cpus,
obj = opal_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0, socket_id, OPAL_HWLOC_LOGICAL); obj = opal_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0, socket_id, OPAL_HWLOC_LOGICAL);
/* get the available logical cpus for this socket */ /* get the available logical cpus for this socket */
res = opal_hwloc_base_get_available_cpus(topo, obj); res = opal_hwloc_base_get_available_cpus(topo, obj);
hwloc_bitmap_copy(cpumask, res); hwloc_bitmap_or(cpumask, cpumask, res);
break; break;
case 2: /* range of sockets was given */ case 2: /* range of sockets was given */
lower_range = atoi(range[0]); lower_range = atoi(range[0]);
upper_range = atoi(range[1]); upper_range = atoi(range[1]);
/* zero the bitmask */
hwloc_bitmap_zero(cpumask);
avail = hwloc_bitmap_alloc();
/* cycle across the range of sockets */ /* cycle across the range of sockets */
for (socket_id=lower_range; socket_id<=upper_range; socket_id++) { for (socket_id=lower_range; socket_id<=upper_range; socket_id++) {
obj = opal_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0, socket_id, OPAL_HWLOC_LOGICAL); obj = opal_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_SOCKET, 0, socket_id, OPAL_HWLOC_LOGICAL);
/* get the available logical cpus for this socket */ /* get the available logical cpus for this socket */
res = opal_hwloc_base_get_available_cpus(topo, obj); res = opal_hwloc_base_get_available_cpus(topo, obj);
/* set the corresponding bits in the bitmask */ /* set the corresponding bits in the bitmask */
hwloc_bitmap_or(avail, cpumask, res); hwloc_bitmap_or(cpumask, cpumask, res);
hwloc_bitmap_copy(cpumask, avail);
} }
hwloc_bitmap_free(avail);
break; break;
default: default:
opal_argv_free(range); opal_argv_free(range);
@ -1002,7 +997,7 @@ static int socket_core_to_cpu_set(char *socket_core_list,
int lower_range, upper_range; int lower_range, upper_range;
int socket_id, core_id; int socket_id, core_id;
hwloc_obj_t socket, core; hwloc_obj_t socket, core;
hwloc_cpuset_t res, avail; hwloc_cpuset_t res;
unsigned int idx; unsigned int idx;
hwloc_obj_type_t obj_type = HWLOC_OBJ_CORE; hwloc_obj_type_t obj_type = HWLOC_OBJ_CORE;
@ -1034,7 +1029,7 @@ static int socket_core_to_cpu_set(char *socket_core_list,
if ('*' == corestr[0]) { if ('*' == corestr[0]) {
/* set to all available logical cpus on this socket */ /* set to all available logical cpus on this socket */
res = opal_hwloc_base_get_available_cpus(topo, socket); res = opal_hwloc_base_get_available_cpus(topo, socket);
hwloc_bitmap_copy(cpumask, res); hwloc_bitmap_or(cpumask, cpumask, res);
/* we are done - already assigned all cores! */ /* we are done - already assigned all cores! */
rc = OPAL_SUCCESS; rc = OPAL_SUCCESS;
break; break;
@ -1054,14 +1049,15 @@ static int socket_core_to_cpu_set(char *socket_core_list,
} }
/* get the cpus */ /* get the cpus */
res = opal_hwloc_base_get_available_cpus(topo, core); res = opal_hwloc_base_get_available_cpus(topo, core);
hwloc_bitmap_copy(cpumask, res); hwloc_bitmap_or(cpumask, cpumask, res);
break; break;
case 2: /* range of core id's was given */ case 2: /* range of core id's was given */
opal_output_verbose(5, opal_hwloc_base_output,
"range of cores given: start %s stop %s",
range[0], range[1]);
lower_range = atoi(range[0]); lower_range = atoi(range[0]);
upper_range = atoi(range[1]); upper_range = atoi(range[1]);
hwloc_bitmap_zero(cpumask);
avail = hwloc_bitmap_alloc();
for (core_id=lower_range; core_id <= upper_range; core_id++) { for (core_id=lower_range; core_id <= upper_range; core_id++) {
/* get that object */ /* get that object */
idx = 0; idx = 0;
@ -1073,10 +1069,8 @@ static int socket_core_to_cpu_set(char *socket_core_list,
/* get the cpus */ /* get the cpus */
res = opal_hwloc_base_get_available_cpus(topo, core); res = opal_hwloc_base_get_available_cpus(topo, core);
/* add them into the result */ /* add them into the result */
hwloc_bitmap_or(avail, cpumask, res); hwloc_bitmap_or(cpumask, cpumask, res);
hwloc_bitmap_copy(cpumask, avail);
} }
hwloc_bitmap_free(avail);
break; break;
default: default:
@ -1117,12 +1111,15 @@ int opal_hwloc_base_slot_list_parse(const char *slot_str,
slot_str); slot_str);
/* split at ';' */ /* split at ';' */
item = opal_argv_split (slot_str, ';'); item = opal_argv_split(slot_str, ';');
/* start with a clean mask */ /* start with a clean mask */
hwloc_bitmap_zero(cpumask); hwloc_bitmap_zero(cpumask);
/* loop across the items and accumulate the mask */ /* loop across the items and accumulate the mask */
for (i=0; NULL != item[i]; i++) { for (i=0; NULL != item[i]; i++) {
opal_output_verbose(5, opal_hwloc_base_output,
"working assignment %s",
item[i]);
/* if they specified "socket" by starting with an S/s, /* if they specified "socket" by starting with an S/s,
* or if they use socket:core notation, then parse the * or if they use socket:core notation, then parse the
* socket/core info * socket/core info
@ -1161,7 +1158,6 @@ int opal_hwloc_base_slot_list_parse(const char *slot_str,
/* just a core specification - see if one or a range was given */ /* just a core specification - see if one or a range was given */
range = opal_argv_split(item[i], '-'); range = opal_argv_split(item[i], '-');
range_cnt = opal_argv_count(range); range_cnt = opal_argv_count(range);
hwloc_bitmap_zero(cpumask);
/* see if a range was set or not */ /* see if a range was set or not */
switch (range_cnt) { switch (range_cnt) {
case 1: /* only one core specified */ case 1: /* only one core specified */
@ -1175,13 +1171,12 @@ int opal_hwloc_base_slot_list_parse(const char *slot_str,
/* get the available cpus for that object */ /* get the available cpus for that object */
pucpus = opal_hwloc_base_get_available_cpus(topo, pu); pucpus = opal_hwloc_base_get_available_cpus(topo, pu);
/* set that in the mask */ /* set that in the mask */
hwloc_bitmap_copy(cpumask, pucpus); hwloc_bitmap_or(cpumask, cpumask, pucpus);
break; break;
case 2: /* range of core id's was given */ case 2: /* range of core id's was given */
lower_range = atoi(range[0]); lower_range = atoi(range[0]);
upper_range = atoi(range[1]); upper_range = atoi(range[1]);
hwloc_bitmap_zero(cpumask);
for (core_id=lower_range; core_id <= upper_range; core_id++) { for (core_id=lower_range; core_id <= upper_range; core_id++) {
/* find the specified logical available cpu */ /* find the specified logical available cpu */
if (NULL == (pu = get_pu(topo, core_id))) { if (NULL == (pu = get_pu(topo, core_id))) {

Просмотреть файл

@ -1,6 +1,8 @@
# Copyright (c) 2004-2005 The Regents of the University of California. # Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved. # All rights reserved.
# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2013 Los Alamos National Security, LLC.
# All rights reserved.
# $COPYRIGHT$ # $COPYRIGHT$
# #
# Additional copyrights may follow # Additional copyrights may follow
@ -27,30 +29,47 @@ Check to make sure the path and filename are correct.
usage: mpirun -mca rmaps_rankfile_path rankfile ./app usage: mpirun -mca rmaps_rankfile_path rankfile ./app
example: cat hostfile Examples of proper syntax include:
cat hostfile
host1 host1
host2 host2
host3 host3
host4 host4
cat rankfile cat rankfile
rank 1=host1 slot=1:0,1 rank 1=host1 slot=1:0,1
rank 0=host2 slot=0:* rank 0=host2 slot=0:*
rank 2=host4 slot=1-2 rank 2=host4 slot=1-2
rank 3=host3 slot=0:1,1:0-2 rank 3=host3 slot=0:1;1:0-2
# #
[parse_error_string] [parse_error_string]
Open RTE detected a parse error in the rankfile (%s) Open RTE detected a parse error in the rankfile (%s)
It occured on line number %d on token %d: It occured on line number %d on token %d:
%s %s
Examples of proper syntax include:
rank 1=host1 slot=1:0,1
rank 0=host2 slot=0:*
rank 2=host4 slot=1-2
rank 3=host3 slot=0:1;1:0-2
# #
[parse_error_int] [parse_error_int]
Open RTE detected a parse error in the rankfile (%s) Open RTE detected a parse error in the rankfile (%s)
It occured on line number %d on token %d: It occured on line number %d on token %d:
%d %d
Examples of proper syntax include:
rank 1=host1 slot=1:0,1
rank 0=host2 slot=0:*
rank 2=host4 slot=1-2
rank 3=host3 slot=0:1;1:0-2
# #
[parse_error] [parse_error]
Open RTE detected a parse error in the rankfile (%s) Open RTE detected a parse error in the rankfile (%s)
It occured on line number %d on token %d. It occured on line number %d on token %d. Examples of
proper syntax include:
rank 1=host1 slot=1:0,1
rank 0=host2 slot=0:*
rank 2=host4 slot=1-2
rank 3=host3 slot=0:1;1:0-2
# #
[not-all-mapped-alloc] [not-all-mapped-alloc]
Some of the requested ranks are not included in the current allocation. Some of the requested ranks are not included in the current allocation.
@ -89,7 +108,11 @@ Error, rank %d is already assigned to %s, check %s
Error, invalid syntax in the rankfile (%s) Error, invalid syntax in the rankfile (%s)
syntax must be the fallowing syntax must be the fallowing
rank i=host_i slot=string rank i=host_i slot=string
ex: rank 1=host1 slot=1:0,1 Examples of proper syntax include:
rank 1=host1 slot=1:0,1
rank 0=host2 slot=0:*
rank 2=host4 slot=1-2
rank 3=host3 slot=0:1;1:0-2
# #
[orte-rmaps-rf:multi-apps-and-zero-np] [orte-rmaps-rf:multi-apps-and-zero-np]
RMAPS found multiple applications to be launched, with RMAPS found multiple applications to be launched, with
@ -100,7 +123,7 @@ of each to launch via the -np argument.
[missing-rank] [missing-rank]
A rank is missing its location specification: A rank is missing its location specification:
Rank: %d Rank: %d
Rank file: %s Rank file: %s
All processes must have their location specified in the rank file. Either All processes must have their location specified in the rank file. Either

Просмотреть файл

@ -10,7 +10,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Los Alamos National Security, LLC. * Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* Copyright (c) 2008 Voltaire. All rights reserved * Copyright (c) 2008 Voltaire. All rights reserved
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
@ -382,7 +382,10 @@ static int orte_rmaps_rank_file_parse(const char *rankfile)
switch (token) { switch (token) {
case ORTE_RANKFILE_ERROR: case ORTE_RANKFILE_ERROR:
opal_output(0, "Got an error!"); orte_show_help("help-rmaps_rank_file.txt", "bad-syntax", true, rankfile);
rc = ORTE_ERR_BAD_PARAM;
ORTE_ERROR_LOG(rc);
goto unlock;
break; break;
case ORTE_RANKFILE_QUOTED_STRING: case ORTE_RANKFILE_QUOTED_STRING:
orte_show_help("help-rmaps_rank_file.txt", "not-supported-rankfile", true, "QUOTED_STRING", rankfile); orte_show_help("help-rmaps_rank_file.txt", "not-supported-rankfile", true, "QUOTED_STRING", rankfile);

Просмотреть файл

@ -13,6 +13,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2008 Voltaire. All rights reserved * Copyright (c) 2008 Voltaire. All rights reserved
* Copyright (c) 2013 Los Alamos National Security, LLC.
* All rights reserved.
* *
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -96,8 +98,8 @@ username { orte_rmaps_rank_file_value.sval = yytext;
*/ */
%} %}
[A-za-z0-9_\-,:*@]* { orte_rmaps_rank_file_value.sval = yytext; [A-za-z0-9_\-,\;:*@]* { orte_rmaps_rank_file_value.sval = yytext;
return ORTE_RANKFILE_STRING; } return ORTE_RANKFILE_STRING; }
([A-Za-z0-9][A-Za-z0-9_\-]*"@")?([0-9]{1,3}"."){3}[0-9]{1,3} { ([A-Za-z0-9][A-Za-z0-9_\-]*"@")?([0-9]{1,3}"."){3}[0-9]{1,3} {
orte_rmaps_rank_file_value.sval = yytext; orte_rmaps_rank_file_value.sval = yytext;