Fix the sequential mapper - it was out-of-sync with the hostfile changes, and we missed the "seq" policy when parsing the --map-by option. Thanks to Bill Chen for reporting it
cmr=v1.8.1:reviewer=jsquyres This commit was SVN r31333.
Этот коммит содержится в:
родитель
b12ee27b3d
Коммит
61d94fcee2
@ -663,6 +663,7 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
|
|||||||
switch (map) {
|
switch (map) {
|
||||||
case ORTE_MAPPING_BYNODE:
|
case ORTE_MAPPING_BYNODE:
|
||||||
case ORTE_MAPPING_BYSLOT:
|
case ORTE_MAPPING_BYSLOT:
|
||||||
|
case ORTE_MAPPING_SEQ:
|
||||||
hwm = HWLOC_OBJ_MACHINE;
|
hwm = HWLOC_OBJ_MACHINE;
|
||||||
break;
|
break;
|
||||||
case ORTE_MAPPING_BYDIST:
|
case ORTE_MAPPING_BYDIST:
|
||||||
|
@ -688,6 +688,8 @@ int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy,
|
|||||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSLOT);
|
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSLOT);
|
||||||
} else if (0 == strncasecmp(spec, "node", len)) {
|
} else if (0 == strncasecmp(spec, "node", len)) {
|
||||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNODE);
|
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNODE);
|
||||||
|
} else if (0 == strncasecmp(spec, "seq", len)) {
|
||||||
|
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_SEQ);
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
} else if (0 == strncasecmp(spec, "core", len)) {
|
} else if (0 == strncasecmp(spec, "core", len)) {
|
||||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYCORE);
|
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYCORE);
|
||||||
|
@ -256,6 +256,10 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
|
|||||||
}
|
}
|
||||||
/* move to next node */
|
/* move to next node */
|
||||||
nd = (orte_node_t*)opal_list_get_next((opal_list_item_t*)nd);
|
nd = (orte_node_t*)opal_list_get_next((opal_list_item_t*)nd);
|
||||||
|
if (NULL == nd) {
|
||||||
|
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||||
|
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** track the total number of processes we mapped */
|
/** track the total number of processes we mapped */
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2013 Intel, Inc. All rights reserved.
|
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -109,17 +109,14 @@ static char *hostfile_parse_string(void)
|
|||||||
return strdup(orte_util_hostfile_value.sval);
|
return strdup(orte_util_hostfile_value.sval);
|
||||||
}
|
}
|
||||||
|
|
||||||
static orte_node_t* hostfile_lookup(opal_list_t* nodes, const char* name, bool keep)
|
static orte_node_t* hostfile_lookup(opal_list_t* nodes, const char* name)
|
||||||
{
|
{
|
||||||
opal_list_item_t* item;
|
opal_list_item_t* item;
|
||||||
for(item = opal_list_get_first(nodes);
|
for(item = opal_list_get_first(nodes);
|
||||||
item != opal_list_get_end(nodes);
|
item != opal_list_get_end(nodes);
|
||||||
item = opal_list_get_next(item)) {
|
item = opal_list_get_next(item)) {
|
||||||
orte_node_t* node = (orte_node_t*)item;
|
orte_node_t* node = (orte_node_t*)item;
|
||||||
if(strcmp(node->name, name) == 0) {
|
if (strcmp(node->name, name) == 0) {
|
||||||
if (!keep) {
|
|
||||||
opal_list_remove_item(nodes, item);
|
|
||||||
}
|
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -195,18 +192,14 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
|
|||||||
|
|
||||||
/* Do we need to make a new node object? First check to see
|
/* Do we need to make a new node object? First check to see
|
||||||
if it's already in the exclude list */
|
if it's already in the exclude list */
|
||||||
if (NULL == (node = hostfile_lookup(exclude, node_name, keep_all))) {
|
if (NULL == (node = hostfile_lookup(exclude, node_name))) {
|
||||||
node = OBJ_NEW(orte_node_t);
|
node = OBJ_NEW(orte_node_t);
|
||||||
node->name = node_name;
|
node->name = node_name;
|
||||||
if (NULL != username) {
|
if (NULL != username) {
|
||||||
node->username = strdup(username);
|
node->username = strdup(username);
|
||||||
}
|
}
|
||||||
|
opal_list_append(exclude, &node->super);
|
||||||
}
|
}
|
||||||
/* Note that we need to add this back to the exclude list.
|
|
||||||
If it was found, we just removed it (in hostfile_lookup()),
|
|
||||||
so this puts it back. If it was not found, then we have to
|
|
||||||
add it to the exclude list anyway. */
|
|
||||||
opal_list_append(exclude, &node->super);
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -229,13 +222,14 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
|
|||||||
keep_all ? "TRUE" : "FALSE"));
|
keep_all ? "TRUE" : "FALSE"));
|
||||||
|
|
||||||
/* Do we need to make a new node object? */
|
/* Do we need to make a new node object? */
|
||||||
if (NULL == (node = hostfile_lookup(updates, node_name, keep_all))) {
|
if (keep_all || NULL == (node = hostfile_lookup(updates, node_name))) {
|
||||||
node = OBJ_NEW(orte_node_t);
|
node = OBJ_NEW(orte_node_t);
|
||||||
node->name = node_name;
|
node->name = node_name;
|
||||||
node->slots = 1;
|
node->slots = 1;
|
||||||
if (NULL != username) {
|
if (NULL != username) {
|
||||||
node->username = strdup(username);
|
node->username = strdup(username);
|
||||||
}
|
}
|
||||||
|
opal_list_append(updates, &node->super);
|
||||||
} else {
|
} else {
|
||||||
/* this node was already found once - add a slot and mark slots as "given" */
|
/* this node was already found once - add a slot and mark slots as "given" */
|
||||||
node->slots++;
|
node->slots++;
|
||||||
@ -254,6 +248,7 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
|
|||||||
if (NULL != username) {
|
if (NULL != username) {
|
||||||
node->username = strdup(username);
|
node->username = strdup(username);
|
||||||
}
|
}
|
||||||
|
opal_list_append(updates, &node->super);
|
||||||
} else if (ORTE_HOSTFILE_RANK == token) {
|
} else if (ORTE_HOSTFILE_RANK == token) {
|
||||||
/* we can ignore the rank, but we need to extract the node name. we
|
/* we can ignore the rank, but we need to extract the node name. we
|
||||||
* first need to shift over to the other side of the equal sign as
|
* first need to shift over to the other side of the equal sign as
|
||||||
@ -289,13 +284,14 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
|
|||||||
}
|
}
|
||||||
opal_argv_free (argv);
|
opal_argv_free (argv);
|
||||||
/* Do we need to make a new node object? */
|
/* Do we need to make a new node object? */
|
||||||
if (NULL == (node = hostfile_lookup(updates, node_name, keep_all))) {
|
if (NULL == (node = hostfile_lookup(updates, node_name))) {
|
||||||
node = OBJ_NEW(orte_node_t);
|
node = OBJ_NEW(orte_node_t);
|
||||||
node->name = node_name;
|
node->name = node_name;
|
||||||
node->slots = 1;
|
node->slots = 1;
|
||||||
if (NULL != username) {
|
if (NULL != username) {
|
||||||
node->username = strdup(username);
|
node->username = strdup(username);
|
||||||
}
|
}
|
||||||
|
opal_list_append(updates, &node->super);
|
||||||
} else {
|
} else {
|
||||||
/* add a slot */
|
/* add a slot */
|
||||||
node->slots++;
|
node->slots++;
|
||||||
@ -312,7 +308,6 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
|
|||||||
ORTE_HOSTFILE_NEWLINE != token) {
|
ORTE_HOSTFILE_NEWLINE != token) {
|
||||||
token = orte_util_hostfile_lex();
|
token = orte_util_hostfile_lex();
|
||||||
}
|
}
|
||||||
opal_list_append(updates, &node->super);
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
} else {
|
} else {
|
||||||
hostfile_parse_error(token);
|
hostfile_parse_error(token);
|
||||||
@ -405,7 +400,6 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
|
|||||||
node->slots = node->slots_max;
|
node->slots = node->slots_max;
|
||||||
node->slots_given = true;
|
node->slots_given = true;
|
||||||
}
|
}
|
||||||
opal_list_append(updates, &node->super);
|
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user