1
1

Error out if we are filtering a hostfile and encounter a node that is not in the resource-managed allocation, giving an error message identifying the file and the node. Don't filter managed allocations thru a default hostfile as this can lead to "hidden" errors.

Don't use dash-host info on managed allocations if we using soft locations

This commit was SVN r27245.
Этот коммит содержится в:
Ralph Castain 2012-09-05 19:42:00 +00:00
родитель fa4c2af9ed
Коммит efa50346c8
3 изменённых файлов: 31 добавлений и 34 удалений

Просмотреть файл

@ -87,7 +87,7 @@ int orte_rmaps_base_filter_nodes(orte_app_context_t *app,
}
}
/* now filter the list through any -host specification */
if (NULL != app->dash_host) {
if (!orte_soft_locations && NULL != app->dash_host) {
if (ORTE_SUCCESS != (rc = orte_util_filter_dash_host_nodes(nodes, app->dash_host, remove))) {
ORTE_ERROR_LOG(rc);
return rc;
@ -407,38 +407,6 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
return ORTE_ERR_SILENT;
}
/* is there a default hostfile? */
if (NULL != orte_default_hostfile) {
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
"%s Filtering thru default hostfile",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* yes - filter the node list through the file, removing
* any nodes not in the file -or- excluded via ^
*/
if (ORTE_SUCCESS != (rc = orte_util_filter_hostfile_nodes(allocated_nodes,
orte_default_hostfile,
true)) &&
ORTE_ERR_TAKE_NEXT_OPTION != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
"%s Resulted in %d nodes in list",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(int)opal_list_get_size(allocated_nodes)));
/** check that anything is here */
if (0 == opal_list_get_size(allocated_nodes)) {
if (!silent) {
orte_show_help("help-orte-rmaps-base.txt",
"orte-rmaps-base:no-available-resources",
true);
}
return ORTE_ERR_SILENT;
}
}
/* filter the nodes thru any hostfile and dash-host options */
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
"%s Filtering thru apps",

Просмотреть файл

@ -10,6 +10,8 @@
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2012 Los Alamos National Security, LLC
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -109,3 +111,17 @@ Open RTE detected a bad parameter in the hostfile:
The cores parameter is less than 0:
cores=%d
#
[hostfile:extra-node-not-found]
A hostfile was provided that contains at least one node not
present in the allocation:
hostfile: %s
node: %s
If you are operating in a resource-managed environment, then only
nodes that are in the allocation can be used in the hostfile. You
may find relative node syntax to be a useful alternative to
specifying absolute node names see the orte_hosts man page for
further information.

Просмотреть файл

@ -565,7 +565,8 @@ int orte_util_filter_hostfile_nodes(opal_list_t *nodes,
int num_empty, nodeidx;
bool want_all_empty = false;
opal_list_t keep;
bool found;
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
"%s hostfile: filtering nodes through hostfile %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile));
@ -717,6 +718,7 @@ int orte_util_filter_hostfile_nodes(opal_list_t *nodes,
* search the provided list of nodes to see if this
* one is found
*/
found = false;
for (item1 = opal_list_get_first(nodes);
item1 != opal_list_get_end(nodes);
item1 = opal_list_get_next(item1)) {
@ -747,9 +749,20 @@ int orte_util_filter_hostfile_nodes(opal_list_t *nodes,
/* mark as included */
node_from_list->mapped = true;
}
found = true;
break;
}
}
/* if the host in the newnode list wasn't found,
* then that is an error we need to report to the
* user and abort
*/
if (!found) {
orte_show_help("help-hostfile.txt", "hostfile:extra-node-not-found",
true, hostfile, node_from_file->name);
rc = ORTE_ERR_SILENT;
goto cleanup;
}
}
/* cleanup the newnode list */
OBJ_RELEASE(item2);