Error out if we are filtering a hostfile and encounter a node that is not in the resource-managed allocation, giving an error message identifying the file and the node. Don't filter managed allocations thru a default hostfile as this can lead to "hidden" errors.
Don't use dash-host info on managed allocations if we using soft locations This commit was SVN r27245.
Этот коммит содержится в:
родитель
fa4c2af9ed
Коммит
efa50346c8
@ -87,7 +87,7 @@ int orte_rmaps_base_filter_nodes(orte_app_context_t *app,
|
||||
}
|
||||
}
|
||||
/* now filter the list through any -host specification */
|
||||
if (NULL != app->dash_host) {
|
||||
if (!orte_soft_locations && NULL != app->dash_host) {
|
||||
if (ORTE_SUCCESS != (rc = orte_util_filter_dash_host_nodes(nodes, app->dash_host, remove))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -407,38 +407,6 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
|
||||
/* is there a default hostfile? */
|
||||
if (NULL != orte_default_hostfile) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
|
||||
"%s Filtering thru default hostfile",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* yes - filter the node list through the file, removing
|
||||
* any nodes not in the file -or- excluded via ^
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_util_filter_hostfile_nodes(allocated_nodes,
|
||||
orte_default_hostfile,
|
||||
true)) &&
|
||||
ORTE_ERR_TAKE_NEXT_OPTION != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
|
||||
"%s Resulted in %d nodes in list",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(int)opal_list_get_size(allocated_nodes)));
|
||||
|
||||
/** check that anything is here */
|
||||
if (0 == opal_list_get_size(allocated_nodes)) {
|
||||
if (!silent) {
|
||||
orte_show_help("help-orte-rmaps-base.txt",
|
||||
"orte-rmaps-base:no-available-resources",
|
||||
true);
|
||||
}
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
}
|
||||
|
||||
/* filter the nodes thru any hostfile and dash-host options */
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
|
||||
"%s Filtering thru apps",
|
||||
|
@ -10,6 +10,8 @@
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2012 Los Alamos National Security, LLC
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -109,3 +111,17 @@ Open RTE detected a bad parameter in the hostfile:
|
||||
The cores parameter is less than 0:
|
||||
cores=%d
|
||||
|
||||
#
|
||||
[hostfile:extra-node-not-found]
|
||||
A hostfile was provided that contains at least one node not
|
||||
present in the allocation:
|
||||
|
||||
hostfile: %s
|
||||
node: %s
|
||||
|
||||
If you are operating in a resource-managed environment, then only
|
||||
nodes that are in the allocation can be used in the hostfile. You
|
||||
may find relative node syntax to be a useful alternative to
|
||||
specifying absolute node names see the orte_hosts man page for
|
||||
further information.
|
||||
|
||||
|
@ -565,7 +565,8 @@ int orte_util_filter_hostfile_nodes(opal_list_t *nodes,
|
||||
int num_empty, nodeidx;
|
||||
bool want_all_empty = false;
|
||||
opal_list_t keep;
|
||||
|
||||
bool found;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
"%s hostfile: filtering nodes through hostfile %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile));
|
||||
@ -717,6 +718,7 @@ int orte_util_filter_hostfile_nodes(opal_list_t *nodes,
|
||||
* search the provided list of nodes to see if this
|
||||
* one is found
|
||||
*/
|
||||
found = false;
|
||||
for (item1 = opal_list_get_first(nodes);
|
||||
item1 != opal_list_get_end(nodes);
|
||||
item1 = opal_list_get_next(item1)) {
|
||||
@ -747,9 +749,20 @@ int orte_util_filter_hostfile_nodes(opal_list_t *nodes,
|
||||
/* mark as included */
|
||||
node_from_list->mapped = true;
|
||||
}
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* if the host in the newnode list wasn't found,
|
||||
* then that is an error we need to report to the
|
||||
* user and abort
|
||||
*/
|
||||
if (!found) {
|
||||
orte_show_help("help-hostfile.txt", "hostfile:extra-node-not-found",
|
||||
true, hostfile, node_from_file->name);
|
||||
rc = ORTE_ERR_SILENT;
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
/* cleanup the newnode list */
|
||||
OBJ_RELEASE(item2);
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user