Error out if we are filtering a hostfile and encounter a node that is not in the resource-managed allocation, giving an error message identifying the file and the node. Don't filter managed allocations thru a default hostfile as this can lead to "hidden" errors.
Don't use dash-host info on managed allocations if we using soft locations This commit was SVN r27245.
Этот коммит содержится в:
родитель
fa4c2af9ed
Коммит
efa50346c8
@ -87,7 +87,7 @@ int orte_rmaps_base_filter_nodes(orte_app_context_t *app,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* now filter the list through any -host specification */
|
/* now filter the list through any -host specification */
|
||||||
if (NULL != app->dash_host) {
|
if (!orte_soft_locations && NULL != app->dash_host) {
|
||||||
if (ORTE_SUCCESS != (rc = orte_util_filter_dash_host_nodes(nodes, app->dash_host, remove))) {
|
if (ORTE_SUCCESS != (rc = orte_util_filter_dash_host_nodes(nodes, app->dash_host, remove))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
@ -407,38 +407,6 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
|
|||||||
return ORTE_ERR_SILENT;
|
return ORTE_ERR_SILENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* is there a default hostfile? */
|
|
||||||
if (NULL != orte_default_hostfile) {
|
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
|
|
||||||
"%s Filtering thru default hostfile",
|
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
|
||||||
|
|
||||||
/* yes - filter the node list through the file, removing
|
|
||||||
* any nodes not in the file -or- excluded via ^
|
|
||||||
*/
|
|
||||||
if (ORTE_SUCCESS != (rc = orte_util_filter_hostfile_nodes(allocated_nodes,
|
|
||||||
orte_default_hostfile,
|
|
||||||
true)) &&
|
|
||||||
ORTE_ERR_TAKE_NEXT_OPTION != rc) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
|
|
||||||
"%s Resulted in %d nodes in list",
|
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
|
||||||
(int)opal_list_get_size(allocated_nodes)));
|
|
||||||
|
|
||||||
/** check that anything is here */
|
|
||||||
if (0 == opal_list_get_size(allocated_nodes)) {
|
|
||||||
if (!silent) {
|
|
||||||
orte_show_help("help-orte-rmaps-base.txt",
|
|
||||||
"orte-rmaps-base:no-available-resources",
|
|
||||||
true);
|
|
||||||
}
|
|
||||||
return ORTE_ERR_SILENT;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* filter the nodes thru any hostfile and dash-host options */
|
/* filter the nodes thru any hostfile and dash-host options */
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
|
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
|
||||||
"%s Filtering thru apps",
|
"%s Filtering thru apps",
|
||||||
|
@ -10,6 +10,8 @@
|
|||||||
# University of Stuttgart. All rights reserved.
|
# University of Stuttgart. All rights reserved.
|
||||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
# Copyright (c) 2012 Los Alamos National Security, LLC
|
||||||
|
# All rights reserved.
|
||||||
# $COPYRIGHT$
|
# $COPYRIGHT$
|
||||||
#
|
#
|
||||||
# Additional copyrights may follow
|
# Additional copyrights may follow
|
||||||
@ -109,3 +111,17 @@ Open RTE detected a bad parameter in the hostfile:
|
|||||||
The cores parameter is less than 0:
|
The cores parameter is less than 0:
|
||||||
cores=%d
|
cores=%d
|
||||||
|
|
||||||
|
#
|
||||||
|
[hostfile:extra-node-not-found]
|
||||||
|
A hostfile was provided that contains at least one node not
|
||||||
|
present in the allocation:
|
||||||
|
|
||||||
|
hostfile: %s
|
||||||
|
node: %s
|
||||||
|
|
||||||
|
If you are operating in a resource-managed environment, then only
|
||||||
|
nodes that are in the allocation can be used in the hostfile. You
|
||||||
|
may find relative node syntax to be a useful alternative to
|
||||||
|
specifying absolute node names see the orte_hosts man page for
|
||||||
|
further information.
|
||||||
|
|
||||||
|
@ -565,7 +565,8 @@ int orte_util_filter_hostfile_nodes(opal_list_t *nodes,
|
|||||||
int num_empty, nodeidx;
|
int num_empty, nodeidx;
|
||||||
bool want_all_empty = false;
|
bool want_all_empty = false;
|
||||||
opal_list_t keep;
|
opal_list_t keep;
|
||||||
|
bool found;
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||||
"%s hostfile: filtering nodes through hostfile %s",
|
"%s hostfile: filtering nodes through hostfile %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile));
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile));
|
||||||
@ -717,6 +718,7 @@ int orte_util_filter_hostfile_nodes(opal_list_t *nodes,
|
|||||||
* search the provided list of nodes to see if this
|
* search the provided list of nodes to see if this
|
||||||
* one is found
|
* one is found
|
||||||
*/
|
*/
|
||||||
|
found = false;
|
||||||
for (item1 = opal_list_get_first(nodes);
|
for (item1 = opal_list_get_first(nodes);
|
||||||
item1 != opal_list_get_end(nodes);
|
item1 != opal_list_get_end(nodes);
|
||||||
item1 = opal_list_get_next(item1)) {
|
item1 = opal_list_get_next(item1)) {
|
||||||
@ -747,9 +749,20 @@ int orte_util_filter_hostfile_nodes(opal_list_t *nodes,
|
|||||||
/* mark as included */
|
/* mark as included */
|
||||||
node_from_list->mapped = true;
|
node_from_list->mapped = true;
|
||||||
}
|
}
|
||||||
|
found = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/* if the host in the newnode list wasn't found,
|
||||||
|
* then that is an error we need to report to the
|
||||||
|
* user and abort
|
||||||
|
*/
|
||||||
|
if (!found) {
|
||||||
|
orte_show_help("help-hostfile.txt", "hostfile:extra-node-not-found",
|
||||||
|
true, hostfile, node_from_file->name);
|
||||||
|
rc = ORTE_ERR_SILENT;
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
/* cleanup the newnode list */
|
/* cleanup the newnode list */
|
||||||
OBJ_RELEASE(item2);
|
OBJ_RELEASE(item2);
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user