1
1

Merge pull request #1084 from rhc54/topic/dashhost

Fix relative node syntax for dash-host option
Этот коммит содержится в:
rhc54 2015-10-31 21:24:39 -07:00
родитель b23f1f3578 8bfbe7f16c
Коммит 1fe27bf1dd
10 изменённых файлов: 115 добавлений и 27 удалений

Просмотреть файл

@ -1714,7 +1714,7 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
"%s using dash_host",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&tnodes, hosts))) {
if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&tnodes, hosts, false))) {
ORTE_ERROR_LOG(rc);
free(hosts);
return rc;

Просмотреть файл

@ -312,7 +312,7 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata)
OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
"%s ras:base:allocate adding dash_hosts",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, hosts))) {
if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, hosts, true))) {
free(hosts);
OBJ_DESTRUCT(&nodes);
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
@ -511,7 +511,7 @@ int orte_ras_base_add_hosts(orte_job_t *jdata)
opal_output_verbose(5, orte_ras_base_framework.framework_output,
"%s ras:base:add_hosts checking add-host %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts);
if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, hosts))) {
if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, hosts, true))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&nodes);
free(hosts);

Просмотреть файл

@ -171,7 +171,7 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
"%s using dash_host %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts));
if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, hosts))) {
if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, hosts, false))) {
ORTE_ERROR_LOG(rc);
free(hosts);
return rc;

Просмотреть файл

@ -155,6 +155,7 @@ bool orte_default_hostfile_given = false;
char *orte_rankfile = NULL;
int orte_num_allocated_nodes = 0;
char *orte_node_regex = NULL;
char *orte_default_dash_host = NULL;
/* tool communication controls */
bool orte_report_events = false;

Просмотреть файл

@ -540,6 +540,7 @@ ORTE_DECLSPEC extern bool orte_default_hostfile_given;
ORTE_DECLSPEC extern char *orte_rankfile;
ORTE_DECLSPEC extern int orte_num_allocated_nodes;
ORTE_DECLSPEC extern char *orte_node_regex;
ORTE_DECLSPEC extern char *orte_default_dash_host;
/* PMI version control */
ORTE_DECLSPEC extern int orted_pmi_version;

Просмотреть файл

@ -362,6 +362,14 @@ int orte_register_params(void)
orte_default_hostfile_given = true;
}
/* default dash-host */
orte_default_dash_host = NULL;
(void) mca_base_var_register ("orte", "orte", NULL, "default_dash_host",
"Default -host setting, \"none\" to ignore environmental or default MCA param setting)",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_default_dash_host);
/* regex of nodes in system */
orte_node_regex = NULL;
(void) mca_base_var_register ("orte", "orte", NULL, "node_regex",

Просмотреть файл

@ -1169,6 +1169,9 @@ static int create_app(int argc, char* argv[],
orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_GLOBAL, tval, OPAL_STRING);
opal_argv_free(targ);
free(tval);
} else if (NULL != orte_default_dash_host) {
orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_LOCAL,
orte_default_dash_host, OPAL_STRING);
}
/* check for bozo error */

Просмотреть файл

@ -1586,6 +1586,9 @@ static int create_app(int argc, char* argv[],
orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_LOCAL, tval, OPAL_STRING);
opal_argv_free(targ);
free(tval);
} else if (NULL != orte_default_dash_host) {
orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_LOCAL,
orte_default_dash_host, OPAL_STRING);
}
/* check for bozo error */

Просмотреть файл

@ -44,11 +44,11 @@
* relative node syntax should generate an immediate error
*/
int orte_util_add_dash_host_nodes(opal_list_t *nodes,
char *hosts)
char *hosts, bool allocating)
{
opal_list_item_t *item, *itm;
orte_std_cntr_t i, j, k;
int rc;
int rc, nodeidx;
char **host_argv=NULL;
char **mapped_nodes = NULL, **mini_map, *ndname;
orte_node_t *node, *nd;
@ -59,8 +59,8 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
char *cptr;
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
"%s dashhost: parsing args",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
"%s dashhost: parsing args %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts));
OBJ_CONSTRUCT(&adds, opal_list_t);
host_argv = opal_argv_split(hosts, ',');
@ -85,6 +85,7 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
}
}
opal_argv_free(host_argv);
mini_map = NULL;
/* Did we find anything? If not, then do nothing */
if (NULL == mapped_nodes) {
@ -92,23 +93,96 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
goto cleanup;
}
for (i = 0; NULL != mapped_nodes[i]; ++i) {
/* if the specified node contains a relative node syntax,
* and we are allocating, then ignore it
*/
if ('+' == mapped_nodes[i][0]) {
if (!allocating) {
if ('e' == mapped_nodes[i][1] ||
'E' == mapped_nodes[i][1]) {
/* request for empty nodes - do they want
* all of them?
*/
if (NULL != (cptr = strchr(mapped_nodes[i], ':'))) {
/* the colon indicates a specific # are requested */
++cptr;
j = strtoul(cptr, NULL, 10);
} else if ('\0' != mapped_nodes[0][2]) {
j = strtoul(&mapped_nodes[0][2], NULL, 10);
} else {
/* add them all */
j = orte_node_pool->size;
}
for (k=0; 0 < j && k < orte_node_pool->size; k++) {
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, k))) {
if (0 == node->num_procs) {
opal_argv_append_nosize(&mini_map, node->name);
--j;
}
}
}
} else if ('n' == mapped_nodes[i][1] ||
'N' == mapped_nodes[i][1]) {
/* they want a specific relative node #, so
* look it up on global pool
*/
nodeidx = strtol(&mapped_nodes[i][2], NULL, 10);
if (nodeidx < 0 ||
nodeidx > (int)orte_node_pool->size) {
/* this is an error */
orte_show_help("help-dash-host.txt", "dash-host:relative-node-out-of-bounds",
true, nodeidx, mapped_nodes[i]);
rc = ORTE_ERR_SILENT;
goto cleanup;
}
/* if the HNP is not allocated, then we need to
* adjust the index as the node pool is offset
* by one
*/
if (!orte_hnp_is_allocated) {
nodeidx++;
}
/* see if that location is filled */
if (NULL == (node = (orte_node_t *) opal_pointer_array_get_item(orte_node_pool, nodeidx))) {
/* this is an error */
orte_show_help("help-dash-host.txt", "dash-host:relative-node-not-found",
true, nodeidx, mapped_nodes[i]);
rc = ORTE_ERR_SILENT;
goto cleanup;
}
/* add this node to the list */
opal_argv_append_nosize(&mini_map, node->name);
} else {
/* invalid relative node syntax */
orte_show_help("help-dash-host.txt", "dash-host:invalid-relative-node-syntax",
true, mapped_nodes[i]);
rc = ORTE_ERR_SILENT;
goto cleanup;
}
}
} else {
/* just one node was given */
opal_argv_append_nosize(&mini_map, mapped_nodes[i]);
}
}
if (NULL == mini_map) {
rc = ORTE_SUCCESS;
goto cleanup;
}
/* go through the names found and
add them to the host list. If they're not unique, then
bump the slots count for each duplicate */
for (i=0; NULL != mini_map[i]; i++) {
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
"%s dashhost: working node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), mini_map[i]));
for (i = 0; NULL != mapped_nodes[i]; ++i) {
/* if the specified node contains a relative node syntax,
* this is an error
*/
if ('+' == mapped_nodes[i][0]) {
orte_show_help("help-dash-host.txt", "dash-host:relative-syntax",
true, mapped_nodes[i]);
rc = ORTE_ERR_SILENT;
goto cleanup;
}
/* see if the node contains the number of slots */
slots_given = false;
if (NULL != (cptr = strchr(mapped_nodes[i], ':'))) {
if (NULL != (cptr = strchr(mini_map[i], ':'))) {
*cptr = '\0';
++cptr;
if ('*' == *cptr) {
@ -119,15 +193,11 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
slots_given = true;
}
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
"%s dashhost: working node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), mapped_nodes[i]));
/* check for local name */
if (orte_ifislocal(mapped_nodes[i])) {
if (orte_ifislocal(mini_map[i])) {
ndname = orte_process_info.nodename;
} else {
ndname = mapped_nodes[i];
ndname = mini_map[i];
}
/* see if the node is already on the list */
@ -177,6 +247,7 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
opal_list_append(&adds, &node->super);
}
}
opal_argv_free(mini_map);
/* transfer across all unique nodes */
while (NULL != (item = opal_list_remove_first(&adds))) {

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -31,7 +31,8 @@
BEGIN_C_DECLS
ORTE_DECLSPEC int orte_util_add_dash_host_nodes(opal_list_t *nodes,
char *hosts);
char *hosts,
bool allocating);
ORTE_DECLSPEC int orte_util_filter_dash_host_nodes(opal_list_t *nodes,
char *hosts,