1
1

Merge pull request #1084 from rhc54/topic/dashhost

Fix relative node syntax for dash-host option
Этот коммит содержится в:
rhc54 2015-10-31 21:24:39 -07:00
родитель b23f1f3578 8bfbe7f16c
Коммит 1fe27bf1dd
10 изменённых файлов: 115 добавлений и 27 удалений

Просмотреть файл

@ -1714,7 +1714,7 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
"%s using dash_host", "%s using dash_host",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&tnodes, hosts))) { if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&tnodes, hosts, false))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
free(hosts); free(hosts);
return rc; return rc;

Просмотреть файл

@ -312,7 +312,7 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata)
OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
"%s ras:base:allocate adding dash_hosts", "%s ras:base:allocate adding dash_hosts",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, hosts))) { if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, hosts, true))) {
free(hosts); free(hosts);
OBJ_DESTRUCT(&nodes); OBJ_DESTRUCT(&nodes);
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
@ -511,7 +511,7 @@ int orte_ras_base_add_hosts(orte_job_t *jdata)
opal_output_verbose(5, orte_ras_base_framework.framework_output, opal_output_verbose(5, orte_ras_base_framework.framework_output,
"%s ras:base:add_hosts checking add-host %s", "%s ras:base:add_hosts checking add-host %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts);
if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, hosts))) { if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, hosts, true))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&nodes); OBJ_DESTRUCT(&nodes);
free(hosts); free(hosts);

Просмотреть файл

@ -171,7 +171,7 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
"%s using dash_host %s", "%s using dash_host %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts)); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts));
if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, hosts))) { if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, hosts, false))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
free(hosts); free(hosts);
return rc; return rc;

Просмотреть файл

@ -155,6 +155,7 @@ bool orte_default_hostfile_given = false;
char *orte_rankfile = NULL; char *orte_rankfile = NULL;
int orte_num_allocated_nodes = 0; int orte_num_allocated_nodes = 0;
char *orte_node_regex = NULL; char *orte_node_regex = NULL;
char *orte_default_dash_host = NULL;
/* tool communication controls */ /* tool communication controls */
bool orte_report_events = false; bool orte_report_events = false;

Просмотреть файл

@ -540,6 +540,7 @@ ORTE_DECLSPEC extern bool orte_default_hostfile_given;
ORTE_DECLSPEC extern char *orte_rankfile; ORTE_DECLSPEC extern char *orte_rankfile;
ORTE_DECLSPEC extern int orte_num_allocated_nodes; ORTE_DECLSPEC extern int orte_num_allocated_nodes;
ORTE_DECLSPEC extern char *orte_node_regex; ORTE_DECLSPEC extern char *orte_node_regex;
ORTE_DECLSPEC extern char *orte_default_dash_host;
/* PMI version control */ /* PMI version control */
ORTE_DECLSPEC extern int orted_pmi_version; ORTE_DECLSPEC extern int orted_pmi_version;

Просмотреть файл

@ -362,6 +362,14 @@ int orte_register_params(void)
orte_default_hostfile_given = true; orte_default_hostfile_given = true;
} }
/* default dash-host */
orte_default_dash_host = NULL;
(void) mca_base_var_register ("orte", "orte", NULL, "default_dash_host",
"Default -host setting, \"none\" to ignore environmental or default MCA param setting)",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_default_dash_host);
/* regex of nodes in system */ /* regex of nodes in system */
orte_node_regex = NULL; orte_node_regex = NULL;
(void) mca_base_var_register ("orte", "orte", NULL, "node_regex", (void) mca_base_var_register ("orte", "orte", NULL, "node_regex",

Просмотреть файл

@ -1169,6 +1169,9 @@ static int create_app(int argc, char* argv[],
orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_GLOBAL, tval, OPAL_STRING); orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_GLOBAL, tval, OPAL_STRING);
opal_argv_free(targ); opal_argv_free(targ);
free(tval); free(tval);
} else if (NULL != orte_default_dash_host) {
orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_LOCAL,
orte_default_dash_host, OPAL_STRING);
} }
/* check for bozo error */ /* check for bozo error */

Просмотреть файл

@ -1586,6 +1586,9 @@ static int create_app(int argc, char* argv[],
orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_LOCAL, tval, OPAL_STRING); orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_LOCAL, tval, OPAL_STRING);
opal_argv_free(targ); opal_argv_free(targ);
free(tval); free(tval);
} else if (NULL != orte_default_dash_host) {
orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_LOCAL,
orte_default_dash_host, OPAL_STRING);
} }
/* check for bozo error */ /* check for bozo error */

Просмотреть файл

@ -44,11 +44,11 @@
* relative node syntax should generate an immediate error * relative node syntax should generate an immediate error
*/ */
int orte_util_add_dash_host_nodes(opal_list_t *nodes, int orte_util_add_dash_host_nodes(opal_list_t *nodes,
char *hosts) char *hosts, bool allocating)
{ {
opal_list_item_t *item, *itm; opal_list_item_t *item, *itm;
orte_std_cntr_t i, j, k; orte_std_cntr_t i, j, k;
int rc; int rc, nodeidx;
char **host_argv=NULL; char **host_argv=NULL;
char **mapped_nodes = NULL, **mini_map, *ndname; char **mapped_nodes = NULL, **mini_map, *ndname;
orte_node_t *node, *nd; orte_node_t *node, *nd;
@ -59,8 +59,8 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
char *cptr; char *cptr;
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
"%s dashhost: parsing args", "%s dashhost: parsing args %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts));
OBJ_CONSTRUCT(&adds, opal_list_t); OBJ_CONSTRUCT(&adds, opal_list_t);
host_argv = opal_argv_split(hosts, ','); host_argv = opal_argv_split(hosts, ',');
@ -85,6 +85,7 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
} }
} }
opal_argv_free(host_argv); opal_argv_free(host_argv);
mini_map = NULL;
/* Did we find anything? If not, then do nothing */ /* Did we find anything? If not, then do nothing */
if (NULL == mapped_nodes) { if (NULL == mapped_nodes) {
@ -92,23 +93,96 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
goto cleanup; goto cleanup;
} }
for (i = 0; NULL != mapped_nodes[i]; ++i) {
/* if the specified node contains a relative node syntax,
* and we are allocating, then ignore it
*/
if ('+' == mapped_nodes[i][0]) {
if (!allocating) {
if ('e' == mapped_nodes[i][1] ||
'E' == mapped_nodes[i][1]) {
/* request for empty nodes - do they want
* all of them?
*/
if (NULL != (cptr = strchr(mapped_nodes[i], ':'))) {
/* the colon indicates a specific # are requested */
++cptr;
j = strtoul(cptr, NULL, 10);
} else if ('\0' != mapped_nodes[0][2]) {
j = strtoul(&mapped_nodes[0][2], NULL, 10);
} else {
/* add them all */
j = orte_node_pool->size;
}
for (k=0; 0 < j && k < orte_node_pool->size; k++) {
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, k))) {
if (0 == node->num_procs) {
opal_argv_append_nosize(&mini_map, node->name);
--j;
}
}
}
} else if ('n' == mapped_nodes[i][1] ||
'N' == mapped_nodes[i][1]) {
/* they want a specific relative node #, so
* look it up on global pool
*/
nodeidx = strtol(&mapped_nodes[i][2], NULL, 10);
if (nodeidx < 0 ||
nodeidx > (int)orte_node_pool->size) {
/* this is an error */
orte_show_help("help-dash-host.txt", "dash-host:relative-node-out-of-bounds",
true, nodeidx, mapped_nodes[i]);
rc = ORTE_ERR_SILENT;
goto cleanup;
}
/* if the HNP is not allocated, then we need to
* adjust the index as the node pool is offset
* by one
*/
if (!orte_hnp_is_allocated) {
nodeidx++;
}
/* see if that location is filled */
if (NULL == (node = (orte_node_t *) opal_pointer_array_get_item(orte_node_pool, nodeidx))) {
/* this is an error */
orte_show_help("help-dash-host.txt", "dash-host:relative-node-not-found",
true, nodeidx, mapped_nodes[i]);
rc = ORTE_ERR_SILENT;
goto cleanup;
}
/* add this node to the list */
opal_argv_append_nosize(&mini_map, node->name);
} else {
/* invalid relative node syntax */
orte_show_help("help-dash-host.txt", "dash-host:invalid-relative-node-syntax",
true, mapped_nodes[i]);
rc = ORTE_ERR_SILENT;
goto cleanup;
}
}
} else {
/* just one node was given */
opal_argv_append_nosize(&mini_map, mapped_nodes[i]);
}
}
if (NULL == mini_map) {
rc = ORTE_SUCCESS;
goto cleanup;
}
/* go through the names found and /* go through the names found and
add them to the host list. If they're not unique, then add them to the host list. If they're not unique, then
bump the slots count for each duplicate */ bump the slots count for each duplicate */
for (i=0; NULL != mini_map[i]; i++) {
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
"%s dashhost: working node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), mini_map[i]));
for (i = 0; NULL != mapped_nodes[i]; ++i) {
/* if the specified node contains a relative node syntax,
* this is an error
*/
if ('+' == mapped_nodes[i][0]) {
orte_show_help("help-dash-host.txt", "dash-host:relative-syntax",
true, mapped_nodes[i]);
rc = ORTE_ERR_SILENT;
goto cleanup;
}
/* see if the node contains the number of slots */ /* see if the node contains the number of slots */
slots_given = false; slots_given = false;
if (NULL != (cptr = strchr(mapped_nodes[i], ':'))) { if (NULL != (cptr = strchr(mini_map[i], ':'))) {
*cptr = '\0'; *cptr = '\0';
++cptr; ++cptr;
if ('*' == *cptr) { if ('*' == *cptr) {
@ -119,15 +193,11 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
slots_given = true; slots_given = true;
} }
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
"%s dashhost: working node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), mapped_nodes[i]));
/* check for local name */ /* check for local name */
if (orte_ifislocal(mapped_nodes[i])) { if (orte_ifislocal(mini_map[i])) {
ndname = orte_process_info.nodename; ndname = orte_process_info.nodename;
} else { } else {
ndname = mapped_nodes[i]; ndname = mini_map[i];
} }
/* see if the node is already on the list */ /* see if the node is already on the list */
@ -177,6 +247,7 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
opal_list_append(&adds, &node->super); opal_list_append(&adds, &node->super);
} }
} }
opal_argv_free(mini_map);
/* transfer across all unique nodes */ /* transfer across all unique nodes */
while (NULL != (item = opal_list_remove_first(&adds))) { while (NULL != (item = opal_list_remove_first(&adds))) {

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -31,7 +31,8 @@
BEGIN_C_DECLS BEGIN_C_DECLS
ORTE_DECLSPEC int orte_util_add_dash_host_nodes(opal_list_t *nodes, ORTE_DECLSPEC int orte_util_add_dash_host_nodes(opal_list_t *nodes,
char *hosts); char *hosts,
bool allocating);
ORTE_DECLSPEC int orte_util_filter_dash_host_nodes(opal_list_t *nodes, ORTE_DECLSPEC int orte_util_filter_dash_host_nodes(opal_list_t *nodes,
char *hosts, char *hosts,