1
1

Revise the way we output resolved hostnames to make life easier for the Eclipse folks. Store aliases for individual nodes (only when requested to show resolved hostnames) and then report them out as part of the display-map option.

This commit was SVN r20284.
Этот коммит содержится в:
Ralph Castain 2009-01-15 18:11:50 +00:00
родитель 253a54df12
Коммит 88a0af9726
8 изменённых файлов: 108 добавлений и 33 удалений

Просмотреть файл

@ -90,6 +90,29 @@ int opal_argv_append_nosize(char ***argv, const char *arg)
return OPAL_SUCCESS;
}
int opal_argv_append_unique_nosize(char ***argv, const char *arg)
{
int i;
/* if the provided array is NULL, then the arg cannot be present,
* so just go ahead and append
*/
if (NULL == *argv) {
return opal_argv_append_nosize(argv, arg);
}
/* see if this arg is already present in the array */
for (i=0; NULL != (*argv)[i]; i++) {
if (0 == strcmp(arg, (*argv)[i])) {
/* already exists - nothing to do */
return OPAL_SUCCESS;
}
}
/* we get here if the arg is not in the array - so add it */
return opal_argv_append_nosize(argv, arg);
}
/*
* Free a NULL-terminated argv array.
*/

Просмотреть файл

@ -85,7 +85,23 @@ OPAL_DECLSPEC int opal_argv_append(int *argc, char ***argv, const char *arg) __
*/
OPAL_DECLSPEC int opal_argv_append_nosize(char ***argv, const char *arg);
/**
/**
* Append to an argv-style array, but only if the provided argument
* doesn't already exist somewhere in the array. Ignore the size of the array.
*
* @param argv Pointer to an argv array.
* @param str Pointer to the string to append.
*
* @retval OPAL_SUCCESS On success
* @retval OPAL_ERROR On failure
*
* This function is identical to the opal_argv_append_nosize() function
* except that it only appends the provided argument if it does not already
* exist in the provided array.
*/
OPAL_DECLSPEC int opal_argv_append_unique_nosize(char ***argv, const char *arg);
/**
* Free a NULL-terminated argv array.
*
* @param argv Argv array to free.

Просмотреть файл

@ -64,7 +64,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
{
opal_list_item_t* item;
orte_std_cntr_t num_nodes;
int rc;
int rc, i;
orte_node_t *node, *hnp_node;
/* get the number of nodes */
@ -116,18 +116,22 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
*/
hnp_node->slots_alloc = node->slots;
/* use the local name for our node - don't trust what
* we got from an RM. If requested, display the resolved
* we got from an RM. If requested, store the resolved
* nodename info
*/
if (orte_show_resolved_nodenames &&
0 != strcmp(node->name, hnp_node->name)) {
if (orte_xml_output) {
opal_output(orte_clean_output, "<noderesolve name=\"%s\" resolved=\"%s\"/>", node->name, hnp_node->name);
} else {
opal_output(orte_clean_output, "node name %s resolved to %s", node->name, hnp_node->name);
if (orte_show_resolved_nodenames) {
/* if the node name is different, store it as an alias */
if (0 != strcmp(node->name, hnp_node->name)) {
/* add to list of aliases for this node - only add if unique */
opal_argv_append_unique_nosize(&hnp_node->alias, node->name);
}
if (NULL != node->alias) {
/* now copy over any aliases that are unique */
for (i=0; NULL != node->alias[i]; i++) {
opal_argv_append_unique_nosize(&hnp_node->alias, node->alias[i]);
}
}
}
/* update the total slots in the job */
jdata->total_slots_alloc += hnp_node->slots;
/* don't keep duplicate copy */

Просмотреть файл

@ -294,9 +294,18 @@ int orte_dt_print_node(char **output, char *prefix, orte_node_t *src, opal_data_
if (orte_xml_output) {
/* need to create the output in XML format */
asprintf(output, "%s<host name=\"%s\" slots=\"%d\" max_slots=\"%d\">\n", pfx2,
asprintf(&tmp, "%s<host name=\"%s\" slots=\"%d\" max_slots=\"%d\">\n", pfx2,
(NULL == src->name) ? "UNKNOWN" : src->name,
(int)src->slots, (int)src->slots_max);
/* does this node have any aliases? */
if (NULL != src->alias) {
for (i=0; NULL != src->alias[i]; i++) {
asprintf(&tmp2, "%s%s\t<noderesolve resolved=\"%s\"/>\n", tmp, pfx2, src->alias[i]);
free(tmp);
tmp = tmp2;
}
}
*output = tmp;
free(pfx2);
return ORTE_SUCCESS;
}
@ -308,6 +317,14 @@ int orte_dt_print_node(char **output, char *prefix, orte_node_t *src, opal_data_
asprintf(&tmp, "\n%sData for node: Name: %s\tNum slots: %ld\tMax slots: %ld",
pfx2, (NULL == src->name) ? "UNKNOWN" : src->name,
(long)src->slots, (long)src->slots_max);
/* does this node have any aliases? */
if (NULL != src->alias) {
for (i=0; NULL != src->alias[i]; i++) {
asprintf(&tmp2, "%s\n%s\tresolved from %s", tmp, pfx2, src->alias[i]);
free(tmp);
tmp = tmp2;
}
}
free(pfx2);
*output = tmp;
return ORTE_SUCCESS;
@ -315,6 +332,14 @@ int orte_dt_print_node(char **output, char *prefix, orte_node_t *src, opal_data_
asprintf(&tmp, "\n%sData for node: Name: %s\tNum procs: %ld",
pfx2, (NULL == src->name) ? "UNKNOWN" : src->name,
(long)src->num_procs);
/* does this node have any aliases? */
if (NULL != src->alias) {
for (i=0; NULL != src->alias[i]; i++) {
asprintf(&tmp2, "%s\n%s\tresolved from %s", tmp, pfx2, src->alias[i]);
free(tmp);
tmp = tmp2;
}
}
goto PRINT_PROCS;
}
@ -322,6 +347,14 @@ int orte_dt_print_node(char **output, char *prefix, orte_node_t *src, opal_data_
pfx2, (NULL == src->name) ? "UNKNOWN" : src->name,
pfx2, (long)src->launch_id,
src->arch, src->state);
/* does this node have any aliases? */
if (NULL != src->alias) {
for (i=0; NULL != src->alias[i]; i++) {
asprintf(&tmp2, "%s\n%s\tresolved from %s", tmp, pfx2, src->alias[i]);
free(tmp);
tmp = tmp2;
}
}
if (NULL == src->daemon) {
asprintf(&tmp2, "%s\n%s\tDaemon: %s\tDaemon launched: %s", tmp, pfx2,

Просмотреть файл

@ -587,6 +587,7 @@ OBJ_CLASS_INSTANCE(orte_job_t,
static void orte_node_construct(orte_node_t* node)
{
node->name = NULL;
node->alias = NULL;
node->index = -1;
node->daemon = NULL;
node->daemon_launched = false;
@ -619,6 +620,10 @@ static void orte_node_destruct(orte_node_t* node)
free(node->name);
}
if (NULL != node->alias) {
opal_argv_free(node->alias);
}
if (NULL != node->daemon) OBJ_RELEASE(node->daemon);
for (i=0; i < node->num_procs; i++) {

Просмотреть файл

@ -189,6 +189,8 @@ typedef struct {
orte_std_cntr_t index;
/** String node name */
char *name;
/* argv-like array of aliases for this node */
char **alias;
/* daemon on this node */
struct orte_proc_t *daemon;
/* whether or not this daemon has been launched */

Просмотреть файл

@ -32,15 +32,6 @@
#include "dash_host.h"
static void show_resolved_hostname(char *name, char *resolved)
{
if (orte_xml_output) {
opal_output(orte_clean_output, "<noderesolve name=\"%s\" resolved=\"%s\"/>", name, resolved);
} else {
opal_output(orte_clean_output, "node name %s resolved to %s", name, resolved);
}
}
/* we can only enter this routine if no other allocation
* was found, so we only need to know that finding any
* relative node syntax should generate an immediate error
@ -121,7 +112,8 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
*/
if (orte_show_resolved_nodenames &&
0 != strcmp(mapped_nodes[i], orte_process_info.nodename)) {
show_resolved_hostname(mapped_nodes[i], orte_process_info.nodename);
/* add to list of aliases for this node - only add if unique */
opal_argv_append_unique_nosize(&node->alias, mapped_nodes[i]);
}
node->name = strdup(orte_process_info.nodename);
} else {

Просмотреть файл

@ -49,15 +49,6 @@
static const char *cur_hostfile_name = NULL;
static void show_resolved_hostname(char *name, char *resolved)
{
if (orte_xml_output) {
opal_output(orte_clean_output, "<noderesolve name=\"%s\" resolved=\"%s\"/>", name, resolved);
} else {
opal_output(orte_clean_output, "node name %s resolved to %s", name, resolved);
}
}
static void hostfile_parse_error(int token)
{
switch (token) {
@ -141,6 +132,7 @@ static int hostfile_parse_line(int token, opal_list_t* updates, opal_list_t* exc
char* value;
char** argv;
char* node_name = NULL;
char* node_alias = NULL;
char* username = NULL;
int cnt;
int number_of_slots = 0;
@ -192,7 +184,7 @@ static int hostfile_parse_line(int token, opal_list_t* updates, opal_list_t* exc
/* Nodename has been allocated, that is for sure */
if (orte_show_resolved_nodenames &&
0 != strcmp(node_name, orte_process_info.nodename)) {
show_resolved_hostname(node_name, orte_process_info.nodename);
node_alias = strdup(node_name);
}
free (node_name);
node_name = strdup(orte_process_info.nodename);
@ -212,12 +204,14 @@ static int hostfile_parse_line(int token, opal_list_t* updates, opal_list_t* exc
return ORTE_SUCCESS;
}
/* convert this into something globally unique */
/* this is not a node to be excluded, so we need to process it and
* add it to the "include" list. See if this host is actually us.
*/
if (strcmp(node_name, "localhost") == 0 || opal_ifislocal(node_name)) {
/* Nodename has been allocated, that is for sure */
if (orte_show_resolved_nodenames &&
0 != strcmp(node_name, orte_process_info.nodename)) {
show_resolved_hostname(node_name, orte_process_info.nodename);
node_alias = strdup(node_name);
}
free (node_name);
node_name = strdup(orte_process_info.nodename);
@ -238,6 +232,12 @@ static int hostfile_parse_line(int token, opal_list_t* updates, opal_list_t* exc
node = OBJ_NEW(orte_node_t);
node->name = node_name;
}
/* do we need to record an alias for this node? */
if (NULL != node_alias) {
/* add to list of aliases for this node - only add if unique */
opal_argv_append_unique_nosize(&node->alias, node_alias);
free(node_alias);
}
} else if (ORTE_HOSTFILE_RELATIVE == token) {
/* store this for later processing */
node = OBJ_NEW(orte_node_t);