1
1

In order to properly connect to debuggers like STAT, we need to get the hostname in its unstripped version for the MPIR_proctab. Unfortunately, we need a stripped version for Cray's alps launcher. So when we are stripping the hostname prefix, retain alias hostnames and add the ability to specify an alias to use in the proctab.

This commit was SVN r27863.
This commit is contained in:
Ralph Castain 2013-01-18 05:00:05 +00:00
parent f63c88701f
commit c96cc2d5a0
7 changed files with 43 additions and 4 deletions

View File

@ -82,6 +82,10 @@ orte_allocation_required = 1
## Deal with the allocator
orte_strip_prefix_from_node_names = 1
orte_retain_aliases = 1
# 1st alias entry is the stripped node name,
# 2nd is the unstripped one
orte_hostname_alias_index = 2
## MPI behavior
## Do NOT specify mpi_leave_pinned so system

View File

@ -82,6 +82,10 @@ orte_allocation_required = 1
## Deal with the allocator
orte_strip_prefix_from_node_names = 1
orte_retain_aliases = 1
# 1st alias entry is the stripped node name,
# 2nd is the unstripped one
orte_hostname_alias_index = 2
## MPI behavior
## Do NOT specify mpi_leave_pinned so system

View File

@ -10,7 +10,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights
* Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2009 Institut National de Recherche en Informatique
* et Automatique. All rights reserved.
@ -53,6 +53,7 @@
#include "opal/util/output.h"
#include "opal/util/cmd_line.h"
#include "opal/util/if.h"
#include "opal/util/net.h"
#include "opal/util/opal_environ.h"
#include "opal/util/os_path.h"
#include "opal/util/printf.h"
@ -704,11 +705,26 @@ int orte_daemon(int argc, char *argv[])
/* include our node name */
opal_dss.pack(buffer, &orte_process_info.nodename, 1, OPAL_STRING);
/* if requested, include any non-loopback aliases for this node */
if (orte_retain_aliases) {
char **aliases=NULL;
uint8_t naliases, ni;
char hostname[ORTE_MAX_HOSTNAME_SIZE];
char *ptr;
/* if we stripped the prefix, include full hostname as an alias */
if (orte_process_info.strip_prefix_from_node_names) {
gethostname(hostname, ORTE_MAX_HOSTNAME_SIZE);
/* if the hostname is an IP address, leave it alone */
if (!opal_net_isaddr(hostname) && !orte_keep_fqdn_hostnames) {
/* not an IP address, so remove any domain info */
if (NULL != (ptr = strchr(hostname, '.'))) {
*ptr = '\0';
}
}
opal_argv_append_nosize(&aliases, hostname);
}
opal_ifgetaliases(&aliases);
naliases = opal_argv_count(aliases);
if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &naliases, 1, OPAL_UINT8))) {

View File

@ -81,6 +81,7 @@ bool orte_keep_fqdn_hostnames = false;
bool orte_have_fqdn_allocation = false;
bool orte_show_resolved_nodenames;
bool orte_retain_aliases;
int orte_use_hostname_alias;
int orted_debug_failure;
int orted_debug_failure_delay;

View File

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2007-2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
@ -609,6 +609,7 @@ ORTE_DECLSPEC extern bool orte_keep_fqdn_hostnames;
ORTE_DECLSPEC extern bool orte_have_fqdn_allocation;
ORTE_DECLSPEC extern bool orte_show_resolved_nodenames;
ORTE_DECLSPEC extern bool orte_retain_aliases;
ORTE_DECLSPEC extern int orte_use_hostname_alias;
/* debug flags */
ORTE_DECLSPEC extern int orted_debug_failure;

View File

@ -351,6 +351,11 @@ int orte_register_params(void)
false, false, (int)false, &value);
orte_retain_aliases = OPAL_INT_TO_BOOL(value);
/* which alias to use in MPIR_proctab */
mca_base_param_reg_int_name("orte", "hostname_alias_index",
"If hostname aliases are being retained, which one to use for the debugger proc table [default: 1st alias]",
false, false, 1, &orte_use_hostname_alias);
/* whether to tag output */
mca_base_param_reg_int_name("orte", "tag_output",
"Tag all output with [job,rank] (default: false)",

View File

@ -2822,7 +2822,15 @@ void orte_debugger_init_after_spawn(int fd, short event, void *cbdata)
continue;
}
MPIR_proctable[i].host_name = strdup(proc->node->name);
/* take the indicated alias as the hostname, if aliases exist */
if (orte_retain_aliases &&
orte_use_hostname_alias <= opal_argv_count(proc->node->alias)) {
MPIR_proctable[i].host_name = strdup(proc->node->alias[orte_use_hostname_alias-1]);
} else {
/* just use the default name */
MPIR_proctable[i].host_name = strdup(proc->node->name);
}
if ( 0 == strncmp(appctx->app, OPAL_PATH_SEP, 1 )) {
MPIR_proctable[i].executable_name =
opal_os_path( false, appctx->app, NULL );