1
1

Attempt to reduce the RARP traffic during definition of allocations

Этот коммит содержится в:
Ralph Castain 2015-03-16 16:25:01 -07:00
родитель 1196069815
Коммит 5ae42c816e
10 изменённых файлов: 104 добавлений и 115 удалений

Просмотреть файл

@ -1,7 +1,7 @@
/*
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -33,6 +33,7 @@
#include "orte/util/error_strings.h"
#include "orte/util/name_fns.h"
#include "orte/util/proc_info.h"
#include "orte/util/show_help.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/errmgr/errmgr.h"
@ -537,9 +538,7 @@ static void process_opens(int fd, short args, void *cbdata)
}
/* if the host is our own, then treat it as a local file */
if (0 == strcmp(host, orte_process_info.nodename) ||
0 == strcmp(host, "localhost") ||
opal_ifislocal(host)) {
if (orte_ifislocal(host)) {
opal_output_verbose(1, orte_dfs_base_framework.framework_output,
"%s file %s on local host",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),

Просмотреть файл

@ -1,7 +1,8 @@
/*
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -329,10 +330,7 @@ static void process_opens(int fd, short args, void *cbdata)
goto complete;
}
/* if the host is our own, then treat it as a local file */
if (NULL == host ||
0 == strcmp(host, orte_process_info.nodename) ||
0 == strcmp(host, "localhost") ||
opal_ifislocal(host)) {
if (NULL == host || orte_ifislocal(host)) {
opal_output_verbose(1, orte_dfs_base_framework.framework_output,
"%s file %s on local host",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -34,6 +34,7 @@
#include "opal/util/output.h"
#include "opal/dss/dss.h"
#include "opal/util/argv.h"
#include "opal/mca/if/if.h"
#include "orte/util/show_help.h"
#include "orte/mca/errmgr/errmgr.h"
@ -195,6 +196,11 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata)
}
/* If something came back, save it and we are done */
if (!opal_list_is_empty(&nodes)) {
/* flag that the allocation is managed */
orte_managed_allocation = true;
/* since it is managed, we do not attempt to resolve
* the nodenames */
opal_if_do_not_resolve = true;
/* store the results in the global resource pool - this removes the
* list items
*/
@ -210,8 +216,6 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata)
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
}
/* flag that the allocation is managed */
orte_managed_allocation = true;
goto DISPLAY;
} else if (orte_allocation_required) {
/* if nothing was found, and an allocation is

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -103,10 +103,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
* first position since it is the first one entered. We need to check to see
* if this node is the same as the HNP's node so we don't double-enter it
*/
if (NULL != hnp_node &&
(0 == strcmp(node->name, hnp_node->name) ||
0 == strcmp(node->name, "localhost") ||
opal_ifislocal(node->name))) {
if (NULL != hnp_node && orte_ifislocal(node->name)) {
OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
"%s ras:base:node_insert updating HNP [%s] info to %ld slots",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),

Просмотреть файл

@ -14,7 +14,7 @@
* All rights reserved.
* Copyright (c) 2008 Voltaire. All rights reserved
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
*
* $COPYRIGHT$
*
@ -484,8 +484,7 @@ static int orte_rmaps_rank_file_parse(const char *rankfile)
goto unlock;
}
/* check if this is the local node */
if (0 == strcmp(node_name, hnp_node->name) ||
opal_ifislocal(node_name)) {
if (orte_ifislocal(node_name)) {
rfmap->node_name = strdup(hnp_node->name);
} else {
rfmap->node_name = strdup(node_name);

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -298,7 +298,7 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
* hostfile may not have been FQDN, while name returned
* by gethostname may have been (or vice versa)
*/
if (opal_ifislocal(seq->hostname)) {
if (orte_ifislocal(seq->hostname)) {
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:seq: removing head node %s", seq->hostname);
opal_list_remove_item(seq_list, item);

Просмотреть файл

@ -10,7 +10,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -52,11 +52,10 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
orte_std_cntr_t i, j, k;
int rc;
char **host_argv=NULL;
char **mapped_nodes = NULL, **mini_map;
char **mapped_nodes = NULL, **mini_map, *ndname;
orte_node_t *node, *nd;
opal_list_t adds;
bool found;
char **aliases, *aptr;
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
"%s dashhost: parsing args",
@ -111,12 +110,17 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
"%s dashhost: working node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), mapped_nodes[i]));
/* check for local name */
if (orte_ifislocal(mapped_nodes[i])) {
ndname = orte_process_info.nodename;
} else {
ndname = mapped_nodes[i];
}
/* see if the node is already on the list */
found = false;
OPAL_LIST_FOREACH(node, &adds, orte_node_t) {
if (0 == strcmp(node->name, mapped_nodes[i]) ||
(0 == strcmp(node->name, orte_process_info.nodename) &&
(0 == strcmp(mapped_nodes[i], "localhost") || opal_ifislocal(mapped_nodes[i])))) {
if (0 == strcmp(node->name, ndname)) {
found = true;
++node->slots;
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
@ -134,33 +138,7 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
if (NULL == node) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* check to see if this is a local name */
if (0 == strcmp(mapped_nodes[i], "localhost") ||
opal_ifislocal(mapped_nodes[i])) {
/* it is local, so use the local nodename to avoid
* later confusion
*/
if (orte_show_resolved_nodenames &&
0 != strcmp(mapped_nodes[i], orte_process_info.nodename)) {
/* add to list of aliases for this node - only add if unique */
aptr = NULL;
aliases = NULL;
orte_get_attribute(&node->attributes, ORTE_NODE_ALIAS, (void**)&aptr, OPAL_STRING);
if (NULL != aptr) {
aliases = opal_argv_split(aptr, ',');
free(aptr);
}
opal_argv_append_unique_nosize(&aliases, mapped_nodes[i], false);
aptr = opal_argv_join(aliases, ',');
opal_argv_free(aliases);
orte_set_attribute(&node->attributes, ORTE_NODE_ALIAS, ORTE_ATTR_LOCAL, aptr, OPAL_STRING);
free(aptr);
}
node->name = strdup(orte_process_info.nodename);
} else {
/* not local - use the given name */
node->name = strdup(mapped_nodes[i]);
}
node->name = strdup(ndname);
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
"%s dashhost: added node %s to list",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name));
@ -287,9 +265,11 @@ static int parse_dash_host(char ***mapped_nodes, char *hosts)
goto cleanup;
}
} else { /* non-relative syntax - add to list */
if (OPAL_SUCCESS != (rc = opal_argv_append_nosize(mapped_nodes,
mini_map[k]))) {
goto cleanup;
/* check for local alias */
if (orte_ifislocal(mini_map[k])) {
opal_argv_append_nosize(mapped_nodes, orte_process_info.nodename);
} else {
opal_argv_append_nosize(mapped_nodes, mini_map[k]);
}
}
}
@ -316,7 +296,7 @@ int orte_util_filter_dash_host_nodes(opal_list_t *nodes,
orte_std_cntr_t i, j, len_mapped_node=0;
int rc;
char **mapped_nodes = NULL;
orte_node_t *node, *hnp_node;
orte_node_t *node;
int num_empty=0;
opal_list_t keep;
bool want_all_empty=false;
@ -342,9 +322,6 @@ int orte_util_filter_dash_host_nodes(opal_list_t *nodes,
* nodes list ONCE.
*/
/* get the hnp node's info */
hnp_node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
len_mapped_node = opal_argv_count(mapped_nodes);
/* setup a working list so we can put the final list
* of nodes in order. This way, if the user specifies a
@ -399,21 +376,15 @@ int orte_util_filter_dash_host_nodes(opal_list_t *nodes,
item = next;
}
} else {
/* we are looking for a specific node on the list
* we have a match if one of two conditions is met:
* 1. the node_name and mapped_nodes directly match
* 2. the node_name is the local system name AND
* either the mapped_node is "localhost" OR it
* is a local interface as found by opal_ifislocal
*/
/* we are looking for a specific node on the list. The
* parser will have substituted our local name for any
* alias, so we only have to do a strcmp here */
item = opal_list_get_first(nodes);
while (item != opal_list_get_end(nodes)) {
next = opal_list_get_next(item); /* save this position */
node = (orte_node_t*)item;
/* search -host list to see if this one is found */
if (0 == strcmp(node->name, mapped_nodes[i]) ||
(0 == strcmp(node->name, hnp_node->name) &&
(0 == strcasecmp(mapped_nodes[i], "localhost") || opal_ifislocal(mapped_nodes[i])))) {
if (0 == strcmp(node->name, mapped_nodes[i])) {
if (remove) {
/* remove item from list */
opal_list_remove_item(nodes, item);

Просмотреть файл

@ -132,9 +132,8 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
orte_node_t* node;
bool got_max = false;
char* value;
char **argv, **aliases, *aptr;
char **argv;
char* node_name = NULL;
char* node_alias = NULL;
char* username = NULL;
int cnt;
int number_of_slots = 0;
@ -181,13 +180,9 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
"%s hostfile: node %s is being excluded",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node_name));
/* convert this into something globally unique */
if (strcmp(node_name, "localhost") == 0 || opal_ifislocal(node_name)) {
/* see if this is another name for us */
if (orte_ifislocal(node_name)) {
/* Nodename has been allocated, that is for sure */
if (orte_show_resolved_nodenames &&
0 != strcmp(node_name, orte_process_info.nodename)) {
node_alias = strdup(node_name);
}
free (node_name);
node_name = strdup(orte_process_info.nodename);
}
@ -208,12 +203,8 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
/* this is not a node to be excluded, so we need to process it and
* add it to the "include" list. See if this host is actually us.
*/
if (strcmp(node_name, "localhost") == 0 || opal_ifislocal(node_name)) {
if (orte_ifislocal(node_name)) {
/* Nodename has been allocated, that is for sure */
if (orte_show_resolved_nodenames &&
0 != strcmp(node_name, orte_process_info.nodename)) {
node_alias = strdup(node_name);
}
free (node_name);
node_name = strdup(orte_process_info.nodename);
}
@ -238,23 +229,6 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
free(node_name);
}
/* do we need to record an alias for this node? */
if (NULL != node_alias) {
/* add to list of aliases for this node - only add if unique */
aptr = NULL;
aliases = NULL;
orte_get_attribute(&node->attributes, ORTE_NODE_ALIAS, (void**)&aptr, OPAL_STRING);
if (NULL != aptr) {
aliases = opal_argv_split(aptr, ',');
free(aptr);
}
opal_argv_append_unique_nosize(&aliases, node_alias, false);
free(node_alias);
aptr = opal_argv_join(aliases, ',');
opal_argv_free(aliases);
orte_set_attribute(&node->attributes, ORTE_NODE_ALIAS, ORTE_ATTR_LOCAL, aptr, OPAL_STRING);
free(aptr);
}
} else if (ORTE_HOSTFILE_RELATIVE == token) {
/* store this for later processing */
node = OBJ_NEW(orte_node_t);
@ -779,15 +753,11 @@ int orte_util_filter_hostfile_nodes(opal_list_t *nodes,
item1 != opal_list_get_end(nodes);
item1 = opal_list_get_next(item1)) {
node_from_list = (orte_node_t*)item1;
/* since the name in the hostfile might not match
* our local name, and yet still be intended to match,
* we have to check for local interfaces
*/
if (0 == strcmp(node_from_file->name, node_from_list->name) ||
(0 == strcmp(node_from_file->name, "localhost") &&
0 == strcmp(node_from_list->name, orte_process_info.nodename)) ||
(opal_ifislocal(node_from_list->name) &&
opal_ifislocal(node_from_file->name))) {
/* we have converted all aliases for ourself
* to our own detected nodename, so no need
* to check for interfaces again - a simple
* strcmp will suffice */
if (0 == strcmp(node_from_file->name, node_from_list->name)) {
/* if the slot count here is less than the
* total slots avail on this node, set it
* to the specified count - this allows people

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -32,15 +32,21 @@
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#if HAVE_ARPA_INET_H
#include <arpa/inet.h>
#endif
#include <ctype.h>
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_var.h"
#include "opal/util/argv.h"
#include "opal/util/if.h"
#include "opal/util/net.h"
#include "opal/util/output.h"
#include "opal/util/proc.h"
#include "orte/util/attr.h"
#include "orte/util/proc_info.h"
/* provide a connection to a reqd variable */
@ -63,6 +69,7 @@ ORTE_DECLSPEC orte_proc_info_t orte_process_info = {
.num_daemons = 1,
.num_nodes = 1,
.nodename = NULL,
.aliases = NULL,
.pid = 0,
.proc_type = ORTE_PROC_TYPE_NONE,
.sync_buf = NULL,
@ -97,6 +104,7 @@ int orte_proc_info(void)
char hostname[ORTE_MAX_HOSTNAME_SIZE];
char **prefixes;
bool match;
struct in_addr buf;
if (init) {
return ORTE_SUCCESS;
@ -165,7 +173,8 @@ int orte_proc_info(void)
gethostname(hostname, ORTE_MAX_HOSTNAME_SIZE);
if (!orte_keep_fqdn_hostnames) {
/* if the nodename is an IP address, do not mess with it! */
if (!opal_net_isaddr(hostname)) {
if (0 != inet_pton(AF_INET, hostname, &buf) &&
0 != inet_pton(AF_INET6, hostname, &buf)) {
/* not an IP address, so remove any domain info */
if (NULL != (ptr = strchr(hostname, '.'))) {
*ptr = '\0';
@ -314,6 +323,45 @@ int orte_proc_info_finalize(void)
OBJ_DESTRUCT(&orte_process_info.super);
opal_argv_free(orte_process_info.aliases);
init = false;
return ORTE_SUCCESS;
}
bool orte_ifislocal(const char *hostname)
{
int i;
/* see if this matches our known hostname */
if (NULL != orte_process_info.nodename &&
0 == strcmp(hostname, orte_process_info.nodename)) {
return true;
}
/* see if it matches the generic "localhost" */
if (0 == strcmp(hostname, "localhost")) {
return true;
}
/* see if it matches any of our known aliases */
if (NULL != orte_process_info.aliases) {
for (i=0; NULL != orte_process_info.aliases[i]; i++) {
if (0 == strcmp(hostname, orte_process_info.aliases[i])) {
return true;
}
}
}
/* okay, have to resolve the address - the opal_ifislocal
* function will not attempt to resolve the address if
* told not to do so */
if (opal_ifislocal(hostname)) {
/* add this to our known aliases */
opal_argv_append_nosize(&orte_process_info.aliases, hostname);
return true;
}
/* not me */
return false;
}

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -103,6 +103,7 @@ struct orte_proc_info_t {
orte_vpid_t num_daemons; /**< number of daemons in system */
int num_nodes; /**< number of nodes in the job */
char *nodename; /**< string name for this node */
char **aliases; /**< aliases for this node */
pid_t pid; /**< Local process ID for this process */
orte_proc_type_t proc_type; /**< Type of process */
opal_buffer_t *sync_buf; /**< buffer to store sync response */
@ -166,6 +167,8 @@ ORTE_DECLSPEC int orte_proc_info(void);
ORTE_DECLSPEC int orte_proc_info_finalize(void);
ORTE_DECLSPEC bool orte_ifislocal(const char *name);
END_C_DECLS
#endif