Attempt to reduce the RARP traffic during definition of allocations
Этот коммит содержится в:
родитель
1196069815
Коммит
5ae42c816e
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
|
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2014 Intel, Inc. All rights reserved
|
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved
|
||||||
* Copyright (c) 2014 Research Organization for Information Science
|
* Copyright (c) 2014 Research Organization for Information Science
|
||||||
* and Technology (RIST). All rights reserved.
|
* and Technology (RIST). All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
@ -33,6 +33,7 @@
|
|||||||
|
|
||||||
#include "orte/util/error_strings.h"
|
#include "orte/util/error_strings.h"
|
||||||
#include "orte/util/name_fns.h"
|
#include "orte/util/name_fns.h"
|
||||||
|
#include "orte/util/proc_info.h"
|
||||||
#include "orte/util/show_help.h"
|
#include "orte/util/show_help.h"
|
||||||
#include "orte/runtime/orte_globals.h"
|
#include "orte/runtime/orte_globals.h"
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
@ -537,9 +538,7 @@ static void process_opens(int fd, short args, void *cbdata)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* if the host is our own, then treat it as a local file */
|
/* if the host is our own, then treat it as a local file */
|
||||||
if (0 == strcmp(host, orte_process_info.nodename) ||
|
if (orte_ifislocal(host)) {
|
||||||
0 == strcmp(host, "localhost") ||
|
|
||||||
opal_ifislocal(host)) {
|
|
||||||
opal_output_verbose(1, orte_dfs_base_framework.framework_output,
|
opal_output_verbose(1, orte_dfs_base_framework.framework_output,
|
||||||
"%s file %s on local host",
|
"%s file %s on local host",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
|
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
||||||
|
* Copyright (c) 2015 Intel, Inc. All rights reserved
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -329,10 +330,7 @@ static void process_opens(int fd, short args, void *cbdata)
|
|||||||
goto complete;
|
goto complete;
|
||||||
}
|
}
|
||||||
/* if the host is our own, then treat it as a local file */
|
/* if the host is our own, then treat it as a local file */
|
||||||
if (NULL == host ||
|
if (NULL == host || orte_ifislocal(host)) {
|
||||||
0 == strcmp(host, orte_process_info.nodename) ||
|
|
||||||
0 == strcmp(host, "localhost") ||
|
|
||||||
opal_ifislocal(host)) {
|
|
||||||
opal_output_verbose(1, orte_dfs_base_framework.framework_output,
|
opal_output_verbose(1, orte_dfs_base_framework.framework_output,
|
||||||
"%s file %s on local host",
|
"%s file %s on local host",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -34,6 +34,7 @@
|
|||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
#include "opal/dss/dss.h"
|
#include "opal/dss/dss.h"
|
||||||
#include "opal/util/argv.h"
|
#include "opal/util/argv.h"
|
||||||
|
#include "opal/mca/if/if.h"
|
||||||
|
|
||||||
#include "orte/util/show_help.h"
|
#include "orte/util/show_help.h"
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
@ -195,6 +196,11 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata)
|
|||||||
}
|
}
|
||||||
/* If something came back, save it and we are done */
|
/* If something came back, save it and we are done */
|
||||||
if (!opal_list_is_empty(&nodes)) {
|
if (!opal_list_is_empty(&nodes)) {
|
||||||
|
/* flag that the allocation is managed */
|
||||||
|
orte_managed_allocation = true;
|
||||||
|
/* since it is managed, we do not attempt to resolve
|
||||||
|
* the nodenames */
|
||||||
|
opal_if_do_not_resolve = true;
|
||||||
/* store the results in the global resource pool - this removes the
|
/* store the results in the global resource pool - this removes the
|
||||||
* list items
|
* list items
|
||||||
*/
|
*/
|
||||||
@ -210,8 +216,6 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata)
|
|||||||
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
|
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
|
||||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
|
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
|
||||||
}
|
}
|
||||||
/* flag that the allocation is managed */
|
|
||||||
orte_managed_allocation = true;
|
|
||||||
goto DISPLAY;
|
goto DISPLAY;
|
||||||
} else if (orte_allocation_required) {
|
} else if (orte_allocation_required) {
|
||||||
/* if nothing was found, and an allocation is
|
/* if nothing was found, and an allocation is
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -103,10 +103,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
|
|||||||
* first position since it is the first one entered. We need to check to see
|
* first position since it is the first one entered. We need to check to see
|
||||||
* if this node is the same as the HNP's node so we don't double-enter it
|
* if this node is the same as the HNP's node so we don't double-enter it
|
||||||
*/
|
*/
|
||||||
if (NULL != hnp_node &&
|
if (NULL != hnp_node && orte_ifislocal(node->name)) {
|
||||||
(0 == strcmp(node->name, hnp_node->name) ||
|
|
||||||
0 == strcmp(node->name, "localhost") ||
|
|
||||||
opal_ifislocal(node->name))) {
|
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
|
||||||
"%s ras:base:node_insert updating HNP [%s] info to %ld slots",
|
"%s ras:base:node_insert updating HNP [%s] info to %ld slots",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2008 Voltaire. All rights reserved
|
* Copyright (c) 2008 Voltaire. All rights reserved
|
||||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||||
*
|
*
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -484,8 +484,7 @@ static int orte_rmaps_rank_file_parse(const char *rankfile)
|
|||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
}
|
||||||
/* check if this is the local node */
|
/* check if this is the local node */
|
||||||
if (0 == strcmp(node_name, hnp_node->name) ||
|
if (orte_ifislocal(node_name)) {
|
||||||
opal_ifislocal(node_name)) {
|
|
||||||
rfmap->node_name = strdup(hnp_node->name);
|
rfmap->node_name = strdup(hnp_node->name);
|
||||||
} else {
|
} else {
|
||||||
rfmap->node_name = strdup(node_name);
|
rfmap->node_name = strdup(node_name);
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
* Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2011 Los Alamos National Security, LLC.
|
* Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||||
* Copyright (c) 2015 Research Organization for Information Science
|
* Copyright (c) 2015 Research Organization for Information Science
|
||||||
* and Technology (RIST). All rights reserved.
|
* and Technology (RIST). All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
@ -298,7 +298,7 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
|
|||||||
* hostfile may not have been FQDN, while name returned
|
* hostfile may not have been FQDN, while name returned
|
||||||
* by gethostname may have been (or vice versa)
|
* by gethostname may have been (or vice versa)
|
||||||
*/
|
*/
|
||||||
if (opal_ifislocal(seq->hostname)) {
|
if (orte_ifislocal(seq->hostname)) {
|
||||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||||
"mca:rmaps:seq: removing head node %s", seq->hostname);
|
"mca:rmaps:seq: removing head node %s", seq->hostname);
|
||||||
opal_list_remove_item(seq_list, item);
|
opal_list_remove_item(seq_list, item);
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||||
* Copyright (c) 2015 Research Organization for Information Science
|
* Copyright (c) 2015 Research Organization for Information Science
|
||||||
* and Technology (RIST). All rights reserved.
|
* and Technology (RIST). All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
@ -52,11 +52,10 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
|
|||||||
orte_std_cntr_t i, j, k;
|
orte_std_cntr_t i, j, k;
|
||||||
int rc;
|
int rc;
|
||||||
char **host_argv=NULL;
|
char **host_argv=NULL;
|
||||||
char **mapped_nodes = NULL, **mini_map;
|
char **mapped_nodes = NULL, **mini_map, *ndname;
|
||||||
orte_node_t *node, *nd;
|
orte_node_t *node, *nd;
|
||||||
opal_list_t adds;
|
opal_list_t adds;
|
||||||
bool found;
|
bool found;
|
||||||
char **aliases, *aptr;
|
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
|
||||||
"%s dashhost: parsing args",
|
"%s dashhost: parsing args",
|
||||||
@ -111,12 +110,17 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
|
|||||||
"%s dashhost: working node %s",
|
"%s dashhost: working node %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), mapped_nodes[i]));
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), mapped_nodes[i]));
|
||||||
|
|
||||||
|
/* check for local name */
|
||||||
|
if (orte_ifislocal(mapped_nodes[i])) {
|
||||||
|
ndname = orte_process_info.nodename;
|
||||||
|
} else {
|
||||||
|
ndname = mapped_nodes[i];
|
||||||
|
}
|
||||||
|
|
||||||
/* see if the node is already on the list */
|
/* see if the node is already on the list */
|
||||||
found = false;
|
found = false;
|
||||||
OPAL_LIST_FOREACH(node, &adds, orte_node_t) {
|
OPAL_LIST_FOREACH(node, &adds, orte_node_t) {
|
||||||
if (0 == strcmp(node->name, mapped_nodes[i]) ||
|
if (0 == strcmp(node->name, ndname)) {
|
||||||
(0 == strcmp(node->name, orte_process_info.nodename) &&
|
|
||||||
(0 == strcmp(mapped_nodes[i], "localhost") || opal_ifislocal(mapped_nodes[i])))) {
|
|
||||||
found = true;
|
found = true;
|
||||||
++node->slots;
|
++node->slots;
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
|
||||||
@ -134,33 +138,7 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
|
|||||||
if (NULL == node) {
|
if (NULL == node) {
|
||||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||||
}
|
}
|
||||||
/* check to see if this is a local name */
|
node->name = strdup(ndname);
|
||||||
if (0 == strcmp(mapped_nodes[i], "localhost") ||
|
|
||||||
opal_ifislocal(mapped_nodes[i])) {
|
|
||||||
/* it is local, so use the local nodename to avoid
|
|
||||||
* later confusion
|
|
||||||
*/
|
|
||||||
if (orte_show_resolved_nodenames &&
|
|
||||||
0 != strcmp(mapped_nodes[i], orte_process_info.nodename)) {
|
|
||||||
/* add to list of aliases for this node - only add if unique */
|
|
||||||
aptr = NULL;
|
|
||||||
aliases = NULL;
|
|
||||||
orte_get_attribute(&node->attributes, ORTE_NODE_ALIAS, (void**)&aptr, OPAL_STRING);
|
|
||||||
if (NULL != aptr) {
|
|
||||||
aliases = opal_argv_split(aptr, ',');
|
|
||||||
free(aptr);
|
|
||||||
}
|
|
||||||
opal_argv_append_unique_nosize(&aliases, mapped_nodes[i], false);
|
|
||||||
aptr = opal_argv_join(aliases, ',');
|
|
||||||
opal_argv_free(aliases);
|
|
||||||
orte_set_attribute(&node->attributes, ORTE_NODE_ALIAS, ORTE_ATTR_LOCAL, aptr, OPAL_STRING);
|
|
||||||
free(aptr);
|
|
||||||
}
|
|
||||||
node->name = strdup(orte_process_info.nodename);
|
|
||||||
} else {
|
|
||||||
/* not local - use the given name */
|
|
||||||
node->name = strdup(mapped_nodes[i]);
|
|
||||||
}
|
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
|
||||||
"%s dashhost: added node %s to list",
|
"%s dashhost: added node %s to list",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name));
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name));
|
||||||
@ -287,9 +265,11 @@ static int parse_dash_host(char ***mapped_nodes, char *hosts)
|
|||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
} else { /* non-relative syntax - add to list */
|
} else { /* non-relative syntax - add to list */
|
||||||
if (OPAL_SUCCESS != (rc = opal_argv_append_nosize(mapped_nodes,
|
/* check for local alias */
|
||||||
mini_map[k]))) {
|
if (orte_ifislocal(mini_map[k])) {
|
||||||
goto cleanup;
|
opal_argv_append_nosize(mapped_nodes, orte_process_info.nodename);
|
||||||
|
} else {
|
||||||
|
opal_argv_append_nosize(mapped_nodes, mini_map[k]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -316,7 +296,7 @@ int orte_util_filter_dash_host_nodes(opal_list_t *nodes,
|
|||||||
orte_std_cntr_t i, j, len_mapped_node=0;
|
orte_std_cntr_t i, j, len_mapped_node=0;
|
||||||
int rc;
|
int rc;
|
||||||
char **mapped_nodes = NULL;
|
char **mapped_nodes = NULL;
|
||||||
orte_node_t *node, *hnp_node;
|
orte_node_t *node;
|
||||||
int num_empty=0;
|
int num_empty=0;
|
||||||
opal_list_t keep;
|
opal_list_t keep;
|
||||||
bool want_all_empty=false;
|
bool want_all_empty=false;
|
||||||
@ -342,9 +322,6 @@ int orte_util_filter_dash_host_nodes(opal_list_t *nodes,
|
|||||||
* nodes list ONCE.
|
* nodes list ONCE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* get the hnp node's info */
|
|
||||||
hnp_node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
|
|
||||||
|
|
||||||
len_mapped_node = opal_argv_count(mapped_nodes);
|
len_mapped_node = opal_argv_count(mapped_nodes);
|
||||||
/* setup a working list so we can put the final list
|
/* setup a working list so we can put the final list
|
||||||
* of nodes in order. This way, if the user specifies a
|
* of nodes in order. This way, if the user specifies a
|
||||||
@ -399,21 +376,15 @@ int orte_util_filter_dash_host_nodes(opal_list_t *nodes,
|
|||||||
item = next;
|
item = next;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/* we are looking for a specific node on the list
|
/* we are looking for a specific node on the list. The
|
||||||
* we have a match if one of two conditions is met:
|
* parser will have substituted our local name for any
|
||||||
* 1. the node_name and mapped_nodes directly match
|
* alias, so we only have to do a strcmp here */
|
||||||
* 2. the node_name is the local system name AND
|
|
||||||
* either the mapped_node is "localhost" OR it
|
|
||||||
* is a local interface as found by opal_ifislocal
|
|
||||||
*/
|
|
||||||
item = opal_list_get_first(nodes);
|
item = opal_list_get_first(nodes);
|
||||||
while (item != opal_list_get_end(nodes)) {
|
while (item != opal_list_get_end(nodes)) {
|
||||||
next = opal_list_get_next(item); /* save this position */
|
next = opal_list_get_next(item); /* save this position */
|
||||||
node = (orte_node_t*)item;
|
node = (orte_node_t*)item;
|
||||||
/* search -host list to see if this one is found */
|
/* search -host list to see if this one is found */
|
||||||
if (0 == strcmp(node->name, mapped_nodes[i]) ||
|
if (0 == strcmp(node->name, mapped_nodes[i])) {
|
||||||
(0 == strcmp(node->name, hnp_node->name) &&
|
|
||||||
(0 == strcasecmp(mapped_nodes[i], "localhost") || opal_ifislocal(mapped_nodes[i])))) {
|
|
||||||
if (remove) {
|
if (remove) {
|
||||||
/* remove item from list */
|
/* remove item from list */
|
||||||
opal_list_remove_item(nodes, item);
|
opal_list_remove_item(nodes, item);
|
||||||
|
@ -132,9 +132,8 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
|
|||||||
orte_node_t* node;
|
orte_node_t* node;
|
||||||
bool got_max = false;
|
bool got_max = false;
|
||||||
char* value;
|
char* value;
|
||||||
char **argv, **aliases, *aptr;
|
char **argv;
|
||||||
char* node_name = NULL;
|
char* node_name = NULL;
|
||||||
char* node_alias = NULL;
|
|
||||||
char* username = NULL;
|
char* username = NULL;
|
||||||
int cnt;
|
int cnt;
|
||||||
int number_of_slots = 0;
|
int number_of_slots = 0;
|
||||||
@ -181,13 +180,9 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
|
|||||||
"%s hostfile: node %s is being excluded",
|
"%s hostfile: node %s is being excluded",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node_name));
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node_name));
|
||||||
|
|
||||||
/* convert this into something globally unique */
|
/* see if this is another name for us */
|
||||||
if (strcmp(node_name, "localhost") == 0 || opal_ifislocal(node_name)) {
|
if (orte_ifislocal(node_name)) {
|
||||||
/* Nodename has been allocated, that is for sure */
|
/* Nodename has been allocated, that is for sure */
|
||||||
if (orte_show_resolved_nodenames &&
|
|
||||||
0 != strcmp(node_name, orte_process_info.nodename)) {
|
|
||||||
node_alias = strdup(node_name);
|
|
||||||
}
|
|
||||||
free (node_name);
|
free (node_name);
|
||||||
node_name = strdup(orte_process_info.nodename);
|
node_name = strdup(orte_process_info.nodename);
|
||||||
}
|
}
|
||||||
@ -208,12 +203,8 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
|
|||||||
/* this is not a node to be excluded, so we need to process it and
|
/* this is not a node to be excluded, so we need to process it and
|
||||||
* add it to the "include" list. See if this host is actually us.
|
* add it to the "include" list. See if this host is actually us.
|
||||||
*/
|
*/
|
||||||
if (strcmp(node_name, "localhost") == 0 || opal_ifislocal(node_name)) {
|
if (orte_ifislocal(node_name)) {
|
||||||
/* Nodename has been allocated, that is for sure */
|
/* Nodename has been allocated, that is for sure */
|
||||||
if (orte_show_resolved_nodenames &&
|
|
||||||
0 != strcmp(node_name, orte_process_info.nodename)) {
|
|
||||||
node_alias = strdup(node_name);
|
|
||||||
}
|
|
||||||
free (node_name);
|
free (node_name);
|
||||||
node_name = strdup(orte_process_info.nodename);
|
node_name = strdup(orte_process_info.nodename);
|
||||||
}
|
}
|
||||||
@ -238,23 +229,6 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
|
|||||||
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
|
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
|
||||||
free(node_name);
|
free(node_name);
|
||||||
}
|
}
|
||||||
/* do we need to record an alias for this node? */
|
|
||||||
if (NULL != node_alias) {
|
|
||||||
/* add to list of aliases for this node - only add if unique */
|
|
||||||
aptr = NULL;
|
|
||||||
aliases = NULL;
|
|
||||||
orte_get_attribute(&node->attributes, ORTE_NODE_ALIAS, (void**)&aptr, OPAL_STRING);
|
|
||||||
if (NULL != aptr) {
|
|
||||||
aliases = opal_argv_split(aptr, ',');
|
|
||||||
free(aptr);
|
|
||||||
}
|
|
||||||
opal_argv_append_unique_nosize(&aliases, node_alias, false);
|
|
||||||
free(node_alias);
|
|
||||||
aptr = opal_argv_join(aliases, ',');
|
|
||||||
opal_argv_free(aliases);
|
|
||||||
orte_set_attribute(&node->attributes, ORTE_NODE_ALIAS, ORTE_ATTR_LOCAL, aptr, OPAL_STRING);
|
|
||||||
free(aptr);
|
|
||||||
}
|
|
||||||
} else if (ORTE_HOSTFILE_RELATIVE == token) {
|
} else if (ORTE_HOSTFILE_RELATIVE == token) {
|
||||||
/* store this for later processing */
|
/* store this for later processing */
|
||||||
node = OBJ_NEW(orte_node_t);
|
node = OBJ_NEW(orte_node_t);
|
||||||
@ -779,15 +753,11 @@ int orte_util_filter_hostfile_nodes(opal_list_t *nodes,
|
|||||||
item1 != opal_list_get_end(nodes);
|
item1 != opal_list_get_end(nodes);
|
||||||
item1 = opal_list_get_next(item1)) {
|
item1 = opal_list_get_next(item1)) {
|
||||||
node_from_list = (orte_node_t*)item1;
|
node_from_list = (orte_node_t*)item1;
|
||||||
/* since the name in the hostfile might not match
|
/* we have converted all aliases for ourself
|
||||||
* our local name, and yet still be intended to match,
|
* to our own detected nodename, so no need
|
||||||
* we have to check for local interfaces
|
* to check for interfaces again - a simple
|
||||||
*/
|
* strcmp will suffice */
|
||||||
if (0 == strcmp(node_from_file->name, node_from_list->name) ||
|
if (0 == strcmp(node_from_file->name, node_from_list->name)) {
|
||||||
(0 == strcmp(node_from_file->name, "localhost") &&
|
|
||||||
0 == strcmp(node_from_list->name, orte_process_info.nodename)) ||
|
|
||||||
(opal_ifislocal(node_from_list->name) &&
|
|
||||||
opal_ifislocal(node_from_file->name))) {
|
|
||||||
/* if the slot count here is less than the
|
/* if the slot count here is less than the
|
||||||
* total slots avail on this node, set it
|
* total slots avail on this node, set it
|
||||||
* to the specified count - this allows people
|
* to the specified count - this allows people
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2014 Intel, Inc. All rights reserved
|
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -32,15 +32,21 @@
|
|||||||
#ifdef HAVE_SYS_TYPES_H
|
#ifdef HAVE_SYS_TYPES_H
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#endif
|
#endif
|
||||||
|
#if HAVE_ARPA_INET_H
|
||||||
|
#include <arpa/inet.h>
|
||||||
|
#endif
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
|
||||||
#include "opal/mca/base/base.h"
|
#include "opal/mca/base/base.h"
|
||||||
#include "opal/mca/base/mca_base_var.h"
|
#include "opal/mca/base/mca_base_var.h"
|
||||||
#include "opal/util/argv.h"
|
#include "opal/util/argv.h"
|
||||||
|
#include "opal/util/if.h"
|
||||||
#include "opal/util/net.h"
|
#include "opal/util/net.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
#include "opal/util/proc.h"
|
#include "opal/util/proc.h"
|
||||||
|
|
||||||
|
#include "orte/util/attr.h"
|
||||||
|
|
||||||
#include "orte/util/proc_info.h"
|
#include "orte/util/proc_info.h"
|
||||||
|
|
||||||
/* provide a connection to a reqd variable */
|
/* provide a connection to a reqd variable */
|
||||||
@ -63,6 +69,7 @@ ORTE_DECLSPEC orte_proc_info_t orte_process_info = {
|
|||||||
.num_daemons = 1,
|
.num_daemons = 1,
|
||||||
.num_nodes = 1,
|
.num_nodes = 1,
|
||||||
.nodename = NULL,
|
.nodename = NULL,
|
||||||
|
.aliases = NULL,
|
||||||
.pid = 0,
|
.pid = 0,
|
||||||
.proc_type = ORTE_PROC_TYPE_NONE,
|
.proc_type = ORTE_PROC_TYPE_NONE,
|
||||||
.sync_buf = NULL,
|
.sync_buf = NULL,
|
||||||
@ -97,7 +104,8 @@ int orte_proc_info(void)
|
|||||||
char hostname[ORTE_MAX_HOSTNAME_SIZE];
|
char hostname[ORTE_MAX_HOSTNAME_SIZE];
|
||||||
char **prefixes;
|
char **prefixes;
|
||||||
bool match;
|
bool match;
|
||||||
|
struct in_addr buf;
|
||||||
|
|
||||||
if (init) {
|
if (init) {
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -165,7 +173,8 @@ int orte_proc_info(void)
|
|||||||
gethostname(hostname, ORTE_MAX_HOSTNAME_SIZE);
|
gethostname(hostname, ORTE_MAX_HOSTNAME_SIZE);
|
||||||
if (!orte_keep_fqdn_hostnames) {
|
if (!orte_keep_fqdn_hostnames) {
|
||||||
/* if the nodename is an IP address, do not mess with it! */
|
/* if the nodename is an IP address, do not mess with it! */
|
||||||
if (!opal_net_isaddr(hostname)) {
|
if (0 != inet_pton(AF_INET, hostname, &buf) &&
|
||||||
|
0 != inet_pton(AF_INET6, hostname, &buf)) {
|
||||||
/* not an IP address, so remove any domain info */
|
/* not an IP address, so remove any domain info */
|
||||||
if (NULL != (ptr = strchr(hostname, '.'))) {
|
if (NULL != (ptr = strchr(hostname, '.'))) {
|
||||||
*ptr = '\0';
|
*ptr = '\0';
|
||||||
@ -314,6 +323,45 @@ int orte_proc_info_finalize(void)
|
|||||||
|
|
||||||
OBJ_DESTRUCT(&orte_process_info.super);
|
OBJ_DESTRUCT(&orte_process_info.super);
|
||||||
|
|
||||||
|
opal_argv_free(orte_process_info.aliases);
|
||||||
|
|
||||||
init = false;
|
init = false;
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool orte_ifislocal(const char *hostname)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* see if this matches our known hostname */
|
||||||
|
if (NULL != orte_process_info.nodename &&
|
||||||
|
0 == strcmp(hostname, orte_process_info.nodename)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* see if it matches the generic "localhost" */
|
||||||
|
if (0 == strcmp(hostname, "localhost")) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* see if it matches any of our known aliases */
|
||||||
|
if (NULL != orte_process_info.aliases) {
|
||||||
|
for (i=0; NULL != orte_process_info.aliases[i]; i++) {
|
||||||
|
if (0 == strcmp(hostname, orte_process_info.aliases[i])) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* okay, have to resolve the address - the opal_ifislocal
|
||||||
|
* function will not attempt to resolve the address if
|
||||||
|
* told not to do so */
|
||||||
|
if (opal_ifislocal(hostname)) {
|
||||||
|
/* add this to our known aliases */
|
||||||
|
opal_argv_append_nosize(&orte_process_info.aliases, hostname);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* not me */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
|
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -103,6 +103,7 @@ struct orte_proc_info_t {
|
|||||||
orte_vpid_t num_daemons; /**< number of daemons in system */
|
orte_vpid_t num_daemons; /**< number of daemons in system */
|
||||||
int num_nodes; /**< number of nodes in the job */
|
int num_nodes; /**< number of nodes in the job */
|
||||||
char *nodename; /**< string name for this node */
|
char *nodename; /**< string name for this node */
|
||||||
|
char **aliases; /**< aliases for this node */
|
||||||
pid_t pid; /**< Local process ID for this process */
|
pid_t pid; /**< Local process ID for this process */
|
||||||
orte_proc_type_t proc_type; /**< Type of process */
|
orte_proc_type_t proc_type; /**< Type of process */
|
||||||
opal_buffer_t *sync_buf; /**< buffer to store sync response */
|
opal_buffer_t *sync_buf; /**< buffer to store sync response */
|
||||||
@ -166,6 +167,8 @@ ORTE_DECLSPEC int orte_proc_info(void);
|
|||||||
|
|
||||||
ORTE_DECLSPEC int orte_proc_info_finalize(void);
|
ORTE_DECLSPEC int orte_proc_info_finalize(void);
|
||||||
|
|
||||||
|
ORTE_DECLSPEC bool orte_ifislocal(const char *name);
|
||||||
|
|
||||||
END_C_DECLS
|
END_C_DECLS
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user