Merge pull request #479 from rhc54/topic/rarp
Attempt to reduce the RARP traffic during definition of allocations
Этот коммит содержится в:
Коммит
7f8fcb7fb7
@ -101,7 +101,6 @@ const opal_pmix_base_module_t opal_pmix_native_module = {
|
||||
// local variables
|
||||
static int init_cntr = 0;
|
||||
opal_process_name_t native_pname;
|
||||
static char *local_uri = NULL;
|
||||
static uint32_t sm_flag;
|
||||
|
||||
static void unpack_segment_info(opal_buffer_t *buf, opal_process_name_t *id, char** seg_info)
|
||||
@ -437,12 +436,6 @@ static int native_put(opal_pmix_scope_t scope,
|
||||
}
|
||||
}
|
||||
|
||||
/* if this is our uri, save it as we need to send it to our server
|
||||
* as a special, separate item */
|
||||
if (0 == strcmp(OPAL_DSTORE_URI, kv->key)) {
|
||||
local_uri = strdup(kv->data.string);
|
||||
}
|
||||
|
||||
/* have to save a copy locally as some of our components will
|
||||
* look for it */
|
||||
(void)opal_dstore.store(opal_dstore_internal, &OPAL_PROC_MY_NAME, kv);
|
||||
@ -493,17 +486,6 @@ static int native_fence(opal_process_name_t *procs, size_t nprocs)
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
/* provide our URI */
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &local_uri, 1, OPAL_STRING))) {
|
||||
OPAL_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(msg);
|
||||
return rc;
|
||||
}
|
||||
/* only do it once */
|
||||
if (NULL != local_uri) {
|
||||
free(local_uri);
|
||||
local_uri = NULL;
|
||||
}
|
||||
|
||||
/* pack 1 if we have sm dstore enabled, 0 otherwise */
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &sm_flag, 1, OPAL_UINT32))) {
|
||||
@ -757,17 +739,6 @@ static int native_fence_nb(opal_process_name_t *procs, size_t nprocs,
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
/* provide our URI */
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &local_uri, 1, OPAL_STRING))) {
|
||||
OPAL_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(msg);
|
||||
return rc;
|
||||
}
|
||||
/* only do it once */
|
||||
if (NULL != local_uri) {
|
||||
free(local_uri);
|
||||
local_uri = NULL;
|
||||
}
|
||||
|
||||
/* pack 1 if we have sm dstore enabled, 0 otherwise */
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &sm_flag, 1, OPAL_UINT32))) {
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -33,6 +33,7 @@
|
||||
|
||||
#include "orte/util/error_strings.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
@ -537,9 +538,7 @@ static void process_opens(int fd, short args, void *cbdata)
|
||||
}
|
||||
|
||||
/* if the host is our own, then treat it as a local file */
|
||||
if (0 == strcmp(host, orte_process_info.nodename) ||
|
||||
0 == strcmp(host, "localhost") ||
|
||||
opal_ifislocal(host)) {
|
||||
if (orte_ifislocal(host)) {
|
||||
opal_output_verbose(1, orte_dfs_base_framework.framework_output,
|
||||
"%s file %s on local host",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
|
@ -1,7 +1,8 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -329,10 +330,7 @@ static void process_opens(int fd, short args, void *cbdata)
|
||||
goto complete;
|
||||
}
|
||||
/* if the host is our own, then treat it as a local file */
|
||||
if (NULL == host ||
|
||||
0 == strcmp(host, orte_process_info.nodename) ||
|
||||
0 == strcmp(host, "localhost") ||
|
||||
opal_ifislocal(host)) {
|
||||
if (NULL == host || orte_ifislocal(host)) {
|
||||
opal_output_verbose(1, orte_dfs_base_framework.framework_output,
|
||||
"%s file %s on local host",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
|
@ -232,28 +232,31 @@ static int rte_init(void)
|
||||
|
||||
/*** PUSH DATA FOR OTHERS TO FIND ***/
|
||||
|
||||
/* if our URI was not provided by the system, then
|
||||
* push our URI so others can find us */
|
||||
OBJ_CONSTRUCT(&vals, opal_list_t);
|
||||
if (OPAL_SUCCESS != opal_dstore.fetch(opal_dstore_internal, &OPAL_PROC_MY_NAME,
|
||||
OPAL_DSTORE_URI, &vals)) {
|
||||
/* construct the RTE string */
|
||||
rmluri = orte_rml.get_contact_info();
|
||||
/* push it out for others to use */
|
||||
OBJ_CONSTRUCT(&kvn, opal_value_t);
|
||||
kvn.key = strdup(OPAL_DSTORE_URI);
|
||||
kvn.type = OPAL_STRING;
|
||||
kvn.data.string = strdup(rmluri);
|
||||
if (ORTE_SUCCESS != (ret = opal_pmix.put(PMIX_GLOBAL, &kvn))) {
|
||||
error = "db store uri";
|
||||
/* if we are direct launched, then push our RML URI - there
|
||||
* is no need to do so when launched by mpirun as all apps
|
||||
* communicate thru their local daemon */
|
||||
if (orte_standalone_operation) {
|
||||
OBJ_CONSTRUCT(&vals, opal_list_t);
|
||||
if (OPAL_SUCCESS != opal_dstore.fetch(opal_dstore_internal, &OPAL_PROC_MY_NAME,
|
||||
OPAL_DSTORE_URI, &vals)) {
|
||||
/* construct the RTE string */
|
||||
rmluri = orte_rml.get_contact_info();
|
||||
/* push it out for others to use */
|
||||
OBJ_CONSTRUCT(&kvn, opal_value_t);
|
||||
kvn.key = strdup(OPAL_DSTORE_URI);
|
||||
kvn.type = OPAL_STRING;
|
||||
kvn.data.string = strdup(rmluri);
|
||||
if (ORTE_SUCCESS != (ret = opal_pmix.put(PMIX_GLOBAL, &kvn))) {
|
||||
error = "db store uri";
|
||||
OBJ_DESTRUCT(&kvn);
|
||||
goto error;
|
||||
}
|
||||
OBJ_DESTRUCT(&kvn);
|
||||
goto error;
|
||||
free(rmluri);
|
||||
}
|
||||
OBJ_DESTRUCT(&kvn);
|
||||
free(rmluri);
|
||||
OPAL_LIST_DESTRUCT(&vals);
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&vals);
|
||||
|
||||
|
||||
/* push our hostname so others can find us, if they need to */
|
||||
OBJ_CONSTRUCT(&kvn, opal_value_t);
|
||||
kvn.key = strdup(OPAL_DSTORE_HOSTNAME);
|
||||
|
@ -216,16 +216,14 @@ static void accept_connection(const int accepted_fd,
|
||||
ORTE_ACTIVATE_TCP_ACCEPT_STATE(accepted_fd, addr, recv_handler);
|
||||
}
|
||||
|
||||
/* the host in this case is always in "dot" notation, and
|
||||
* thus we do not need to do a DNS lookup to convert it */
|
||||
static int parse_uri(const uint16_t af_family,
|
||||
const char* host,
|
||||
const char *port,
|
||||
struct sockaddr* inaddr)
|
||||
{
|
||||
struct sockaddr_in *in;
|
||||
#if OPAL_ENABLE_IPV6
|
||||
struct addrinfo hints, *res;
|
||||
int ret;
|
||||
#endif
|
||||
|
||||
if (AF_INET == af_family) {
|
||||
memset(inaddr, 0, sizeof(struct sockaddr_in));
|
||||
@ -239,21 +237,14 @@ static int parse_uri(const uint16_t af_family,
|
||||
}
|
||||
#if OPAL_ENABLE_IPV6
|
||||
else if (AF_INET6 == af_family) {
|
||||
size_t len;
|
||||
struct sockaddr_in6 *in6;
|
||||
memset(inaddr, 0, sizeof(struct sockaddr_in6));
|
||||
memset(&hints, 0, sizeof(hints));
|
||||
hints.ai_family = af_family;
|
||||
hints.ai_socktype = SOCK_STREAM;
|
||||
ret = getaddrinfo(host, NULL, &hints, &res);
|
||||
in6 = (struct sockaddr_in6*) inaddr;
|
||||
|
||||
if (ret) {
|
||||
opal_output (0, "oob_tcp_parse_uri: Could not resolve %s. [Error: %s]\n",
|
||||
host, gai_strerror (ret));
|
||||
if (0 == inet_pton(AF_INET6, host, (void*)&in6->sin6_addr)) {
|
||||
opal_output (0, "oob_tcp_parse_uri: Could not convert %s\n", host);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
len = (res->ai_addrlen < sizeof(struct sockaddr_in6)) ? res->ai_addrlen : sizeof(struct sockaddr_in6);
|
||||
memcpy(inaddr, res->ai_addr, len);
|
||||
freeaddrinfo(res);
|
||||
}
|
||||
#endif
|
||||
else {
|
||||
|
@ -451,6 +451,12 @@ static bool component_available(void)
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output,
|
||||
"oob:tcp: component_available called");
|
||||
|
||||
/* if we are an APP and we are not direct launched,
|
||||
* then we don't want to be considered */
|
||||
if (ORTE_PROC_IS_APP && !orte_standalone_operation) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* if interface include was given, construct a list
|
||||
* of those interfaces which match the specifications - remember,
|
||||
* the includes could be given as named interfaces, IP addrs, or
|
||||
|
@ -11,7 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -34,6 +34,7 @@
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/mca/if/if.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
@ -195,6 +196,11 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata)
|
||||
}
|
||||
/* If something came back, save it and we are done */
|
||||
if (!opal_list_is_empty(&nodes)) {
|
||||
/* flag that the allocation is managed */
|
||||
orte_managed_allocation = true;
|
||||
/* since it is managed, we do not attempt to resolve
|
||||
* the nodenames */
|
||||
opal_if_do_not_resolve = true;
|
||||
/* store the results in the global resource pool - this removes the
|
||||
* list items
|
||||
*/
|
||||
@ -210,8 +216,6 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata)
|
||||
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
|
||||
}
|
||||
/* flag that the allocation is managed */
|
||||
orte_managed_allocation = true;
|
||||
goto DISPLAY;
|
||||
} else if (orte_allocation_required) {
|
||||
/* if nothing was found, and an allocation is
|
||||
|
@ -11,7 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -103,10 +103,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
|
||||
* first position since it is the first one entered. We need to check to see
|
||||
* if this node is the same as the HNP's node so we don't double-enter it
|
||||
*/
|
||||
if (NULL != hnp_node &&
|
||||
(0 == strcmp(node->name, hnp_node->name) ||
|
||||
0 == strcmp(node->name, "localhost") ||
|
||||
opal_ifislocal(node->name))) {
|
||||
if (NULL != hnp_node && orte_ifislocal(node->name)) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
|
||||
"%s ras:base:node_insert updating HNP [%s] info to %ld slots",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
|
@ -14,7 +14,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 Voltaire. All rights reserved
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -484,8 +484,7 @@ static int orte_rmaps_rank_file_parse(const char *rankfile)
|
||||
goto unlock;
|
||||
}
|
||||
/* check if this is the local node */
|
||||
if (0 == strcmp(node_name, hnp_node->name) ||
|
||||
opal_ifislocal(node_name)) {
|
||||
if (orte_ifislocal(node_name)) {
|
||||
rfmap->node_name = strdup(hnp_node->name);
|
||||
} else {
|
||||
rfmap->node_name = strdup(node_name);
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -298,7 +298,7 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
|
||||
* hostfile may not have been FQDN, while name returned
|
||||
* by gethostname may have been (or vice versa)
|
||||
*/
|
||||
if (opal_ifislocal(seq->hostname)) {
|
||||
if (orte_ifislocal(seq->hostname)) {
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:seq: removing head node %s", seq->hostname);
|
||||
opal_list_remove_item(seq_list, item);
|
||||
|
@ -100,7 +100,6 @@ void pmix_server_process_message(pmix_server_peer_t *peer)
|
||||
pmix_server_dmx_req_t *req, *nextreq;
|
||||
bool found;
|
||||
orte_grpcomm_signature_t *sig;
|
||||
char *local_uri;
|
||||
uint32_t sm_flag;
|
||||
|
||||
/* xfer the message to a buffer for unpacking */
|
||||
@ -207,19 +206,6 @@ void pmix_server_process_message(pmix_server_peer_t *peer)
|
||||
(PMIX_FENCENB_CMD == cmd) ? "FENCE_NB" : "FENCE", tmp);
|
||||
free(tmp);
|
||||
}
|
||||
/* get the URI for this process */
|
||||
cnt = 1;
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(&xfer, &local_uri, &cnt, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(sig);
|
||||
goto reply_fence;
|
||||
}
|
||||
/* if not NULL, then update our connection info as we might need
|
||||
* to send this proc a message at some point */
|
||||
if (NULL != local_uri) {
|
||||
orte_rml.set_contact_info(local_uri);
|
||||
free(local_uri);
|
||||
}
|
||||
/* unpack flag if sm dstore is supported by the client */
|
||||
cnt = 1;
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(&xfer, &sm_flag, &cnt, OPAL_UINT32))) {
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -52,11 +52,10 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
|
||||
orte_std_cntr_t i, j, k;
|
||||
int rc;
|
||||
char **host_argv=NULL;
|
||||
char **mapped_nodes = NULL, **mini_map;
|
||||
char **mapped_nodes = NULL, **mini_map, *ndname;
|
||||
orte_node_t *node, *nd;
|
||||
opal_list_t adds;
|
||||
bool found;
|
||||
char **aliases, *aptr;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
|
||||
"%s dashhost: parsing args",
|
||||
@ -111,12 +110,17 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
|
||||
"%s dashhost: working node %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), mapped_nodes[i]));
|
||||
|
||||
/* check for local name */
|
||||
if (orte_ifislocal(mapped_nodes[i])) {
|
||||
ndname = orte_process_info.nodename;
|
||||
} else {
|
||||
ndname = mapped_nodes[i];
|
||||
}
|
||||
|
||||
/* see if the node is already on the list */
|
||||
found = false;
|
||||
OPAL_LIST_FOREACH(node, &adds, orte_node_t) {
|
||||
if (0 == strcmp(node->name, mapped_nodes[i]) ||
|
||||
(0 == strcmp(node->name, orte_process_info.nodename) &&
|
||||
(0 == strcmp(mapped_nodes[i], "localhost") || opal_ifislocal(mapped_nodes[i])))) {
|
||||
if (0 == strcmp(node->name, ndname)) {
|
||||
found = true;
|
||||
++node->slots;
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
|
||||
@ -134,33 +138,7 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
|
||||
if (NULL == node) {
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
/* check to see if this is a local name */
|
||||
if (0 == strcmp(mapped_nodes[i], "localhost") ||
|
||||
opal_ifislocal(mapped_nodes[i])) {
|
||||
/* it is local, so use the local nodename to avoid
|
||||
* later confusion
|
||||
*/
|
||||
if (orte_show_resolved_nodenames &&
|
||||
0 != strcmp(mapped_nodes[i], orte_process_info.nodename)) {
|
||||
/* add to list of aliases for this node - only add if unique */
|
||||
aptr = NULL;
|
||||
aliases = NULL;
|
||||
orte_get_attribute(&node->attributes, ORTE_NODE_ALIAS, (void**)&aptr, OPAL_STRING);
|
||||
if (NULL != aptr) {
|
||||
aliases = opal_argv_split(aptr, ',');
|
||||
free(aptr);
|
||||
}
|
||||
opal_argv_append_unique_nosize(&aliases, mapped_nodes[i], false);
|
||||
aptr = opal_argv_join(aliases, ',');
|
||||
opal_argv_free(aliases);
|
||||
orte_set_attribute(&node->attributes, ORTE_NODE_ALIAS, ORTE_ATTR_LOCAL, aptr, OPAL_STRING);
|
||||
free(aptr);
|
||||
}
|
||||
node->name = strdup(orte_process_info.nodename);
|
||||
} else {
|
||||
/* not local - use the given name */
|
||||
node->name = strdup(mapped_nodes[i]);
|
||||
}
|
||||
node->name = strdup(ndname);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
|
||||
"%s dashhost: added node %s to list",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name));
|
||||
@ -287,9 +265,11 @@ static int parse_dash_host(char ***mapped_nodes, char *hosts)
|
||||
goto cleanup;
|
||||
}
|
||||
} else { /* non-relative syntax - add to list */
|
||||
if (OPAL_SUCCESS != (rc = opal_argv_append_nosize(mapped_nodes,
|
||||
mini_map[k]))) {
|
||||
goto cleanup;
|
||||
/* check for local alias */
|
||||
if (orte_ifislocal(mini_map[k])) {
|
||||
opal_argv_append_nosize(mapped_nodes, orte_process_info.nodename);
|
||||
} else {
|
||||
opal_argv_append_nosize(mapped_nodes, mini_map[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -316,7 +296,7 @@ int orte_util_filter_dash_host_nodes(opal_list_t *nodes,
|
||||
orte_std_cntr_t i, j, len_mapped_node=0;
|
||||
int rc;
|
||||
char **mapped_nodes = NULL;
|
||||
orte_node_t *node, *hnp_node;
|
||||
orte_node_t *node;
|
||||
int num_empty=0;
|
||||
opal_list_t keep;
|
||||
bool want_all_empty=false;
|
||||
@ -342,9 +322,6 @@ int orte_util_filter_dash_host_nodes(opal_list_t *nodes,
|
||||
* nodes list ONCE.
|
||||
*/
|
||||
|
||||
/* get the hnp node's info */
|
||||
hnp_node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
|
||||
|
||||
len_mapped_node = opal_argv_count(mapped_nodes);
|
||||
/* setup a working list so we can put the final list
|
||||
* of nodes in order. This way, if the user specifies a
|
||||
@ -399,21 +376,15 @@ int orte_util_filter_dash_host_nodes(opal_list_t *nodes,
|
||||
item = next;
|
||||
}
|
||||
} else {
|
||||
/* we are looking for a specific node on the list
|
||||
* we have a match if one of two conditions is met:
|
||||
* 1. the node_name and mapped_nodes directly match
|
||||
* 2. the node_name is the local system name AND
|
||||
* either the mapped_node is "localhost" OR it
|
||||
* is a local interface as found by opal_ifislocal
|
||||
*/
|
||||
/* we are looking for a specific node on the list. The
|
||||
* parser will have substituted our local name for any
|
||||
* alias, so we only have to do a strcmp here */
|
||||
item = opal_list_get_first(nodes);
|
||||
while (item != opal_list_get_end(nodes)) {
|
||||
next = opal_list_get_next(item); /* save this position */
|
||||
node = (orte_node_t*)item;
|
||||
/* search -host list to see if this one is found */
|
||||
if (0 == strcmp(node->name, mapped_nodes[i]) ||
|
||||
(0 == strcmp(node->name, hnp_node->name) &&
|
||||
(0 == strcasecmp(mapped_nodes[i], "localhost") || opal_ifislocal(mapped_nodes[i])))) {
|
||||
if (0 == strcmp(node->name, mapped_nodes[i])) {
|
||||
if (remove) {
|
||||
/* remove item from list */
|
||||
opal_list_remove_item(nodes, item);
|
||||
|
@ -132,9 +132,8 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
|
||||
orte_node_t* node;
|
||||
bool got_max = false;
|
||||
char* value;
|
||||
char **argv, **aliases, *aptr;
|
||||
char **argv;
|
||||
char* node_name = NULL;
|
||||
char* node_alias = NULL;
|
||||
char* username = NULL;
|
||||
int cnt;
|
||||
int number_of_slots = 0;
|
||||
@ -181,13 +180,9 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
|
||||
"%s hostfile: node %s is being excluded",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node_name));
|
||||
|
||||
/* convert this into something globally unique */
|
||||
if (strcmp(node_name, "localhost") == 0 || opal_ifislocal(node_name)) {
|
||||
/* see if this is another name for us */
|
||||
if (orte_ifislocal(node_name)) {
|
||||
/* Nodename has been allocated, that is for sure */
|
||||
if (orte_show_resolved_nodenames &&
|
||||
0 != strcmp(node_name, orte_process_info.nodename)) {
|
||||
node_alias = strdup(node_name);
|
||||
}
|
||||
free (node_name);
|
||||
node_name = strdup(orte_process_info.nodename);
|
||||
}
|
||||
@ -208,12 +203,8 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
|
||||
/* this is not a node to be excluded, so we need to process it and
|
||||
* add it to the "include" list. See if this host is actually us.
|
||||
*/
|
||||
if (strcmp(node_name, "localhost") == 0 || opal_ifislocal(node_name)) {
|
||||
if (orte_ifislocal(node_name)) {
|
||||
/* Nodename has been allocated, that is for sure */
|
||||
if (orte_show_resolved_nodenames &&
|
||||
0 != strcmp(node_name, orte_process_info.nodename)) {
|
||||
node_alias = strdup(node_name);
|
||||
}
|
||||
free (node_name);
|
||||
node_name = strdup(orte_process_info.nodename);
|
||||
}
|
||||
@ -238,23 +229,6 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
|
||||
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
|
||||
free(node_name);
|
||||
}
|
||||
/* do we need to record an alias for this node? */
|
||||
if (NULL != node_alias) {
|
||||
/* add to list of aliases for this node - only add if unique */
|
||||
aptr = NULL;
|
||||
aliases = NULL;
|
||||
orte_get_attribute(&node->attributes, ORTE_NODE_ALIAS, (void**)&aptr, OPAL_STRING);
|
||||
if (NULL != aptr) {
|
||||
aliases = opal_argv_split(aptr, ',');
|
||||
free(aptr);
|
||||
}
|
||||
opal_argv_append_unique_nosize(&aliases, node_alias, false);
|
||||
free(node_alias);
|
||||
aptr = opal_argv_join(aliases, ',');
|
||||
opal_argv_free(aliases);
|
||||
orte_set_attribute(&node->attributes, ORTE_NODE_ALIAS, ORTE_ATTR_LOCAL, aptr, OPAL_STRING);
|
||||
free(aptr);
|
||||
}
|
||||
} else if (ORTE_HOSTFILE_RELATIVE == token) {
|
||||
/* store this for later processing */
|
||||
node = OBJ_NEW(orte_node_t);
|
||||
@ -779,15 +753,11 @@ int orte_util_filter_hostfile_nodes(opal_list_t *nodes,
|
||||
item1 != opal_list_get_end(nodes);
|
||||
item1 = opal_list_get_next(item1)) {
|
||||
node_from_list = (orte_node_t*)item1;
|
||||
/* since the name in the hostfile might not match
|
||||
* our local name, and yet still be intended to match,
|
||||
* we have to check for local interfaces
|
||||
*/
|
||||
if (0 == strcmp(node_from_file->name, node_from_list->name) ||
|
||||
(0 == strcmp(node_from_file->name, "localhost") &&
|
||||
0 == strcmp(node_from_list->name, orte_process_info.nodename)) ||
|
||||
(opal_ifislocal(node_from_list->name) &&
|
||||
opal_ifislocal(node_from_file->name))) {
|
||||
/* we have converted all aliases for ourself
|
||||
* to our own detected nodename, so no need
|
||||
* to check for interfaces again - a simple
|
||||
* strcmp will suffice */
|
||||
if (0 == strcmp(node_from_file->name, node_from_list->name)) {
|
||||
/* if the slot count here is less than the
|
||||
* total slots avail on this node, set it
|
||||
* to the specified count - this allows people
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -32,15 +32,21 @@
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#if HAVE_ARPA_INET_H
|
||||
#include <arpa/inet.h>
|
||||
#endif
|
||||
#include <ctype.h>
|
||||
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_var.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/if.h"
|
||||
#include "opal/util/net.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/proc.h"
|
||||
|
||||
#include "orte/util/attr.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
|
||||
/* provide a connection to a reqd variable */
|
||||
@ -63,6 +69,7 @@ ORTE_DECLSPEC orte_proc_info_t orte_process_info = {
|
||||
.num_daemons = 1,
|
||||
.num_nodes = 1,
|
||||
.nodename = NULL,
|
||||
.aliases = NULL,
|
||||
.pid = 0,
|
||||
.proc_type = ORTE_PROC_TYPE_NONE,
|
||||
.sync_buf = NULL,
|
||||
@ -97,7 +104,8 @@ int orte_proc_info(void)
|
||||
char hostname[ORTE_MAX_HOSTNAME_SIZE];
|
||||
char **prefixes;
|
||||
bool match;
|
||||
|
||||
struct in_addr buf;
|
||||
|
||||
if (init) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -163,12 +171,18 @@ int orte_proc_info(void)
|
||||
|
||||
/* get the nodename */
|
||||
gethostname(hostname, ORTE_MAX_HOSTNAME_SIZE);
|
||||
/* add this to our list of aliases */
|
||||
opal_argv_append_nosize(&orte_process_info.aliases, hostname);
|
||||
|
||||
if (!orte_keep_fqdn_hostnames) {
|
||||
/* if the nodename is an IP address, do not mess with it! */
|
||||
if (!opal_net_isaddr(hostname)) {
|
||||
if (0 == inet_pton(AF_INET, hostname, &buf) &&
|
||||
0 == inet_pton(AF_INET6, hostname, &buf)) {
|
||||
/* not an IP address, so remove any domain info */
|
||||
if (NULL != (ptr = strchr(hostname, '.'))) {
|
||||
*ptr = '\0';
|
||||
/* add this to our list of aliases */
|
||||
opal_argv_append_nosize(&orte_process_info.aliases, hostname);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -200,6 +214,8 @@ int orte_proc_info(void)
|
||||
} else {
|
||||
orte_process_info.nodename = strdup(&hostname[idx]);
|
||||
}
|
||||
/* add this to our list of aliases */
|
||||
opal_argv_append_nosize(&orte_process_info.aliases, orte_process_info.nodename);
|
||||
match = true;
|
||||
break;
|
||||
}
|
||||
@ -213,6 +229,9 @@ int orte_proc_info(void)
|
||||
orte_process_info.nodename = strdup(hostname);
|
||||
}
|
||||
|
||||
/* add "localhost" to our list of aliases */
|
||||
opal_argv_append_nosize(&orte_process_info.aliases, "localhost");
|
||||
|
||||
/* get the number of nodes in the job */
|
||||
orte_process_info.num_nodes = 1;
|
||||
(void) mca_base_var_register ("orte", "orte", NULL, "num_nodes",
|
||||
@ -314,6 +333,34 @@ int orte_proc_info_finalize(void)
|
||||
|
||||
OBJ_DESTRUCT(&orte_process_info.super);
|
||||
|
||||
opal_argv_free(orte_process_info.aliases);
|
||||
|
||||
init = false;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
bool orte_ifislocal(const char *hostname)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* see if it matches any of our known aliases */
|
||||
if (NULL != orte_process_info.aliases) {
|
||||
for (i=0; NULL != orte_process_info.aliases[i]; i++) {
|
||||
if (0 == strcmp(hostname, orte_process_info.aliases[i])) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* okay, have to resolve the address - the opal_ifislocal
|
||||
* function will not attempt to resolve the address if
|
||||
* told not to do so */
|
||||
if (opal_ifislocal(hostname)) {
|
||||
/* add this to our known aliases */
|
||||
opal_argv_append_nosize(&orte_process_info.aliases, hostname);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* not me */
|
||||
return false;
|
||||
}
|
||||
|
@ -11,7 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -103,6 +103,7 @@ struct orte_proc_info_t {
|
||||
orte_vpid_t num_daemons; /**< number of daemons in system */
|
||||
int num_nodes; /**< number of nodes in the job */
|
||||
char *nodename; /**< string name for this node */
|
||||
char **aliases; /**< aliases for this node */
|
||||
pid_t pid; /**< Local process ID for this process */
|
||||
orte_proc_type_t proc_type; /**< Type of process */
|
||||
opal_buffer_t *sync_buf; /**< buffer to store sync response */
|
||||
@ -166,6 +167,8 @@ ORTE_DECLSPEC int orte_proc_info(void);
|
||||
|
||||
ORTE_DECLSPEC int orte_proc_info_finalize(void);
|
||||
|
||||
ORTE_DECLSPEC bool orte_ifislocal(const char *name);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user