1
1

Merge pull request #479 from rhc54/topic/rarp

Attempt to reduce the RARP traffic during definition of allocations
Этот коммит содержится в:
rhc54 2015-03-17 07:40:35 -07:00
родитель 1196069815 b01e8c1063
Коммит 7f8fcb7fb7
15 изменённых файлов: 137 добавлений и 192 удалений

Просмотреть файл

@ -101,7 +101,6 @@ const opal_pmix_base_module_t opal_pmix_native_module = {
// local variables
static int init_cntr = 0;
opal_process_name_t native_pname;
static char *local_uri = NULL;
static uint32_t sm_flag;
static void unpack_segment_info(opal_buffer_t *buf, opal_process_name_t *id, char** seg_info)
@ -437,12 +436,6 @@ static int native_put(opal_pmix_scope_t scope,
}
}
/* if this is our uri, save it as we need to send it to our server
* as a special, separate item */
if (0 == strcmp(OPAL_DSTORE_URI, kv->key)) {
local_uri = strdup(kv->data.string);
}
/* have to save a copy locally as some of our components will
* look for it */
(void)opal_dstore.store(opal_dstore_internal, &OPAL_PROC_MY_NAME, kv);
@ -493,17 +486,6 @@ static int native_fence(opal_process_name_t *procs, size_t nprocs)
return rc;
}
}
/* provide our URI */
if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &local_uri, 1, OPAL_STRING))) {
OPAL_ERROR_LOG(rc);
OBJ_RELEASE(msg);
return rc;
}
/* only do it once */
if (NULL != local_uri) {
free(local_uri);
local_uri = NULL;
}
/* pack 1 if we have sm dstore enabled, 0 otherwise */
if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &sm_flag, 1, OPAL_UINT32))) {
@ -757,17 +739,6 @@ static int native_fence_nb(opal_process_name_t *procs, size_t nprocs,
return rc;
}
}
/* provide our URI */
if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &local_uri, 1, OPAL_STRING))) {
OPAL_ERROR_LOG(rc);
OBJ_RELEASE(msg);
return rc;
}
/* only do it once */
if (NULL != local_uri) {
free(local_uri);
local_uri = NULL;
}
/* pack 1 if we have sm dstore enabled, 0 otherwise */
if (OPAL_SUCCESS != (rc = opal_dss.pack(msg, &sm_flag, 1, OPAL_UINT32))) {

Просмотреть файл

@ -1,7 +1,7 @@
/*
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -33,6 +33,7 @@
#include "orte/util/error_strings.h"
#include "orte/util/name_fns.h"
#include "orte/util/proc_info.h"
#include "orte/util/show_help.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/errmgr/errmgr.h"
@ -537,9 +538,7 @@ static void process_opens(int fd, short args, void *cbdata)
}
/* if the host is our own, then treat it as a local file */
if (0 == strcmp(host, orte_process_info.nodename) ||
0 == strcmp(host, "localhost") ||
opal_ifislocal(host)) {
if (orte_ifislocal(host)) {
opal_output_verbose(1, orte_dfs_base_framework.framework_output,
"%s file %s on local host",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),

Просмотреть файл

@ -1,7 +1,8 @@
/*
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -329,10 +330,7 @@ static void process_opens(int fd, short args, void *cbdata)
goto complete;
}
/* if the host is our own, then treat it as a local file */
if (NULL == host ||
0 == strcmp(host, orte_process_info.nodename) ||
0 == strcmp(host, "localhost") ||
opal_ifislocal(host)) {
if (NULL == host || orte_ifislocal(host)) {
opal_output_verbose(1, orte_dfs_base_framework.framework_output,
"%s file %s on local host",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),

Просмотреть файл

@ -232,28 +232,31 @@ static int rte_init(void)
/*** PUSH DATA FOR OTHERS TO FIND ***/
/* if our URI was not provided by the system, then
* push our URI so others can find us */
OBJ_CONSTRUCT(&vals, opal_list_t);
if (OPAL_SUCCESS != opal_dstore.fetch(opal_dstore_internal, &OPAL_PROC_MY_NAME,
OPAL_DSTORE_URI, &vals)) {
/* construct the RTE string */
rmluri = orte_rml.get_contact_info();
/* push it out for others to use */
OBJ_CONSTRUCT(&kvn, opal_value_t);
kvn.key = strdup(OPAL_DSTORE_URI);
kvn.type = OPAL_STRING;
kvn.data.string = strdup(rmluri);
if (ORTE_SUCCESS != (ret = opal_pmix.put(PMIX_GLOBAL, &kvn))) {
error = "db store uri";
/* if we are direct launched, then push our RML URI - there
* is no need to do so when launched by mpirun as all apps
* communicate thru their local daemon */
if (orte_standalone_operation) {
OBJ_CONSTRUCT(&vals, opal_list_t);
if (OPAL_SUCCESS != opal_dstore.fetch(opal_dstore_internal, &OPAL_PROC_MY_NAME,
OPAL_DSTORE_URI, &vals)) {
/* construct the RTE string */
rmluri = orte_rml.get_contact_info();
/* push it out for others to use */
OBJ_CONSTRUCT(&kvn, opal_value_t);
kvn.key = strdup(OPAL_DSTORE_URI);
kvn.type = OPAL_STRING;
kvn.data.string = strdup(rmluri);
if (ORTE_SUCCESS != (ret = opal_pmix.put(PMIX_GLOBAL, &kvn))) {
error = "db store uri";
OBJ_DESTRUCT(&kvn);
goto error;
}
OBJ_DESTRUCT(&kvn);
goto error;
free(rmluri);
}
OBJ_DESTRUCT(&kvn);
free(rmluri);
OPAL_LIST_DESTRUCT(&vals);
}
OPAL_LIST_DESTRUCT(&vals);
/* push our hostname so others can find us, if they need to */
OBJ_CONSTRUCT(&kvn, opal_value_t);
kvn.key = strdup(OPAL_DSTORE_HOSTNAME);

Просмотреть файл

@ -216,16 +216,14 @@ static void accept_connection(const int accepted_fd,
ORTE_ACTIVATE_TCP_ACCEPT_STATE(accepted_fd, addr, recv_handler);
}
/* the host in this case is always in "dot" notation, and
* thus we do not need to do a DNS lookup to convert it */
static int parse_uri(const uint16_t af_family,
const char* host,
const char *port,
struct sockaddr* inaddr)
{
struct sockaddr_in *in;
#if OPAL_ENABLE_IPV6
struct addrinfo hints, *res;
int ret;
#endif
if (AF_INET == af_family) {
memset(inaddr, 0, sizeof(struct sockaddr_in));
@ -239,21 +237,14 @@ static int parse_uri(const uint16_t af_family,
}
#if OPAL_ENABLE_IPV6
else if (AF_INET6 == af_family) {
size_t len;
struct sockaddr_in6 *in6;
memset(inaddr, 0, sizeof(struct sockaddr_in6));
memset(&hints, 0, sizeof(hints));
hints.ai_family = af_family;
hints.ai_socktype = SOCK_STREAM;
ret = getaddrinfo(host, NULL, &hints, &res);
in6 = (struct sockaddr_in6*) inaddr;
if (ret) {
opal_output (0, "oob_tcp_parse_uri: Could not resolve %s. [Error: %s]\n",
host, gai_strerror (ret));
if (0 == inet_pton(AF_INET6, host, (void*)&in6->sin6_addr)) {
opal_output (0, "oob_tcp_parse_uri: Could not convert %s\n", host);
return ORTE_ERR_BAD_PARAM;
}
len = (res->ai_addrlen < sizeof(struct sockaddr_in6)) ? res->ai_addrlen : sizeof(struct sockaddr_in6);
memcpy(inaddr, res->ai_addr, len);
freeaddrinfo(res);
}
#endif
else {

Просмотреть файл

@ -451,6 +451,12 @@ static bool component_available(void)
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"oob:tcp: component_available called");
/* if we are an APP and we are not direct launched,
* then we don't want to be considered */
if (ORTE_PROC_IS_APP && !orte_standalone_operation) {
return false;
}
/* if interface include was given, construct a list
* of those interfaces which match the specifications - remember,
* the includes could be given as named interfaces, IP addrs, or

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -34,6 +34,7 @@
#include "opal/util/output.h"
#include "opal/dss/dss.h"
#include "opal/util/argv.h"
#include "opal/mca/if/if.h"
#include "orte/util/show_help.h"
#include "orte/mca/errmgr/errmgr.h"
@ -195,6 +196,11 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata)
}
/* If something came back, save it and we are done */
if (!opal_list_is_empty(&nodes)) {
/* flag that the allocation is managed */
orte_managed_allocation = true;
/* since it is managed, we do not attempt to resolve
* the nodenames */
opal_if_do_not_resolve = true;
/* store the results in the global resource pool - this removes the
* list items
*/
@ -210,8 +216,6 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata)
if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
}
/* flag that the allocation is managed */
orte_managed_allocation = true;
goto DISPLAY;
} else if (orte_allocation_required) {
/* if nothing was found, and an allocation is

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -103,10 +103,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
* first position since it is the first one entered. We need to check to see
* if this node is the same as the HNP's node so we don't double-enter it
*/
if (NULL != hnp_node &&
(0 == strcmp(node->name, hnp_node->name) ||
0 == strcmp(node->name, "localhost") ||
opal_ifislocal(node->name))) {
if (NULL != hnp_node && orte_ifislocal(node->name)) {
OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
"%s ras:base:node_insert updating HNP [%s] info to %ld slots",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),

Просмотреть файл

@ -14,7 +14,7 @@
* All rights reserved.
* Copyright (c) 2008 Voltaire. All rights reserved
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
*
* $COPYRIGHT$
*
@ -484,8 +484,7 @@ static int orte_rmaps_rank_file_parse(const char *rankfile)
goto unlock;
}
/* check if this is the local node */
if (0 == strcmp(node_name, hnp_node->name) ||
opal_ifislocal(node_name)) {
if (orte_ifislocal(node_name)) {
rfmap->node_name = strdup(hnp_node->name);
} else {
rfmap->node_name = strdup(node_name);

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -298,7 +298,7 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
* hostfile may not have been FQDN, while name returned
* by gethostname may have been (or vice versa)
*/
if (opal_ifislocal(seq->hostname)) {
if (orte_ifislocal(seq->hostname)) {
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:seq: removing head node %s", seq->hostname);
opal_list_remove_item(seq_list, item);

Просмотреть файл

@ -100,7 +100,6 @@ void pmix_server_process_message(pmix_server_peer_t *peer)
pmix_server_dmx_req_t *req, *nextreq;
bool found;
orte_grpcomm_signature_t *sig;
char *local_uri;
uint32_t sm_flag;
/* xfer the message to a buffer for unpacking */
@ -207,19 +206,6 @@ void pmix_server_process_message(pmix_server_peer_t *peer)
(PMIX_FENCENB_CMD == cmd) ? "FENCE_NB" : "FENCE", tmp);
free(tmp);
}
/* get the URI for this process */
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(&xfer, &local_uri, &cnt, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(sig);
goto reply_fence;
}
/* if not NULL, then update our connection info as we might need
* to send this proc a message at some point */
if (NULL != local_uri) {
orte_rml.set_contact_info(local_uri);
free(local_uri);
}
/* unpack flag if sm dstore is supported by the client */
cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(&xfer, &sm_flag, &cnt, OPAL_UINT32))) {

Просмотреть файл

@ -10,7 +10,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -52,11 +52,10 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
orte_std_cntr_t i, j, k;
int rc;
char **host_argv=NULL;
char **mapped_nodes = NULL, **mini_map;
char **mapped_nodes = NULL, **mini_map, *ndname;
orte_node_t *node, *nd;
opal_list_t adds;
bool found;
char **aliases, *aptr;
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
"%s dashhost: parsing args",
@ -111,12 +110,17 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
"%s dashhost: working node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), mapped_nodes[i]));
/* check for local name */
if (orte_ifislocal(mapped_nodes[i])) {
ndname = orte_process_info.nodename;
} else {
ndname = mapped_nodes[i];
}
/* see if the node is already on the list */
found = false;
OPAL_LIST_FOREACH(node, &adds, orte_node_t) {
if (0 == strcmp(node->name, mapped_nodes[i]) ||
(0 == strcmp(node->name, orte_process_info.nodename) &&
(0 == strcmp(mapped_nodes[i], "localhost") || opal_ifislocal(mapped_nodes[i])))) {
if (0 == strcmp(node->name, ndname)) {
found = true;
++node->slots;
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
@ -134,33 +138,7 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
if (NULL == node) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* check to see if this is a local name */
if (0 == strcmp(mapped_nodes[i], "localhost") ||
opal_ifislocal(mapped_nodes[i])) {
/* it is local, so use the local nodename to avoid
* later confusion
*/
if (orte_show_resolved_nodenames &&
0 != strcmp(mapped_nodes[i], orte_process_info.nodename)) {
/* add to list of aliases for this node - only add if unique */
aptr = NULL;
aliases = NULL;
orte_get_attribute(&node->attributes, ORTE_NODE_ALIAS, (void**)&aptr, OPAL_STRING);
if (NULL != aptr) {
aliases = opal_argv_split(aptr, ',');
free(aptr);
}
opal_argv_append_unique_nosize(&aliases, mapped_nodes[i], false);
aptr = opal_argv_join(aliases, ',');
opal_argv_free(aliases);
orte_set_attribute(&node->attributes, ORTE_NODE_ALIAS, ORTE_ATTR_LOCAL, aptr, OPAL_STRING);
free(aptr);
}
node->name = strdup(orte_process_info.nodename);
} else {
/* not local - use the given name */
node->name = strdup(mapped_nodes[i]);
}
node->name = strdup(ndname);
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
"%s dashhost: added node %s to list",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name));
@ -287,9 +265,11 @@ static int parse_dash_host(char ***mapped_nodes, char *hosts)
goto cleanup;
}
} else { /* non-relative syntax - add to list */
if (OPAL_SUCCESS != (rc = opal_argv_append_nosize(mapped_nodes,
mini_map[k]))) {
goto cleanup;
/* check for local alias */
if (orte_ifislocal(mini_map[k])) {
opal_argv_append_nosize(mapped_nodes, orte_process_info.nodename);
} else {
opal_argv_append_nosize(mapped_nodes, mini_map[k]);
}
}
}
@ -316,7 +296,7 @@ int orte_util_filter_dash_host_nodes(opal_list_t *nodes,
orte_std_cntr_t i, j, len_mapped_node=0;
int rc;
char **mapped_nodes = NULL;
orte_node_t *node, *hnp_node;
orte_node_t *node;
int num_empty=0;
opal_list_t keep;
bool want_all_empty=false;
@ -342,9 +322,6 @@ int orte_util_filter_dash_host_nodes(opal_list_t *nodes,
* nodes list ONCE.
*/
/* get the hnp node's info */
hnp_node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
len_mapped_node = opal_argv_count(mapped_nodes);
/* setup a working list so we can put the final list
* of nodes in order. This way, if the user specifies a
@ -399,21 +376,15 @@ int orte_util_filter_dash_host_nodes(opal_list_t *nodes,
item = next;
}
} else {
/* we are looking for a specific node on the list
* we have a match if one of two conditions is met:
* 1. the node_name and mapped_nodes directly match
* 2. the node_name is the local system name AND
* either the mapped_node is "localhost" OR it
* is a local interface as found by opal_ifislocal
*/
/* we are looking for a specific node on the list. The
* parser will have substituted our local name for any
* alias, so we only have to do a strcmp here */
item = opal_list_get_first(nodes);
while (item != opal_list_get_end(nodes)) {
next = opal_list_get_next(item); /* save this position */
node = (orte_node_t*)item;
/* search -host list to see if this one is found */
if (0 == strcmp(node->name, mapped_nodes[i]) ||
(0 == strcmp(node->name, hnp_node->name) &&
(0 == strcasecmp(mapped_nodes[i], "localhost") || opal_ifislocal(mapped_nodes[i])))) {
if (0 == strcmp(node->name, mapped_nodes[i])) {
if (remove) {
/* remove item from list */
opal_list_remove_item(nodes, item);

Просмотреть файл

@ -132,9 +132,8 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
orte_node_t* node;
bool got_max = false;
char* value;
char **argv, **aliases, *aptr;
char **argv;
char* node_name = NULL;
char* node_alias = NULL;
char* username = NULL;
int cnt;
int number_of_slots = 0;
@ -181,13 +180,9 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
"%s hostfile: node %s is being excluded",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node_name));
/* convert this into something globally unique */
if (strcmp(node_name, "localhost") == 0 || opal_ifislocal(node_name)) {
/* see if this is another name for us */
if (orte_ifislocal(node_name)) {
/* Nodename has been allocated, that is for sure */
if (orte_show_resolved_nodenames &&
0 != strcmp(node_name, orte_process_info.nodename)) {
node_alias = strdup(node_name);
}
free (node_name);
node_name = strdup(orte_process_info.nodename);
}
@ -208,12 +203,8 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
/* this is not a node to be excluded, so we need to process it and
* add it to the "include" list. See if this host is actually us.
*/
if (strcmp(node_name, "localhost") == 0 || opal_ifislocal(node_name)) {
if (orte_ifislocal(node_name)) {
/* Nodename has been allocated, that is for sure */
if (orte_show_resolved_nodenames &&
0 != strcmp(node_name, orte_process_info.nodename)) {
node_alias = strdup(node_name);
}
free (node_name);
node_name = strdup(orte_process_info.nodename);
}
@ -238,23 +229,6 @@ static int hostfile_parse_line(int token, opal_list_t* updates,
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
free(node_name);
}
/* do we need to record an alias for this node? */
if (NULL != node_alias) {
/* add to list of aliases for this node - only add if unique */
aptr = NULL;
aliases = NULL;
orte_get_attribute(&node->attributes, ORTE_NODE_ALIAS, (void**)&aptr, OPAL_STRING);
if (NULL != aptr) {
aliases = opal_argv_split(aptr, ',');
free(aptr);
}
opal_argv_append_unique_nosize(&aliases, node_alias, false);
free(node_alias);
aptr = opal_argv_join(aliases, ',');
opal_argv_free(aliases);
orte_set_attribute(&node->attributes, ORTE_NODE_ALIAS, ORTE_ATTR_LOCAL, aptr, OPAL_STRING);
free(aptr);
}
} else if (ORTE_HOSTFILE_RELATIVE == token) {
/* store this for later processing */
node = OBJ_NEW(orte_node_t);
@ -779,15 +753,11 @@ int orte_util_filter_hostfile_nodes(opal_list_t *nodes,
item1 != opal_list_get_end(nodes);
item1 = opal_list_get_next(item1)) {
node_from_list = (orte_node_t*)item1;
/* since the name in the hostfile might not match
* our local name, and yet still be intended to match,
* we have to check for local interfaces
*/
if (0 == strcmp(node_from_file->name, node_from_list->name) ||
(0 == strcmp(node_from_file->name, "localhost") &&
0 == strcmp(node_from_list->name, orte_process_info.nodename)) ||
(opal_ifislocal(node_from_list->name) &&
opal_ifislocal(node_from_file->name))) {
/* we have converted all aliases for ourself
* to our own detected nodename, so no need
* to check for interfaces again - a simple
* strcmp will suffice */
if (0 == strcmp(node_from_file->name, node_from_list->name)) {
/* if the slot count here is less than the
* total slots avail on this node, set it
* to the specified count - this allows people

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -32,15 +32,21 @@
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#if HAVE_ARPA_INET_H
#include <arpa/inet.h>
#endif
#include <ctype.h>
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_var.h"
#include "opal/util/argv.h"
#include "opal/util/if.h"
#include "opal/util/net.h"
#include "opal/util/output.h"
#include "opal/util/proc.h"
#include "orte/util/attr.h"
#include "orte/util/proc_info.h"
/* provide a connection to a reqd variable */
@ -63,6 +69,7 @@ ORTE_DECLSPEC orte_proc_info_t orte_process_info = {
.num_daemons = 1,
.num_nodes = 1,
.nodename = NULL,
.aliases = NULL,
.pid = 0,
.proc_type = ORTE_PROC_TYPE_NONE,
.sync_buf = NULL,
@ -97,7 +104,8 @@ int orte_proc_info(void)
char hostname[ORTE_MAX_HOSTNAME_SIZE];
char **prefixes;
bool match;
struct in_addr buf;
if (init) {
return ORTE_SUCCESS;
}
@ -163,12 +171,18 @@ int orte_proc_info(void)
/* get the nodename */
gethostname(hostname, ORTE_MAX_HOSTNAME_SIZE);
/* add this to our list of aliases */
opal_argv_append_nosize(&orte_process_info.aliases, hostname);
if (!orte_keep_fqdn_hostnames) {
/* if the nodename is an IP address, do not mess with it! */
if (!opal_net_isaddr(hostname)) {
if (0 == inet_pton(AF_INET, hostname, &buf) &&
0 == inet_pton(AF_INET6, hostname, &buf)) {
/* not an IP address, so remove any domain info */
if (NULL != (ptr = strchr(hostname, '.'))) {
*ptr = '\0';
/* add this to our list of aliases */
opal_argv_append_nosize(&orte_process_info.aliases, hostname);
}
}
}
@ -200,6 +214,8 @@ int orte_proc_info(void)
} else {
orte_process_info.nodename = strdup(&hostname[idx]);
}
/* add this to our list of aliases */
opal_argv_append_nosize(&orte_process_info.aliases, orte_process_info.nodename);
match = true;
break;
}
@ -213,6 +229,9 @@ int orte_proc_info(void)
orte_process_info.nodename = strdup(hostname);
}
/* add "localhost" to our list of aliases */
opal_argv_append_nosize(&orte_process_info.aliases, "localhost");
/* get the number of nodes in the job */
orte_process_info.num_nodes = 1;
(void) mca_base_var_register ("orte", "orte", NULL, "num_nodes",
@ -314,6 +333,34 @@ int orte_proc_info_finalize(void)
OBJ_DESTRUCT(&orte_process_info.super);
opal_argv_free(orte_process_info.aliases);
init = false;
return ORTE_SUCCESS;
}
bool orte_ifislocal(const char *hostname)
{
int i;
/* see if it matches any of our known aliases */
if (NULL != orte_process_info.aliases) {
for (i=0; NULL != orte_process_info.aliases[i]; i++) {
if (0 == strcmp(hostname, orte_process_info.aliases[i])) {
return true;
}
}
}
/* okay, have to resolve the address - the opal_ifislocal
* function will not attempt to resolve the address if
* told not to do so */
if (opal_ifislocal(hostname)) {
/* add this to our known aliases */
opal_argv_append_nosize(&orte_process_info.aliases, hostname);
return true;
}
/* not me */
return false;
}

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -103,6 +103,7 @@ struct orte_proc_info_t {
orte_vpid_t num_daemons; /**< number of daemons in system */
int num_nodes; /**< number of nodes in the job */
char *nodename; /**< string name for this node */
char **aliases; /**< aliases for this node */
pid_t pid; /**< Local process ID for this process */
orte_proc_type_t proc_type; /**< Type of process */
opal_buffer_t *sync_buf; /**< buffer to store sync response */
@ -166,6 +167,8 @@ ORTE_DECLSPEC int orte_proc_info(void);
ORTE_DECLSPEC int orte_proc_info_finalize(void);
ORTE_DECLSPEC bool orte_ifislocal(const char *name);
END_C_DECLS
#endif