Add an MCA param to retain all aliases based on IP addrs for node names so that procs can look them up by interface, if desired. If the param is set, pass aliases around to all daemons and procs for local use
This commit was SVN r27619.
Этот коммит содержится в:
родитель
a52071a17d
Коммит
e11f32038a
@ -504,10 +504,9 @@ static void process_opens(int fd, short args, void *cbdata)
|
|||||||
orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata;
|
orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata;
|
||||||
int rc;
|
int rc;
|
||||||
opal_buffer_t *buffer;
|
opal_buffer_t *buffer;
|
||||||
char *scheme, *host, *filename, *hostname;
|
char *scheme, *host, *filename;
|
||||||
orte_process_name_t daemon;
|
orte_process_name_t daemon;
|
||||||
bool found;
|
orte_vpid_t *v;
|
||||||
orte_vpid_t v;
|
|
||||||
|
|
||||||
/* get the scheme to determine if we can process locally or not */
|
/* get the scheme to determine if we can process locally or not */
|
||||||
if (NULL == (scheme = opal_uri_get_scheme(dfs->uri))) {
|
if (NULL == (scheme = opal_uri_get_scheme(dfs->uri))) {
|
||||||
@ -538,8 +537,7 @@ static void process_opens(int fd, short args, void *cbdata)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* if the host is our own, then treat it as a local file */
|
/* if the host is our own, then treat it as a local file */
|
||||||
if (NULL == host ||
|
if (0 == strcmp(host, orte_process_info.nodename) ||
|
||||||
0 == strcmp(host, orte_process_info.nodename) ||
|
|
||||||
0 == strcmp(host, "localhost") ||
|
0 == strcmp(host, "localhost") ||
|
||||||
opal_ifislocal(host)) {
|
opal_ifislocal(host)) {
|
||||||
opal_output_verbose(1, orte_dfs_base.output,
|
opal_output_verbose(1, orte_dfs_base.output,
|
||||||
@ -554,24 +552,16 @@ static void process_opens(int fd, short args, void *cbdata)
|
|||||||
|
|
||||||
/* ident the daemon on that host */
|
/* ident the daemon on that host */
|
||||||
daemon.jobid = ORTE_PROC_MY_DAEMON->jobid;
|
daemon.jobid = ORTE_PROC_MY_DAEMON->jobid;
|
||||||
found = false;
|
/* fetch the daemon for this hostname */
|
||||||
for (v=0; v < orte_process_info.num_daemons; v++) {
|
opal_output_verbose(1, orte_dfs_base.output,
|
||||||
daemon.vpid = v;
|
"%s looking for daemon on host %s",
|
||||||
/* fetch the hostname where this daemon is located */
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), host);
|
||||||
if (ORTE_SUCCESS != (rc = orte_db.fetch_pointer(&daemon, ORTE_DB_HOSTNAME, (void**)&hostname, OPAL_STRING))) {
|
v = &daemon.vpid;
|
||||||
ORTE_ERROR_LOG(rc);
|
if (ORTE_SUCCESS != (rc = orte_db.fetch(ORTE_NAME_WILDCARD, host, (void**)&v, ORTE_VPID))) {
|
||||||
goto complete;
|
ORTE_ERROR_LOG(rc);
|
||||||
}
|
|
||||||
opal_output(0, "%s GOT HOST %s HOSTNAME %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), host, hostname);
|
|
||||||
if (0 == strcmp(host, hostname)) {
|
|
||||||
found = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!found) {
|
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
|
||||||
goto complete;
|
goto complete;
|
||||||
}
|
}
|
||||||
|
|
||||||
opal_output_verbose(1, orte_dfs_base.output,
|
opal_output_verbose(1, orte_dfs_base.output,
|
||||||
"%s file %s on host %s daemon %s",
|
"%s file %s on host %s daemon %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
|
@ -550,6 +550,11 @@ static int rte_init(void)
|
|||||||
node->daemon_launched = true;
|
node->daemon_launched = true;
|
||||||
node->state = ORTE_NODE_STATE_UP;
|
node->state = ORTE_NODE_STATE_UP;
|
||||||
|
|
||||||
|
/* if we are to retain aliases, get ours */
|
||||||
|
if (orte_retain_aliases) {
|
||||||
|
opal_ifgetaliases(&node->alias);
|
||||||
|
}
|
||||||
|
|
||||||
/* record that the daemon job is running */
|
/* record that the daemon job is running */
|
||||||
jdata->num_procs = 1;
|
jdata->num_procs = 1;
|
||||||
jdata->state = ORTE_JOB_STATE_RUNNING;
|
jdata->state = ORTE_JOB_STATE_RUNNING;
|
||||||
|
@ -674,6 +674,15 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
|
|||||||
orted_failed_launch = true;
|
orted_failed_launch = true;
|
||||||
goto CLEANUP;
|
goto CLEANUP;
|
||||||
}
|
}
|
||||||
|
if (!orte_have_fqdn_allocation) {
|
||||||
|
/* remove any domain info */
|
||||||
|
if (NULL != (ptr = strchr(nodename, '.'))) {
|
||||||
|
*ptr = '\0';
|
||||||
|
ptr = strdup(nodename);
|
||||||
|
free(nodename);
|
||||||
|
nodename = ptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||||
"%s plm:base:orted_report_launch from daemon %s on node %s",
|
"%s plm:base:orted_report_launch from daemon %s on node %s",
|
||||||
@ -682,15 +691,6 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
|
|||||||
|
|
||||||
/* look this node up, if necessary */
|
/* look this node up, if necessary */
|
||||||
if (!orte_plm_globals.daemon_nodes_assigned_at_launch) {
|
if (!orte_plm_globals.daemon_nodes_assigned_at_launch) {
|
||||||
if (!orte_have_fqdn_allocation) {
|
|
||||||
/* remove any domain info */
|
|
||||||
if (NULL != (ptr = strchr(nodename, '.'))) {
|
|
||||||
*ptr = '\0';
|
|
||||||
ptr = strdup(nodename);
|
|
||||||
free(nodename);
|
|
||||||
nodename = ptr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||||
"%s plm:base:orted_report_launch attempting to assign daemon %s to node %s",
|
"%s plm:base:orted_report_launch attempting to assign daemon %s to node %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
@ -732,7 +732,48 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
node = daemon->node;
|
||||||
|
if (NULL == node) {
|
||||||
|
/* this shouldn't happen - it indicates an error in the
|
||||||
|
* prior node matching logic, so report it and error out
|
||||||
|
*/
|
||||||
|
orte_show_help("help-plm-base.txt", "daemon-no-assigned-node", true,
|
||||||
|
ORTE_NAME_PRINT(&daemon->name), nodename);
|
||||||
|
orted_failed_launch = true;
|
||||||
|
goto CLEANUP;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (orte_retain_aliases) {
|
||||||
|
char *alias;
|
||||||
|
uint8_t naliases, ni;
|
||||||
|
/* first, store the nodename itself as an alias. We do
|
||||||
|
* this in case the nodename isn't the same as what we
|
||||||
|
* were given by the allocation. For example, a hostfile
|
||||||
|
* might contain an IP address instead of the value returned
|
||||||
|
* by gethostname, yet the daemon will have returned the latter
|
||||||
|
* and apps may refer to the host by that name
|
||||||
|
*/
|
||||||
|
opal_argv_append_nosize(&node->alias, nodename);
|
||||||
|
/* unpack and store the provided aliases */
|
||||||
|
idx = 1;
|
||||||
|
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &naliases, &idx, OPAL_UINT8))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
orted_failed_launch = true;
|
||||||
|
goto CLEANUP;
|
||||||
|
}
|
||||||
|
for (ni=0; ni < naliases; ni++) {
|
||||||
|
idx = 1;
|
||||||
|
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &alias, &idx, OPAL_STRING))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
orted_failed_launch = true;
|
||||||
|
goto CLEANUP;
|
||||||
|
}
|
||||||
|
opal_argv_append_nosize(&node->alias, alias);
|
||||||
|
free(alias);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
/* store the local resources for that node */
|
/* store the local resources for that node */
|
||||||
if (1 == dname.vpid || orte_hetero_nodes) {
|
if (1 == dname.vpid || orte_hetero_nodes) {
|
||||||
@ -741,16 +782,6 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
|
|||||||
bool found;
|
bool found;
|
||||||
|
|
||||||
idx=1;
|
idx=1;
|
||||||
node = daemon->node;
|
|
||||||
if (NULL == node) {
|
|
||||||
/* this shouldn't happen - it indicates an error in the
|
|
||||||
* prior node matching logic, so report it and error out
|
|
||||||
*/
|
|
||||||
orte_show_help("help-plm-base.txt", "daemon-no-assigned-node", true,
|
|
||||||
ORTE_NAME_PRINT(&daemon->name), nodename);
|
|
||||||
orted_failed_launch = true;
|
|
||||||
goto CLEANUP;
|
|
||||||
}
|
|
||||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &topo, &idx, OPAL_HWLOC_TOPO))) {
|
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &topo, &idx, OPAL_HWLOC_TOPO))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
orted_failed_launch = true;
|
orted_failed_launch = true;
|
||||||
|
@ -303,6 +303,15 @@ int orte_routed_base_register_sync(bool setup)
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* setup to receive the response */
|
||||||
|
sync_waiting = true;
|
||||||
|
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_SYNC,
|
||||||
|
ORTE_RML_NON_PERSISTENT, report_sync, NULL);
|
||||||
|
if (rc != ORTE_SUCCESS && rc != ORTE_ERR_NOT_IMPLEMENTED) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
/* send the sync command to our daemon */
|
/* send the sync command to our daemon */
|
||||||
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_DAEMON, buffer,
|
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_DAEMON, buffer,
|
||||||
ORTE_RML_TAG_DAEMON, 0,
|
ORTE_RML_TAG_DAEMON, 0,
|
||||||
@ -311,28 +320,20 @@ int orte_routed_base_register_sync(bool setup)
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output,
|
||||||
|
"%s registering sync waiting for ack",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
|
||||||
|
|
||||||
/* get the ack - need this to ensure that the sync communication
|
/* get the ack - need this to ensure that the sync communication
|
||||||
* gets serviced by the event library on the orted prior to the
|
* gets serviced by the event library on the orted prior to the
|
||||||
* process exiting
|
* process exiting
|
||||||
*/
|
*/
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output,
|
|
||||||
"%s registering sync waiting for ack",
|
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
|
||||||
sync_waiting = true;
|
|
||||||
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_SYNC,
|
|
||||||
ORTE_RML_NON_PERSISTENT, report_sync, NULL);
|
|
||||||
if (rc != ORTE_SUCCESS && rc != ORTE_ERR_NOT_IMPLEMENTED) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* it is okay to block here as we are -not- in an event */
|
|
||||||
ORTE_WAIT_FOR_COMPLETION(sync_waiting);
|
ORTE_WAIT_FOR_COMPLETION(sync_waiting);
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output,
|
OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output,
|
||||||
"%s registering sync ack recvd",
|
"%s registering sync ack recvd",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -52,6 +52,7 @@
|
|||||||
#include "opal/mca/base/base.h"
|
#include "opal/mca/base/base.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
#include "opal/util/cmd_line.h"
|
#include "opal/util/cmd_line.h"
|
||||||
|
#include "opal/util/if.h"
|
||||||
#include "opal/util/opal_environ.h"
|
#include "opal/util/opal_environ.h"
|
||||||
#include "opal/util/os_path.h"
|
#include "opal/util/os_path.h"
|
||||||
#include "opal/util/printf.h"
|
#include "opal/util/printf.h"
|
||||||
@ -704,6 +705,27 @@ int orte_daemon(int argc, char *argv[])
|
|||||||
/* include our node name */
|
/* include our node name */
|
||||||
opal_dss.pack(buffer, &orte_process_info.nodename, 1, OPAL_STRING);
|
opal_dss.pack(buffer, &orte_process_info.nodename, 1, OPAL_STRING);
|
||||||
|
|
||||||
|
/* if requested, include any non-loopback aliases for this node */
|
||||||
|
if (orte_retain_aliases) {
|
||||||
|
char **aliases=NULL;
|
||||||
|
uint8_t naliases, ni;
|
||||||
|
opal_ifgetaliases(&aliases);
|
||||||
|
naliases = opal_argv_count(aliases);
|
||||||
|
if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &naliases, 1, OPAL_UINT8))) {
|
||||||
|
ORTE_ERROR_LOG(ret);
|
||||||
|
OBJ_RELEASE(buffer);
|
||||||
|
goto DONE;
|
||||||
|
}
|
||||||
|
for (ni=0; ni < naliases; ni++) {
|
||||||
|
if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &aliases[ni], 1, OPAL_STRING))) {
|
||||||
|
ORTE_ERROR_LOG(ret);
|
||||||
|
OBJ_RELEASE(buffer);
|
||||||
|
goto DONE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
opal_argv_free(aliases);
|
||||||
|
}
|
||||||
|
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
/* add the local topology */
|
/* add the local topology */
|
||||||
if (NULL != opal_hwloc_topology &&
|
if (NULL != opal_hwloc_topology &&
|
||||||
|
@ -80,6 +80,8 @@ bool orte_use_common_port = false;
|
|||||||
bool orte_keep_fqdn_hostnames = false;
|
bool orte_keep_fqdn_hostnames = false;
|
||||||
bool orte_have_fqdn_allocation = false;
|
bool orte_have_fqdn_allocation = false;
|
||||||
bool orte_show_resolved_nodenames;
|
bool orte_show_resolved_nodenames;
|
||||||
|
bool orte_retain_aliases;
|
||||||
|
|
||||||
int orted_debug_failure;
|
int orted_debug_failure;
|
||||||
int orted_debug_failure_delay;
|
int orted_debug_failure_delay;
|
||||||
bool orte_homogeneous_nodes = false;
|
bool orte_homogeneous_nodes = false;
|
||||||
|
@ -603,14 +603,21 @@ ORTE_DECLSPEC extern char *orte_oob_static_ports;
|
|||||||
ORTE_DECLSPEC extern bool orte_standalone_operation;
|
ORTE_DECLSPEC extern bool orte_standalone_operation;
|
||||||
ORTE_DECLSPEC extern bool orte_use_common_port;
|
ORTE_DECLSPEC extern bool orte_use_common_port;
|
||||||
|
|
||||||
|
/* nodename flags */
|
||||||
ORTE_DECLSPEC extern bool orte_keep_fqdn_hostnames;
|
ORTE_DECLSPEC extern bool orte_keep_fqdn_hostnames;
|
||||||
ORTE_DECLSPEC extern bool orte_have_fqdn_allocation;
|
ORTE_DECLSPEC extern bool orte_have_fqdn_allocation;
|
||||||
ORTE_DECLSPEC extern bool orte_show_resolved_nodenames;
|
ORTE_DECLSPEC extern bool orte_show_resolved_nodenames;
|
||||||
|
ORTE_DECLSPEC extern bool orte_retain_aliases;
|
||||||
|
|
||||||
|
/* debug flags */
|
||||||
ORTE_DECLSPEC extern int orted_debug_failure;
|
ORTE_DECLSPEC extern int orted_debug_failure;
|
||||||
ORTE_DECLSPEC extern int orted_debug_failure_delay;
|
ORTE_DECLSPEC extern int orted_debug_failure_delay;
|
||||||
|
|
||||||
|
/* homegeneity flags */
|
||||||
ORTE_DECLSPEC extern bool orte_homogeneous_nodes;
|
ORTE_DECLSPEC extern bool orte_homogeneous_nodes;
|
||||||
ORTE_DECLSPEC extern bool orte_hetero_apps;
|
ORTE_DECLSPEC extern bool orte_hetero_apps;
|
||||||
ORTE_DECLSPEC extern bool orte_hetero_nodes;
|
ORTE_DECLSPEC extern bool orte_hetero_nodes;
|
||||||
|
|
||||||
ORTE_DECLSPEC extern bool orte_never_launched;
|
ORTE_DECLSPEC extern bool orte_never_launched;
|
||||||
ORTE_DECLSPEC extern bool orte_devel_level_output;
|
ORTE_DECLSPEC extern bool orte_devel_level_output;
|
||||||
ORTE_DECLSPEC extern bool orte_display_topo_with_map;
|
ORTE_DECLSPEC extern bool orte_display_topo_with_map;
|
||||||
|
@ -345,6 +345,12 @@ int orte_register_params(void)
|
|||||||
false, false, (int)false, &value);
|
false, false, (int)false, &value);
|
||||||
orte_keep_fqdn_hostnames = OPAL_INT_TO_BOOL(value);
|
orte_keep_fqdn_hostnames = OPAL_INT_TO_BOOL(value);
|
||||||
|
|
||||||
|
/* whether or not to retain aliases of hostnames */
|
||||||
|
mca_base_param_reg_int_name("orte", "retain_aliases",
|
||||||
|
"Whether or not to keep aliases for host names [default: no]",
|
||||||
|
false, false, (int)false, &value);
|
||||||
|
orte_retain_aliases = OPAL_INT_TO_BOOL(value);
|
||||||
|
|
||||||
/* whether to tag output */
|
/* whether to tag output */
|
||||||
mca_base_param_reg_int_name("orte", "tag_output",
|
mca_base_param_reg_int_name("orte", "tag_output",
|
||||||
"Tag all output with [job,rank] (default: false)",
|
"Tag all output with [job,rank] (default: false)",
|
||||||
|
@ -16,36 +16,47 @@ int main(int argc, char* argv[])
|
|||||||
int rc, idx;
|
int rc, idx;
|
||||||
uint32_t addr, netmask, netaddr;
|
uint32_t addr, netmask, netaddr;
|
||||||
struct sockaddr_in inaddr;
|
struct sockaddr_in inaddr;
|
||||||
|
char **aliases=NULL;
|
||||||
|
|
||||||
if (0 > (rc = opal_init(&argc, &argv))) {
|
if (0 > (rc = opal_init(&argc, &argv))) {
|
||||||
fprintf(stderr, "orte_interface: couldn't init opal - error code %d\n", rc);
|
fprintf(stderr, "orte_interface: couldn't init opal - error code %d\n", rc);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = opal_iftupletoaddr(argv[1], &netaddr, &netmask);
|
if (2 == argc) {
|
||||||
|
rc = opal_iftupletoaddr(argv[1], &netaddr, &netmask);
|
||||||
|
|
||||||
fprintf(stderr, "netaddr %03d.%03d.%03d.%03d netmask %03d.%03d.%03d.%03d rc %d\n",
|
fprintf(stderr, "netaddr %03d.%03d.%03d.%03d netmask %03d.%03d.%03d.%03d rc %d\n",
|
||||||
OPAL_IF_FORMAT_ADDR(netaddr), OPAL_IF_FORMAT_ADDR(netmask), rc);
|
OPAL_IF_FORMAT_ADDR(netaddr), OPAL_IF_FORMAT_ADDR(netmask), rc);
|
||||||
|
|
||||||
/* search for a matching interface - take the first one within the returned scope */
|
/* search for a matching interface - take the first one within the returned scope */
|
||||||
idx = opal_ifbegin();
|
idx = opal_ifbegin();
|
||||||
while (0 < idx) {
|
while (0 < idx) {
|
||||||
/* ignore the loopback interface */
|
/* ignore the loopback interface */
|
||||||
if (opal_ifisloopback(idx)) {
|
if (opal_ifisloopback(idx)) {
|
||||||
fprintf(stderr, "LOOPBACK IGNORED\n");
|
fprintf(stderr, "LOOPBACK IGNORED\n");
|
||||||
|
idx = opal_ifnext(idx);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (0 != (rc = opal_ifindextoaddr(idx, (struct sockaddr*)&inaddr, sizeof(inaddr)))) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
addr = ntohl(inaddr.sin_addr.s_addr);
|
||||||
|
fprintf(stderr, "checking netaddr %03d.%03d.%03d.%03d addr %03d.%03d.%03d.%03d netmask %03d.%03d.%03d.%03d rc %d\n",
|
||||||
|
OPAL_IF_FORMAT_ADDR(netaddr), OPAL_IF_FORMAT_ADDR(addr), OPAL_IF_FORMAT_ADDR(netmask), rc);
|
||||||
|
if (netaddr == (addr & netmask)) {
|
||||||
|
fprintf(stderr, "MATCH FOUND\n");
|
||||||
|
}
|
||||||
idx = opal_ifnext(idx);
|
idx = opal_ifnext(idx);
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
if (0 != (rc = opal_ifindextoaddr(idx, (struct sockaddr*)&inaddr, sizeof(inaddr)))) {
|
}
|
||||||
break;
|
|
||||||
}
|
/* check the aliases */
|
||||||
addr = ntohl(inaddr.sin_addr.s_addr);
|
opal_ifgetaliases(&aliases);
|
||||||
fprintf(stderr, "checking netaddr %03d.%03d.%03d.%03d addr %03d.%03d.%03d.%03d netmask %03d.%03d.%03d.%03d rc %d\n",
|
idx = 0;
|
||||||
OPAL_IF_FORMAT_ADDR(netaddr), OPAL_IF_FORMAT_ADDR(addr), OPAL_IF_FORMAT_ADDR(netmask), rc);
|
while (NULL != aliases[idx]) {
|
||||||
if (netaddr == (addr & netmask)) {
|
fprintf(stderr, "alias: %s\n", aliases[idx]);
|
||||||
fprintf(stderr, "MATCH FOUND\n");
|
idx++;
|
||||||
}
|
|
||||||
idx = opal_ifnext(idx);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
opal_finalize();
|
opal_finalize();
|
||||||
|
@ -311,6 +311,22 @@ int orte_util_encode_nodemap(opal_byte_object_t *boptr, bool update)
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/* if requested, pack any aliases */
|
||||||
|
if (orte_retain_aliases) {
|
||||||
|
uint8_t naliases, ni;
|
||||||
|
naliases = opal_argv_count(node->alias);
|
||||||
|
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &naliases, 1, OPAL_UINT8))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
for (ni=0; ni < naliases; ni++) {
|
||||||
|
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &node->alias[ni], 1, OPAL_STRING))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* pack the oversubscribed flag */
|
/* pack the oversubscribed flag */
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &node->oversubscribed, 1, OPAL_UINT8))) {
|
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &node->oversubscribed, 1, OPAL_UINT8))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
@ -366,10 +382,17 @@ int orte_util_decode_nodemap(opal_byte_object_t *bo)
|
|||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
/* now store a direct reference so we can quickly lookup the daemon from a hostname */
|
||||||
|
if (ORTE_SUCCESS != (rc = orte_db.store(ORTE_NAME_WILDCARD, nodename, &daemon.vpid, ORTE_VPID))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((2, orte_nidmap_output,
|
OPAL_OUTPUT_VERBOSE((2, orte_nidmap_output,
|
||||||
"%s orte:util:decode:nidmap daemon %s node %s",
|
"%s orte:util:decode:nidmap daemon %s node %s",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
ORTE_VPID_PRINT(daemon.vpid), nodename));
|
ORTE_VPID_PRINT(daemon.vpid), nodename));
|
||||||
|
|
||||||
/* if this is my daemon, then store the data for me too */
|
/* if this is my daemon, then store the data for me too */
|
||||||
if (daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) {
|
if (daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) {
|
||||||
if (ORTE_SUCCESS != (rc = orte_db.store(ORTE_PROC_MY_NAME, ORTE_DB_HOSTNAME, nodename, OPAL_STRING))) {
|
if (ORTE_SUCCESS != (rc = orte_db.store(ORTE_PROC_MY_NAME, ORTE_DB_HOSTNAME, nodename, OPAL_STRING))) {
|
||||||
@ -381,6 +404,31 @@ int orte_util_decode_nodemap(opal_byte_object_t *bo)
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* if requested, unpack any aliases */
|
||||||
|
if (orte_retain_aliases) {
|
||||||
|
char *alias;
|
||||||
|
uint8_t naliases, ni;
|
||||||
|
n=1;
|
||||||
|
if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &naliases, &n, OPAL_UINT8))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
for (ni=0; ni < naliases; ni++) {
|
||||||
|
n=1;
|
||||||
|
if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &alias, &n, OPAL_STRING))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
/* store a cross-reference to the daemon for this nodename */
|
||||||
|
if (ORTE_SUCCESS != (rc = orte_db.store(ORTE_NAME_WILDCARD, alias, &daemon.vpid, ORTE_VPID))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
free(alias);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* unpack and discard the oversubscribed flag - procs don't need it */
|
/* unpack and discard the oversubscribed flag - procs don't need it */
|
||||||
n=1;
|
n=1;
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &oversub, &n, OPAL_UINT8))) {
|
if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &oversub, &n, OPAL_UINT8))) {
|
||||||
@ -447,6 +495,25 @@ int orte_util_decode_daemon_nodemap(opal_byte_object_t *bo)
|
|||||||
} else {
|
} else {
|
||||||
free(name);
|
free(name);
|
||||||
}
|
}
|
||||||
|
/* if requested, unpack any aliases */
|
||||||
|
if (orte_retain_aliases) {
|
||||||
|
char *alias;
|
||||||
|
uint8_t naliases, ni;
|
||||||
|
n=1;
|
||||||
|
if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &naliases, &n, OPAL_UINT8))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
for (ni=0; ni < naliases; ni++) {
|
||||||
|
n=1;
|
||||||
|
if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &alias, &n, OPAL_STRING))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
opal_argv_append_nosize(&node->alias, alias);
|
||||||
|
free(alias);
|
||||||
|
}
|
||||||
|
}
|
||||||
/* unpack the oversubscribed flag */
|
/* unpack the oversubscribed flag */
|
||||||
n=1;
|
n=1;
|
||||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &oversub, &n, OPAL_UINT8))) {
|
if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &oversub, &n, OPAL_UINT8))) {
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user