diff --git a/orte/mca/dfs/app/dfs_app.c b/orte/mca/dfs/app/dfs_app.c index 7b88757a32..a9ebad7909 100644 --- a/orte/mca/dfs/app/dfs_app.c +++ b/orte/mca/dfs/app/dfs_app.c @@ -504,10 +504,9 @@ static void process_opens(int fd, short args, void *cbdata) orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; int rc; opal_buffer_t *buffer; - char *scheme, *host, *filename, *hostname; + char *scheme, *host, *filename; orte_process_name_t daemon; - bool found; - orte_vpid_t v; + orte_vpid_t *v; /* get the scheme to determine if we can process locally or not */ if (NULL == (scheme = opal_uri_get_scheme(dfs->uri))) { @@ -538,8 +537,7 @@ static void process_opens(int fd, short args, void *cbdata) } /* if the host is our own, then treat it as a local file */ - if (NULL == host || - 0 == strcmp(host, orte_process_info.nodename) || + if (0 == strcmp(host, orte_process_info.nodename) || 0 == strcmp(host, "localhost") || opal_ifislocal(host)) { opal_output_verbose(1, orte_dfs_base.output, @@ -554,24 +552,16 @@ static void process_opens(int fd, short args, void *cbdata) /* ident the daemon on that host */ daemon.jobid = ORTE_PROC_MY_DAEMON->jobid; - found = false; - for (v=0; v < orte_process_info.num_daemons; v++) { - daemon.vpid = v; - /* fetch the hostname where this daemon is located */ - if (ORTE_SUCCESS != (rc = orte_db.fetch_pointer(&daemon, ORTE_DB_HOSTNAME, (void**)&hostname, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - goto complete; - } - opal_output(0, "%s GOT HOST %s HOSTNAME %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), host, hostname); - if (0 == strcmp(host, hostname)) { - found = true; - break; - } - } - if (!found) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + /* fetch the daemon for this hostname */ + opal_output_verbose(1, orte_dfs_base.output, + "%s looking for daemon on host %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), host); + v = &daemon.vpid; + if (ORTE_SUCCESS != (rc = orte_db.fetch(ORTE_NAME_WILDCARD, host, (void**)&v, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); goto complete; } + opal_output_verbose(1, orte_dfs_base.output, "%s file %s on host %s daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index 869f08a8ba..c6a21d04c9 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -550,6 +550,11 @@ static int rte_init(void) node->daemon_launched = true; node->state = ORTE_NODE_STATE_UP; + /* if we are to retain aliases, get ours */ + if (orte_retain_aliases) { + opal_ifgetaliases(&node->alias); + } + /* record that the daemon job is running */ jdata->num_procs = 1; jdata->state = ORTE_JOB_STATE_RUNNING; diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 6f3cb7d271..7cdf456551 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -674,6 +674,15 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, orted_failed_launch = true; goto CLEANUP; } + if (!orte_have_fqdn_allocation) { + /* remove any domain info */ + if (NULL != (ptr = strchr(nodename, '.'))) { + *ptr = '\0'; + ptr = strdup(nodename); + free(nodename); + nodename = ptr; + } + } OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:base:orted_report_launch from daemon %s on node %s", @@ -682,15 +691,6 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, /* look this node up, if necessary */ if (!orte_plm_globals.daemon_nodes_assigned_at_launch) { - if (!orte_have_fqdn_allocation) { - /* remove any domain info */ - if (NULL != (ptr = strchr(nodename, '.'))) { - *ptr = '\0'; - ptr = strdup(nodename); - free(nodename); - nodename = ptr; - } - } OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:base:orted_report_launch attempting to assign daemon %s to node %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -732,7 +732,48 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, } } } + + node = daemon->node; + if (NULL == node) { + /* this shouldn't happen - it indicates an error in the + * prior node matching logic, so report it and error out + */ + orte_show_help("help-plm-base.txt", "daemon-no-assigned-node", true, + ORTE_NAME_PRINT(&daemon->name), nodename); + orted_failed_launch = true; + goto CLEANUP; + } + if (orte_retain_aliases) { + char *alias; + uint8_t naliases, ni; + /* first, store the nodename itself as an alias. We do + * this in case the nodename isn't the same as what we + * were given by the allocation. For example, a hostfile + * might contain an IP address instead of the value returned + * by gethostname, yet the daemon will have returned the latter + * and apps may refer to the host by that name + */ + opal_argv_append_nosize(&node->alias, nodename); + /* unpack and store the provided aliases */ + idx = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &naliases, &idx, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + for (ni=0; ni < naliases; ni++) { + idx = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &alias, &idx, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + opal_argv_append_nosize(&node->alias, alias); + free(alias); + } + } + #if OPAL_HAVE_HWLOC /* store the local resources for that node */ if (1 == dname.vpid || orte_hetero_nodes) { @@ -741,16 +782,6 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, bool found; idx=1; - node = daemon->node; - if (NULL == node) { - /* this shouldn't happen - it indicates an error in the - * prior node matching logic, so report it and error out - */ - orte_show_help("help-plm-base.txt", "daemon-no-assigned-node", true, - ORTE_NAME_PRINT(&daemon->name), nodename); - orted_failed_launch = true; - goto CLEANUP; - } if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &topo, &idx, OPAL_HWLOC_TOPO))) { ORTE_ERROR_LOG(rc); orted_failed_launch = true; diff --git a/orte/mca/routed/base/routed_base_fns.c b/orte/mca/routed/base/routed_base_fns.c index c1bd2b208d..08c712ffab 100644 --- a/orte/mca/routed/base/routed_base_fns.c +++ b/orte/mca/routed/base/routed_base_fns.c @@ -303,6 +303,15 @@ int orte_routed_base_register_sync(bool setup) return rc; } + /* setup to receive the response */ + sync_waiting = true; + rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_SYNC, + ORTE_RML_NON_PERSISTENT, report_sync, NULL); + if (rc != ORTE_SUCCESS && rc != ORTE_ERR_NOT_IMPLEMENTED) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* send the sync command to our daemon */ if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_DAEMON, buffer, ORTE_RML_TAG_DAEMON, 0, @@ -311,28 +320,20 @@ int orte_routed_base_register_sync(bool setup) return rc; } + OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output, + "%s registering sync waiting for ack", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + /* get the ack - need this to ensure that the sync communication * gets serviced by the event library on the orted prior to the * process exiting */ - OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output, - "%s registering sync waiting for ack", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - sync_waiting = true; - rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_SYNC, - ORTE_RML_NON_PERSISTENT, report_sync, NULL); - if (rc != ORTE_SUCCESS && rc != ORTE_ERR_NOT_IMPLEMENTED) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* it is okay to block here as we are -not- in an event */ ORTE_WAIT_FOR_COMPLETION(sync_waiting); - OPAL_OUTPUT_VERBOSE((5, orte_routed_base_output, "%s registering sync ack recvd", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - + return ORTE_SUCCESS; } diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index e58f6acbda..a0d067e067 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -52,6 +52,7 @@ #include "opal/mca/base/base.h" #include "opal/util/output.h" #include "opal/util/cmd_line.h" +#include "opal/util/if.h" #include "opal/util/opal_environ.h" #include "opal/util/os_path.h" #include "opal/util/printf.h" @@ -704,6 +705,27 @@ int orte_daemon(int argc, char *argv[]) /* include our node name */ opal_dss.pack(buffer, &orte_process_info.nodename, 1, OPAL_STRING); + /* if requested, include any non-loopback aliases for this node */ + if (orte_retain_aliases) { + char **aliases=NULL; + uint8_t naliases, ni; + opal_ifgetaliases(&aliases); + naliases = opal_argv_count(aliases); + if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &naliases, 1, OPAL_UINT8))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(buffer); + goto DONE; + } + for (ni=0; ni < naliases; ni++) { + if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &aliases[ni], 1, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(buffer); + goto DONE; + } + } + opal_argv_free(aliases); + } + #if OPAL_HAVE_HWLOC /* add the local topology */ if (NULL != opal_hwloc_topology && diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index d6d66d89d2..e8c8d07efa 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -80,6 +80,8 @@ bool orte_use_common_port = false; bool orte_keep_fqdn_hostnames = false; bool orte_have_fqdn_allocation = false; bool orte_show_resolved_nodenames; +bool orte_retain_aliases; + int orted_debug_failure; int orted_debug_failure_delay; bool orte_homogeneous_nodes = false; diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index 9a62503c64..3e31c3dd13 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -603,14 +603,21 @@ ORTE_DECLSPEC extern char *orte_oob_static_ports; ORTE_DECLSPEC extern bool orte_standalone_operation; ORTE_DECLSPEC extern bool orte_use_common_port; +/* nodename flags */ ORTE_DECLSPEC extern bool orte_keep_fqdn_hostnames; ORTE_DECLSPEC extern bool orte_have_fqdn_allocation; ORTE_DECLSPEC extern bool orte_show_resolved_nodenames; +ORTE_DECLSPEC extern bool orte_retain_aliases; + +/* debug flags */ ORTE_DECLSPEC extern int orted_debug_failure; ORTE_DECLSPEC extern int orted_debug_failure_delay; + +/* homegeneity flags */ ORTE_DECLSPEC extern bool orte_homogeneous_nodes; ORTE_DECLSPEC extern bool orte_hetero_apps; ORTE_DECLSPEC extern bool orte_hetero_nodes; + ORTE_DECLSPEC extern bool orte_never_launched; ORTE_DECLSPEC extern bool orte_devel_level_output; ORTE_DECLSPEC extern bool orte_display_topo_with_map; diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c index 3bb2a1bfbd..eaa50dab25 100644 --- a/orte/runtime/orte_mca_params.c +++ b/orte/runtime/orte_mca_params.c @@ -345,6 +345,12 @@ int orte_register_params(void) false, false, (int)false, &value); orte_keep_fqdn_hostnames = OPAL_INT_TO_BOOL(value); + /* whether or not to retain aliases of hostnames */ + mca_base_param_reg_int_name("orte", "retain_aliases", + "Whether or not to keep aliases for host names [default: no]", + false, false, (int)false, &value); + orte_retain_aliases = OPAL_INT_TO_BOOL(value); + /* whether to tag output */ mca_base_param_reg_int_name("orte", "tag_output", "Tag all output with [job,rank] (default: false)", diff --git a/orte/test/system/opal_interface.c b/orte/test/system/opal_interface.c index 081439432c..0627edca9e 100644 --- a/orte/test/system/opal_interface.c +++ b/orte/test/system/opal_interface.c @@ -16,36 +16,47 @@ int main(int argc, char* argv[]) int rc, idx; uint32_t addr, netmask, netaddr; struct sockaddr_in inaddr; + char **aliases=NULL; if (0 > (rc = opal_init(&argc, &argv))) { fprintf(stderr, "orte_interface: couldn't init opal - error code %d\n", rc); return rc; } - rc = opal_iftupletoaddr(argv[1], &netaddr, &netmask); + if (2 == argc) { + rc = opal_iftupletoaddr(argv[1], &netaddr, &netmask); - fprintf(stderr, "netaddr %03d.%03d.%03d.%03d netmask %03d.%03d.%03d.%03d rc %d\n", - OPAL_IF_FORMAT_ADDR(netaddr), OPAL_IF_FORMAT_ADDR(netmask), rc); + fprintf(stderr, "netaddr %03d.%03d.%03d.%03d netmask %03d.%03d.%03d.%03d rc %d\n", + OPAL_IF_FORMAT_ADDR(netaddr), OPAL_IF_FORMAT_ADDR(netmask), rc); - /* search for a matching interface - take the first one within the returned scope */ - idx = opal_ifbegin(); - while (0 < idx) { - /* ignore the loopback interface */ - if (opal_ifisloopback(idx)) { - fprintf(stderr, "LOOPBACK IGNORED\n"); + /* search for a matching interface - take the first one within the returned scope */ + idx = opal_ifbegin(); + while (0 < idx) { + /* ignore the loopback interface */ + if (opal_ifisloopback(idx)) { + fprintf(stderr, "LOOPBACK IGNORED\n"); + idx = opal_ifnext(idx); + continue; + } + if (0 != (rc = opal_ifindextoaddr(idx, (struct sockaddr*)&inaddr, sizeof(inaddr)))) { + break; + } + addr = ntohl(inaddr.sin_addr.s_addr); + fprintf(stderr, "checking netaddr %03d.%03d.%03d.%03d addr %03d.%03d.%03d.%03d netmask %03d.%03d.%03d.%03d rc %d\n", + OPAL_IF_FORMAT_ADDR(netaddr), OPAL_IF_FORMAT_ADDR(addr), OPAL_IF_FORMAT_ADDR(netmask), rc); + if (netaddr == (addr & netmask)) { + fprintf(stderr, "MATCH FOUND\n"); + } idx = opal_ifnext(idx); - continue; } - if (0 != (rc = opal_ifindextoaddr(idx, (struct sockaddr*)&inaddr, sizeof(inaddr)))) { - break; - } - addr = ntohl(inaddr.sin_addr.s_addr); - fprintf(stderr, "checking netaddr %03d.%03d.%03d.%03d addr %03d.%03d.%03d.%03d netmask %03d.%03d.%03d.%03d rc %d\n", - OPAL_IF_FORMAT_ADDR(netaddr), OPAL_IF_FORMAT_ADDR(addr), OPAL_IF_FORMAT_ADDR(netmask), rc); - if (netaddr == (addr & netmask)) { - fprintf(stderr, "MATCH FOUND\n"); - } - idx = opal_ifnext(idx); + } + + /* check the aliases */ + opal_ifgetaliases(&aliases); + idx = 0; + while (NULL != aliases[idx]) { + fprintf(stderr, "alias: %s\n", aliases[idx]); + idx++; } opal_finalize(); diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index 597b1bbb45..3b8bf508c3 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -311,6 +311,22 @@ int orte_util_encode_nodemap(opal_byte_object_t *boptr, bool update) return rc; } } + /* if requested, pack any aliases */ + if (orte_retain_aliases) { + uint8_t naliases, ni; + naliases = opal_argv_count(node->alias); + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &naliases, 1, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + return rc; + } + for (ni=0; ni < naliases; ni++) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &node->alias[ni], 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + } + /* pack the oversubscribed flag */ if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &node->oversubscribed, 1, OPAL_UINT8))) { ORTE_ERROR_LOG(rc); @@ -366,10 +382,17 @@ int orte_util_decode_nodemap(opal_byte_object_t *bo) ORTE_ERROR_LOG(rc); return rc; } + /* now store a direct reference so we can quickly lookup the daemon from a hostname */ + if (ORTE_SUCCESS != (rc = orte_db.store(ORTE_NAME_WILDCARD, nodename, &daemon.vpid, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + return rc; + } + OPAL_OUTPUT_VERBOSE((2, orte_nidmap_output, "%s orte:util:decode:nidmap daemon %s node %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_VPID_PRINT(daemon.vpid), nodename)); + /* if this is my daemon, then store the data for me too */ if (daemon.vpid == ORTE_PROC_MY_DAEMON->vpid) { if (ORTE_SUCCESS != (rc = orte_db.store(ORTE_PROC_MY_NAME, ORTE_DB_HOSTNAME, nodename, OPAL_STRING))) { @@ -381,6 +404,31 @@ int orte_util_decode_nodemap(opal_byte_object_t *bo) return rc; } } + + /* if requested, unpack any aliases */ + if (orte_retain_aliases) { + char *alias; + uint8_t naliases, ni; + n=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &naliases, &n, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + return rc; + } + for (ni=0; ni < naliases; ni++) { + n=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &alias, &n, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* store a cross-reference to the daemon for this nodename */ + if (ORTE_SUCCESS != (rc = orte_db.store(ORTE_NAME_WILDCARD, alias, &daemon.vpid, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + return rc; + } + free(alias); + } + } + /* unpack and discard the oversubscribed flag - procs don't need it */ n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &oversub, &n, OPAL_UINT8))) { @@ -447,6 +495,25 @@ int orte_util_decode_daemon_nodemap(opal_byte_object_t *bo) } else { free(name); } + /* if requested, unpack any aliases */ + if (orte_retain_aliases) { + char *alias; + uint8_t naliases, ni; + n=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &naliases, &n, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + return rc; + } + for (ni=0; ni < naliases; ni++) { + n=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &alias, &n, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + opal_argv_append_nosize(&node->alias, alias); + free(alias); + } + } /* unpack the oversubscribed flag */ n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &oversub, &n, OPAL_UINT8))) {