From 368684bd63be5aed33ccfff4501e47ffc07e392d Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 17 Jan 2017 21:02:59 -0800 Subject: [PATCH] Revert e9bc293 and try a different approach for scalably dealing with hetero clusters. Have each orted send back its topo "signature". If mpirun detects that this signature has not been seen before, then ask for that daemon to send back its full topology description. This allows the system to only get the topology once for each unique topo in the cluster. Cleanup a typo, and remove no longer needed MCA params for hetero nodes and hetero apps. Hetero nodes will always be automatically detected. We don't support a mix of 32 and 64 bit apps Modify the orte_node_t to use orte_topology_t instead of hwloc_topology_t, updating all the places that use it. Ensure that we properly update topology when we see a different one on a compute node. Signed-off-by: Ralph Castain --- orte/mca/ess/base/ess_base_std_orted.c | 8 +- orte/mca/ess/hnp/ess_hnp_module.c | 28 +- orte/mca/odls/odls_types.h | 5 +- orte/mca/plm/base/plm_base_launch_support.c | 385 +++++++++------- orte/mca/plm/base/plm_base_receive.c | 12 +- orte/mca/plm/base/plm_private.h | 4 + orte/mca/ras/simulator/ras_sim_module.c | 5 +- orte/mca/rmaps/base/rmaps_base_binding.c | 50 +- orte/mca/rmaps/base/rmaps_base_map_job.c | 6 +- orte/mca/rmaps/base/rmaps_base_ranking.c | 14 +- orte/mca/rmaps/mindist/rmaps_mindist_module.c | 30 +- orte/mca/rmaps/ppr/rmaps_ppr.c | 24 +- orte/mca/rmaps/rank_file/rmaps_rank_file.c | 6 +- orte/mca/rmaps/round_robin/rmaps_rr_mappers.c | 41 +- orte/mca/rmaps/seq/rmaps_seq.c | 18 +- orte/mca/rmaps/staged/Makefile.am | 35 -- orte/mca/rmaps/staged/owner.txt | 7 - orte/mca/rmaps/staged/rmaps_staged.c | 436 ------------------ orte/mca/rmaps/staged/rmaps_staged.h | 25 - .../mca/rmaps/staged/rmaps_staged_component.c | 73 --- orte/mca/rml/rml_types.h | 7 +- orte/mca/schizo/ompi/schizo_ompi.c | 11 +- orte/mca/state/novm/state_novm.c | 4 +- orte/orted/orted_comm.c | 44 +- orte/orted/orted_main.c | 50 +- .../data_type_support/orte_dt_print_fns.c | 13 +- orte/runtime/orte_globals.c | 3 - orte/runtime/orte_globals.h | 24 +- orte/runtime/orte_mca_params.c | 23 - 29 files changed, 417 insertions(+), 974 deletions(-) delete mode 100644 orte/mca/rmaps/staged/Makefile.am delete mode 100644 orte/mca/rmaps/staged/owner.txt delete mode 100644 orte/mca/rmaps/staged/rmaps_staged.c delete mode 100644 orte/mca/rmaps/staged/rmaps_staged.h delete mode 100644 orte/mca/rmaps/staged/rmaps_staged_component.c diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index 374d1dbcec..6233032a34 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -14,7 +14,7 @@ * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -335,7 +335,11 @@ int orte_ess_base_orted_setup(char **hosts) node->name = strdup(orte_process_info.nodename); node->index = opal_pointer_array_set_item(orte_node_pool, ORTE_PROC_MY_NAME->vpid, node); /* point our topology to the one detected locally */ - node->topology = opal_hwloc_topology; + node->topology = OBJ_NEW(orte_topology_t); + node->topology->sig = strdup(orte_topo_signature); + node->topology->topo = opal_hwloc_topology; + /* add it to the array of known ones */ + opal_pointer_array_add(orte_node_topologies, node->topology); /* create and store a proc object for us */ proc = OBJ_NEW(orte_proc_t); diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index 66f15fea79..6327f242e5 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -215,14 +215,6 @@ static int rte_init(void) goto error; } } - /* generate the signature */ - orte_topo_signature = opal_hwloc_base_get_topo_signature(opal_hwloc_topology); - - if (15 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) { - opal_output(0, "%s Topology Info:", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO); - } - /* if we are using xml for output, put an mpirun start tag */ if (orte_xml_output) { @@ -437,12 +429,6 @@ static int rte_init(void) node->name = strdup(orte_process_info.nodename); node->index = opal_pointer_array_set_item(orte_node_pool, 0, node); - /* add it to the array of known topologies */ - t = OBJ_NEW(orte_topology_t); - t->topo = opal_hwloc_topology; - t->sig = strdup(orte_topo_signature); - opal_pointer_array_add(orte_node_topologies, t); - /* create and store a proc object for us */ proc = OBJ_NEW(orte_proc_t); proc->name.jobid = ORTE_PROC_MY_NAME->jobid; @@ -521,7 +507,19 @@ static int rte_init(void) * will have reset our topology. Ensure we always get the right * one by setting our node topology afterwards */ - node->topology = opal_hwloc_topology; + /* add it to the array of known topologies */ + t = OBJ_NEW(orte_topology_t); + t->topo = opal_hwloc_topology; + /* generate the signature */ + orte_topo_signature = opal_hwloc_base_get_topo_signature(opal_hwloc_topology); + t->sig = strdup(orte_topo_signature); + opal_pointer_array_add(orte_node_topologies, t); + node->topology = t; + if (15 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) { + opal_output(0, "%s Topology Info:", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO); + } + /* init the hash table, if necessary */ if (NULL == orte_coprocessors) { diff --git a/orte/mca/odls/odls_types.h b/orte/mca/odls/odls_types.h index 436fc31f75..82cef3ff4c 100644 --- a/orte/mca/odls/odls_types.h +++ b/orte/mca/odls/odls_types.h @@ -12,7 +12,7 @@ * Copyright (c) 2011-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -86,6 +86,9 @@ typedef uint8_t orte_daemon_cmd_flag_t; /* for memory profiling */ #define ORTE_DAEMON_GET_MEMPROFILE (orte_daemon_cmd_flag_t) 32 +/* request full topology string */ +#define ORTE_DAEMON_REPORT_TOPOLOGY_CMD (orte_daemon_cmd_flag_t) 33 + /* * Struct written up the pipe from the child to the parent. */ diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index c415cb7323..07a45e667d 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -84,25 +84,25 @@ void orte_plm_base_set_slots(orte_node_t *node) { if (0 == strncmp(orte_set_slots, "cores", strlen(orte_set_slots))) { - node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology, + node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_CORE, 0, OPAL_HWLOC_LOGICAL); } else if (0 == strncmp(orte_set_slots, "sockets", strlen(orte_set_slots))) { - if (0 == (node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology, + if (0 == (node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_SOCKET, 0, OPAL_HWLOC_LOGICAL))) { /* some systems don't report sockets - in this case, * use numanodes */ - node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology, + node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_NODE, 0, OPAL_HWLOC_LOGICAL); } } else if (0 == strncmp(orte_set_slots, "numas", strlen(orte_set_slots))) { - node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology, + node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_NODE, 0, OPAL_HWLOC_LOGICAL); } else if (0 == strncmp(orte_set_slots, "hwthreads", strlen(orte_set_slots))) { - node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology, + node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_PU, 0, OPAL_HWLOC_LOGICAL); } else { @@ -116,10 +116,8 @@ void orte_plm_base_set_slots(orte_node_t *node) void orte_plm_base_daemons_reported(int fd, short args, void *cbdata) { orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; - hwloc_topology_t t; - orte_job_t *jdata; + orte_topology_t *t; orte_node_t *node; - orte_proc_t *dmn1; int i; /* if we are not launching, then we just assume that all @@ -135,40 +133,6 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata) node->topology = t; } } - } else if (1 < orte_process_info.num_procs) { - /* if we got back topology info from the first node, then we use - * it as the "standard" for all other nodes unless they sent - * back their own topology */ - - /* find daemon.vpid = 1 */ - jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); - if (NULL == (dmn1 = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, 1))) { - /* something is wrong */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - ORTE_FORCED_TERMINATE(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(caddy); - return; - } - if (NULL == (node = dmn1->node) || - NULL == (t = node->topology)) { - /* something is wrong */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - ORTE_FORCED_TERMINATE(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(caddy); - return; - } - - for (i=1; i < orte_node_pool->size; i++) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { - continue; - } - if (NULL == node->topology) { - OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, - "%s plm:base:setting topo on node %s to that from node %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, dmn1->node->name)); - node->topology = t; - } - } } /* if this is an unmanaged allocation, then set the default @@ -856,6 +820,164 @@ void orte_plm_base_registered(int fd, short args, void *cbdata) static bool orted_failed_launch; static orte_job_t *jdatorted=NULL; +/* callback for topology reports */ +void orte_plm_base_daemon_topology(int status, orte_process_name_t* sender, + opal_buffer_t *buffer, + orte_rml_tag_t tag, void *cbdata) +{ + hwloc_topology_t topo; + int rc, idx; + char *sig, *coprocessors, **sns; + orte_proc_t *daemon=NULL; + orte_topology_t *t, *t2; + int i; + uint32_t h; + orte_job_t *jdata; + + OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, + "%s plm:base:daemon_topology for daemon %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(sender))); + + /* get the daemon job, if necessary */ + if (NULL == jdatorted) { + jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); + } + if (NULL == (daemon = (orte_proc_t*)opal_pointer_array_get_item(jdatorted->procs, sender->vpid))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + orted_failed_launch = true; + goto CLEANUP; + } + + /* unpack the topology signature for this node */ + idx=1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &sig, &idx, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + /* find it in the array */ + t = NULL; + for (i=0; i < orte_node_topologies->size; i++) { + if (NULL == (t2 = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, i))) { + continue; + } + /* just check the signature */ + if (0 == strcmp(sig, t2->sig)) { + t = t2; + break; + } + } + if (NULL == t) { + /* should never happen */ + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + orted_failed_launch = true; + goto CLEANUP; + } + + /* unpack the topology */ + idx=1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &topo, &idx, OPAL_HWLOC_TOPO))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + /* filter the topology as we'll need it that way later */ + opal_hwloc_base_filter_cpus(topo); + /* record the final topology */ + t->topo = topo; + + /* unpack any coprocessors */ + idx=1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &coprocessors, &idx, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + if (NULL != coprocessors) { + /* init the hash table, if necessary */ + if (NULL == orte_coprocessors) { + orte_coprocessors = OBJ_NEW(opal_hash_table_t); + opal_hash_table_init(orte_coprocessors, orte_process_info.num_procs); + } + /* separate the serial numbers of the coprocessors + * on this host + */ + sns = opal_argv_split(coprocessors, ','); + for (idx=0; NULL != sns[idx]; idx++) { + /* compute the hash */ + OPAL_HASH_STR(sns[idx], h); + /* mark that this coprocessor is hosted by this node */ + opal_hash_table_set_value_uint32(orte_coprocessors, h, (void*)&daemon->name.vpid); + } + opal_argv_free(sns); + free(coprocessors); + orte_coprocessors_detected = true; + } + /* see if this daemon is on a coprocessor */ + idx=1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &coprocessors, &idx, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + orted_failed_launch = true; + goto CLEANUP; + } + if (NULL != coprocessors) { + if (orte_get_attribute(&daemon->node->attributes, ORTE_NODE_SERIAL_NUMBER, NULL, OPAL_STRING)) { + /* this is not allowed - a coprocessor cannot be host + * to another coprocessor at this time + */ + ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED); + orted_failed_launch = true; + free(coprocessors); + goto CLEANUP; + } + orte_set_attribute(&daemon->node->attributes, ORTE_NODE_SERIAL_NUMBER, ORTE_ATTR_LOCAL, coprocessors, OPAL_STRING); + free(coprocessors); + orte_coprocessors_detected = true; + } + + CLEANUP: + OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, + "%s plm:base:orted_report_launch %s for daemon %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + orted_failed_launch ? "failed" : "completed", + ORTE_NAME_PRINT(sender))); + + if (orted_failed_launch) { + ORTE_ACTIVATE_JOB_STATE(jdatorted, ORTE_JOB_STATE_FAILED_TO_START); + return; + } else { + jdatorted->num_reported++; + OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, + "%s plm:base:orted_report_launch recvd %d of %d reported daemons", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + jdatorted->num_reported, jdatorted->num_procs)); + if (jdatorted->num_procs == jdatorted->num_reported) { + bool dvm = true; + uint32_t key; + void *nptr; + jdatorted->state = ORTE_JOB_STATE_DAEMONS_REPORTED; + /* activate the daemons_reported state for all jobs + * whose daemons were launched + */ + rc = opal_hash_table_get_first_key_uint32(orte_job_data, &key, (void **)&jdata, &nptr); + while (OPAL_SUCCESS == rc) { + if (ORTE_PROC_MY_NAME->jobid != jdata->jobid) { + dvm = false; + if (ORTE_JOB_STATE_DAEMONS_LAUNCHED == jdata->state) { + ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_REPORTED); + } + } + rc = opal_hash_table_get_next_key_uint32(orte_job_data, &key, (void **)&jdata, nptr, &nptr); + } + if (dvm) { + /* must be launching a DVM - activate the state */ + ORTE_ACTIVATE_JOB_STATE(jdatorted, ORTE_JOB_STATE_DAEMONS_REPORTED); + } + } + } +} + void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, opal_buffer_t *buffer, orte_rml_tag_t tag, void *cbdata) @@ -867,13 +989,11 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, orte_job_t *jdata; orte_process_name_t dname; opal_buffer_t *relay; - char *coprocessors, **sns, *sig; - uint32_t h; - hwloc_topology_t topo; + char *sig; orte_topology_t *t; int i; bool found; - uint8_t tflag; + orte_daemon_cmd_flag_t cmd = ORTE_DAEMON_REPORT_TOPOLOGY_CMD; /* get the daemon job, if necessary */ if (NULL == jdatorted) { @@ -1024,129 +1144,66 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, free(alias); } - /* store the local resources for that node */ + /* unpack the topology signature for that node */ idx=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &tflag, &idx, OPAL_UINT8))) { + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &sig, &idx, OPAL_STRING))) { ORTE_ERROR_LOG(rc); orted_failed_launch = true; goto CLEANUP; } - if (1 == tflag) { - idx=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &sig, &idx, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - orted_failed_launch = true; - goto CLEANUP; + OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, + "%s RECEIVED TOPOLOGY SIG %s FROM NODE %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), sig, nodename)); + /* do we already have this topology from some other node? */ + found = false; + for (i=0; i < orte_node_topologies->size; i++) { + if (NULL == (t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, i))) { + continue; } - idx=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &topo, &idx, OPAL_HWLOC_TOPO))) { - ORTE_ERROR_LOG(rc); - orted_failed_launch = true; - goto CLEANUP; - } - OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, - "%s RECEIVED TOPOLOGY SIG %s FROM NODE %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), sig, nodename)); - if (10 < opal_output_get_verbosity(orte_plm_base_framework.framework_output)) { - opal_dss.dump(0, topo, OPAL_HWLOC_TOPO); - } - if (1 == dname.vpid || orte_hetero_nodes) { - /* the user has told us that something is different, so just store it */ + /* just check the signature */ + if (0 == strcmp(sig, t->sig)) { OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, - "%s ADDING TOPOLOGY PER USER REQUEST TO NODE %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name)); - t = OBJ_NEW(orte_topology_t); - /* filter the topology as we'll need it that way later */ - opal_hwloc_base_filter_cpus(topo); - t->topo = topo; - t->sig = sig; - opal_pointer_array_add(orte_node_topologies, t); - node->topology = topo; - } else { - /* do we already have this topology from some other node? */ - found = false; - for (i=0; i < orte_node_topologies->size; i++) { - if (NULL == (t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, i))) { - continue; - } - /* just check the signature */ - if (0 == strcmp(sig, t->sig)) { - OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, - "%s TOPOLOGY ALREADY RECORDED", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - found = true; - node->topology = t->topo; - hwloc_topology_destroy(topo); - free(sig); - break; - } - } - if (!found) { - /* nope - add it */ - OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, - "%s NEW TOPOLOGY - ADDING", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - t = OBJ_NEW(orte_topology_t); - /* filter the topology as we'll need it that way later */ - opal_hwloc_base_filter_cpus(topo); - t->topo = topo; - t->sig = sig; - opal_pointer_array_add(orte_node_topologies, t); - node->topology = topo; - } + "%s TOPOLOGY ALREADY RECORDED", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + found = true; + node->topology = t; + free(sig); + break; } } - - /* unpack any coprocessors */ - idx=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &coprocessors, &idx, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - orted_failed_launch = true; - goto CLEANUP; - } - if (NULL != coprocessors) { - /* init the hash table, if necessary */ - if (NULL == orte_coprocessors) { - orte_coprocessors = OBJ_NEW(opal_hash_table_t); - opal_hash_table_init(orte_coprocessors, orte_process_info.num_procs); - } - /* separate the serial numbers of the coprocessors - * on this host - */ - sns = opal_argv_split(coprocessors, ','); - for (idx=0; NULL != sns[idx]; idx++) { - /* compute the hash */ - OPAL_HASH_STR(sns[idx], h); - /* mark that this coprocessor is hosted by this node */ - opal_hash_table_set_value_uint32(orte_coprocessors, h, (void*)&node->daemon->name.vpid); - } - opal_argv_free(sns); - free(coprocessors); - orte_coprocessors_detected = true; - } - /* see if this daemon is on a coprocessor */ - idx=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &coprocessors, &idx, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - orted_failed_launch = true; - goto CLEANUP; - } - if (NULL != coprocessors) { - if (orte_get_attribute(&node->attributes, ORTE_NODE_SERIAL_NUMBER, NULL, OPAL_STRING)) { - /* this is not allowed - a coprocessor cannot be host - * to another coprocessor at this time - */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED); + if (!found) { + /* nope - save the signature and request the complete topology from that node */ + OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, + "%s NEW TOPOLOGY - ADDING", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + t = OBJ_NEW(orte_topology_t); + t->sig = sig; + opal_pointer_array_add(orte_node_topologies, t); + node->topology = t; + /* construct the request */ + relay = OBJ_NEW(opal_buffer_t); + if (OPAL_SUCCESS != (rc = opal_dss.pack(relay, &cmd, 1, ORTE_DAEMON_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(relay); orted_failed_launch = true; - free(coprocessors); goto CLEANUP; } - orte_set_attribute(&node->attributes, ORTE_NODE_SERIAL_NUMBER, ORTE_ATTR_LOCAL, coprocessors, OPAL_STRING); - free(coprocessors); - orte_coprocessors_detected = true; + /* send it */ + orte_rml.send_buffer_nb(orte_mgmt_conduit, + sender, relay, + ORTE_RML_TAG_DAEMON, + orte_rml_send_callback, NULL); + /* we will count this node as completed + * when we get the full topology back */ + if (NULL != nodename) { + free(nodename); + nodename = NULL; + } + idx = 1; + continue; } - CLEANUP: + CLEANUP: OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output, "%s plm:base:orted_report_launch %s for daemon %s at contact %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -1208,7 +1265,6 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL); } - } void orte_plm_base_daemon_failed(int st, orte_process_name_t* sender, @@ -1251,7 +1307,7 @@ void orte_plm_base_daemon_failed(int st, orte_process_name_t* sender, daemon->state = ORTE_PROC_STATE_FAILED_TO_START; daemon->exit_code = status; - finish: + finish: if (NULL == daemon) { ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); return; @@ -1331,21 +1387,6 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv, opal_argv_append(argc, argv, "orte_report_bindings"); opal_argv_append(argc, argv, "1"); } - /* pass our topology signature */ - opal_argv_append(argc, argv, "--hnp-topo-sig"); - opal_argv_append(argc, argv, orte_topo_signature); - - if (orte_hetero_nodes) { - opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); - opal_argv_append(argc, argv, "orte_hetero_nodes"); - opal_argv_append(argc, argv, "1"); - } - - if (orte_hnp_on_smgmt_node) { - opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); - opal_argv_append(argc, argv, "orte_hnp_on_smgmt_node"); - opal_argv_append(argc, argv, "1"); - } if (orte_map_stddiag_to_stderr) { opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); diff --git a/orte/mca/plm/base/plm_base_receive.c b/orte/mca/plm/base/plm_base_receive.c index 883fa34826..3a783693f4 100644 --- a/orte/mca/plm/base/plm_base_receive.c +++ b/orte/mca/plm/base/plm_base_receive.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2011 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -86,6 +86,10 @@ int orte_plm_base_comm_start(void) ORTE_RML_TAG_REPORT_REMOTE_LAUNCH, ORTE_RML_PERSISTENT, orte_plm_base_daemon_failed, NULL); + orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, + ORTE_RML_TAG_TOPOLOGY_REPORT, + ORTE_RML_PERSISTENT, + orte_plm_base_daemon_topology, NULL); } recv_issued = true; @@ -103,6 +107,12 @@ int orte_plm_base_comm_stop(void) "%s plm:base:receive stop comm", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_PLM); + if (ORTE_PROC_IS_HNP) { + orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ORTED_CALLBACK); + orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_REPORT_REMOTE_LAUNCH); + orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_TOPOLOGY_REPORT); + } recv_issued = false; return ORTE_SUCCESS; diff --git a/orte/mca/plm/base/plm_private.h b/orte/mca/plm/base/plm_private.h index 05d64aec1f..835c6de843 100644 --- a/orte/mca/plm/base/plm_private.h +++ b/orte/mca/plm/base/plm_private.h @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -79,6 +80,9 @@ ORTE_DECLSPEC void orte_plm_base_daemon_callback(int status, orte_process_name_t ORTE_DECLSPEC void orte_plm_base_daemon_failed(int status, orte_process_name_t* sender, opal_buffer_t *buffer, orte_rml_tag_t tag, void *cbdata); +ORTE_DECLSPEC void orte_plm_base_daemon_topology(int status, orte_process_name_t* sender, + opal_buffer_t *buffer, + orte_rml_tag_t tag, void *cbdata); ORTE_DECLSPEC int orte_plm_base_create_jobid(orte_job_t *jdata); ORTE_DECLSPEC int orte_plm_base_set_hnp_name(void); diff --git a/orte/mca/ras/simulator/ras_sim_module.c b/orte/mca/ras/simulator/ras_sim_module.c index dcf41e09de..ddeb153a56 100644 --- a/orte/mca/ras/simulator/ras_sim_module.c +++ b/orte/mca/ras/simulator/ras_sim_module.c @@ -3,7 +3,7 @@ * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Intel, Inc. All rights reserved + * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -266,7 +266,8 @@ static int allocate(orte_job_t *jdata, opal_list_t *nodes) obj = hwloc_get_root_obj(topo); node->slots = opal_hwloc_base_get_npus(topo, obj); } - node->topology = topo; + node->topology = OBJ_NEW(orte_topology_t); + node->topology->topo = topo; opal_output_verbose(1, orte_ras_base_framework.framework_output, "Created Node <%10s> [%3d : %3d]", node->name, node->slots, node->slots_max); diff --git a/orte/mca/rmaps/base/rmaps_base_binding.c b/orte/mca/rmaps/base/rmaps_base_binding.c index 6786da7720..5446e00d7b 100644 --- a/orte/mca/rmaps/base/rmaps_base_binding.c +++ b/orte/mca/rmaps/base/rmaps_base_binding.c @@ -12,7 +12,7 @@ * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -66,7 +66,7 @@ static void reset_usage(orte_node_t *node, orte_jobid_t jobid) node->name, node->num_procs); /* start by clearing any existing info */ - opal_hwloc_base_clear_usage(node->topology); + opal_hwloc_base_clear_usage(node->topology->topo); /* cycle thru the procs on the node and record * their usage in the topology @@ -176,7 +176,7 @@ static int bind_upwards(orte_job_t *jdata, continue; } /* get its index */ - if (UINT_MAX == (idx = opal_hwloc_base_get_obj_idx(node->topology, obj, OPAL_HWLOC_AVAILABLE))) { + if (UINT_MAX == (idx = opal_hwloc_base_get_obj_idx(node->topology->topo, obj, OPAL_HWLOC_AVAILABLE))) { ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_SILENT; } @@ -184,7 +184,7 @@ static int bind_upwards(orte_job_t *jdata, data = (opal_hwloc_obj_data_t*)obj->userdata; data->num_bound++; /* get the number of cpus under this location */ - if (0 == (ncpus = opal_hwloc_base_get_npus(node->topology, obj))) { + if (0 == (ncpus = opal_hwloc_base_get_npus(node->topology->topo, obj))) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name); return ORTE_ERR_SILENT; } @@ -210,7 +210,7 @@ static int bind_upwards(orte_job_t *jdata, } } /* bind it here */ - cpus = opal_hwloc_base_get_available_cpus(node->topology, obj); + cpus = opal_hwloc_base_get_available_cpus(node->topology->topo, obj); hwloc_bitmap_list_asprintf(&cpu_bitmap, cpus); orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, cpu_bitmap, OPAL_STRING); /* record the location */ @@ -287,7 +287,7 @@ static int bind_downwards(orte_job_t *jdata, * or if it is some depth below it, so we have to conduct a bit * of a search. Let hwloc find the min usage one for us. */ - trg_obj = opal_hwloc_base_find_min_bound_target_under_obj(node->topology, locale, + trg_obj = opal_hwloc_base_find_min_bound_target_under_obj(node->topology->topo, locale, target, cache_level); if (NULL == trg_obj) { /* there aren't any such targets under this object */ @@ -310,7 +310,7 @@ static int bind_downwards(orte_job_t *jdata, } trg_obj = nxt_obj; /* get the number of cpus under this location */ - ncpus = opal_hwloc_base_get_npus(node->topology, trg_obj); + ncpus = opal_hwloc_base_get_npus(node->topology->topo, trg_obj); opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "%s GOT %d CPUS", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ncpus); @@ -344,7 +344,7 @@ static int bind_downwards(orte_job_t *jdata, } } /* bind the proc here */ - cpus = opal_hwloc_base_get_available_cpus(node->topology, trg_obj); + cpus = opal_hwloc_base_get_available_cpus(node->topology->topo, trg_obj); hwloc_bitmap_or(totalcpuset, totalcpuset, cpus); /* track total #cpus */ total_cpus += ncpus; @@ -363,13 +363,13 @@ static int bind_downwards(orte_job_t *jdata, if (4 < opal_output_get_verbosity(orte_rmaps_base_framework.framework_output)) { char tmp1[1024], tmp2[1024]; if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2str(tmp1, sizeof(tmp1), - node->topology, totalcpuset)) { + node->topology->topo, totalcpuset)) { opal_output(orte_rmaps_base_framework.framework_output, "%s PROC %s ON %s IS NOT BOUND", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc->name), node->name); } else { - opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), node->topology, totalcpuset); + opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), node->topology->topo, totalcpuset); opal_output(orte_rmaps_base_framework.framework_output, "%s BOUND PROC %s[%s] TO %s: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -418,7 +418,7 @@ static int bind_in_place(orte_job_t *jdata, /* if we don't want to launch, then we are just testing the system, * so ignore questions about support capabilities */ - support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology); + support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology->topo); /* check if topology supports cpubind - have to be careful here * as Linux doesn't currently support thread-level binding. This * may change in the future, though, and it isn't clear how hwloc @@ -461,7 +461,7 @@ static int bind_in_place(orte_job_t *jdata, * on this node, just silently skip it - we will not bind */ if (!OPAL_BINDING_POLICY_IS_SET(map->binding) && - HWLOC_TYPE_DEPTH_UNKNOWN == hwloc_get_type_depth(node->topology, HWLOC_OBJ_CORE)) { + HWLOC_TYPE_DEPTH_UNKNOWN == hwloc_get_type_depth(node->topology->topo, HWLOC_OBJ_CORE)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "Unable to bind-to core by default on node %s as no cores detected", node->name); @@ -490,13 +490,13 @@ static int bind_in_place(orte_job_t *jdata, return ORTE_ERR_SILENT; } /* get the index of this location */ - if (UINT_MAX == (idx = opal_hwloc_base_get_obj_idx(node->topology, locale, OPAL_HWLOC_AVAILABLE))) { + if (UINT_MAX == (idx = opal_hwloc_base_get_obj_idx(node->topology->topo, locale, OPAL_HWLOC_AVAILABLE))) { ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_SILENT; } data = (opal_hwloc_obj_data_t*)locale->userdata; /* get the number of cpus under this location */ - if (0 == (ncpus = opal_hwloc_base_get_npus(node->topology, locale))) { + if (0 == (ncpus = opal_hwloc_base_get_npus(node->topology->topo, locale))) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name); return ORTE_ERR_SILENT; } @@ -511,7 +511,7 @@ static int bind_in_place(orte_job_t *jdata, found = false; while (NULL != (sib = sib->next_cousin)) { data = (opal_hwloc_obj_data_t*)sib->userdata; - ncpus = opal_hwloc_base_get_npus(node->topology, sib); + ncpus = opal_hwloc_base_get_npus(node->topology->topo, sib); if (data->num_bound < ncpus) { found = true; locale = sib; @@ -526,7 +526,7 @@ static int bind_in_place(orte_job_t *jdata, sib = locale; while (NULL != (sib = sib->prev_cousin)) { data = (opal_hwloc_obj_data_t*)sib->userdata; - ncpus = opal_hwloc_base_get_npus(node->topology, sib); + ncpus = opal_hwloc_base_get_npus(node->topology->topo, sib); if (data->num_bound < ncpus) { found = true; locale = sib; @@ -563,7 +563,7 @@ static int bind_in_place(orte_job_t *jdata, ORTE_NAME_PRINT(&proc->name), hwloc_obj_type_string(locale->type), idx); /* bind the proc here */ - cpus = opal_hwloc_base_get_available_cpus(node->topology, locale); + cpus = opal_hwloc_base_get_available_cpus(node->topology->topo, locale); hwloc_bitmap_list_asprintf(&cpu_bitmap, cpus); orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, cpu_bitmap, OPAL_STRING); /* update the location, in case it changed */ @@ -610,7 +610,7 @@ static int bind_to_cpuset(orte_job_t *jdata) /* if we don't want to launch, then we are just testing the system, * so ignore questions about support capabilities */ - support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology); + support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology->topo); /* check if topology supports cpubind - have to be careful here * as Linux doesn't currently support thread-level binding. This * may change in the future, though, and it isn't clear how hwloc @@ -643,7 +643,7 @@ static int bind_to_cpuset(orte_job_t *jdata) } } } - root = hwloc_get_root_obj(node->topology); + root = hwloc_get_root_obj(node->topology->topo); if (NULL == root->userdata) { /* something went wrong */ ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); @@ -846,7 +846,7 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata) /* if we don't want to launch, then we are just testing the system, * so ignore questions about support capabilities */ - support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology); + support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology->topo); /* check if topology supports cpubind - have to be careful here * as Linux doesn't currently support thread-level binding. This * may change in the future, though, and it isn't clear how hwloc @@ -889,7 +889,7 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata) * on this node, just silently skip it - we will not bind */ if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding) && - HWLOC_TYPE_DEPTH_UNKNOWN == hwloc_get_type_depth(node->topology, HWLOC_OBJ_CORE)) { + HWLOC_TYPE_DEPTH_UNKNOWN == hwloc_get_type_depth(node->topology->topo, HWLOC_OBJ_CORE)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "Unable to bind-to core by default on node %s as no cores detected", node->name); @@ -913,9 +913,9 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata) /* must use a unique function because blasted hwloc * just doesn't deal with caches very well...sigh */ - bind_depth = hwloc_get_cache_type_depth(node->topology, clvl, (hwloc_obj_cache_type_t)-1); + bind_depth = hwloc_get_cache_type_depth(node->topology->topo, clvl, (hwloc_obj_cache_type_t)-1); } else { - bind_depth = hwloc_get_type_depth(node->topology, hwb); + bind_depth = hwloc_get_type_depth(node->topology->topo, hwb); } if (0 > bind_depth) { /* didn't find such an object */ @@ -927,9 +927,9 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata) /* must use a unique function because blasted hwloc * just doesn't deal with caches very well...sigh */ - map_depth = hwloc_get_cache_type_depth(node->topology, clvm, (hwloc_obj_cache_type_t)-1); + map_depth = hwloc_get_cache_type_depth(node->topology->topo, clvm, (hwloc_obj_cache_type_t)-1); } else { - map_depth = hwloc_get_type_depth(node->topology, hwm); + map_depth = hwloc_get_type_depth(node->topology->topo, hwm); } if (0 > map_depth) { /* didn't find such an object */ diff --git a/orte/mca/rmaps/base/rmaps_base_map_job.c b/orte/mca/rmaps/base/rmaps_base_map_job.c index 7a76ad468b..b7cfcd7cc7 100644 --- a/orte/mca/rmaps/base/rmaps_base_map_job.c +++ b/orte/mca/rmaps/base/rmaps_base_map_job.c @@ -324,7 +324,7 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) */ if (orte_do_not_launch) { orte_node_t *node; - hwloc_topology_t t0; + orte_topology_t *t0; int i; if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); @@ -472,7 +472,7 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) if (NULL == bd) { (void)strncpy(tmp1, "UNBOUND", strlen("UNBOUND")); } else { - if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2mapstr(tmp1, sizeof(tmp1), node->topology, bd->cpuset)) { + if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2mapstr(tmp1, sizeof(tmp1), node->topology->topo, bd->cpuset)) { (void)strncpy(tmp1, "UNBOUND", strlen("UNBOUND")); } } @@ -497,7 +497,7 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata) } procbitmap = NULL; orte_get_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, (void**)&procbitmap, OPAL_STRING); - locality = opal_hwloc_base_get_relative_locality(node->topology, + locality = opal_hwloc_base_get_relative_locality(node->topology->topo, p0bitmap, procbitmap); opal_output(orte_clean_output, "\t\t", diff --git a/orte/mca/rmaps/base/rmaps_base_ranking.c b/orte/mca/rmaps/base/rmaps_base_ranking.c index f6dda8ece1..b297290a4d 100644 --- a/orte/mca/rmaps/base/rmaps_base_ranking.c +++ b/orte/mca/rmaps/base/rmaps_base_ranking.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -93,7 +93,7 @@ static int rank_span(orte_job_t *jdata, item = opal_list_get_next(item)) { node = (orte_node_t*)item; /* get the number of objects - only consider those we can actually use */ - num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology, target, + num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target, cache_level, OPAL_HWLOC_AVAILABLE); opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:rank_span: found %d objects on node %s with %d procs", @@ -104,7 +104,7 @@ static int rank_span(orte_job_t *jdata, /* for each object */ for (i=0; i < num_objs && cnt < app->num_procs; i++) { - obj = opal_hwloc_base_get_obj_by_type(node->topology, target, + obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target, cache_level, i, OPAL_HWLOC_AVAILABLE); opal_output_verbose(5, orte_rmaps_base_framework.framework_output, @@ -206,7 +206,7 @@ static int rank_fill(orte_job_t *jdata, item = opal_list_get_next(item)) { node = (orte_node_t*)item; /* get the number of objects - only consider those we can actually use */ - num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology, target, + num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target, cache_level, OPAL_HWLOC_AVAILABLE); opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:rank_fill: found %d objects on node %s with %d procs", @@ -217,7 +217,7 @@ static int rank_fill(orte_job_t *jdata, /* for each object */ for (i=0; i < num_objs && cnt < app->num_procs; i++) { - obj = opal_hwloc_base_get_obj_by_type(node->topology, target, + obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target, cache_level, i, OPAL_HWLOC_AVAILABLE); opal_output_verbose(5, orte_rmaps_base_framework.framework_output, @@ -327,7 +327,7 @@ static int rank_by(orte_job_t *jdata, item = opal_list_get_next(item)) { node = (orte_node_t*)item; /* get the number of objects - only consider those we can actually use */ - num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology, target, + num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target, cache_level, OPAL_HWLOC_AVAILABLE); opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:rank_by: found %d objects on node %s with %d procs", @@ -337,7 +337,7 @@ static int rank_by(orte_job_t *jdata, } /* collect all the objects */ for (i=0; i < num_objs; i++) { - obj = opal_hwloc_base_get_obj_by_type(node->topology, target, + obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target, cache_level, i, OPAL_HWLOC_AVAILABLE); opal_pointer_array_set_item(&objs, i, obj); } diff --git a/orte/mca/rmaps/mindist/rmaps_mindist_module.c b/orte/mca/rmaps/mindist/rmaps_mindist_module.c index ca05cc26d9..53ce91f71a 100644 --- a/orte/mca/rmaps/mindist/rmaps_mindist_module.c +++ b/orte/mca/rmaps/mindist/rmaps_mindist_module.c @@ -12,7 +12,7 @@ * Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -196,7 +196,7 @@ static int mindist_map(orte_job_t *jdata) item = opal_list_get_next(item)) { node = (orte_node_t*)item; - if (NULL == node->topology) { + if (NULL == node->topology || NULL == node->topology->topo) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-topology", true, node->name); rc = ORTE_ERR_SILENT; @@ -205,7 +205,7 @@ static int mindist_map(orte_job_t *jdata) /* get the root object as we are not assigning * locale except at the node level */ - obj = hwloc_get_root_obj(node->topology); + obj = hwloc_get_root_obj(node->topology->topo); if (NULL == obj) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-topology", true, node->name); @@ -215,9 +215,9 @@ static int mindist_map(orte_job_t *jdata) /* get the number of available pus */ if (opal_hwloc_use_hwthreads_as_cpus) { - total_npus = opal_hwloc_base_get_nbobjs_by_type(node->topology, HWLOC_OBJ_PU, 0, OPAL_HWLOC_AVAILABLE); + total_npus = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_PU, 0, OPAL_HWLOC_AVAILABLE); } else { - total_npus = opal_hwloc_base_get_nbobjs_by_type(node->topology, HWLOC_OBJ_CORE, 0, OPAL_HWLOC_AVAILABLE); + total_npus = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_CORE, 0, OPAL_HWLOC_AVAILABLE); } if (bynode) { if (total_npus < num_procs_to_assign) { @@ -236,9 +236,9 @@ static int mindist_map(orte_job_t *jdata) } /* first we need to fill summary object for root with information about nodes * so we call opal_hwloc_base_get_nbobjs_by_type */ - opal_hwloc_base_get_nbobjs_by_type(node->topology, HWLOC_OBJ_NODE, 0, OPAL_HWLOC_AVAILABLE); + opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_NODE, 0, OPAL_HWLOC_AVAILABLE); OBJ_CONSTRUCT(&numa_list, opal_list_t); - ret = opal_hwloc_get_sorted_numa_list(node->topology, orte_rmaps_base.device, &numa_list); + ret = opal_hwloc_get_sorted_numa_list(node->topology->topo, orte_rmaps_base.device, &numa_list); if (ret > 1) { orte_show_help("help-orte-rmaps-md.txt", "orte-rmaps-mindist:several-devices", true, orte_rmaps_base.device, ret, node->name); @@ -257,11 +257,11 @@ static int mindist_map(orte_job_t *jdata) required = 0; OPAL_LIST_FOREACH(numa, &numa_list, opal_rmaps_numa_node_t) { /* get the hwloc object for this numa */ - if (NULL == (obj = opal_hwloc_base_get_obj_by_type(node->topology, HWLOC_OBJ_NODE, 0, numa->index, OPAL_HWLOC_AVAILABLE))) { + if (NULL == (obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, HWLOC_OBJ_NODE, 0, numa->index, OPAL_HWLOC_AVAILABLE))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } - npus = opal_hwloc_base_get_npus(node->topology, obj); + npus = opal_hwloc_base_get_npus(node->topology->topo, obj); if (bynode) { required = ((num_procs_to_assign-j) > npus) ? (npus) : (num_procs_to_assign-j); } else { @@ -296,7 +296,7 @@ static int mindist_map(orte_job_t *jdata) j, node->name); } } else { - if (hwloc_get_nbobjs_by_type(node->topology, HWLOC_OBJ_SOCKET) > 1) { + if (hwloc_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_SOCKET) > 1) { /* don't have info about pci locality */ orte_show_help("help-orte-rmaps-md.txt", "orte-rmaps-mindist:no-pci-locality-info", true, node->name); @@ -355,12 +355,12 @@ static int mindist_map(orte_job_t *jdata) "mca:rmaps:mindist: second pass assigning %d extra procs to node %s", (int)num_procs_to_assign, node->name); OBJ_CONSTRUCT(&numa_list, opal_list_t); - opal_hwloc_get_sorted_numa_list(node->topology, orte_rmaps_base.device, &numa_list); + opal_hwloc_get_sorted_numa_list(node->topology->topo, orte_rmaps_base.device, &numa_list); if (opal_list_get_size(&numa_list) > 0) { numa_item = opal_list_get_first(&numa_list); k = 0; - obj = hwloc_get_obj_by_type(node->topology, HWLOC_OBJ_NODE,((opal_rmaps_numa_node_t*)numa_item)->index); - npus = opal_hwloc_base_get_npus(node->topology, obj); + obj = hwloc_get_obj_by_type(node->topology->topo, HWLOC_OBJ_NODE,((opal_rmaps_numa_node_t*)numa_item)->index); + npus = opal_hwloc_base_get_npus(node->topology->topo, obj); for (j = 0; j < (int)num_procs_to_assign && nprocs_mapped < (int)app->num_procs; j++) { if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, i))) { rc = ORTE_ERR_OUT_OF_RESOURCE; @@ -374,8 +374,8 @@ static int mindist_map(orte_job_t *jdata) if (numa_item == opal_list_get_end(&numa_list)) { numa_item = opal_list_get_first(&numa_list); } - obj = hwloc_get_obj_by_type(node->topology, HWLOC_OBJ_NODE,((opal_rmaps_numa_node_t*)numa_item)->index); - npus = opal_hwloc_base_get_npus(node->topology, obj); + obj = hwloc_get_obj_by_type(node->topology->topo, HWLOC_OBJ_NODE,((opal_rmaps_numa_node_t*)numa_item)->index); + npus = opal_hwloc_base_get_npus(node->topology->topo, obj); k = 0; } } diff --git a/orte/mca/rmaps/ppr/rmaps_ppr.c b/orte/mca/rmaps/ppr/rmaps_ppr.c index a0303effba..7af292d308 100644 --- a/orte/mca/rmaps/ppr/rmaps_ppr.c +++ b/orte/mca/rmaps/ppr/rmaps_ppr.c @@ -2,7 +2,7 @@ * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -265,7 +265,7 @@ static int ppr_mapper(orte_job_t *jdata) item = opal_list_get_next(item)) { node = (orte_node_t*)item; /* bozo check */ - if (NULL == node->topology) { + if (NULL == node->topology || NULL == node->topology->topo) { orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing", true, node->name); rc = ORTE_ERR_SILENT; @@ -285,7 +285,7 @@ static int ppr_mapper(orte_job_t *jdata) * that many procs on this node */ if (OPAL_HWLOC_NODE_LEVEL == start) { - obj = hwloc_get_root_obj(node->topology); + obj = hwloc_get_root_obj(node->topology->topo); for (j=0; j < ppr[start] && nprocs_mapped < total_procs; j++) { if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, idx))) { rc = ORTE_ERR_OUT_OF_RESOURCE; @@ -296,7 +296,7 @@ static int ppr_mapper(orte_job_t *jdata) } } else { /* get the number of lowest resources on this node */ - nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology, + nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, lowest, cache_level, OPAL_HWLOC_AVAILABLE); @@ -304,7 +304,7 @@ static int ppr_mapper(orte_job_t *jdata) * recording the locale of each proc so we know its cpuset */ for (i=0; i < nobjs; i++) { - obj = opal_hwloc_base_get_obj_by_type(node->topology, + obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, lowest, cache_level, i, OPAL_HWLOC_AVAILABLE); for (j=0; j < ppr[start] && nprocs_mapped < total_procs; j++) { @@ -486,7 +486,7 @@ static void prune(orte_jobid_t jobid, } /* get the number of resources at this level on this node */ - nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology, + nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, lvl, cache_level, OPAL_HWLOC_AVAILABLE); @@ -494,11 +494,11 @@ static void prune(orte_jobid_t jobid, * underneath it and check against the limit */ for (i=0; i < nobjs; i++) { - obj = opal_hwloc_base_get_obj_by_type(node->topology, + obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, lvl, cache_level, i, OPAL_HWLOC_AVAILABLE); /* get the available cpuset */ - avail = opal_hwloc_base_get_available_cpus(node->topology, obj); + avail = opal_hwloc_base_get_available_cpus(node->topology->topo, obj); /* look at the intersection of this object's cpuset and that * of each proc in the job/app - if they intersect, then count this proc @@ -518,7 +518,7 @@ static void prune(orte_jobid_t jobid, ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return; } - cpus = opal_hwloc_base_get_available_cpus(node->topology, locale); + cpus = opal_hwloc_base_get_available_cpus(node->topology->topo, locale); if (hwloc_bitmap_intersects(avail, cpus)) { nprocs++; } @@ -544,7 +544,7 @@ static void prune(orte_jobid_t jobid, * have only one child, then return this * object */ - top = find_split(node->topology, obj); + top = find_split(node->topology->topo, obj); hwloc_obj_type_snprintf(dang, 64, top, 1); opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: SPLIT AT LEVEL %s", dang); @@ -556,7 +556,7 @@ static void prune(orte_jobid_t jobid, /* find the child with the most procs underneath it */ for (k=0; k < top->arity && limit < nprocs; k++) { /* get this object's available cpuset */ - childcpus = opal_hwloc_base_get_available_cpus(node->topology, top->children[k]); + childcpus = opal_hwloc_base_get_available_cpus(node->topology->topo, top->children[k]); nunder = 0; pptr = NULL; for (n=0; n < node->procs->size; n++) { @@ -572,7 +572,7 @@ static void prune(orte_jobid_t jobid, ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return; } - cpus = opal_hwloc_base_get_available_cpus(node->topology, locale); + cpus = opal_hwloc_base_get_available_cpus(node->topology->topo, locale); if (hwloc_bitmap_intersects(childcpus, cpus)) { nunder++; if (NULL == pptr) { diff --git a/orte/mca/rmaps/rank_file/rmaps_rank_file.c b/orte/mca/rmaps/rank_file/rmaps_rank_file.c index 1fe6c7dcc0..f5d2812699 100644 --- a/orte/mca/rmaps/rank_file/rmaps_rank_file.c +++ b/orte/mca/rmaps/rank_file/rmaps_rank_file.c @@ -14,7 +14,7 @@ * All rights reserved. * Copyright (c) 2008 Voltaire. All rights reserved * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -308,7 +308,7 @@ static int orte_rmaps_rf_map(orte_job_t *jdata) /* setup the bitmap */ hwloc_cpuset_t bitmap; char *cpu_bitmap; - if (NULL == node->topology) { + if (NULL == node->topology || NULL == node->topology->topo) { /* not allowed - for rank-file, we must have * the topology info */ @@ -318,7 +318,7 @@ static int orte_rmaps_rf_map(orte_job_t *jdata) } bitmap = hwloc_bitmap_alloc(); /* parse the slot_list to find the socket and core */ - if (ORTE_SUCCESS != (rc = opal_hwloc_base_slot_list_parse(slots, node->topology, rtype, bitmap))) { + if (ORTE_SUCCESS != (rc = opal_hwloc_base_slot_list_parse(slots, node->topology->topo, rtype, bitmap))) { ORTE_ERROR_LOG(rc); hwloc_bitmap_free(bitmap); goto error; diff --git a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c index b923d898ab..a978ef9eba 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr_mappers.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -76,8 +76,8 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata, /* get the root object as we are not assigning * locale here except at the node level */ - if (NULL != node->topology) { - obj = hwloc_get_root_obj(node->topology); + if (NULL != node->topology && NULL != node->topology->topo) { + obj = hwloc_get_root_obj(node->topology->topo); } if (node->slots <= node->slots_inuse) { opal_output_verbose(2, orte_rmaps_base_framework.framework_output, @@ -143,8 +143,8 @@ int orte_rmaps_rr_byslot(orte_job_t *jdata, /* get the root object as we are not assigning * locale except at the node level */ - if (NULL != node->topology) { - obj = hwloc_get_root_obj(node->topology); + if (NULL != node->topology && NULL != node->topology->topo) { + obj = hwloc_get_root_obj(node->topology->topo); } /* add this node to the map - do it only once */ @@ -288,8 +288,8 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata, /* get the root object as we are not assigning * locale except at the node level */ - if (NULL != node->topology) { - obj = hwloc_get_root_obj(node->topology); + if (NULL != node->topology && NULL != node->topology->topo) { + obj = hwloc_get_root_obj(node->topology->topo); } /* add this node to the map, but only do so once */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { @@ -402,8 +402,8 @@ int orte_rmaps_rr_bynode(orte_job_t *jdata, /* get the root object as we are not assigning * locale except at the node level */ - if (NULL != node->topology) { - obj = hwloc_get_root_obj(node->topology); + if (NULL != node->topology && NULL != node->topology->topo) { + obj = hwloc_get_root_obj(node->topology->topo); } OPAL_OUTPUT_VERBOSE((20, orte_rmaps_base_framework.framework_output, @@ -507,14 +507,14 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata, do { add_one = false; OPAL_LIST_FOREACH(node, node_list, orte_node_t) { - if (NULL == node->topology) { + if (NULL == node->topology || NULL == node->topology->topo) { orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing", true, node->name); return ORTE_ERR_SILENT; } start = 0; /* get the number of objects of this type on this node */ - nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology, target, cache_level, OPAL_HWLOC_AVAILABLE); + nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target, cache_level, OPAL_HWLOC_AVAILABLE); if (0 == nobjs) { continue; } @@ -564,13 +564,13 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata, opal_output_verbose(20, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr: assigning proc to object %d", (i+start) % nobjs); /* get the hwloc object */ - if (NULL == (obj = opal_hwloc_base_get_obj_by_type(node->topology, target, cache_level, (i+start) % nobjs, OPAL_HWLOC_AVAILABLE))) { + if (NULL == (obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target, cache_level, (i+start) % nobjs, OPAL_HWLOC_AVAILABLE))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } - if (orte_rmaps_base.cpus_per_rank > (int)opal_hwloc_base_get_npus(node->topology, obj)) { + if (orte_rmaps_base.cpus_per_rank > (int)opal_hwloc_base_get_npus(node->topology->topo, obj)) { orte_show_help("help-orte-rmaps-base.txt", "mapping-too-low", true, - orte_rmaps_base.cpus_per_rank, opal_hwloc_base_get_npus(node->topology, obj), + orte_rmaps_base.cpus_per_rank, opal_hwloc_base_get_npus(node->topology->topo, obj), orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } @@ -663,13 +663,13 @@ static int byobj_span(orte_job_t *jdata, */ nobjs = 0; OPAL_LIST_FOREACH(node, node_list, orte_node_t) { - if (NULL == node->topology) { + if (NULL == node->topology || NULL == node->topology->topo) { orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing", true, node->name); return ORTE_ERR_SILENT; } /* get the number of objects of this type on this node */ - nobjs += opal_hwloc_base_get_nbobjs_by_type(node->topology, target, cache_level, OPAL_HWLOC_AVAILABLE); + nobjs += opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target, cache_level, OPAL_HWLOC_AVAILABLE); } if (0 == nobjs) { @@ -708,19 +708,19 @@ static int byobj_span(orte_job_t *jdata, ++(jdata->map->num_nodes); } /* get the number of objects of this type on this node */ - nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology, target, cache_level, OPAL_HWLOC_AVAILABLE); + nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target, cache_level, OPAL_HWLOC_AVAILABLE); opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "mca:rmaps:rr:byobj: found %d objs on node %s", nobjs, node->name); /* loop through the number of objects */ for (i=0; i < (int)nobjs && nprocs_mapped < (int)app->num_procs; i++) { /* get the hwloc object */ - if (NULL == (obj = opal_hwloc_base_get_obj_by_type(node->topology, target, cache_level, i, OPAL_HWLOC_AVAILABLE))) { + if (NULL == (obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target, cache_level, i, OPAL_HWLOC_AVAILABLE))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } - if (orte_rmaps_base.cpus_per_rank > (int)opal_hwloc_base_get_npus(node->topology, obj)) { + if (orte_rmaps_base.cpus_per_rank > (int)opal_hwloc_base_get_npus(node->topology->topo, obj)) { orte_show_help("help-orte-rmaps-base.txt", "mapping-too-low", true, - orte_rmaps_base.cpus_per_rank, opal_hwloc_base_get_npus(node->topology, obj), + orte_rmaps_base.cpus_per_rank, opal_hwloc_base_get_npus(node->topology->topo, obj), orte_rmaps_base_print_mapping(orte_rmaps_base.mapping)); return ORTE_ERR_SILENT; } @@ -759,4 +759,3 @@ static int byobj_span(orte_job_t *jdata, return ORTE_SUCCESS; } - diff --git a/orte/mca/rmaps/seq/rmaps_seq.c b/orte/mca/rmaps/seq/rmaps_seq.c index d2509496fa..de4f3df213 100644 --- a/orte/mca/rmaps/seq/rmaps_seq.c +++ b/orte/mca/rmaps/seq/rmaps_seq.c @@ -12,7 +12,7 @@ * Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -442,7 +442,7 @@ static int orte_rmaps_seq_map(orte_job_t *jdata) if (NULL != sq->cpuset) { hwloc_cpuset_t bitmap; char *cpu_bitmap; - if (NULL == node->topology) { + if (NULL == node->topology || NULL == node->topology->topo) { /* not allowed - for sequential cpusets, we must have * the topology info */ @@ -460,7 +460,7 @@ static int orte_rmaps_seq_map(orte_job_t *jdata) /* setup the bitmap */ bitmap = hwloc_bitmap_alloc(); /* parse the slot_list to find the socket and core */ - if (ORTE_SUCCESS != (rc = opal_hwloc_base_slot_list_parse(sq->cpuset, node->topology, rtype, bitmap))) { + if (ORTE_SUCCESS != (rc = opal_hwloc_base_slot_list_parse(sq->cpuset, node->topology->topo, rtype, bitmap))) { ORTE_ERROR_LOG(rc); hwloc_bitmap_free(bitmap); goto error; @@ -490,8 +490,8 @@ static int orte_rmaps_seq_map(orte_job_t *jdata) /* assign the locale - okay for the topo to be null as * it just means it wasn't returned */ - if (NULL != node->topology) { - locale = hwloc_get_root_obj(node->topology); + if (NULL != node->topology && NULL != node->topology->topo) { + locale = hwloc_get_root_obj(node->topology->topo); orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, locale, OPAL_PTR); } @@ -531,12 +531,10 @@ static char *orte_getline(FILE *fp) ret = fgets(input, 1024, fp); if (NULL != ret) { - input[strlen(input)-1] = '\0'; /* remove newline */ - buff = strdup(input); - return buff; + input[strlen(input)-1] = '\0'; /* remove newline */ + buff = strdup(input); + return buff; } return NULL; } - - diff --git a/orte/mca/rmaps/staged/Makefile.am b/orte/mca/rmaps/staged/Makefile.am deleted file mode 100644 index 0b1db45c5f..0000000000 --- a/orte/mca/rmaps/staged/Makefile.am +++ /dev/null @@ -1,35 +0,0 @@ -# -# Copyright (c) 2012 Los Alamos National Security, LLC. -# All rights reserved -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - rmaps_staged.c \ - rmaps_staged.h \ - rmaps_staged_component.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_rmaps_staged_DSO -component_noinst = -component_install = mca_rmaps_staged.la -else -component_noinst = libmca_rmaps_staged.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_rmaps_staged_la_SOURCES = $(sources) -mca_rmaps_staged_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_rmaps_staged_la_SOURCES =$(sources) -libmca_rmaps_staged_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/rmaps/staged/owner.txt b/orte/mca/rmaps/staged/owner.txt deleted file mode 100644 index 4ad6f408ca..0000000000 --- a/orte/mca/rmaps/staged/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: maintenance diff --git a/orte/mca/rmaps/staged/rmaps_staged.c b/orte/mca/rmaps/staged/rmaps_staged.c deleted file mode 100644 index 54519ff9b3..0000000000 --- a/orte/mca/rmaps/staged/rmaps_staged.c +++ /dev/null @@ -1,436 +0,0 @@ -/* - * Copyright (c) 2012 Los Alamos National Security, LLC. - * All rights reserved - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" -#include "orte/types.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#include - -#include "opal/mca/hwloc/base/base.h" -#include "opal/dss/dss.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" - -#include "orte/util/show_help.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/state/state.h" - -#include "orte/mca/rmaps/base/rmaps_private.h" -#include "orte/mca/rmaps/base/base.h" -#include "rmaps_staged.h" - -static int staged_mapper(orte_job_t *jdata); - -orte_rmaps_base_module_t orte_rmaps_staged_module = { - staged_mapper -}; - -static int staged_mapper(orte_job_t *jdata) -{ - mca_base_component_t *c=&mca_rmaps_staged_component.base_version; - int i, j, k, rc; - orte_app_context_t *app; - opal_list_t node_list, desired; - orte_std_cntr_t num_slots; - orte_proc_t *proc; - orte_node_t *node, *next; - bool work_to_do = false, first_pass = false; - opal_list_item_t *item, *it2; - char *cptr, **minimap, *hosts, **dash_host; - orte_vpid_t load; - orte_vpid_t max_ppn, *ppn; - - /* only use this mapper if it was specified */ - if (NULL == jdata->map->req_mapper || - 0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name) || - ORTE_MAPPING_STAGED != ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { - /* I wasn't specified */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "mca:rmaps:staged: job %s not using staged mapper", - ORTE_JOBID_PRINT(jdata->jobid)); - return ORTE_ERR_TAKE_NEXT_OPTION; - } - - opal_output_verbose(2, orte_rmaps_base_framework.framework_output, - "%s mca:rmaps:staged: mapping job %s with %d procs", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(jdata->jobid), (int)jdata->num_procs); - - /* flag that I did the mapping */ - if (NULL != jdata->map->last_mapper) { - free(jdata->map->last_mapper); - } - jdata->map->last_mapper = strdup(c->mca_component_name); - - /* if there are no nodes in the map, then this is our first - * pass thru this job - */ - if (0 == jdata->map->num_nodes) { - first_pass = true; - } - - /* we assume that the app_contexts are in priority order, - * with the highest priority being the first entry in the - * job's app_context array. Loop across the app_contexts - * in order, looking for apps that have not been - * fully mapped - */ - for (i=0; i < jdata->apps->size; i++) { - if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { - continue; - } - /* has it been fully mapped? */ - if (ORTE_APP_STATE_ALL_MAPPED <= app->state) { - continue; - } - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "%s mca:rmaps:staged: working app %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app->app); - - /* find nodes that meet any constraints provided in the form of - * -hostfile or -host directives - */ - OBJ_CONSTRUCT(&node_list, opal_list_t); - /* get nodes based on a strict interpretation of the location hints */ - if (ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app, - jdata->map->mapping, false, true))) { - /* we were unable to get any nodes that match those - * specified in the app - */ - if (ORTE_ERR_RESOURCE_BUSY == rc) { - /* if the return is "busy", then at least one of the - * specified resources must exist, but no slots are - * currently available. This means there is at least - * a hope of eventually being able to map this app - * within its specified constraints, so continue working - */ - if (orte_soft_locations) { - /* if soft locations were given, then we know that - * none of the nodes in this allocation are available, - * so there is no point in continuing to check the - * remaining apps - */ - while (NULL != (item = opal_list_remove_first(&node_list))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&node_list); - goto complete; - } - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "%s mca:rmaps:staged: all nodes for this app are currently busy", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - OBJ_DESTRUCT(&node_list); - continue; - } else { - /* this indicates that there are no nodes that match - * the specified constraints, so there is no hope of - * ever being able to execute this app. This is an - * unrecoverable error - note that a return of - * "silent" means that the function already printed - * an error message, so the error_log will print nothing - */ - ORTE_ERROR_LOG(rc); - return rc; - } - } - - /* if a max number of procs/node was given for this - * app, remove all nodes from the list that exceed - * that limit - */ - ppn = &max_ppn; - if (orte_get_attribute(&app->attributes, ORTE_APP_MAX_PPN, (void**)&ppn, OPAL_UINT32)) { - item = opal_list_get_first(&node_list); - while (item != opal_list_get_end(&node_list)) { - it2 = opal_list_get_next(item); - node = (orte_node_t*)item; - if (max_ppn <= node->num_procs) { - opal_list_remove_item(&node_list, item); - OBJ_RELEASE(item); - } - item = it2; - } - } - - /* if we have no available nodes, then move on to next app */ - if (0 == opal_list_get_size(&node_list)) { - OBJ_DESTRUCT(&node_list); - continue; - } - - /* if the app specified locations, soft or not, search the list of nodes - * for those that match the requested locations and move those - * to the desired list so we use them first - */ - if (orte_get_attribute(&app->attributes, ORTE_APP_DASH_HOST, (void**)&hosts, OPAL_STRING)) { - OBJ_CONSTRUCT(&desired, opal_list_t); - dash_host = opal_argv_split(hosts, ','); - free(hosts); - /* no particular order is required */ - for (j=0; j < opal_argv_count(dash_host); j++) { - minimap = opal_argv_split(dash_host[j], ','); - for (k=0; k < opal_argv_count(minimap); k++) { - cptr = minimap[k]; - for (item = opal_list_get_first(&node_list); - item != opal_list_get_end(&node_list); - item = opal_list_get_next(item)) { - node = (orte_node_t*)item; - if (0 == strcmp(node->name, cptr) || - (0 == strcmp("localhost", cptr) && - 0 == strcmp(node->name, orte_process_info.nodename))) { - opal_list_remove_item(&node_list, item); - opal_list_append(&desired, item); - opal_output_verbose(10, orte_rmaps_base_framework.framework_output, - "%s mca:rmaps:staged: placing node %s on desired list", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - node->name); - break; - } - } - } - opal_argv_free(minimap); - } - opal_argv_free(dash_host); - /* if no nodes made the transition and the app specified soft - * locations, then we can skip to look at the non-desired list - */ - if (0 == opal_list_get_size(&desired)) { - OBJ_DESTRUCT(&desired); - if (orte_soft_locations) { - goto process; - } else { - /* move on to next app */ - continue; - } - } - /* cycle thru the procs for this app and attempt to map them - * to the desired nodes using a load-balancing algo - */ - for (j=0; j < app->procs.size; j++) { - if (NULL == (proc = opal_pointer_array_get_item(&app->procs, j))) { - continue; - } - if (ORTE_PROC_STATE_UNDEF != proc->state) { - /* this proc has already been mapped or executed */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "%s mca:rmaps:staged: proc %s has already been mapped", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&proc->name)); - continue; - } - /* flag that there is at least one proc still to - * be executed - */ - work_to_do = true; - /* track number mapped */ - jdata->num_mapped++; - /* find the lightest-loaded node on the desired list */ - node = NULL; - load = ORTE_VPID_MAX; - for (item = opal_list_get_first(&desired); - item != opal_list_get_end(&desired); - item = opal_list_get_next(item)) { - next = (orte_node_t*)item; - if (next->num_procs < load) { - node = next; - load = next->num_procs; - } - } - /* put the proc there */ - proc->node = node; - /* the local rank is the number of procs - * on this node from this job - we don't - * directly track this number, so it must - * be found by looping across the node->procs - * array and counting it each time. For now, - * since we don't use this value in this mode - * of operation, just set it to something arbitrary - */ - proc->local_rank = node->num_procs; - /* the node rank is simply the number of procs - * on the node at this time - */ - proc->node_rank = node->num_procs; - /* track number of procs on node and number of slots used */ - node->num_procs++; - node->slots_inuse++; - opal_output_verbose(10, orte_rmaps_base_framework.framework_output, - "%s Proc %s on node %s: slots %d inuse %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&proc->name), node->name, - (int)node->slots, (int)node->slots_inuse); - if (node->slots_inuse == node->slots) { - opal_list_remove_item(&desired, &node->super); - OBJ_RELEASE(node); - } - if (0 > (rc = opal_pointer_array_add(node->procs, (void*)proc))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(proc); - return rc; - } - /* retain the proc struct so that we correctly track its release */ - OBJ_RETAIN(proc); - proc->state = ORTE_PROC_STATE_INIT; - /* flag the proc as updated so it will be included - * in the next pidmap message - */ - ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_UPDATED); - /* add the node to the map, if needed */ - if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { - if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { - ORTE_ERROR_LOG(rc); - return rc; - } - ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); - OBJ_RETAIN(node); /* maintain accounting on object */ - jdata->map->num_nodes++; - } - if (0 == opal_list_get_size(&desired)) { - /* nothing more we can do */ - break; - } - } - /* clear the list */ - while (NULL != (item = opal_list_remove_first(&desired))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&desired); - } - - process: - for (j=0; j < app->procs.size; j++) { - if (NULL == (proc = opal_pointer_array_get_item(&app->procs, j))) { - continue; - } - if (ORTE_PROC_STATE_UNDEF != proc->state) { - /* this proc has already been mapped or executed */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "%s mca:rmaps:staged: proc %s has already been mapped", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&proc->name)); - continue; - } - /* find the lightest-loaded node on the node list */ - node = NULL; - load = ORTE_VPID_MAX; - for (item = opal_list_get_first(&node_list); - item != opal_list_get_end(&node_list); - item = opal_list_get_next(item)) { - next = (orte_node_t*)item; - if (next->num_procs < load) { - node = next; - load = next->num_procs; - } - } - /* flag that there is at least one proc still to - * be executed - */ - work_to_do = true; - /* track number mapped */ - jdata->num_mapped++; - /* map this proc to the first available slot */ - OBJ_RETAIN(node); /* maintain accounting on object */ - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "%s mca:rmaps:staged: assigning proc %s to node %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&proc->name), node->name); - proc->node = node; - /* the local rank is the number of procs - * on this node from this job - we don't - * directly track this number, so it must - * be found by looping across the node->procs - * array and counting it each time. For now, - * since we don't use this value in this mode - * of operation, just set it to something arbitrary - */ - proc->local_rank = node->num_procs; - /* the node rank is simply the number of procs - * on the node at this time - */ - proc->node_rank = node->num_procs; - /* track number of procs on node and number of slots used */ - node->num_procs++; - node->slots_inuse++; - opal_output_verbose(10, orte_rmaps_base_framework.framework_output, - "%s Proc %s on node %s: slots %d inuse %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&proc->name), node->name, - (int)node->slots, (int)node->slots_inuse); - if (node->slots_inuse == node->slots) { - opal_list_remove_item(&node_list, &node->super); - OBJ_RELEASE(node); - } - if (0 > (rc = opal_pointer_array_add(node->procs, (void*)proc))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(proc); - return rc; - } - /* retain the proc struct so that we correctly track its release */ - OBJ_RETAIN(proc); - proc->state = ORTE_PROC_STATE_INIT; - /* flag the proc as updated so it will be included - * in the next pidmap message - */ - ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_UPDATED); - /* add the node to the map, if needed */ - if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { - if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { - ORTE_ERROR_LOG(rc); - return rc; - } - ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); - OBJ_RETAIN(node); /* maintain accounting on object */ - jdata->map->num_nodes++; - } - if (0 == opal_list_get_size(&node_list)) { - /* nothing more we can do */ - break; - } - } - /* clear the list */ - while (NULL != (item = opal_list_remove_first(&node_list))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&node_list); - } - - complete: - /* if there isn't at least one proc that can be launched, - * then indicate that we don't need to proceed with the - * launch sequence - */ - if (!work_to_do) { - return ORTE_ERR_RESOURCE_BUSY; - } - - /* flag that the job was updated so it will be - * included in the pidmap message - */ - ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_UPDATED); - - /* if we successfully mapped ALL procs in the first pass, - * then this job is capable of supporting MPI procs - */ - if (first_pass && jdata->num_mapped == jdata->num_procs) { - opal_output_verbose(5, orte_rmaps_base_framework.framework_output, - "%s mca:rmaps:staged: job %s is MPI-capable", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(jdata->jobid)); - ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_GANG_LAUNCHED); - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/rmaps/staged/rmaps_staged.h b/orte/mca/rmaps/staged/rmaps_staged.h deleted file mode 100644 index 7ba4bf7826..0000000000 --- a/orte/mca/rmaps/staged/rmaps_staged.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2012 Los Alamos National Security, LLC. - * All rights reserved - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef ORTE_RMAPS_STAGED_H -#define ORTE_RMAPS_STAGED_H - -#include "orte_config.h" - -#include "orte/mca/rmaps/rmaps.h" - -BEGIN_C_DECLS - -ORTE_MODULE_DECLSPEC extern orte_rmaps_base_component_t mca_rmaps_staged_component; -extern orte_rmaps_base_module_t orte_rmaps_staged_module; - -END_C_DECLS - -#endif diff --git a/orte/mca/rmaps/staged/rmaps_staged_component.c b/orte/mca/rmaps/staged/rmaps_staged_component.c deleted file mode 100644 index 4cc1d9ca4b..0000000000 --- a/orte/mca/rmaps/staged/rmaps_staged_component.c +++ /dev/null @@ -1,73 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights - * reserved - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include "opal/mca/base/base.h" - -#include "orte/util/show_help.h" - -#include "orte/mca/rmaps/base/base.h" -#include "rmaps_staged.h" - -/* - * Local functions - */ - -static int orte_rmaps_staged_open(void); -static int orte_rmaps_staged_close(void); -static int orte_rmaps_staged_query(mca_base_module_t **module, int *priority); - -orte_rmaps_base_component_t mca_rmaps_staged_component = { - .base_version = { - ORTE_RMAPS_BASE_VERSION_2_0_0, - - .mca_component_name = "staged", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - .mca_open_component = orte_rmaps_staged_open, - .mca_close_component = orte_rmaps_staged_close, - .mca_query_component = orte_rmaps_staged_query, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - - -/** - * component open/close/init function - */ -static int orte_rmaps_staged_open(void) -{ - return ORTE_SUCCESS; -} - - -static int orte_rmaps_staged_query(mca_base_module_t **module, int *priority) -{ - *priority = 5; - *module = (mca_base_module_t *)&orte_rmaps_staged_module; - return ORTE_SUCCESS; -} - -/** - * Close all subsystems. - */ - -static int orte_rmaps_staged_close(void) -{ - return ORTE_SUCCESS; -} - - diff --git a/orte/mca/rml/rml_types.h b/orte/mca/rml/rml_types.h index 5a43431c1e..4d1a7f8d89 100644 --- a/orte/mca/rml/rml_types.h +++ b/orte/mca/rml/rml_types.h @@ -12,7 +12,7 @@ * Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -169,6 +169,9 @@ BEGIN_C_DECLS /* memory profile */ #define ORTE_RML_TAG_MEMPROFILE 61 +/* topology report */ +#define ORTE_RML_TAG_TOPOLOGY_REPORT 62 + #define ORTE_RML_TAG_MAX 100 @@ -178,7 +181,7 @@ BEGIN_C_DECLS /*** length of the tag. change this when type of orte_rml_tag_t is changed ***/ /*** max valu in unit32_t is 0xFFFF_FFFF when converted to char this is 8 ** #define ORTE_RML_TAG_T_CHAR_LEN 8 -#define ORTE_RML_TAG_T_SPRINT "%8x" */ +#define ORTE_RML_TAG_T_SPRINT "%8x" */ /** * Message matching tag diff --git a/orte/mca/schizo/ompi/schizo_ompi.c b/orte/mca/schizo/ompi/schizo_ompi.c index 6e561dacb0..fbad86506a 100644 --- a/orte/mca/schizo/ompi/schizo_ompi.c +++ b/orte/mca/schizo/ompi/schizo_ompi.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -114,11 +114,6 @@ static opal_cmd_line_init_t cmd_line_init[] = { &orte_cmd_options.hnp, OPAL_CMD_LINE_TYPE_STRING, "Specify the URI of the HNP, or the name of the file (specified as file:filename) that contains that info" }, - /* hetero apps */ - { "orte_hetero_apps", '\0', NULL, "hetero-apps", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Indicates that multiple app_contexts are being provided that are a mix of 32/64 bit binaries" }, - /* select XML output */ { "orte_xml_output", '\0', "xml", "xml", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, @@ -422,10 +417,6 @@ static opal_cmd_line_init_t cmd_line_init[] = { &orte_cmd_options.continuous, OPAL_CMD_LINE_TYPE_BOOL, "Job is to run until explicitly terminated" }, - { "orte_hetero_nodes", '\0', NULL, "hetero-nodes", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Nodes in cluster may differ in topology, so send the topology back from each node [Default = false]" }, - #if OPAL_ENABLE_CRDEBUG == 1 { "opal_cr_enable_crdebug", '\0', "crdebug", "crdebug", 0, NULL, OPAL_CMD_LINE_TYPE_BOOL, diff --git a/orte/mca/state/novm/state_novm.c b/orte/mca/state/novm/state_novm.c index 49f5d6c348..0b40bd7614 100644 --- a/orte/mca/state/novm/state_novm.c +++ b/orte/mca/state/novm/state_novm.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -222,7 +222,7 @@ static void allocation_complete(int fd, short args, void *cbdata) if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { continue; } - node->topology = t->topo; + node->topology = t; } /* move to the map stage */ diff --git a/orte/orted/orted_comm.c b/orte/orted/orted_comm.c index 645152e3cd..662c9bc7fa 100644 --- a/orte/orted/orted_comm.c +++ b/orte/orted/orted_comm.c @@ -14,7 +14,7 @@ * reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -121,6 +121,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, float pss; opal_pstats_t pstat; char *rtmod; + char *coprocessors; /* unpack the command */ n = 1; @@ -548,6 +549,47 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, } break; + /**** REPORT TOPOLOGY COMMAND ****/ + case ORTE_DAEMON_REPORT_TOPOLOGY_CMD: + answer = OBJ_NEW(opal_buffer_t); + /* pack the topology signature */ + if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &orte_topo_signature, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(answer); + goto CLEANUP; + } + /* pack the topology */ + if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &opal_hwloc_topology, 1, OPAL_HWLOC_TOPO))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(answer); + goto CLEANUP; + } + + /* detect and add any coprocessors */ + coprocessors = opal_hwloc_base_find_coprocessors(opal_hwloc_topology); + if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &coprocessors, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + } + if (NULL != coprocessors) { + free(coprocessors); + } + /* see if I am on a coprocessor */ + coprocessors = opal_hwloc_base_check_on_coprocessor(); + if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &coprocessors, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(ret); + } + if (NULL!= coprocessors) { + free(coprocessors); + } + /* send the data */ + if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, + sender, answer, ORTE_RML_TAG_TOPOLOGY_REPORT, + orte_rml_send_callback, NULL))) { + ORTE_ERROR_LOG(ret); + OBJ_RELEASE(answer); + } + break; + /**** CONTACT QUERY COMMAND ****/ case ORTE_DAEMON_CONTACT_QUERY_CMD: if (orte_debug_daemons_flag) { diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index daf9546755..8c57462c2e 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -127,9 +127,7 @@ static struct { int singleton_died_pipe; bool abort; bool tree_spawn; - char *hnp_topo_sig; bool test_suicide; - bool hnp_on_smgmt_node; } orted_globals; /* @@ -209,18 +207,6 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = { NULL, OPAL_CMD_LINE_TYPE_STRING, "Regular expression defining nodes in system" }, - { "orte_hetero_nodes", '\0', NULL, "hetero-nodes", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Nodes in cluster may differ in topology, so send the topology back from each node [Default = false]" }, - - { NULL, '\0', NULL, "hnp-topo-sig", 1, - &orted_globals.hnp_topo_sig, OPAL_CMD_LINE_TYPE_STRING, - "Topology signature of HNP" }, - - { "orte_hnp_on_smgmt_node", '\0', NULL, "hnp-on-smgmt-node", 0, - &orted_globals.hnp_on_smgmt_node, OPAL_CMD_LINE_TYPE_BOOL, - "Mpirun is executing on a system mgmt node whose topology is different from the compute nodes [Default = false]" }, - /* End of list */ { NULL, '\0', NULL, NULL, 0, NULL, OPAL_CMD_LINE_TYPE_NULL, NULL } @@ -237,8 +223,6 @@ int orte_daemon(int argc, char *argv[]) #if OPAL_ENABLE_FT_CR == 1 char *tmp_env_var = NULL; #endif - char *coprocessors; - uint8_t tflag; /* initialize the globals */ memset(&orted_globals, 0, sizeof(orted_globals)); @@ -769,39 +753,11 @@ int orte_daemon(int argc, char *argv[]) opal_argv_free(aliases); } - /* add the local topology, if different from the HNP's or user directed us to, - * but always if we are the first daemon to ensure we get a compute node */ - if (1 == ORTE_PROC_MY_NAME->vpid || orte_hetero_nodes || - (!orted_globals.hnp_on_smgmt_node && 0 != strcmp(orte_topo_signature, orted_globals.hnp_topo_sig))) { - tflag = 1; - if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &tflag, 1, OPAL_UINT8))) { - ORTE_ERROR_LOG(ret); - } - if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &orte_topo_signature, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - } - if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &opal_hwloc_topology, 1, OPAL_HWLOC_TOPO))) { - ORTE_ERROR_LOG(ret); - } - } else { - tflag = 0; - if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &tflag, 1, OPAL_UINT8))) { - ORTE_ERROR_LOG(ret); - } - } - /* detect and add any coprocessors */ - coprocessors = opal_hwloc_base_find_coprocessors(opal_hwloc_topology); - if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &coprocessors, 1, OPAL_STRING))) { + /* always send back our topology signature - this is a small string + * and won't hurt anything */ + if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &orte_topo_signature, 1, OPAL_STRING))) { ORTE_ERROR_LOG(ret); } - /* see if I am on a coprocessor */ - coprocessors = opal_hwloc_base_check_on_coprocessor(); - if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &coprocessors, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - } - if (NULL!= coprocessors) { - free(coprocessors); - } /* send to the HNP's callback - will be routed if routes are available */ if (0 > (ret = orte_rml.send_buffer_nb(orte_coll_conduit, diff --git a/orte/runtime/data_type_support/orte_dt_print_fns.c b/orte/runtime/data_type_support/orte_dt_print_fns.c index 8faf4b55b8..53202030d1 100644 --- a/orte/runtime/data_type_support/orte_dt_print_fns.c +++ b/orte/runtime/data_type_support/orte_dt_print_fns.c @@ -13,7 +13,7 @@ * Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -476,13 +476,13 @@ int orte_dt_print_proc(char **output, char *prefix, orte_proc_t *src, opal_data_ char *str=NULL, *cpu_bitmap=NULL; if (orte_get_attribute(&src->attributes, ORTE_PROC_CPU_BITMAP, (void**)&cpu_bitmap, OPAL_STRING) && - NULL != src->node->topology) { + NULL != src->node->topology && NULL != src->node->topology->topo) { mycpus = hwloc_bitmap_alloc(); hwloc_bitmap_list_sscanf(mycpus, cpu_bitmap); - if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2str(tmp1, sizeof(tmp1), src->node->topology, mycpus)) { + if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2str(tmp1, sizeof(tmp1), src->node->topology->topo, mycpus)) { str = strdup("UNBOUND"); } else { - opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), src->node->topology, mycpus); + opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), src->node->topology->topo, mycpus); asprintf(&str, "%s:%s", tmp1, tmp2); } hwloc_bitmap_free(mycpus); @@ -517,7 +517,7 @@ int orte_dt_print_proc(char **output, char *prefix, orte_proc_t *src, opal_data_ if (orte_get_attribute(&src->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&loc, OPAL_PTR)) { if (NULL != loc) { - if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2mapstr(locale, sizeof(locale), src->node->topology, loc->cpuset)) { + if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2mapstr(locale, sizeof(locale), src->node->topology->topo, loc->cpuset)) { strcpy(locale, "NODE"); } } else { @@ -528,7 +528,7 @@ int orte_dt_print_proc(char **output, char *prefix, orte_proc_t *src, opal_data_ } if (orte_get_attribute(&src->attributes, ORTE_PROC_HWLOC_BOUND, (void**)&bd, OPAL_PTR)) { if (NULL != bd) { - if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2mapstr(bind, sizeof(bind), src->node->topology, bd->cpuset)) { + if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2mapstr(bind, sizeof(bind), src->node->topology->topo, bd->cpuset)) { strcpy(bind, "UNBOUND"); } } else { @@ -869,4 +869,3 @@ int orte_dt_print_sig(char **output, char *prefix, orte_grpcomm_signature_t *src *output = tmp; return ORTE_SUCCESS; } - diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index 0096f66cee..1090aa6a3a 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -90,9 +90,6 @@ int orte_hostname_cutoff = 1000; int orted_debug_failure = -1; int orted_debug_failure_delay = -1; -bool orte_hetero_apps = false; -bool orte_hetero_nodes = false; -bool orte_hnp_on_smgmt_node = false; bool orte_never_launched = false; bool orte_devel_level_output = false; bool orte_display_topo_with_map = false; diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index c3c6ea375f..a794610f61 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -215,6 +215,15 @@ struct orte_proc_t; struct orte_job_map_t; /************/ +/* define an object for storing node topologies */ +typedef struct { + opal_object_t super; + hwloc_topology_t topo; + char *sig; +} orte_topology_t; +ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_topology_t); + + /** * Information about a specific application to be launched in the RTE. */ @@ -292,7 +301,7 @@ typedef struct { may want to allow up to four processes but no more. */ orte_std_cntr_t slots_max; /* system topology for this node */ - hwloc_topology_t topology; + orte_topology_t *topology; /* flags */ orte_node_flags_t flags; /* list of orte_attribute_t */ @@ -405,14 +414,6 @@ struct orte_proc_t { typedef struct orte_proc_t orte_proc_t; ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_proc_t); -/* define an object for storing node topologies */ -typedef struct { - opal_object_t super; - hwloc_topology_t topo; - char *sig; -} orte_topology_t; -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_topology_t); - /** * Get a job data object * We cannot just reference a job data object with its jobid as @@ -472,11 +473,6 @@ ORTE_DECLSPEC extern int orte_hostname_cutoff; ORTE_DECLSPEC extern int orted_debug_failure; ORTE_DECLSPEC extern int orted_debug_failure_delay; -/* homegeneity flags */ -ORTE_DECLSPEC extern bool orte_hetero_apps; -ORTE_DECLSPEC extern bool orte_hetero_nodes; -ORTE_DECLSPEC extern bool orte_hnp_on_smgmt_node; - ORTE_DECLSPEC extern bool orte_never_launched; ORTE_DECLSPEC extern bool orte_devel_level_output; ORTE_DECLSPEC extern bool orte_display_topo_with_map; diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c index 7d74c6d265..e637ad4fd3 100644 --- a/orte/runtime/orte_mca_params.c +++ b/orte/runtime/orte_mca_params.c @@ -510,21 +510,6 @@ int orte_register_params(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &orte_show_resolved_nodenames); -#if 0 - /* XXX -- option doesn't appear to do anything */ - mca_base_param_reg_int_name("orte", "hetero_apps", - "Indicates that multiple app_contexts are being provided that are a mix of 32/64 bit binaries (default: false)", - false, false, (int) false, &value); - orte_hetero_apps = OPAL_INT_TO_BOOL(value); -#endif - - orte_hetero_nodes = false; - (void) mca_base_var_register ("orte", "orte", NULL, "hetero_nodes", - "Nodes in cluster may differ in topology, so send the topology back from each node [Default = false]", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &orte_hetero_nodes); - /* allow specification of the launch agent */ orte_launch_agent = "orted"; (void) mca_base_var_register ("orte", "orte", NULL, "launch_agent", @@ -767,13 +752,5 @@ int orte_register_params(void) MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &orte_mgmt_transport); - orte_hnp_on_smgmt_node = false; - (void) mca_base_var_register ("orte", "orte", NULL, "hnp_on_smgmt_node", - "Mpirun is executing on a system mgmt node whose topology is different from the compute nodes [Default = false]", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &orte_hnp_on_smgmt_node); - - return ORTE_SUCCESS; }