diff --git a/orte/mca/ras/simulator/ras_sim.h b/orte/mca/ras/simulator/ras_sim.h index ee48391af9..9987aa42fb 100644 --- a/orte/mca/ras/simulator/ras_sim.h +++ b/orte/mca/ras/simulator/ras_sim.h @@ -18,10 +18,12 @@ BEGIN_C_DECLS struct orte_ras_sim_component_t { orte_ras_base_component_t super; - int num_nodes; + char *num_nodes; int slots; int slots_max; - char *topofile; + char *topofiles; + bool have_cpubind; + bool have_membind; }; typedef struct orte_ras_sim_component_t orte_ras_sim_component_t; diff --git a/orte/mca/ras/simulator/ras_sim_component.c b/orte/mca/ras/simulator/ras_sim_component.c index b4768c81f3..ea73a1498f 100644 --- a/orte/mca/ras/simulator/ras_sim_component.c +++ b/orte/mca/ras/simulator/ras_sim_component.c @@ -65,10 +65,8 @@ orte_ras_sim_component_t mca_ras_simulator_component = { static int ras_sim_open(void) { - mca_base_param_reg_int(&mca_ras_simulator_component.super.base_version, - "num_nodes", - "Number of nodes to simulate", - false, false, 0, &mca_ras_simulator_component.num_nodes); + int tmp; + mca_base_param_reg_int(&mca_ras_simulator_component.super.base_version, "slots", "Number of slots on each node to simulate", @@ -79,18 +77,37 @@ static int ras_sim_open(void) false, false, 0, &mca_ras_simulator_component.slots_max); #if OPAL_HAVE_HWLOC mca_base_param_reg_string(&mca_ras_simulator_component.super.base_version, - "topo_file", - "File containing xml topology description for simulated nodes", - false, false, NULL, &mca_ras_simulator_component.topofile); + "num_nodes", + "Comma-separated list of number of nodes to simulate for each topology", + false, false, NULL, &mca_ras_simulator_component.num_nodes); + mca_base_param_reg_string(&mca_ras_simulator_component.super.base_version, + "topo_files", + "Comma-separated list of files containing xml topology descriptions for simulated nodes", + false, false, NULL, &mca_ras_simulator_component.topofiles); + mca_base_param_reg_int(&mca_ras_simulator_component.super.base_version, + "have_cpubind", + "Topology supports binding to cpus", + false, false, (int)true, &tmp); + mca_ras_simulator_component.have_cpubind = OPAL_INT_TO_BOOL(tmp); + mca_base_param_reg_int(&mca_ras_simulator_component.super.base_version, + "have_membind", + "Topology supports binding to memory", + false, false, (int)true, &tmp); + mca_ras_simulator_component.have_membind = OPAL_INT_TO_BOOL(tmp); +#else + mca_base_param_reg_string(&mca_ras_simulator_component.super.base_version, + "num_nodes", + "Number of nodes to simulate", + false, false, NULL, &mca_ras_simulator_component.num_nodes); #endif - + return ORTE_SUCCESS; } static int ras_sim_component_query(mca_base_module_t **module, int *priority) { - if (0 < mca_ras_simulator_component.num_nodes) { + if (NULL != mca_ras_simulator_component.num_nodes) { *module = (mca_base_module_t *) &orte_ras_sim_module; *priority = 1000; /* cannot launch simulated nodes or resolve their names to addresses */ diff --git a/orte/mca/ras/simulator/ras_sim_module.c b/orte/mca/ras/simulator/ras_sim_module.c index 06c48b18aa..60d06bcb22 100644 --- a/orte/mca/ras/simulator/ras_sim_module.c +++ b/orte/mca/ras/simulator/ras_sim_module.c @@ -17,6 +17,7 @@ #include "opal/class/opal_list.h" #include "opal/mca/hwloc/hwloc.h" +#include "orte/util/show_help.h" #include "orte/runtime/orte_globals.h" #include "ras_sim.h" @@ -39,83 +40,125 @@ orte_ras_base_module_t orte_ras_sim_module = { static int allocate(opal_list_t *nodes) { - int i, val, dig; + int i, n, val, dig, num_nodes; orte_node_t *node; #if OPAL_HAVE_HWLOC hwloc_topology_t topo; hwloc_obj_t obj; unsigned j, k; + struct hwloc_topology_support *support; + char **files=NULL; + bool use_local_topology = false; +#endif + char **node_cnt=NULL; + + node_cnt = opal_argv_split(mca_ras_simulator_component.num_nodes, ','); + +#if OPAL_HAVE_HWLOC + if (NULL == mca_ras_simulator_component.topofiles) { + /* use our topology */ + use_local_topology = true; + if (1 != opal_argv_count(node_cnt)) { + orte_show_help("help-ras-base.txt", "ras-sim:mismatch", true); + return ORTE_ERR_SILENT; + } + } else { + files = opal_argv_split(mca_ras_simulator_component.topofiles, ','); + if (opal_argv_count(files) != opal_argv_count(node_cnt)) { + orte_show_help("help-ras-base.txt", "ras-sim:mismatch", true); + return ORTE_ERR_SILENT; + } + } + #endif + /* count the total number of nodes */ + val = 0; + for (n=0; NULL != node_cnt[n]; n++) { + val += strtol(node_cnt[n], NULL, 10); + } /* get number of digits */ - val = mca_ras_simulator_component.num_nodes; for (dig=0; 0 != val; dig++) { val /= 10; } - /* check for topology */ + /* process the request */ + val = 0; + for (n=0; NULL != node_cnt[n]; n++) { + num_nodes = strtol(node_cnt[n], NULL, 10); + + /* check for topology */ #if OPAL_HAVE_HWLOC - if (NULL == mca_ras_simulator_component.topofile) { - /* use our topology */ - topo = opal_hwloc_topology; - } else { - if (0 != hwloc_topology_init(&topo)) { - return ORTE_ERROR; - } - if (0 != hwloc_topology_set_xml(topo, mca_ras_simulator_component.topofile)) { - hwloc_topology_destroy(topo); - return ORTE_ERROR; - } - /* since we are loading this from an external source, we have to - * explicitly set a flag so hwloc sets things up correctly - */ - if (0 != hwloc_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM)) { - hwloc_topology_destroy(topo); - return ORTE_ERROR; - } - if (0 != hwloc_topology_load(topo)) { - hwloc_topology_destroy(topo); - return ORTE_ERROR; - } - /* remove the hostname from the topology. Unfortunately, hwloc - * decided to add the source hostname to the "topology", thus - * rendering it unusable as a pure topological description. So - * we remove that information here. - */ - obj = hwloc_get_root_obj(topo); - for (k=0; k < obj->infos_count; k++) { - if (NULL == obj->infos[k].name || - NULL == obj->infos[k].value) { - continue; + if (use_local_topology) { + /* use our topology */ + topo = opal_hwloc_topology; + } else { + if (0 != hwloc_topology_init(&topo)) { + return ORTE_ERROR; } - if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) { - free(obj->infos[k].name); - free(obj->infos[k].value); - /* left justify the array */ - for (j=k; j < obj->infos_count-1; j++) { - obj->infos[j] = obj->infos[j+1]; + if (0 != hwloc_topology_set_xml(topo, files[n])) { + hwloc_topology_destroy(topo); + return ORTE_ERROR; + } + /* since we are loading this from an external source, we have to + * explicitly set a flag so hwloc sets things up correctly + */ + if (0 != hwloc_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM)) { + hwloc_topology_destroy(topo); + return ORTE_ERROR; + } + if (0 != hwloc_topology_load(topo)) { + hwloc_topology_destroy(topo); + return ORTE_ERROR; + } + /* remove the hostname from the topology. Unfortunately, hwloc + * decided to add the source hostname to the "topology", thus + * rendering it unusable as a pure topological description. So + * we remove that information here. + */ + obj = hwloc_get_root_obj(topo); + for (k=0; k < obj->infos_count; k++) { + if (NULL == obj->infos[k].name || + NULL == obj->infos[k].value) { + continue; + } + if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) { + free(obj->infos[k].name); + free(obj->infos[k].value); + /* left justify the array */ + for (j=k; j < obj->infos_count-1; j++) { + obj->infos[j] = obj->infos[j+1]; + } + obj->infos[obj->infos_count-1].name = NULL; + obj->infos[obj->infos_count-1].value = NULL; + obj->infos_count--; + break; } - obj->infos[obj->infos_count-1].name = NULL; - obj->infos[obj->infos_count-1].value = NULL; - obj->infos_count--; - break; } + /* unfortunately, hwloc does not include support info in its + * xml output :-(( To aid in debugging, we set it here + */ + support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo); + support->cpubind->set_thisproc_cpubind = mca_ras_simulator_component.have_cpubind; + support->membind->set_thisproc_membind = mca_ras_simulator_component.have_membind; + /* add it to our array */ + opal_pointer_array_add(orte_node_topologies, topo); } - } #endif - for (i=0; i < mca_ras_simulator_component.num_nodes; i++) { - node = OBJ_NEW(orte_node_t); - asprintf(&node->name, "node%0*d", dig, i); - node->state = ORTE_NODE_STATE_UP; - node->slots_inuse = 0; - node->slots_max = mca_ras_simulator_component.slots_max; - node->slots = mca_ras_simulator_component.slots; + for (i=0; i < num_nodes; i++) { + node = OBJ_NEW(orte_node_t); + asprintf(&node->name, "node%0*d", dig, val++); + node->state = ORTE_NODE_STATE_UP; + node->slots_inuse = 0; + node->slots_max = mca_ras_simulator_component.slots_max; + node->slots = mca_ras_simulator_component.slots; #if OPAL_HAVE_HWLOC - node->topology = topo; + node->topology = topo; #endif - opal_list_append(nodes, &node->super); - } + opal_list_append(nodes, &node->super); + } + } /* record the number of allocated nodes */ orte_num_allocated_nodes = opal_list_get_size(nodes);