From d818c9d4075eb9214b9e70ce81acce8cde48c1ed Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 2 Aug 2012 04:57:13 +0000 Subject: [PATCH] Includes a patch from Jeff and Josh: update the simulator module to allow specification of multiple slot and max_slot counts for each node group (but don't require it). Remove the requirement that each node group provide its own topology. Adjust verbosities to allow showing some light debug output to see what nodes have been added without getting a bunch of other stuff. This commit was SVN r26936. --- orte/mca/ras/alps/ras_alps_component.c | 4 +-- orte/mca/ras/base/ras_base_allocate.c | 2 +- .../ras/gridengine/ras_gridengine_component.c | 4 +-- .../loadleveler/ras_loadleveler_component.c | 4 +-- orte/mca/ras/simulator/ras_sim.h | 4 +-- orte/mca/ras/simulator/ras_sim_component.c | 21 +++++++------ orte/mca/ras/simulator/ras_sim_module.c | 31 +++++++++++++++---- orte/mca/ras/slurm/ras_slurm_component.c | 4 +-- 8 files changed, 47 insertions(+), 27 deletions(-) diff --git a/orte/mca/ras/alps/ras_alps_component.c b/orte/mca/ras/alps/ras_alps_component.c index 09478e1d6a..996710ec51 100644 --- a/orte/mca/ras/alps/ras_alps_component.c +++ b/orte/mca/ras/alps/ras_alps_component.c @@ -179,7 +179,7 @@ orte_ras_alps_component_query(mca_base_module_t **module, } if (0 != orte_ras_alps_res_id) { mca_base_param_lookup_int(param_priority, priority); - opal_output_verbose(1, orte_ras_base.ras_output, + opal_output_verbose(2, orte_ras_base.ras_output, "ras:alps: available for selection"); *module = (mca_base_module_t *) &orte_ras_alps_module; return ORTE_SUCCESS; @@ -198,7 +198,7 @@ int orte_ras_alps_get_appinfo_attempts(int *attempts) { mca_base_param_lookup_int(param_read_attempts, attempts); - opal_output_verbose(1, orte_ras_base.ras_output, + opal_output_verbose(2, orte_ras_base.ras_output, "ras:alps:orte_ras_alps_get_appinfo_attempts: %d", *attempts); return ORTE_SUCCESS; diff --git a/orte/mca/ras/base/ras_base_allocate.c b/orte/mca/ras/base/ras_base_allocate.c index 1f6972e5a0..b5fe8c07b1 100644 --- a/orte/mca/ras/base/ras_base_allocate.c +++ b/orte/mca/ras/base/ras_base_allocate.c @@ -416,7 +416,7 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata) DISPLAY: /* shall we display the results? */ - if (0 < opal_output_get_verbosity(orte_ras_base.ras_output) || orte_ras_base.display_alloc) { + if (4 < opal_output_get_verbosity(orte_ras_base.ras_output) || orte_ras_base.display_alloc) { display_alloc(); } diff --git a/orte/mca/ras/gridengine/ras_gridengine_component.c b/orte/mca/ras/gridengine/ras_gridengine_component.c index 192081a99b..88e018ebf4 100644 --- a/orte/mca/ras/gridengine/ras_gridengine_component.c +++ b/orte/mca/ras/gridengine/ras_gridengine_component.c @@ -101,13 +101,13 @@ static int orte_ras_gridengine_component_query(mca_base_module_t **module, int * if (NULL != getenv("SGE_ROOT") && NULL != getenv("ARC") && NULL != getenv("PE_HOSTFILE") && NULL != getenv("JOB_ID")) { - OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output, + OPAL_OUTPUT_VERBOSE((2, orte_ras_base.ras_output, "%s ras:gridengine: available for selection", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); *module = (mca_base_module_t *) &orte_ras_gridengine_module; return ORTE_SUCCESS; } - OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output, + OPAL_OUTPUT_VERBOSE((2, orte_ras_base.ras_output, "%s ras:gridengine: NOT available for selection", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); *module = NULL; diff --git a/orte/mca/ras/loadleveler/ras_loadleveler_component.c b/orte/mca/ras/loadleveler/ras_loadleveler_component.c index 0d416683a8..02d14f2c92 100644 --- a/orte/mca/ras/loadleveler/ras_loadleveler_component.c +++ b/orte/mca/ras/loadleveler/ras_loadleveler_component.c @@ -82,7 +82,7 @@ static int orte_ras_loadleveler_component_query(mca_base_module_t **module, int /* Are we running under a LOADLEVELER job? */ if (NULL != getenv("LOADL_STEP_ID")) { mca_base_param_lookup_int(param_priority, priority); - OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output, + OPAL_OUTPUT_VERBOSE((2, orte_ras_base.ras_output, "%s ras:loadleveler: available for selection", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); *module = (mca_base_module_t *) &orte_ras_loadleveler_module; @@ -90,7 +90,7 @@ static int orte_ras_loadleveler_component_query(mca_base_module_t **module, int } /* Sadly, no */ - OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output, + OPAL_OUTPUT_VERBOSE((2, orte_ras_base.ras_output, "%s ras:loadleveler: NOT available for selection", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); *module = NULL; diff --git a/orte/mca/ras/simulator/ras_sim.h b/orte/mca/ras/simulator/ras_sim.h index 9987aa42fb..bece209553 100644 --- a/orte/mca/ras/simulator/ras_sim.h +++ b/orte/mca/ras/simulator/ras_sim.h @@ -19,8 +19,8 @@ BEGIN_C_DECLS struct orte_ras_sim_component_t { orte_ras_base_component_t super; char *num_nodes; - int slots; - int slots_max; + char * slots; + char * slots_max; char *topofiles; bool have_cpubind; bool have_membind; diff --git a/orte/mca/ras/simulator/ras_sim_component.c b/orte/mca/ras/simulator/ras_sim_component.c index 9eb7bc9d8b..944157adf4 100644 --- a/orte/mca/ras/simulator/ras_sim_component.c +++ b/orte/mca/ras/simulator/ras_sim_component.c @@ -32,7 +32,7 @@ /* * Local functions */ -static int ras_sim_open(void); +static int ras_sim_register(void); static int ras_sim_component_query(mca_base_module_t **module, int *priority); @@ -51,9 +51,10 @@ orte_ras_sim_component_t mca_ras_simulator_component = { ORTE_RELEASE_VERSION, /* Component open and close functions */ - ras_sim_open, NULL, - ras_sim_component_query + NULL, + ras_sim_component_query, + ras_sim_register }, { /* The component is checkpoint ready */ @@ -63,16 +64,16 @@ orte_ras_sim_component_t mca_ras_simulator_component = { }; -static int ras_sim_open(void) +static int ras_sim_register(void) { - mca_base_param_reg_int(&mca_ras_simulator_component.super.base_version, + mca_base_param_reg_string(&mca_ras_simulator_component.super.base_version, "slots", - "Number of slots on each node to simulate", - false, false, 1, &mca_ras_simulator_component.slots); - mca_base_param_reg_int(&mca_ras_simulator_component.super.base_version, + "Comma-separated list of number of slots on each node to simulate", + false, false, "1", &mca_ras_simulator_component.slots); + mca_base_param_reg_string(&mca_ras_simulator_component.super.base_version, "max_slots", - "Number of max slots on each node to simulate", - false, false, 0, &mca_ras_simulator_component.slots_max); + "Comma-separated list of number of max slots on each node to simulate", + false, false, "0", &mca_ras_simulator_component.slots_max); #if OPAL_HAVE_HWLOC { int tmp; diff --git a/orte/mca/ras/simulator/ras_sim_module.c b/orte/mca/ras/simulator/ras_sim_module.c index 62e4505c23..e72999470d 100644 --- a/orte/mca/ras/simulator/ras_sim_module.c +++ b/orte/mca/ras/simulator/ras_sim_module.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -51,18 +53,32 @@ static int allocate(opal_list_t *nodes) bool use_local_topology = false; #endif char **node_cnt=NULL; + char **slot_cnt=NULL; + char **max_slot_cnt=NULL; + char *tmp; char prefix[6]; node_cnt = opal_argv_split(mca_ras_simulator_component.num_nodes, ','); + slot_cnt = opal_argv_split(mca_ras_simulator_component.slots, ','); + max_slot_cnt = opal_argv_split(mca_ras_simulator_component.slots_max, ','); + + /* backfill the slot_cnt as reqd so we don't have to + * specify slot_cnt for each set of nodes - we'll set + * */ + tmp = slot_cnt[opal_argv_count(slot_cnt)-1]; + for (n=opal_argv_count(slot_cnt); n < opal_argv_count(node_cnt); n++) { + opal_argv_append_nosize(&slot_cnt, tmp); + } + /* backfill the max_slot_cnt as reqd */ + tmp = max_slot_cnt[opal_argv_count(slot_cnt)-1]; + for (n=opal_argv_count(max_slot_cnt); n < opal_argv_count(max_slot_cnt); n++) { + opal_argv_append_nosize(&max_slot_cnt, tmp); + } #if OPAL_HAVE_HWLOC if (NULL == mca_ras_simulator_component.topofiles) { /* use our topology */ use_local_topology = true; - if (1 != opal_argv_count(node_cnt)) { - orte_show_help("help-ras-base.txt", "ras-sim:mismatch", true); - return ORTE_ERR_SILENT; - } } else { files = opal_argv_split(mca_ras_simulator_component.topofiles, ','); if (opal_argv_count(files) != opal_argv_count(node_cnt)) { @@ -153,11 +169,14 @@ static int allocate(opal_list_t *nodes) asprintf(&node->name, "%s%0*d", prefix, dig, i); node->state = ORTE_NODE_STATE_UP; node->slots_inuse = 0; - node->slots_max = mca_ras_simulator_component.slots_max; - node->slots = mca_ras_simulator_component.slots; + node->slots_max = (NULL == max_slot_cnt[n] ? 0 : atoi(max_slot_cnt[n])); + node->slots = (NULL == slot_cnt[n] ? 0 : atoi(slot_cnt[n])); #if OPAL_HAVE_HWLOC node->topology = topo; #endif + opal_output_verbose(1, orte_ras_base.ras_output, + "Created Node <%10s> [%3d : %3d]", + node->name, node->slots, node->slots_max); opal_list_append(nodes, &node->super); } } diff --git a/orte/mca/ras/slurm/ras_slurm_component.c b/orte/mca/ras/slurm/ras_slurm_component.c index 4e0f0e24b9..2f2fae1250 100644 --- a/orte/mca/ras/slurm/ras_slurm_component.c +++ b/orte/mca/ras/slurm/ras_slurm_component.c @@ -85,7 +85,7 @@ static int orte_ras_slurm_component_query(mca_base_module_t **module, int *prior if (NULL != getenv("SLURM_JOBID")) { mca_base_param_lookup_int(param_priority, priority); - OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output, + OPAL_OUTPUT_VERBOSE((2, orte_ras_base.ras_output, "%s ras:slurm: available for selection", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); *module = (mca_base_module_t *) &orte_ras_slurm_module; @@ -94,7 +94,7 @@ static int orte_ras_slurm_component_query(mca_base_module_t **module, int *prior /* Sadly, no */ - OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output, + OPAL_OUTPUT_VERBOSE((2, orte_ras_base.ras_output, "%s ras:slurm: NOT available for selection", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); *module = NULL;