Includes a patch from Jeff and Josh: update the simulator module to allow specification of multiple slot and max_slot counts for each node group (but don't require it). Remove the requirement that each node group provide its own topology. Adjust verbosities to allow showing some light debug output to see what nodes have been added without getting a bunch of other stuff.
This commit was SVN r26936.
Этот коммит содержится в:
родитель
62c2ff7ee7
Коммит
d818c9d407
@ -179,7 +179,7 @@ orte_ras_alps_component_query(mca_base_module_t **module,
|
||||
}
|
||||
if (0 != orte_ras_alps_res_id) {
|
||||
mca_base_param_lookup_int(param_priority, priority);
|
||||
opal_output_verbose(1, orte_ras_base.ras_output,
|
||||
opal_output_verbose(2, orte_ras_base.ras_output,
|
||||
"ras:alps: available for selection");
|
||||
*module = (mca_base_module_t *) &orte_ras_alps_module;
|
||||
return ORTE_SUCCESS;
|
||||
@ -198,7 +198,7 @@ int
|
||||
orte_ras_alps_get_appinfo_attempts(int *attempts)
|
||||
{
|
||||
mca_base_param_lookup_int(param_read_attempts, attempts);
|
||||
opal_output_verbose(1, orte_ras_base.ras_output,
|
||||
opal_output_verbose(2, orte_ras_base.ras_output,
|
||||
"ras:alps:orte_ras_alps_get_appinfo_attempts: %d",
|
||||
*attempts);
|
||||
return ORTE_SUCCESS;
|
||||
|
@ -416,7 +416,7 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata)
|
||||
|
||||
DISPLAY:
|
||||
/* shall we display the results? */
|
||||
if (0 < opal_output_get_verbosity(orte_ras_base.ras_output) || orte_ras_base.display_alloc) {
|
||||
if (4 < opal_output_get_verbosity(orte_ras_base.ras_output) || orte_ras_base.display_alloc) {
|
||||
display_alloc();
|
||||
}
|
||||
|
||||
|
@ -101,13 +101,13 @@ static int orte_ras_gridengine_component_query(mca_base_module_t **module, int *
|
||||
|
||||
if (NULL != getenv("SGE_ROOT") && NULL != getenv("ARC") &&
|
||||
NULL != getenv("PE_HOSTFILE") && NULL != getenv("JOB_ID")) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ras_base.ras_output,
|
||||
"%s ras:gridengine: available for selection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
*module = (mca_base_module_t *) &orte_ras_gridengine_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ras_base.ras_output,
|
||||
"%s ras:gridengine: NOT available for selection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
*module = NULL;
|
||||
|
@ -82,7 +82,7 @@ static int orte_ras_loadleveler_component_query(mca_base_module_t **module, int
|
||||
/* Are we running under a LOADLEVELER job? */
|
||||
if (NULL != getenv("LOADL_STEP_ID")) {
|
||||
mca_base_param_lookup_int(param_priority, priority);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ras_base.ras_output,
|
||||
"%s ras:loadleveler: available for selection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
*module = (mca_base_module_t *) &orte_ras_loadleveler_module;
|
||||
@ -90,7 +90,7 @@ static int orte_ras_loadleveler_component_query(mca_base_module_t **module, int
|
||||
}
|
||||
|
||||
/* Sadly, no */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ras_base.ras_output,
|
||||
"%s ras:loadleveler: NOT available for selection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
*module = NULL;
|
||||
|
@ -19,8 +19,8 @@ BEGIN_C_DECLS
|
||||
struct orte_ras_sim_component_t {
|
||||
orte_ras_base_component_t super;
|
||||
char *num_nodes;
|
||||
int slots;
|
||||
int slots_max;
|
||||
char * slots;
|
||||
char * slots_max;
|
||||
char *topofiles;
|
||||
bool have_cpubind;
|
||||
bool have_membind;
|
||||
|
@ -32,7 +32,7 @@
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
static int ras_sim_open(void);
|
||||
static int ras_sim_register(void);
|
||||
static int ras_sim_component_query(mca_base_module_t **module, int *priority);
|
||||
|
||||
|
||||
@ -51,9 +51,10 @@ orte_ras_sim_component_t mca_ras_simulator_component = {
|
||||
ORTE_RELEASE_VERSION,
|
||||
|
||||
/* Component open and close functions */
|
||||
ras_sim_open,
|
||||
NULL,
|
||||
ras_sim_component_query
|
||||
NULL,
|
||||
ras_sim_component_query,
|
||||
ras_sim_register
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
@ -63,16 +64,16 @@ orte_ras_sim_component_t mca_ras_simulator_component = {
|
||||
};
|
||||
|
||||
|
||||
static int ras_sim_open(void)
|
||||
static int ras_sim_register(void)
|
||||
{
|
||||
mca_base_param_reg_int(&mca_ras_simulator_component.super.base_version,
|
||||
mca_base_param_reg_string(&mca_ras_simulator_component.super.base_version,
|
||||
"slots",
|
||||
"Number of slots on each node to simulate",
|
||||
false, false, 1, &mca_ras_simulator_component.slots);
|
||||
mca_base_param_reg_int(&mca_ras_simulator_component.super.base_version,
|
||||
"Comma-separated list of number of slots on each node to simulate",
|
||||
false, false, "1", &mca_ras_simulator_component.slots);
|
||||
mca_base_param_reg_string(&mca_ras_simulator_component.super.base_version,
|
||||
"max_slots",
|
||||
"Number of max slots on each node to simulate",
|
||||
false, false, 0, &mca_ras_simulator_component.slots_max);
|
||||
"Comma-separated list of number of max slots on each node to simulate",
|
||||
false, false, "0", &mca_ras_simulator_component.slots_max);
|
||||
#if OPAL_HAVE_HWLOC
|
||||
{
|
||||
int tmp;
|
||||
|
@ -1,5 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -51,18 +53,32 @@ static int allocate(opal_list_t *nodes)
|
||||
bool use_local_topology = false;
|
||||
#endif
|
||||
char **node_cnt=NULL;
|
||||
char **slot_cnt=NULL;
|
||||
char **max_slot_cnt=NULL;
|
||||
char *tmp;
|
||||
char prefix[6];
|
||||
|
||||
node_cnt = opal_argv_split(mca_ras_simulator_component.num_nodes, ',');
|
||||
slot_cnt = opal_argv_split(mca_ras_simulator_component.slots, ',');
|
||||
max_slot_cnt = opal_argv_split(mca_ras_simulator_component.slots_max, ',');
|
||||
|
||||
/* backfill the slot_cnt as reqd so we don't have to
|
||||
* specify slot_cnt for each set of nodes - we'll set
|
||||
* */
|
||||
tmp = slot_cnt[opal_argv_count(slot_cnt)-1];
|
||||
for (n=opal_argv_count(slot_cnt); n < opal_argv_count(node_cnt); n++) {
|
||||
opal_argv_append_nosize(&slot_cnt, tmp);
|
||||
}
|
||||
/* backfill the max_slot_cnt as reqd */
|
||||
tmp = max_slot_cnt[opal_argv_count(slot_cnt)-1];
|
||||
for (n=opal_argv_count(max_slot_cnt); n < opal_argv_count(max_slot_cnt); n++) {
|
||||
opal_argv_append_nosize(&max_slot_cnt, tmp);
|
||||
}
|
||||
|
||||
#if OPAL_HAVE_HWLOC
|
||||
if (NULL == mca_ras_simulator_component.topofiles) {
|
||||
/* use our topology */
|
||||
use_local_topology = true;
|
||||
if (1 != opal_argv_count(node_cnt)) {
|
||||
orte_show_help("help-ras-base.txt", "ras-sim:mismatch", true);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
} else {
|
||||
files = opal_argv_split(mca_ras_simulator_component.topofiles, ',');
|
||||
if (opal_argv_count(files) != opal_argv_count(node_cnt)) {
|
||||
@ -153,11 +169,14 @@ static int allocate(opal_list_t *nodes)
|
||||
asprintf(&node->name, "%s%0*d", prefix, dig, i);
|
||||
node->state = ORTE_NODE_STATE_UP;
|
||||
node->slots_inuse = 0;
|
||||
node->slots_max = mca_ras_simulator_component.slots_max;
|
||||
node->slots = mca_ras_simulator_component.slots;
|
||||
node->slots_max = (NULL == max_slot_cnt[n] ? 0 : atoi(max_slot_cnt[n]));
|
||||
node->slots = (NULL == slot_cnt[n] ? 0 : atoi(slot_cnt[n]));
|
||||
#if OPAL_HAVE_HWLOC
|
||||
node->topology = topo;
|
||||
#endif
|
||||
opal_output_verbose(1, orte_ras_base.ras_output,
|
||||
"Created Node <%10s> [%3d : %3d]",
|
||||
node->name, node->slots, node->slots_max);
|
||||
opal_list_append(nodes, &node->super);
|
||||
}
|
||||
}
|
||||
|
@ -85,7 +85,7 @@ static int orte_ras_slurm_component_query(mca_base_module_t **module, int *prior
|
||||
|
||||
if (NULL != getenv("SLURM_JOBID")) {
|
||||
mca_base_param_lookup_int(param_priority, priority);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ras_base.ras_output,
|
||||
"%s ras:slurm: available for selection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
*module = (mca_base_module_t *) &orte_ras_slurm_module;
|
||||
@ -94,7 +94,7 @@ static int orte_ras_slurm_component_query(mca_base_module_t **module, int *prior
|
||||
|
||||
/* Sadly, no */
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_ras_base.ras_output,
|
||||
"%s ras:slurm: NOT available for selection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
*module = NULL;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user