1
1

Merge pull request #1728 from rhc54/topic/sim

Enable simulation of large-scale clusters
Этот коммит содержится в:
rhc54 2016-05-29 21:29:16 -07:00
родитель a93c01d4f4 3913595e10
Коммит 876257469e
10 изменённых файлов: 84 добавлений и 10 удалений

Просмотреть файл

@ -245,6 +245,17 @@ int orte_ess_base_orted_setup(char **hosts)
error = "orte_session_dir define";
goto error;
}
/* if we have multiple daemons/node, then add our pid to the name */
if (NULL != (param = getenv("OMPI_MCA_ras_base_multiplier")) &&
1 < strtol(param, NULL, 10)) {
if (0 > asprintf(&param, "%s.%lu", orte_process_info.top_session_dir, (unsigned long)orte_process_info.pid)) {
ret = ORTE_ERR_OUT_OF_RESOURCE;
error = "create top session dir";
goto error;
}
free(orte_process_info.top_session_dir);
orte_process_info.top_session_dir = param;
}
/* clear the session directory just in case there are
* stale directories laying around
*/

Просмотреть файл

@ -1528,6 +1528,7 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
bool default_hostfile_used;
char *hosts;
bool singleton=false;
bool multi_sim = false;
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
"%s plm:base:setup_vm",
@ -1617,7 +1618,8 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
* look across all jobs and ensure that the "VM" contains
* all nodes with application procs on them
*/
if (orte_get_attribute(&daemons->attributes, ORTE_JOB_NO_VM, NULL, OPAL_BOOL)) {
multi_sim = orte_get_attribute(&jdata->attributes, ORTE_JOB_MULTI_DAEMON_SIM, NULL, OPAL_BOOL);
if (orte_get_attribute(&daemons->attributes, ORTE_JOB_NO_VM, NULL, OPAL_BOOL) || multi_sim) {
OBJ_CONSTRUCT(&nodes, opal_list_t);
/* loop across all nodes and include those that have
* num_procs > 0 && no daemon already on them
@ -1645,7 +1647,7 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
/* not to be used */
continue;
}
if (0 < node->num_procs) {
if (0 < node->num_procs || multi_sim) {
/* retain a copy for our use in case the item gets
* destructed along the way
*/
@ -1653,6 +1655,9 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
opal_list_append(&nodes, &node->super);
}
}
if (multi_sim) {
goto process;
}
/* see if anybody had procs */
if (0 == opal_list_get_size(&nodes)) {
/* if the HNP has some procs, then we are still good */

Просмотреть файл

@ -11,6 +11,7 @@
* All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -49,6 +50,7 @@ typedef struct orte_ras_base_t {
bool allocation_read;
orte_ras_base_module_t *active_module;
int total_slots_alloc;
int multiplier;
} orte_ras_base_t;
ORTE_DECLSPEC extern orte_ras_base_t orte_ras_base;

Просмотреть файл

@ -50,6 +50,18 @@
*/
orte_ras_base_t orte_ras_base = {0};
static int ras_register(mca_base_register_flag_t flags)
{
orte_ras_base.multiplier = 1;
mca_base_var_register("orte", "ras", "base", "multiplier",
"Simulate a larger cluster by launching N daemons/node",
MCA_BASE_VAR_TYPE_INT,
NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &orte_ras_base.multiplier);
return ORTE_SUCCESS;
}
static int orte_ras_base_close(void)
{
/* Close selected component */
@ -76,5 +88,5 @@ static int orte_ras_base_open(mca_base_open_flag_t flags)
}
MCA_BASE_FRAMEWORK_DECLARE(orte, ras, "ORTE Resource Allocation Subsystem",
NULL, orte_ras_base_open, orte_ras_base_close,
ras_register, orte_ras_base_open, orte_ras_base_close,
mca_ras_base_static_components, 0);

Просмотреть файл

@ -44,7 +44,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
opal_list_item_t* item;
orte_std_cntr_t num_nodes;
int rc, i;
orte_node_t *node, *hnp_node;
orte_node_t *node, *hnp_node, *nptr;
char *ptr;
bool hnp_alone = true;
orte_attribute_t *kv;
@ -61,10 +61,16 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(long)num_nodes));
/* mark the job as being a large-cluster sim if that was requested */
if (1 < orte_ras_base.multiplier) {
orte_set_attribute(&jdata->attributes, ORTE_JOB_MULTI_DAEMON_SIM,
ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
}
/* set the size of the global array - this helps minimize time
* spent doing realloc's
*/
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_size(orte_node_pool, num_nodes))) {
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_size(orte_node_pool, num_nodes * orte_ras_base.multiplier))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -139,6 +145,12 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
}
/* don't keep duplicate copy */
OBJ_RELEASE(node);
/* create copies, if required */
for (i=1; i < orte_ras_base.multiplier; i++) {
opal_dss.copy((void**)&node, hnp_node, ORTE_NODE);
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED);
node->index = opal_pointer_array_add(orte_node_pool, node);
}
} else {
/* insert the object onto the orte_nodes global array */
OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
@ -166,6 +178,10 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
}
/* indicate the HNP is not alone */
hnp_alone = false;
for (i=1; i < orte_ras_base.multiplier; i++) {
opal_dss.copy((void**)&nptr, node, ORTE_NODE);
nptr->index = opal_pointer_array_add(orte_node_pool, nptr);
}
}
}

Просмотреть файл

@ -941,8 +941,9 @@ static int setup_fork(orte_job_t *jdata,
opal_setenv("OMPI_COMM_WORLD_LOCAL_SIZE", param, true, &app->env);
free(param);
/* forcibly set the local tmpdir base to match ours */
/* forcibly set the local tmpdir base and top session dir to match ours */
opal_setenv("OMPI_MCA_orte_tmpdir_base", orte_process_info.tmpdir_base, true, &app->env);
opal_setenv("OMPI_MCA_orte_top_session_dir", orte_process_info.top_session_dir, true, &app->env);
/* MPI-3 requires we provide some further info to the procs,
* so we pass them as envars to avoid introducing further

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -66,8 +66,18 @@ int orte_dt_copy_job(orte_job_t **dest, orte_job_t *src, opal_data_type_t type)
*/
int orte_dt_copy_node(orte_node_t **dest, orte_node_t *src, opal_data_type_t type)
{
(*dest) = src;
OBJ_RETAIN(src);
orte_node_t *node;
node = OBJ_NEW(orte_node_t);
node->name = strdup(src->name);
node->state = src->state;
node->slots = src->slots;
node->slots_inuse = src->slots_inuse;
node->slots_max = src->slots_max;
node->topology = src->topology;
node->flags = src->flags;
(*dest) = node;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -50,6 +50,7 @@ static char *orte_fork_agent_string = NULL;
static char *orte_tmpdir_base = NULL;
static char *orte_local_tmpdir_base = NULL;
static char *orte_remote_tmpdir_base = NULL;
static char *orte_top_session_dir = NULL;
int orte_register_params(void)
{
@ -150,6 +151,20 @@ int orte_register_params(void)
orte_process_info.tmpdir_base = strdup (orte_remote_tmpdir_base);
}
orte_top_session_dir = NULL;
(void) mca_base_var_register ("orte", "orte", NULL, "top_session_dir",
"Top of the session directory tree for applications",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ,
&orte_top_session_dir);
if (NULL != orte_top_session_dir) {
if (NULL != orte_process_info.top_session_dir) {
free(orte_process_info.top_session_dir);
}
orte_process_info.top_session_dir = strdup(orte_top_session_dir);
}
orte_prohibited_session_dirs = NULL;
(void) mca_base_var_register ("orte", "orte", NULL, "no_session_dirs",
"Prohibited locations for session directories (multiple locations separated by ',', default=NULL)",

Просмотреть файл

@ -290,6 +290,7 @@ int orterun(int argc, char *argv[])
DONE:
/* cleanup and leave */
orte_submit_finalize();
orte_finalize();
if (orte_debug_flag) {
fprintf(stderr, "exiting with status %d\n", orte_exit_status);

Просмотреть файл

@ -138,6 +138,7 @@ typedef uint16_t orte_job_flags_t;
#define ORTE_JOB_MERGE_STDERR_STDOUT (ORTE_JOB_START_KEY + 46) // bool - merge stderr into stdout stream
#define ORTE_JOB_TAG_OUTPUT (ORTE_JOB_START_KEY + 47) // bool - tag stdout/stderr
#define ORTE_JOB_TIMESTAMP_OUTPUT (ORTE_JOB_START_KEY + 48) // bool - timestamp stdout/stderr
#define ORTE_JOB_MULTI_DAEMON_SIM (ORTE_JOB_START_KEY + 49) // bool - multiple daemons/node to simulate large cluster
#define ORTE_JOB_MAX_KEY 300