From be6ed9c2df0370d3fa51816314d40a331926fad0 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sun, 27 May 2012 16:48:19 +0000 Subject: [PATCH] Allow partial use of allocations by specifying the max number of daemons (i.e., max VM size) for the job This commit was SVN r26499. --- orte/mca/plm/base/plm_base_launch_support.c | 20 +++++++++++++++++++- orte/runtime/orte_globals.c | 3 +++ orte/runtime/orte_globals.h | 3 +++ orte/runtime/orte_mca_params.c | 4 ++++ orte/tools/orterun/orterun.c | 5 +++++ orte/util/nidmap.c | 17 +++++++++-------- 6 files changed, 43 insertions(+), 9 deletions(-) diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index e86939a9bf..24a0826b21 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -1006,6 +1006,7 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata) opal_list_item_t *item, *next; orte_app_context_t *app; bool one_filter = false; + int num_nodes; OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, "%s plm:base:setup_vm", @@ -1146,17 +1147,34 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata) /* cycle thru all available nodes and find those that do not already * have a daemon on them - no need to include our own as we are - * obviously already here! + * obviously already here! If a max vm size was given, then limit + * the overall number of active nodes to the given number. Only + * count the HNP's node if it was included in the allocation */ + if (orte_hnp_is_allocated) { + num_nodes = 1; + } else { + num_nodes = 0; + } while (NULL != (item = opal_list_remove_first(&nodes))) { + /* if a max size was given and we are there, then exit the loop */ + if (0 < orte_max_vm_size && num_nodes == orte_max_vm_size) { + /* maintain accounting */ + OBJ_RELEASE(item); + break; + } node = (orte_node_t*)item; /* if this node is already in the map, skip it */ if (NULL != node->daemon) { + num_nodes++; + /* maintain accounting */ + OBJ_RELEASE(item); continue; } /* add the node to the map */ opal_pointer_array_add(map->nodes, (void*)node); ++(map->num_nodes); + num_nodes++; /* create a new daemon object for this node */ proc = OBJ_NEW(orte_proc_t); if (NULL == proc) { diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index e5d8d4e68d..c6c57d1b76 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -187,6 +187,9 @@ bool orte_map_reduce = false; /* map stddiag output to stderr so it isn't forwarded to mpirun */ bool orte_map_stddiag_to_stderr = false; +/* maximum size of virtual machine - used to subdivide allocation */ +int orte_max_vm_size = -1; + /* progress thread */ #if ORTE_ENABLE_PROGRESS_THREADS opal_thread_t orte_progress_thread; diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index aa6d43fef4..7adfddccd8 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -714,6 +714,9 @@ ORTE_DECLSPEC extern bool orte_map_reduce; /* map stddiag output to stderr so it isn't forwarded to mpirun */ ORTE_DECLSPEC extern bool orte_map_stddiag_to_stderr; +/* maximum size of virtual machine - used to subdivide allocation */ +ORTE_DECLSPEC extern int orte_max_vm_size; + #endif /* ORTE_DISABLE_FULL_SUPPORT */ END_C_DECLS diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c index 812f40f21b..d69a8c49ad 100644 --- a/orte/runtime/orte_mca_params.c +++ b/orte/runtime/orte_mca_params.c @@ -543,6 +543,10 @@ int orte_register_params(void) false, false, (int)false, &value); orte_preload_binaries = OPAL_INT_TO_BOOL(value); + mca_base_param_reg_int_name("orte", "max_vm_size", + "Maximum size of virtual machine - used to subdivide allocation", + false, false, -1, &orte_max_vm_size); + #endif /* ORTE_DISABLE_FULL_SUPPORT */ return ORTE_SUCCESS; diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index 672724e554..3f41d010f3 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -256,6 +256,11 @@ static opal_cmd_line_init_t cmd_line_init[] = { &orterun_globals.num_procs, OPAL_CMD_LINE_TYPE_INT, "Number of processes to run" }, + /* maximum size of VM - typically used to subdivide an allocation */ + { "orte", "max", "vm_size", '\0', "max-vm-size", "max-vm-size", 1, + NULL, OPAL_CMD_LINE_TYPE_INT, + "Number of processes to run" }, + /* Set a hostfile */ { NULL, NULL, NULL, '\0', "hostfile", "hostfile", 1, NULL, OPAL_CMD_LINE_TYPE_STRING, diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index 203406a1ae..6cfbcc4ec1 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -529,7 +529,7 @@ int orte_util_decode_nodemap(opal_byte_object_t *bo) int orte_util_decode_daemon_nodemap(opal_byte_object_t *bo) { int n; - int32_t num_nodes, i, num_daemons; + int32_t num_nodes, i; orte_vpid_t *vpids; orte_node_t *node; opal_buffer_t buf; @@ -604,9 +604,12 @@ int orte_util_decode_daemon_nodemap(opal_byte_object_t *bo) /* transfer the data to the nodes, counting the number of * daemons in the system */ - num_daemons = 0; daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); for (i=0; i < num_nodes; i++) { + if (ORTE_VPID_INVALID == vpids[i]) { + /* no daemon on this node */ + continue; + } if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { /* this is an error */ ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); @@ -618,6 +621,7 @@ int orte_util_decode_daemon_nodemap(opal_byte_object_t *bo) dptr->name.jobid = ORTE_PROC_MY_NAME->jobid; dptr->name.vpid = vpids[i]; opal_pointer_array_set_item(daemons->procs, vpids[i], dptr); + daemons->num_procs++; } if (NULL != node->daemon) { OBJ_RELEASE(node->daemon); @@ -634,21 +638,18 @@ int orte_util_decode_daemon_nodemap(opal_byte_object_t *bo) } else { node->oversubscribed = true; } - if (ORTE_VPID_INVALID != vpids[i]) { - ++num_daemons; - } } free(vpids); free(oversub); - orte_process_info.num_procs = num_daemons; + orte_process_info.num_procs = daemons->num_procs; if (orte_process_info.max_procs < orte_process_info.num_procs) { orte_process_info.max_procs = orte_process_info.num_procs; } /* update num_daemons */ - orte_process_info.num_daemons = num_daemons; + orte_process_info.num_daemons = daemons->num_procs; if (0 < opal_output_get_verbosity(orte_debug_output)) { for (i=0; i < num_nodes; i++) { @@ -658,7 +659,7 @@ int orte_util_decode_daemon_nodemap(opal_byte_object_t *bo) opal_output(5, "%s node[%d].name %s daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i, (NULL == node->name) ? "NULL" : node->name, - ORTE_VPID_PRINT(node->daemon->name.vpid)); + (NULL == node->daemon) ? "NONE" : ORTE_VPID_PRINT(node->daemon->name.vpid)); } }