1
1

Allow partial use of allocations by specifying the max number of daemons (i.e., max VM size) for the job

This commit was SVN r26499.
Этот коммит содержится в:
Ralph Castain 2012-05-27 16:48:19 +00:00
родитель c69a04e16b
Коммит be6ed9c2df
6 изменённых файлов: 43 добавлений и 9 удалений

Просмотреть файл

@ -1006,6 +1006,7 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
opal_list_item_t *item, *next; opal_list_item_t *item, *next;
orte_app_context_t *app; orte_app_context_t *app;
bool one_filter = false; bool one_filter = false;
int num_nodes;
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output, OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:setup_vm", "%s plm:base:setup_vm",
@ -1146,17 +1147,34 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
/* cycle thru all available nodes and find those that do not already /* cycle thru all available nodes and find those that do not already
* have a daemon on them - no need to include our own as we are * have a daemon on them - no need to include our own as we are
* obviously already here! * obviously already here! If a max vm size was given, then limit
* the overall number of active nodes to the given number. Only
* count the HNP's node if it was included in the allocation
*/ */
if (orte_hnp_is_allocated) {
num_nodes = 1;
} else {
num_nodes = 0;
}
while (NULL != (item = opal_list_remove_first(&nodes))) { while (NULL != (item = opal_list_remove_first(&nodes))) {
/* if a max size was given and we are there, then exit the loop */
if (0 < orte_max_vm_size && num_nodes == orte_max_vm_size) {
/* maintain accounting */
OBJ_RELEASE(item);
break;
}
node = (orte_node_t*)item; node = (orte_node_t*)item;
/* if this node is already in the map, skip it */ /* if this node is already in the map, skip it */
if (NULL != node->daemon) { if (NULL != node->daemon) {
num_nodes++;
/* maintain accounting */
OBJ_RELEASE(item);
continue; continue;
} }
/* add the node to the map */ /* add the node to the map */
opal_pointer_array_add(map->nodes, (void*)node); opal_pointer_array_add(map->nodes, (void*)node);
++(map->num_nodes); ++(map->num_nodes);
num_nodes++;
/* create a new daemon object for this node */ /* create a new daemon object for this node */
proc = OBJ_NEW(orte_proc_t); proc = OBJ_NEW(orte_proc_t);
if (NULL == proc) { if (NULL == proc) {

Просмотреть файл

@ -187,6 +187,9 @@ bool orte_map_reduce = false;
/* map stddiag output to stderr so it isn't forwarded to mpirun */ /* map stddiag output to stderr so it isn't forwarded to mpirun */
bool orte_map_stddiag_to_stderr = false; bool orte_map_stddiag_to_stderr = false;
/* maximum size of virtual machine - used to subdivide allocation */
int orte_max_vm_size = -1;
/* progress thread */ /* progress thread */
#if ORTE_ENABLE_PROGRESS_THREADS #if ORTE_ENABLE_PROGRESS_THREADS
opal_thread_t orte_progress_thread; opal_thread_t orte_progress_thread;

Просмотреть файл

@ -714,6 +714,9 @@ ORTE_DECLSPEC extern bool orte_map_reduce;
/* map stddiag output to stderr so it isn't forwarded to mpirun */ /* map stddiag output to stderr so it isn't forwarded to mpirun */
ORTE_DECLSPEC extern bool orte_map_stddiag_to_stderr; ORTE_DECLSPEC extern bool orte_map_stddiag_to_stderr;
/* maximum size of virtual machine - used to subdivide allocation */
ORTE_DECLSPEC extern int orte_max_vm_size;
#endif /* ORTE_DISABLE_FULL_SUPPORT */ #endif /* ORTE_DISABLE_FULL_SUPPORT */
END_C_DECLS END_C_DECLS

Просмотреть файл

@ -543,6 +543,10 @@ int orte_register_params(void)
false, false, (int)false, &value); false, false, (int)false, &value);
orte_preload_binaries = OPAL_INT_TO_BOOL(value); orte_preload_binaries = OPAL_INT_TO_BOOL(value);
mca_base_param_reg_int_name("orte", "max_vm_size",
"Maximum size of virtual machine - used to subdivide allocation",
false, false, -1, &orte_max_vm_size);
#endif /* ORTE_DISABLE_FULL_SUPPORT */ #endif /* ORTE_DISABLE_FULL_SUPPORT */
return ORTE_SUCCESS; return ORTE_SUCCESS;

Просмотреть файл

@ -256,6 +256,11 @@ static opal_cmd_line_init_t cmd_line_init[] = {
&orterun_globals.num_procs, OPAL_CMD_LINE_TYPE_INT, &orterun_globals.num_procs, OPAL_CMD_LINE_TYPE_INT,
"Number of processes to run" }, "Number of processes to run" },
/* maximum size of VM - typically used to subdivide an allocation */
{ "orte", "max", "vm_size", '\0', "max-vm-size", "max-vm-size", 1,
NULL, OPAL_CMD_LINE_TYPE_INT,
"Number of processes to run" },
/* Set a hostfile */ /* Set a hostfile */
{ NULL, NULL, NULL, '\0', "hostfile", "hostfile", 1, { NULL, NULL, NULL, '\0', "hostfile", "hostfile", 1,
NULL, OPAL_CMD_LINE_TYPE_STRING, NULL, OPAL_CMD_LINE_TYPE_STRING,

Просмотреть файл

@ -529,7 +529,7 @@ int orte_util_decode_nodemap(opal_byte_object_t *bo)
int orte_util_decode_daemon_nodemap(opal_byte_object_t *bo) int orte_util_decode_daemon_nodemap(opal_byte_object_t *bo)
{ {
int n; int n;
int32_t num_nodes, i, num_daemons; int32_t num_nodes, i;
orte_vpid_t *vpids; orte_vpid_t *vpids;
orte_node_t *node; orte_node_t *node;
opal_buffer_t buf; opal_buffer_t buf;
@ -604,9 +604,12 @@ int orte_util_decode_daemon_nodemap(opal_byte_object_t *bo)
/* transfer the data to the nodes, counting the number of /* transfer the data to the nodes, counting the number of
* daemons in the system * daemons in the system
*/ */
num_daemons = 0;
daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
for (i=0; i < num_nodes; i++) { for (i=0; i < num_nodes; i++) {
if (ORTE_VPID_INVALID == vpids[i]) {
/* no daemon on this node */
continue;
}
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
/* this is an error */ /* this is an error */
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
@ -618,6 +621,7 @@ int orte_util_decode_daemon_nodemap(opal_byte_object_t *bo)
dptr->name.jobid = ORTE_PROC_MY_NAME->jobid; dptr->name.jobid = ORTE_PROC_MY_NAME->jobid;
dptr->name.vpid = vpids[i]; dptr->name.vpid = vpids[i];
opal_pointer_array_set_item(daemons->procs, vpids[i], dptr); opal_pointer_array_set_item(daemons->procs, vpids[i], dptr);
daemons->num_procs++;
} }
if (NULL != node->daemon) { if (NULL != node->daemon) {
OBJ_RELEASE(node->daemon); OBJ_RELEASE(node->daemon);
@ -634,21 +638,18 @@ int orte_util_decode_daemon_nodemap(opal_byte_object_t *bo)
} else { } else {
node->oversubscribed = true; node->oversubscribed = true;
} }
if (ORTE_VPID_INVALID != vpids[i]) {
++num_daemons;
}
} }
free(vpids); free(vpids);
free(oversub); free(oversub);
orte_process_info.num_procs = num_daemons; orte_process_info.num_procs = daemons->num_procs;
if (orte_process_info.max_procs < orte_process_info.num_procs) { if (orte_process_info.max_procs < orte_process_info.num_procs) {
orte_process_info.max_procs = orte_process_info.num_procs; orte_process_info.max_procs = orte_process_info.num_procs;
} }
/* update num_daemons */ /* update num_daemons */
orte_process_info.num_daemons = num_daemons; orte_process_info.num_daemons = daemons->num_procs;
if (0 < opal_output_get_verbosity(orte_debug_output)) { if (0 < opal_output_get_verbosity(orte_debug_output)) {
for (i=0; i < num_nodes; i++) { for (i=0; i < num_nodes; i++) {
@ -658,7 +659,7 @@ int orte_util_decode_daemon_nodemap(opal_byte_object_t *bo)
opal_output(5, "%s node[%d].name %s daemon %s", opal_output(5, "%s node[%d].name %s daemon %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i,
(NULL == node->name) ? "NULL" : node->name, (NULL == node->name) ? "NULL" : node->name,
ORTE_VPID_PRINT(node->daemon->name.vpid)); (NULL == node->daemon) ? "NONE" : ORTE_VPID_PRINT(node->daemon->name.vpid));
} }
} }