Reduce the memory usage of the GPR:
- Make it so that all the GPR pointer arrays are allocated initially at 16 elements instead of 512. This saves (on a 64 bit machine) approximately 4*(# procs + # nodes) KB. - Fix up the segment prealloc function so that preallocating an existant segment is not an error, and make the areas where we do large inserts use it. Fix the orte_pointer_array to efficiently implement setting its size. Before we just realloced the array one block at a time until the desired size was reached. Now we resize it all in one realloc. This commit was SVN r14264.
Этот коммит содержится в:
родитель
13a4bba13f
Коммит
2ffc02870d
@ -29,7 +29,7 @@
|
||||
|
||||
static void orte_pointer_array_construct(orte_pointer_array_t *);
|
||||
static void orte_pointer_array_destruct(orte_pointer_array_t *);
|
||||
static bool grow_table(orte_pointer_array_t *table);
|
||||
static bool grow_table(orte_pointer_array_t *table, orte_std_cntr_t num_needed);
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
orte_pointer_array_t,
|
||||
@ -138,7 +138,7 @@ int orte_pointer_array_add(orte_std_cntr_t *location, orte_pointer_array_t *tabl
|
||||
|
||||
/* need to grow table */
|
||||
|
||||
if (!grow_table(table)) {
|
||||
if (!grow_table(table, 1)) {
|
||||
OPAL_THREAD_UNLOCK(&(table->lock));
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
@ -211,7 +211,7 @@ int orte_pointer_array_set_item(orte_pointer_array_t *table, orte_std_cntr_t ele
|
||||
|
||||
OPAL_THREAD_LOCK(&(table->lock));
|
||||
if (table->size <= element_index) {
|
||||
if (!grow_table(table)) {
|
||||
if (!grow_table(table, 1)) {
|
||||
OPAL_THREAD_UNLOCK(&(table->lock));
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
@ -321,7 +321,7 @@ bool orte_pointer_array_test_and_set_item (orte_pointer_array_t *table,
|
||||
/* Do we need to grow the table? */
|
||||
|
||||
if (table->size <= element_index) {
|
||||
if (!grow_table(table)) {
|
||||
if (!grow_table(table, element_index + 1 - table->size)) {
|
||||
OPAL_THREAD_UNLOCK(&(table->lock));
|
||||
return false;
|
||||
}
|
||||
@ -361,8 +361,8 @@ bool orte_pointer_array_test_and_set_item (orte_pointer_array_t *table,
|
||||
int orte_pointer_array_set_size(orte_pointer_array_t *array, orte_std_cntr_t new_size)
|
||||
{
|
||||
OPAL_THREAD_LOCK(&(array->lock));
|
||||
while (new_size > orte_pointer_array_get_size(array)) {
|
||||
if (!grow_table(array)) {
|
||||
if(new_size > array->size) {
|
||||
if (!grow_table(array, new_size - array->size)) {
|
||||
OPAL_THREAD_UNLOCK(&(array->lock));
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
@ -372,7 +372,7 @@ int orte_pointer_array_set_size(orte_pointer_array_t *array, orte_std_cntr_t new
|
||||
}
|
||||
|
||||
|
||||
static bool grow_table(orte_pointer_array_t *table)
|
||||
static bool grow_table(orte_pointer_array_t *table, orte_std_cntr_t num_needed)
|
||||
{
|
||||
orte_std_cntr_t new_size, i;
|
||||
void *p;
|
||||
@ -381,14 +381,15 @@ static bool grow_table(orte_pointer_array_t *table)
|
||||
* specified maximum
|
||||
*/
|
||||
|
||||
if (table->size >= table->max_size) {
|
||||
if (table->size + num_needed > table->max_size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (table->block_size > (table->max_size - table->size)) { /* not enough space for a full block */
|
||||
new_size = ((table->size + num_needed + table->block_size - 1) /
|
||||
table->block_size) * table->block_size;
|
||||
|
||||
if (new_size > table->max_size) {
|
||||
new_size = table->max_size;
|
||||
} else {
|
||||
new_size = table->size + table->block_size;
|
||||
}
|
||||
|
||||
p = (void **) realloc(table->addr, new_size * sizeof(void *));
|
||||
|
@ -42,7 +42,7 @@
|
||||
value is used to set an MAC parameter, which can [currently] only
|
||||
take an int. */
|
||||
#define ORTE_GPR_ARRAY_MAX_SIZE INT_MAX
|
||||
#define ORTE_GPR_ARRAY_BLOCK_SIZE 512
|
||||
#define ORTE_GPR_ARRAY_BLOCK_SIZE 16
|
||||
|
||||
|
||||
|
||||
|
@ -163,12 +163,16 @@ typedef int (*orte_gpr_base_module_cleanup_job_fn_t)(orte_jobid_t jobid);
|
||||
typedef int (*orte_gpr_base_module_cleanup_proc_fn_t)(orte_process_name_t *proc);
|
||||
|
||||
/*
|
||||
* Define and initialize a job segment
|
||||
* The registry contains a segment for each job that stores data on each
|
||||
* process within that job. Although the registry can create this segment
|
||||
* "on-the-fly", it is more efficient to initialize the segment via a separate
|
||||
* command - thus allowing the registry to allocate the base storage for all
|
||||
* the processes in a single malloc.
|
||||
* Define and initialize a segment
|
||||
* The registry contains segments which store containers of data.
|
||||
* Although the registry can create segments "on-the-fly", it is often
|
||||
* more efficient to initialize the segment via a separate command - thus
|
||||
* allowing the registry to allocate the base storage for all the
|
||||
* containers in a single malloc.
|
||||
*
|
||||
* Note that if the given segment already exists, this function simply
|
||||
* makes sure it has enough space to store at least the passed number
|
||||
* of containers
|
||||
*
|
||||
* @param name A character string indicating the name of the segment.
|
||||
* @param num_slots The number of containers expected in this segment. This
|
||||
|
@ -49,15 +49,9 @@ int orte_gpr_replica_preallocate_segment(char *name, orte_std_cntr_t num_slots)
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (0 < (seg->containers)->size) { /* segment already exists! */
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
rc = orte_pointer_array_set_size(seg->containers, num_slots);
|
||||
|
||||
rc = orte_pointer_array_init(&(seg->containers), num_slots,
|
||||
(orte_std_cntr_t)orte_gpr_array_max_size,
|
||||
(orte_std_cntr_t)orte_gpr_array_block_size);
|
||||
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
|
||||
|
||||
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
|
||||
|
||||
return rc;
|
||||
return rc;
|
||||
}
|
||||
|
@ -589,6 +589,13 @@ int orte_ras_base_node_insert(opal_list_t* nodes)
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/** preallocate the appropriate number of containers on the segment */
|
||||
rc = orte_gpr.preallocate_segment(ORTE_NODE_SEGMENT, num_values);
|
||||
if(ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
for (i=0; i < num_values; i++) {
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[i]),
|
||||
ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_AND,
|
||||
|
@ -61,6 +61,13 @@ int orte_rds_base_store_resource(opal_list_t *resources)
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/** preallocate the appropriate number of containers on the segment */
|
||||
rc = orte_gpr.preallocate_segment(ORTE_RESOURCE_SEGMENT, num_vals);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
for (i=0; i < num_vals && NULL != (cell = (orte_rds_cell_desc_t*)opal_list_remove_first(resources)); i++) {
|
||||
num_attr = (orte_std_cntr_t)opal_list_get_size(&cell->attributes);
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[i]), ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR,
|
||||
|
@ -398,6 +398,14 @@ int orte_rmaps_base_put_job_map(orte_job_map_t *map)
|
||||
return rc;
|
||||
}
|
||||
|
||||
/** preallocate the appropriate number of containers on the segment */
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.preallocate_segment(segment, num_procs + 1))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(values);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/** setup the last value in the array to store the vpid start/range and update the INIT counter */
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[num_procs]),
|
||||
ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND,
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user