1
1
This commit was SVN r17826.
Этот коммит содержится в:
Rich Graham 2008-03-15 13:31:21 +00:00
родитель 6c77c995c2
Коммит e3e336b5ab
3 изменённых файлов: 109 добавлений и 21 удалений

Просмотреть файл

@ -44,8 +44,8 @@ BEGIN_C_DECLS
/*
* Memory Management
* - All memory allocation will be done on a per-communictor basis
* - The two banks of memory will be used
* - Each bank of memory will have M buffers
* - At least two banks of memory will be used
* - Each bank of memory will have M buffers (or segments)
* - These buffers will be used in a cirucular buffer order
* - Each buffer will be contigous in virtual memory, and will have page-aligned
* regions belonging to each process in the communicator
@ -210,6 +210,34 @@ BEGIN_C_DECLS
/* forward declartion */
struct mca_coll_sm2_module_t;
/*
* shared memory region descriptor
*/
struct sm_memory_region_desc_t {
/* pointer to control structures */
volatile mca_coll_sm2_nb_request_process_shared_mem_t *control_region;
/* pointer to data segment, and lower half of data segment */
volatile char *data_segment;
};
typedef struct sm_memory_region_desc_t sm_memory_region_desc_t;
/*
* Shared memory buffer management strcucture
*/
struct sm_work_buffer_t {
/* pointer to segment base */
volatile char * base_segment_address;
/* description of how the memory segment is mapped on
* a per process basis
*/
sm_memory_region_desc_t *proc_memory;
};
typedef struct sm_work_buffer_t sm_work_buffer_t;
/* process private barrier request object */
struct mca_coll_sm2_nb_request_process_private_mem_t {
struct ompi_request_t super;
@ -251,6 +279,12 @@ BEGIN_C_DECLS
/* Pointer to the collective buffers */
char *collective_buffer_region;
/* description of allocated temp buffers - one struct per
* buffer. Each buffer has space "owned" by each process
* in the group.
*/
sm_work_buffer_t *sm_buffer_descriptor;
/* size of memory region, per process, for memory bank management */
size_t sm2_size_management_region_per_proc;
@ -373,7 +407,7 @@ BEGIN_C_DECLS
struct mca_coll_base_module_1_1_0_t *module);
/* allocate working buffer */
char *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module);
sm_work_buffer_t *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module);
/* free working buffer - it is assumed that buffers are released in
* the order they are allocated. We can assume this because each

Просмотреть файл

@ -52,6 +52,7 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count,
volatile mca_coll_sm2_nb_request_process_shared_mem_t * parent_ctl_pointer;
mca_coll_sm2_module_t *sm_module;
tree_node_t *my_reduction_node, *my_fanout_read_tree;
sm_work_buffer_t *sm_buffer_desc;
sm_module=(mca_coll_sm2_module_t *) module;
@ -98,7 +99,9 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count,
/* get a pointer to the shared-memory working buffer */
/* NOTE: starting with a rather synchronous approach */
for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
sm_buffer=alloc_sm2_shared_buffer(sm_module);
sm_buffer_desc=alloc_sm2_shared_buffer(sm_module);
sm_buffer=sm_buffer_desc->base_segment_address;
if( NULL == sm_buffer) {
rc=OMPI_ERR_OUT_OF_RESOURCE;
goto Error;
@ -350,6 +353,7 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf,
int my_rank,count_processed,count_this_stripe;
size_t message_extent,dt_extent,ctl_size,len_data_buffer;
long long tag, base_tag;
sm_work_buffer_t *sm_buffer_desc;
volatile char * sm_buffer;
volatile char * my_tmp_data_buffer[2];
volatile char * my_write_pointer;
@ -412,7 +416,8 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf,
/* debug */
t0=opal_sys_timer_get_cycles();
/* end debug */
sm_buffer=alloc_sm2_shared_buffer(sm_module);
sm_buffer_desc=alloc_sm2_shared_buffer(sm_module);
sm_buffer=sm_buffer_desc->base_segment_address;
if( NULL == sm_buffer) {
rc=OMPI_ERR_OUT_OF_RESOURCE;
goto Error;
@ -558,9 +563,9 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf,
{
int ii,n_ints;
int *my_read=(int *)my_read_pointer;
int *my_write=(int *)my_write_pointer;
int *exchange_read=(int *)partner_read_pointer;
int * restrict my_read=(int *)my_read_pointer;
int * restrict my_write=(int *)my_write_pointer;
int * restrict exchange_read=(int *)partner_read_pointer;
n_ints=count_this_stripe;
for(ii=0 ; ii < n_ints ; ii++ ) {
my_write[ii]=my_read[ii]+exchange_read[ii];

Просмотреть файл

@ -459,7 +459,7 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
{
/* local variables */
mca_coll_sm2_module_t *sm_module;
int group_size,ret;
int i,j,group_size,ret;
size_t alignment,size;
ssize_t size_tot_per_proc_per_seg;
size_t tot_size_per_bank,size_tot_per_segment;
@ -542,7 +542,25 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
mca_coll_sm2_component.sm2_num_mem_banks;
sm_module->sm2_module_num_regions_per_bank=
mca_coll_sm2_component.sm2_num_regions_per_bank;
sm_module->sm2_module_num_buffers=
mca_coll_sm2_component.sm2_num_regions_per_bank *
mca_coll_sm2_component.sm2_num_mem_banks;
/* allocate the array of memory descriptors used to describe the
* shared memory buffers. This structure resides in process
* private memory, but describes the shared memory.
*/
sm_module->sm_buffer_descriptor=(sm_work_buffer_t *)malloc(
sizeof(sm_work_buffer_t)*sm_module->sm2_module_num_buffers);
if( NULL == sm_module->sm_buffer_descriptor ) {
goto CLEANUP;
}
/*
* Now figure out how much memory to allocate for use as
* working memory for the shared memory collectives.
*/
/*
* get control region size
*/
@ -611,11 +629,6 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
/* total memory management required */
mem_management_total=mem_management_per_proc * group_size;
/* set the total number of working buffers */
sm_module->sm2_module_num_buffers=
mca_coll_sm2_component.sm2_num_regions_per_bank *
mca_coll_sm2_component.sm2_num_mem_banks;
/* total size of backing file - this assumes the mmap allocation
* occurs on page boundaries, and that all segments are paged
* aligned
@ -694,7 +707,36 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
sm_module->sm2_first_buffer_index_next_bank=0;
}
/* set pointers */
/* setup shared memory memory descriptors */
for( i=0 ; i < sm_module->sm2_module_num_buffers ; i++ ) {
char *base_buffer;
volatile mca_coll_sm2_nb_request_process_shared_mem_t *ctl_ptr;
/* set the base address for this working buffer */
base_buffer= sm_module->collective_buffer_region+
i*sm_module->segment_size;
sm_module->sm_buffer_descriptor[i].base_segment_address=base_buffer;
/* allocate array to keep data on each segment in the buffer.
* One segment per process in the group.
*/
sm_module->sm_buffer_descriptor[i].proc_memory=
(sm_memory_region_desc_t *)malloc(sizeof(sm_memory_region_desc_t)*
group_size);
if( NULL == sm_module->sm_buffer_descriptor[i].proc_memory ) {
goto CLEANUP;
}
for(j=0 ; j < group_size ; j++ ) {
ctl_ptr=(volatile mca_coll_sm2_nb_request_process_shared_mem_t *)
base_buffer+j* sm_module->segement_size_per_process;
sm_module->sm_buffer_descriptor[i].proc_memory[j].control_region=
ctl_ptr;
sm_module->sm_buffer_descriptor[i].proc_memory[j].data_segment=
(char *)ctl_ptr+sm_module->ctl_memory_per_proc_per_segment;
}
}
/* touch pages to apply memory affinity - Note: do we really need this or will
* the algorithms do this */
@ -716,6 +758,17 @@ CLEANUP:
sm_module->coll_sm2_file_name=NULL;
}
if( NULL != sm_module->sm_buffer_descriptor ) {
for(i=0 ; i < group_size ; i++ ) {
if(NULL != sm_module->sm_buffer_descriptor[i].proc_memory) {
free(sm_module->sm_buffer_descriptor[i].proc_memory);
sm_module->sm_buffer_descriptor[i].proc_memory=NULL;
}
}
free(sm_module->sm_buffer_descriptor);
sm_module->sm_buffer_descriptor=NULL;
}
OBJ_RELEASE(sm_module);
return NULL;
@ -742,7 +795,7 @@ sm2_module_enable(struct mca_coll_base_module_1_1_0_t *module,
}
/* allocate working buffer */
char *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module)
sm_work_buffer_t *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module)
{
/* local variables */
int rc,buffer_index, memory_bank_index;
@ -836,11 +889,7 @@ char *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module)
buffer_index=module->sm2_allocated_buffer_index;
/* get base address of return buffer */
return_buffer=module->collective_buffer_region+
buffer_index*module->segment_size;
return return_buffer;
return &(module->sm_buffer_descriptor[buffer_index]);
}