check point
This commit was SVN r17826.
Этот коммит содержится в:
родитель
6c77c995c2
Коммит
e3e336b5ab
@ -44,8 +44,8 @@ BEGIN_C_DECLS
|
||||
/*
|
||||
* Memory Management
|
||||
* - All memory allocation will be done on a per-communictor basis
|
||||
* - The two banks of memory will be used
|
||||
* - Each bank of memory will have M buffers
|
||||
* - At least two banks of memory will be used
|
||||
* - Each bank of memory will have M buffers (or segments)
|
||||
* - These buffers will be used in a cirucular buffer order
|
||||
* - Each buffer will be contigous in virtual memory, and will have page-aligned
|
||||
* regions belonging to each process in the communicator
|
||||
@ -210,6 +210,34 @@ BEGIN_C_DECLS
|
||||
/* forward declartion */
|
||||
struct mca_coll_sm2_module_t;
|
||||
|
||||
/*
|
||||
* shared memory region descriptor
|
||||
*/
|
||||
struct sm_memory_region_desc_t {
|
||||
|
||||
/* pointer to control structures */
|
||||
volatile mca_coll_sm2_nb_request_process_shared_mem_t *control_region;
|
||||
|
||||
/* pointer to data segment, and lower half of data segment */
|
||||
volatile char *data_segment;
|
||||
|
||||
};
|
||||
typedef struct sm_memory_region_desc_t sm_memory_region_desc_t;
|
||||
|
||||
/*
|
||||
* Shared memory buffer management strcucture
|
||||
*/
|
||||
struct sm_work_buffer_t {
|
||||
/* pointer to segment base */
|
||||
volatile char * base_segment_address;
|
||||
|
||||
/* description of how the memory segment is mapped on
|
||||
* a per process basis
|
||||
*/
|
||||
sm_memory_region_desc_t *proc_memory;
|
||||
};
|
||||
typedef struct sm_work_buffer_t sm_work_buffer_t;
|
||||
|
||||
/* process private barrier request object */
|
||||
struct mca_coll_sm2_nb_request_process_private_mem_t {
|
||||
struct ompi_request_t super;
|
||||
@ -251,6 +279,12 @@ BEGIN_C_DECLS
|
||||
/* Pointer to the collective buffers */
|
||||
char *collective_buffer_region;
|
||||
|
||||
/* description of allocated temp buffers - one struct per
|
||||
* buffer. Each buffer has space "owned" by each process
|
||||
* in the group.
|
||||
*/
|
||||
sm_work_buffer_t *sm_buffer_descriptor;
|
||||
|
||||
/* size of memory region, per process, for memory bank management */
|
||||
size_t sm2_size_management_region_per_proc;
|
||||
|
||||
@ -373,7 +407,7 @@ BEGIN_C_DECLS
|
||||
struct mca_coll_base_module_1_1_0_t *module);
|
||||
|
||||
/* allocate working buffer */
|
||||
char *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module);
|
||||
sm_work_buffer_t *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module);
|
||||
|
||||
/* free working buffer - it is assumed that buffers are released in
|
||||
* the order they are allocated. We can assume this because each
|
||||
|
@ -52,6 +52,7 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count,
|
||||
volatile mca_coll_sm2_nb_request_process_shared_mem_t * parent_ctl_pointer;
|
||||
mca_coll_sm2_module_t *sm_module;
|
||||
tree_node_t *my_reduction_node, *my_fanout_read_tree;
|
||||
sm_work_buffer_t *sm_buffer_desc;
|
||||
|
||||
|
||||
sm_module=(mca_coll_sm2_module_t *) module;
|
||||
@ -98,7 +99,9 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count,
|
||||
/* get a pointer to the shared-memory working buffer */
|
||||
/* NOTE: starting with a rather synchronous approach */
|
||||
for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
|
||||
sm_buffer=alloc_sm2_shared_buffer(sm_module);
|
||||
|
||||
sm_buffer_desc=alloc_sm2_shared_buffer(sm_module);
|
||||
sm_buffer=sm_buffer_desc->base_segment_address;
|
||||
if( NULL == sm_buffer) {
|
||||
rc=OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Error;
|
||||
@ -350,6 +353,7 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf,
|
||||
int my_rank,count_processed,count_this_stripe;
|
||||
size_t message_extent,dt_extent,ctl_size,len_data_buffer;
|
||||
long long tag, base_tag;
|
||||
sm_work_buffer_t *sm_buffer_desc;
|
||||
volatile char * sm_buffer;
|
||||
volatile char * my_tmp_data_buffer[2];
|
||||
volatile char * my_write_pointer;
|
||||
@ -412,7 +416,8 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf,
|
||||
/* debug */
|
||||
t0=opal_sys_timer_get_cycles();
|
||||
/* end debug */
|
||||
sm_buffer=alloc_sm2_shared_buffer(sm_module);
|
||||
sm_buffer_desc=alloc_sm2_shared_buffer(sm_module);
|
||||
sm_buffer=sm_buffer_desc->base_segment_address;
|
||||
if( NULL == sm_buffer) {
|
||||
rc=OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto Error;
|
||||
@ -558,9 +563,9 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf,
|
||||
|
||||
{
|
||||
int ii,n_ints;
|
||||
int *my_read=(int *)my_read_pointer;
|
||||
int *my_write=(int *)my_write_pointer;
|
||||
int *exchange_read=(int *)partner_read_pointer;
|
||||
int * restrict my_read=(int *)my_read_pointer;
|
||||
int * restrict my_write=(int *)my_write_pointer;
|
||||
int * restrict exchange_read=(int *)partner_read_pointer;
|
||||
n_ints=count_this_stripe;
|
||||
for(ii=0 ; ii < n_ints ; ii++ ) {
|
||||
my_write[ii]=my_read[ii]+exchange_read[ii];
|
||||
|
@ -459,7 +459,7 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
||||
{
|
||||
/* local variables */
|
||||
mca_coll_sm2_module_t *sm_module;
|
||||
int group_size,ret;
|
||||
int i,j,group_size,ret;
|
||||
size_t alignment,size;
|
||||
ssize_t size_tot_per_proc_per_seg;
|
||||
size_t tot_size_per_bank,size_tot_per_segment;
|
||||
@ -542,7 +542,25 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
||||
mca_coll_sm2_component.sm2_num_mem_banks;
|
||||
sm_module->sm2_module_num_regions_per_bank=
|
||||
mca_coll_sm2_component.sm2_num_regions_per_bank;
|
||||
sm_module->sm2_module_num_buffers=
|
||||
mca_coll_sm2_component.sm2_num_regions_per_bank *
|
||||
mca_coll_sm2_component.sm2_num_mem_banks;
|
||||
|
||||
|
||||
/* allocate the array of memory descriptors used to describe the
|
||||
* shared memory buffers. This structure resides in process
|
||||
* private memory, but describes the shared memory.
|
||||
*/
|
||||
sm_module->sm_buffer_descriptor=(sm_work_buffer_t *)malloc(
|
||||
sizeof(sm_work_buffer_t)*sm_module->sm2_module_num_buffers);
|
||||
if( NULL == sm_module->sm_buffer_descriptor ) {
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now figure out how much memory to allocate for use as
|
||||
* working memory for the shared memory collectives.
|
||||
*/
|
||||
/*
|
||||
* get control region size
|
||||
*/
|
||||
@ -611,11 +629,6 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
||||
/* total memory management required */
|
||||
mem_management_total=mem_management_per_proc * group_size;
|
||||
|
||||
/* set the total number of working buffers */
|
||||
sm_module->sm2_module_num_buffers=
|
||||
mca_coll_sm2_component.sm2_num_regions_per_bank *
|
||||
mca_coll_sm2_component.sm2_num_mem_banks;
|
||||
|
||||
/* total size of backing file - this assumes the mmap allocation
|
||||
* occurs on page boundaries, and that all segments are paged
|
||||
* aligned
|
||||
@ -694,7 +707,36 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
||||
sm_module->sm2_first_buffer_index_next_bank=0;
|
||||
}
|
||||
|
||||
/* set pointers */
|
||||
/* setup shared memory memory descriptors */
|
||||
for( i=0 ; i < sm_module->sm2_module_num_buffers ; i++ ) {
|
||||
|
||||
char *base_buffer;
|
||||
volatile mca_coll_sm2_nb_request_process_shared_mem_t *ctl_ptr;
|
||||
|
||||
/* set the base address for this working buffer */
|
||||
base_buffer= sm_module->collective_buffer_region+
|
||||
i*sm_module->segment_size;
|
||||
sm_module->sm_buffer_descriptor[i].base_segment_address=base_buffer;
|
||||
|
||||
/* allocate array to keep data on each segment in the buffer.
|
||||
* One segment per process in the group.
|
||||
*/
|
||||
sm_module->sm_buffer_descriptor[i].proc_memory=
|
||||
(sm_memory_region_desc_t *)malloc(sizeof(sm_memory_region_desc_t)*
|
||||
group_size);
|
||||
if( NULL == sm_module->sm_buffer_descriptor[i].proc_memory ) {
|
||||
goto CLEANUP;
|
||||
}
|
||||
for(j=0 ; j < group_size ; j++ ) {
|
||||
ctl_ptr=(volatile mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
base_buffer+j* sm_module->segement_size_per_process;
|
||||
sm_module->sm_buffer_descriptor[i].proc_memory[j].control_region=
|
||||
ctl_ptr;
|
||||
sm_module->sm_buffer_descriptor[i].proc_memory[j].data_segment=
|
||||
(char *)ctl_ptr+sm_module->ctl_memory_per_proc_per_segment;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* touch pages to apply memory affinity - Note: do we really need this or will
|
||||
* the algorithms do this */
|
||||
@ -716,6 +758,17 @@ CLEANUP:
|
||||
sm_module->coll_sm2_file_name=NULL;
|
||||
}
|
||||
|
||||
if( NULL != sm_module->sm_buffer_descriptor ) {
|
||||
for(i=0 ; i < group_size ; i++ ) {
|
||||
if(NULL != sm_module->sm_buffer_descriptor[i].proc_memory) {
|
||||
free(sm_module->sm_buffer_descriptor[i].proc_memory);
|
||||
sm_module->sm_buffer_descriptor[i].proc_memory=NULL;
|
||||
}
|
||||
}
|
||||
free(sm_module->sm_buffer_descriptor);
|
||||
sm_module->sm_buffer_descriptor=NULL;
|
||||
}
|
||||
|
||||
OBJ_RELEASE(sm_module);
|
||||
|
||||
return NULL;
|
||||
@ -742,7 +795,7 @@ sm2_module_enable(struct mca_coll_base_module_1_1_0_t *module,
|
||||
}
|
||||
|
||||
/* allocate working buffer */
|
||||
char *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module)
|
||||
sm_work_buffer_t *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module)
|
||||
{
|
||||
/* local variables */
|
||||
int rc,buffer_index, memory_bank_index;
|
||||
@ -836,11 +889,7 @@ char *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module)
|
||||
|
||||
buffer_index=module->sm2_allocated_buffer_index;
|
||||
|
||||
/* get base address of return buffer */
|
||||
return_buffer=module->collective_buffer_region+
|
||||
buffer_index*module->segment_size;
|
||||
|
||||
return return_buffer;
|
||||
return &(module->sm_buffer_descriptor[buffer_index]);
|
||||
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user