check point
This commit was SVN r17826.
Этот коммит содержится в:
родитель
6c77c995c2
Коммит
e3e336b5ab
@ -44,8 +44,8 @@ BEGIN_C_DECLS
|
|||||||
/*
|
/*
|
||||||
* Memory Management
|
* Memory Management
|
||||||
* - All memory allocation will be done on a per-communictor basis
|
* - All memory allocation will be done on a per-communictor basis
|
||||||
* - The two banks of memory will be used
|
* - At least two banks of memory will be used
|
||||||
* - Each bank of memory will have M buffers
|
* - Each bank of memory will have M buffers (or segments)
|
||||||
* - These buffers will be used in a cirucular buffer order
|
* - These buffers will be used in a cirucular buffer order
|
||||||
* - Each buffer will be contigous in virtual memory, and will have page-aligned
|
* - Each buffer will be contigous in virtual memory, and will have page-aligned
|
||||||
* regions belonging to each process in the communicator
|
* regions belonging to each process in the communicator
|
||||||
@ -210,6 +210,34 @@ BEGIN_C_DECLS
|
|||||||
/* forward declartion */
|
/* forward declartion */
|
||||||
struct mca_coll_sm2_module_t;
|
struct mca_coll_sm2_module_t;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* shared memory region descriptor
|
||||||
|
*/
|
||||||
|
struct sm_memory_region_desc_t {
|
||||||
|
|
||||||
|
/* pointer to control structures */
|
||||||
|
volatile mca_coll_sm2_nb_request_process_shared_mem_t *control_region;
|
||||||
|
|
||||||
|
/* pointer to data segment, and lower half of data segment */
|
||||||
|
volatile char *data_segment;
|
||||||
|
|
||||||
|
};
|
||||||
|
typedef struct sm_memory_region_desc_t sm_memory_region_desc_t;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Shared memory buffer management strcucture
|
||||||
|
*/
|
||||||
|
struct sm_work_buffer_t {
|
||||||
|
/* pointer to segment base */
|
||||||
|
volatile char * base_segment_address;
|
||||||
|
|
||||||
|
/* description of how the memory segment is mapped on
|
||||||
|
* a per process basis
|
||||||
|
*/
|
||||||
|
sm_memory_region_desc_t *proc_memory;
|
||||||
|
};
|
||||||
|
typedef struct sm_work_buffer_t sm_work_buffer_t;
|
||||||
|
|
||||||
/* process private barrier request object */
|
/* process private barrier request object */
|
||||||
struct mca_coll_sm2_nb_request_process_private_mem_t {
|
struct mca_coll_sm2_nb_request_process_private_mem_t {
|
||||||
struct ompi_request_t super;
|
struct ompi_request_t super;
|
||||||
@ -251,6 +279,12 @@ BEGIN_C_DECLS
|
|||||||
/* Pointer to the collective buffers */
|
/* Pointer to the collective buffers */
|
||||||
char *collective_buffer_region;
|
char *collective_buffer_region;
|
||||||
|
|
||||||
|
/* description of allocated temp buffers - one struct per
|
||||||
|
* buffer. Each buffer has space "owned" by each process
|
||||||
|
* in the group.
|
||||||
|
*/
|
||||||
|
sm_work_buffer_t *sm_buffer_descriptor;
|
||||||
|
|
||||||
/* size of memory region, per process, for memory bank management */
|
/* size of memory region, per process, for memory bank management */
|
||||||
size_t sm2_size_management_region_per_proc;
|
size_t sm2_size_management_region_per_proc;
|
||||||
|
|
||||||
@ -373,7 +407,7 @@ BEGIN_C_DECLS
|
|||||||
struct mca_coll_base_module_1_1_0_t *module);
|
struct mca_coll_base_module_1_1_0_t *module);
|
||||||
|
|
||||||
/* allocate working buffer */
|
/* allocate working buffer */
|
||||||
char *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module);
|
sm_work_buffer_t *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module);
|
||||||
|
|
||||||
/* free working buffer - it is assumed that buffers are released in
|
/* free working buffer - it is assumed that buffers are released in
|
||||||
* the order they are allocated. We can assume this because each
|
* the order they are allocated. We can assume this because each
|
||||||
|
@ -52,6 +52,7 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count,
|
|||||||
volatile mca_coll_sm2_nb_request_process_shared_mem_t * parent_ctl_pointer;
|
volatile mca_coll_sm2_nb_request_process_shared_mem_t * parent_ctl_pointer;
|
||||||
mca_coll_sm2_module_t *sm_module;
|
mca_coll_sm2_module_t *sm_module;
|
||||||
tree_node_t *my_reduction_node, *my_fanout_read_tree;
|
tree_node_t *my_reduction_node, *my_fanout_read_tree;
|
||||||
|
sm_work_buffer_t *sm_buffer_desc;
|
||||||
|
|
||||||
|
|
||||||
sm_module=(mca_coll_sm2_module_t *) module;
|
sm_module=(mca_coll_sm2_module_t *) module;
|
||||||
@ -98,7 +99,9 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count,
|
|||||||
/* get a pointer to the shared-memory working buffer */
|
/* get a pointer to the shared-memory working buffer */
|
||||||
/* NOTE: starting with a rather synchronous approach */
|
/* NOTE: starting with a rather synchronous approach */
|
||||||
for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
|
for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
|
||||||
sm_buffer=alloc_sm2_shared_buffer(sm_module);
|
|
||||||
|
sm_buffer_desc=alloc_sm2_shared_buffer(sm_module);
|
||||||
|
sm_buffer=sm_buffer_desc->base_segment_address;
|
||||||
if( NULL == sm_buffer) {
|
if( NULL == sm_buffer) {
|
||||||
rc=OMPI_ERR_OUT_OF_RESOURCE;
|
rc=OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
goto Error;
|
goto Error;
|
||||||
@ -350,6 +353,7 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf,
|
|||||||
int my_rank,count_processed,count_this_stripe;
|
int my_rank,count_processed,count_this_stripe;
|
||||||
size_t message_extent,dt_extent,ctl_size,len_data_buffer;
|
size_t message_extent,dt_extent,ctl_size,len_data_buffer;
|
||||||
long long tag, base_tag;
|
long long tag, base_tag;
|
||||||
|
sm_work_buffer_t *sm_buffer_desc;
|
||||||
volatile char * sm_buffer;
|
volatile char * sm_buffer;
|
||||||
volatile char * my_tmp_data_buffer[2];
|
volatile char * my_tmp_data_buffer[2];
|
||||||
volatile char * my_write_pointer;
|
volatile char * my_write_pointer;
|
||||||
@ -412,7 +416,8 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf,
|
|||||||
/* debug */
|
/* debug */
|
||||||
t0=opal_sys_timer_get_cycles();
|
t0=opal_sys_timer_get_cycles();
|
||||||
/* end debug */
|
/* end debug */
|
||||||
sm_buffer=alloc_sm2_shared_buffer(sm_module);
|
sm_buffer_desc=alloc_sm2_shared_buffer(sm_module);
|
||||||
|
sm_buffer=sm_buffer_desc->base_segment_address;
|
||||||
if( NULL == sm_buffer) {
|
if( NULL == sm_buffer) {
|
||||||
rc=OMPI_ERR_OUT_OF_RESOURCE;
|
rc=OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
goto Error;
|
goto Error;
|
||||||
@ -558,9 +563,9 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf,
|
|||||||
|
|
||||||
{
|
{
|
||||||
int ii,n_ints;
|
int ii,n_ints;
|
||||||
int *my_read=(int *)my_read_pointer;
|
int * restrict my_read=(int *)my_read_pointer;
|
||||||
int *my_write=(int *)my_write_pointer;
|
int * restrict my_write=(int *)my_write_pointer;
|
||||||
int *exchange_read=(int *)partner_read_pointer;
|
int * restrict exchange_read=(int *)partner_read_pointer;
|
||||||
n_ints=count_this_stripe;
|
n_ints=count_this_stripe;
|
||||||
for(ii=0 ; ii < n_ints ; ii++ ) {
|
for(ii=0 ; ii < n_ints ; ii++ ) {
|
||||||
my_write[ii]=my_read[ii]+exchange_read[ii];
|
my_write[ii]=my_read[ii]+exchange_read[ii];
|
||||||
|
@ -459,7 +459,7 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
|||||||
{
|
{
|
||||||
/* local variables */
|
/* local variables */
|
||||||
mca_coll_sm2_module_t *sm_module;
|
mca_coll_sm2_module_t *sm_module;
|
||||||
int group_size,ret;
|
int i,j,group_size,ret;
|
||||||
size_t alignment,size;
|
size_t alignment,size;
|
||||||
ssize_t size_tot_per_proc_per_seg;
|
ssize_t size_tot_per_proc_per_seg;
|
||||||
size_t tot_size_per_bank,size_tot_per_segment;
|
size_t tot_size_per_bank,size_tot_per_segment;
|
||||||
@ -542,7 +542,25 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
|||||||
mca_coll_sm2_component.sm2_num_mem_banks;
|
mca_coll_sm2_component.sm2_num_mem_banks;
|
||||||
sm_module->sm2_module_num_regions_per_bank=
|
sm_module->sm2_module_num_regions_per_bank=
|
||||||
mca_coll_sm2_component.sm2_num_regions_per_bank;
|
mca_coll_sm2_component.sm2_num_regions_per_bank;
|
||||||
|
sm_module->sm2_module_num_buffers=
|
||||||
|
mca_coll_sm2_component.sm2_num_regions_per_bank *
|
||||||
|
mca_coll_sm2_component.sm2_num_mem_banks;
|
||||||
|
|
||||||
|
|
||||||
|
/* allocate the array of memory descriptors used to describe the
|
||||||
|
* shared memory buffers. This structure resides in process
|
||||||
|
* private memory, but describes the shared memory.
|
||||||
|
*/
|
||||||
|
sm_module->sm_buffer_descriptor=(sm_work_buffer_t *)malloc(
|
||||||
|
sizeof(sm_work_buffer_t)*sm_module->sm2_module_num_buffers);
|
||||||
|
if( NULL == sm_module->sm_buffer_descriptor ) {
|
||||||
|
goto CLEANUP;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Now figure out how much memory to allocate for use as
|
||||||
|
* working memory for the shared memory collectives.
|
||||||
|
*/
|
||||||
/*
|
/*
|
||||||
* get control region size
|
* get control region size
|
||||||
*/
|
*/
|
||||||
@ -611,11 +629,6 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
|||||||
/* total memory management required */
|
/* total memory management required */
|
||||||
mem_management_total=mem_management_per_proc * group_size;
|
mem_management_total=mem_management_per_proc * group_size;
|
||||||
|
|
||||||
/* set the total number of working buffers */
|
|
||||||
sm_module->sm2_module_num_buffers=
|
|
||||||
mca_coll_sm2_component.sm2_num_regions_per_bank *
|
|
||||||
mca_coll_sm2_component.sm2_num_mem_banks;
|
|
||||||
|
|
||||||
/* total size of backing file - this assumes the mmap allocation
|
/* total size of backing file - this assumes the mmap allocation
|
||||||
* occurs on page boundaries, and that all segments are paged
|
* occurs on page boundaries, and that all segments are paged
|
||||||
* aligned
|
* aligned
|
||||||
@ -694,7 +707,36 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
|||||||
sm_module->sm2_first_buffer_index_next_bank=0;
|
sm_module->sm2_first_buffer_index_next_bank=0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* set pointers */
|
/* setup shared memory memory descriptors */
|
||||||
|
for( i=0 ; i < sm_module->sm2_module_num_buffers ; i++ ) {
|
||||||
|
|
||||||
|
char *base_buffer;
|
||||||
|
volatile mca_coll_sm2_nb_request_process_shared_mem_t *ctl_ptr;
|
||||||
|
|
||||||
|
/* set the base address for this working buffer */
|
||||||
|
base_buffer= sm_module->collective_buffer_region+
|
||||||
|
i*sm_module->segment_size;
|
||||||
|
sm_module->sm_buffer_descriptor[i].base_segment_address=base_buffer;
|
||||||
|
|
||||||
|
/* allocate array to keep data on each segment in the buffer.
|
||||||
|
* One segment per process in the group.
|
||||||
|
*/
|
||||||
|
sm_module->sm_buffer_descriptor[i].proc_memory=
|
||||||
|
(sm_memory_region_desc_t *)malloc(sizeof(sm_memory_region_desc_t)*
|
||||||
|
group_size);
|
||||||
|
if( NULL == sm_module->sm_buffer_descriptor[i].proc_memory ) {
|
||||||
|
goto CLEANUP;
|
||||||
|
}
|
||||||
|
for(j=0 ; j < group_size ; j++ ) {
|
||||||
|
ctl_ptr=(volatile mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||||
|
base_buffer+j* sm_module->segement_size_per_process;
|
||||||
|
sm_module->sm_buffer_descriptor[i].proc_memory[j].control_region=
|
||||||
|
ctl_ptr;
|
||||||
|
sm_module->sm_buffer_descriptor[i].proc_memory[j].data_segment=
|
||||||
|
(char *)ctl_ptr+sm_module->ctl_memory_per_proc_per_segment;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/* touch pages to apply memory affinity - Note: do we really need this or will
|
/* touch pages to apply memory affinity - Note: do we really need this or will
|
||||||
* the algorithms do this */
|
* the algorithms do this */
|
||||||
@ -716,6 +758,17 @@ CLEANUP:
|
|||||||
sm_module->coll_sm2_file_name=NULL;
|
sm_module->coll_sm2_file_name=NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if( NULL != sm_module->sm_buffer_descriptor ) {
|
||||||
|
for(i=0 ; i < group_size ; i++ ) {
|
||||||
|
if(NULL != sm_module->sm_buffer_descriptor[i].proc_memory) {
|
||||||
|
free(sm_module->sm_buffer_descriptor[i].proc_memory);
|
||||||
|
sm_module->sm_buffer_descriptor[i].proc_memory=NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(sm_module->sm_buffer_descriptor);
|
||||||
|
sm_module->sm_buffer_descriptor=NULL;
|
||||||
|
}
|
||||||
|
|
||||||
OBJ_RELEASE(sm_module);
|
OBJ_RELEASE(sm_module);
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -742,7 +795,7 @@ sm2_module_enable(struct mca_coll_base_module_1_1_0_t *module,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* allocate working buffer */
|
/* allocate working buffer */
|
||||||
char *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module)
|
sm_work_buffer_t *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module)
|
||||||
{
|
{
|
||||||
/* local variables */
|
/* local variables */
|
||||||
int rc,buffer_index, memory_bank_index;
|
int rc,buffer_index, memory_bank_index;
|
||||||
@ -836,11 +889,7 @@ char *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module)
|
|||||||
|
|
||||||
buffer_index=module->sm2_allocated_buffer_index;
|
buffer_index=module->sm2_allocated_buffer_index;
|
||||||
|
|
||||||
/* get base address of return buffer */
|
return &(module->sm_buffer_descriptor[buffer_index]);
|
||||||
return_buffer=module->collective_buffer_region+
|
|
||||||
buffer_index*module->segment_size;
|
|
||||||
|
|
||||||
return return_buffer;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user