1
1

remove debug code that would not let people run.

Add infrastructure for blocking-barrier.

This commit was SVN r18214.
Этот коммит содержится в:
Rich Graham 2008-04-19 01:34:04 +00:00
родитель 56a61bfacf
Коммит bee8b42f29
4 изменённых файлов: 127 добавлений и 15 удалений

Просмотреть файл

@ -304,9 +304,30 @@ BEGIN_C_DECLS
/* Memory pointer to shared file */ /* Memory pointer to shared file */
char *shared_memory_region; char *shared_memory_region;
/* size of memory banks control regions */
size_t size_mem_banks_ctl_region;
/* Pointer to the collective buffers */ /* Pointer to the collective buffers */
char *collective_buffer_region; char *collective_buffer_region;
/* size of collective buffer region */
size_t size_of_collective_buffer_region;
/* pointer to memory for blocking collectives */
char *sm_blocking_barrier_region;
/* size of memory for blocking collectives */
size_t size_of_blocking_barrier_region;
/* per proc size of memory for blocking collectives */
size_t per_proc_size_of_blocking_barrier_region;
/* index of blocking barrier memory region to use */
int index_blocking_barrier_memory_bank;
/* pointers to blocking memory control regions */
mca_coll_sm2_nb_request_process_shared_mem_t ***ctl_blocking_barrier;
/* description of allocated temp buffers - one struct per /* description of allocated temp buffers - one struct per
* buffer. Each buffer has space "owned" by each process * buffer. Each buffer has space "owned" by each process
* in the group. * in the group.

Просмотреть файл

@ -24,9 +24,9 @@
#include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/coll.h"
#include "opal/sys/atomic.h" #include "opal/sys/atomic.h"
#include "coll_sm2.h" #include "coll_sm2.h"
/* debug */ /* debug
extern int debug_print; extern int debug_print;
/* end debug */ end debug */
/** /**
* Shared memory barrier. * Shared memory barrier.

Просмотреть файл

@ -44,6 +44,7 @@
static int sm2_module_enable(struct mca_coll_base_module_1_1_0_t *module, static int sm2_module_enable(struct mca_coll_base_module_1_1_0_t *module,
struct ompi_communicator_t *comm); struct ompi_communicator_t *comm);
#if 0
/* debug */ /* debug */
extern int debug_print; extern int debug_print;
extern int my_debug_rank; extern int my_debug_rank;
@ -116,6 +117,7 @@ void debug_module(void) {
} }
/* end debug */ /* end debug */
#endif
/* /*
* Local functions * Local functions
@ -236,7 +238,7 @@ static int allocate_shared_file(size_t size, char **file_name,
/* process initializing the file */ /* process initializing the file */
fd = open(*file_name, O_CREAT|O_RDWR, 0600); fd = open(*file_name, O_CREAT|O_RDWR, 0600);
if (fd < 0) { if (fd < 0) {
opal_output(0,"mca_common_sm_mmap_init: open %s len %d failed with errno=%d\n", opal_output(0,"mca_common_sm_mmap_init: open %s len %ld failed with errno=%d\n",
*file_name, len, errno); *file_name, len, errno);
goto file_opened; goto file_opened;
} }
@ -321,7 +323,7 @@ static int allocate_shared_file(size_t size, char **file_name,
/* open backing file */ /* open backing file */
fd = open(*file_name, O_RDWR, 0600); fd = open(*file_name, O_RDWR, 0600);
if (fd < 0) { if (fd < 0) {
opal_output(0,"mca_common_sm_mmap_init: open %s len %d failed with errno=%d\n", opal_output(0,"mca_common_sm_mmap_init: open %s len %ld failed with errno=%d\n",
*file_name, len, errno); *file_name, len, errno);
goto return_error; goto return_error;
} }
@ -873,6 +875,7 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
size_buff_ctl_per_proc*group_size+ size_buff_ctl_per_proc*group_size+
/* size of data buffers */ /* size of data buffers */
size*sm_module->sm2_module_num_buffers*group_size; size*sm_module->sm2_module_num_buffers*group_size;
sm_module->size_of_collective_buffer_region=tot_size_mem_banks;
sm_module->data_memory_per_proc_per_segment=size; sm_module->data_memory_per_proc_per_segment=size;
@ -896,12 +899,29 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
/* total memory management required */ /* total memory management required */
mem_management_total=mem_management_per_proc * group_size; mem_management_total=mem_management_per_proc * group_size;
sm_module->size_mem_banks_ctl_region=mem_management_total;
/*
* Memory for blocking collectives - need two sets of memory
* regions for this.
*/
/* size per proc */
size=2*sizeof(mca_coll_sm2_nb_request_process_shared_mem_t);
/* page align */
size=(size +
getpagesize() -1 ) / getpagesize();
size*=getpagesize();
sm_module->per_proc_size_of_blocking_barrier_region=size;
sm_module->size_of_blocking_barrier_region=size*group_size;
/* total size of backing file - this assumes the mmap allocation /* total size of backing file - this assumes the mmap allocation
* occurs on page boundaries, and that all segments are paged * occurs on page boundaries, and that all segments are paged
* aligned * aligned
*/ */
size_sm2_backing_file=mem_management_total+tot_size_mem_banks; size_sm2_backing_file=sm_module->size_mem_banks_ctl_region+
sm_module->size_of_collective_buffer_region+
sm_module->size_of_blocking_barrier_region;
sm_module->size_sm2_backing_file=size_sm2_backing_file; sm_module->size_sm2_backing_file=size_sm2_backing_file;
/* set file name */ /* set file name */
@ -1028,6 +1048,62 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
goto CLEANUP; goto CLEANUP;
} }
/*
* setup blocking barrier data structures
*/
sm_module->sm_blocking_barrier_region=
sm_module->shared_memory_region+
sm_module->size_mem_banks_ctl_region+
sm_module->size_of_collective_buffer_region;
sm_module->index_blocking_barrier_memory_bank=0;
sm_module->ctl_blocking_barrier=
(mca_coll_sm2_nb_request_process_shared_mem_t ***)
malloc(2*sizeof(mca_coll_sm2_nb_request_process_shared_mem_t **));
if( NULL == sm_module->ctl_blocking_barrier ) {
goto CLEANUP;
}
sm_module->ctl_blocking_barrier[0]=
(mca_coll_sm2_nb_request_process_shared_mem_t **)
malloc(group_size*sizeof(mca_coll_sm2_nb_request_process_shared_mem_t *));
if( NULL == sm_module->ctl_blocking_barrier[0]) {
goto CLEANUP;
}
sm_module->ctl_blocking_barrier[1]=
(mca_coll_sm2_nb_request_process_shared_mem_t **)
malloc(group_size*sizeof(mca_coll_sm2_nb_request_process_shared_mem_t *));
if( NULL == sm_module->ctl_blocking_barrier[1]) {
goto CLEANUP;
}
/* debug */
fprintf(stderr," sizeof(mca_coll_sm2_nb_request_process_shared_mem_t) %lx \n",
sizeof(mca_coll_sm2_nb_request_process_shared_mem_t));
fflush(stderr);
/* end debug */
for( j= 0 ; j < 2 ; j++ ) {
for( i=0 ; i < group_size ; i++ ) {
sm_module->ctl_blocking_barrier[j][i]=
(mca_coll_sm2_nb_request_process_shared_mem_t * )
(
sm_module->sm_blocking_barrier_region+
j*sizeof(mca_coll_sm2_nb_request_process_shared_mem_t)+
i*sm_module->per_proc_size_of_blocking_barrier_region )
;
/* debug */
fprintf(stderr," i %d j %d %p base %p pp %lx\n",i,j,
sm_module->ctl_blocking_barrier[j][i],
sm_module->sm_blocking_barrier_region,
sm_module->per_proc_size_of_blocking_barrier_region);
fflush(stderr);
/* end debug */
sm_module->ctl_blocking_barrier[j][i]->flag=0;
}
}
/* touch pages to apply memory affinity - Note: do we really need this or will /* touch pages to apply memory affinity - Note: do we really need this or will
* the algorithms do this */ * the algorithms do this */
@ -1071,6 +1147,21 @@ CLEANUP:
sm_module->scratch_space=NULL; sm_module->scratch_space=NULL;
} }
for( i= 0 ; i < group_size ; i++ ) {
if( NULL != sm_module->ctl_blocking_barrier[0][i] ) {
free( sm_module->ctl_blocking_barrier[0][i]);
sm_module->ctl_blocking_barrier[0][i]=NULL;
}
if( NULL != sm_module->ctl_blocking_barrier[1][i] ) {
free( sm_module->ctl_blocking_barrier[1][i]);
sm_module->ctl_blocking_barrier[1][i]=NULL;
}
}
if( NULL != sm_module->ctl_blocking_barrier ) {
free(sm_module->ctl_blocking_barrier);
sm_module->ctl_blocking_barrier=NULL;
}
OBJ_RELEASE(sm_module); OBJ_RELEASE(sm_module);
return NULL; return NULL;
@ -1110,7 +1201,7 @@ int progress_nb_barrier(mca_coll_sm2_module_t *module)
* to subtract 1 for the index, as the number completed is the index * to subtract 1 for the index, as the number completed is the index
* of the next one to complete. * of the next one to complete.
*/ */
int barrier_index=(module->num_nb_barriers_completed% barrier_index=(module->num_nb_barriers_completed%
module->sm2_module_num_memory_banks); module->sm2_module_num_memory_banks);
rc=mca_coll_sm2_nbbarrier_intra_progress(module->module_comm, rc=mca_coll_sm2_nbbarrier_intra_progress(module->module_comm,
@ -1176,7 +1267,7 @@ sm_work_buffer_t *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module)
while( num_incomlete_barriers == module->sm2_module_num_memory_banks ) { while( num_incomlete_barriers == module->sm2_module_num_memory_banks ) {
rc=progress_nb_barrier(module); rc=progress_nb_barrier(module);
if( OMPI_SUCCESS != rc ) { if( OMPI_SUCCESS != rc ) {
return rc; return NULL;
} }
num_incomlete_barriers=module->num_nb_barriers_started - num_incomlete_barriers=module->num_nb_barriers_started -
module->num_nb_barriers_completed; module->num_nb_barriers_completed;

Просмотреть файл

@ -20,7 +20,7 @@
extern uint64_t timers[7]; extern uint64_t timers[7];
end debug */ end debug */
/* debug */ /* debug
#include <assert.h> #include <assert.h>
extern void debug_module(void); extern void debug_module(void);
extern int last_root; extern int last_root;
@ -28,7 +28,7 @@ extern int node_type;
extern long long free_buff_free_index; extern long long free_buff_free_index;
int last_root; int last_root;
int node_type; int node_type;
/* end debug */ end debug */
@ -58,9 +58,9 @@ int mca_coll_sm2_reduce_intra_fanin(void *sbuf, void *rbuf, int count,
tree_node_t *my_reduction_node; tree_node_t *my_reduction_node;
sm_work_buffer_t *sm_buffer_desc; sm_work_buffer_t *sm_buffer_desc;
/* debug */ /* debug
last_root=root; last_root=root;
/* end debug */ end debug */
sm_module=(mca_coll_sm2_module_t *) module; sm_module=(mca_coll_sm2_module_t *) module;
/* compute process shift */ /* compute process shift */
@ -99,9 +99,9 @@ int mca_coll_sm2_reduce_intra_fanin(void *sbuf, void *rbuf, int count,
/* get my node for the reduction tree */ /* get my node for the reduction tree */
my_reduction_node=&(sm_module->reduction_tree[my_node_index]); my_reduction_node=&(sm_module->reduction_tree[my_node_index]);
n_children=my_reduction_node->n_children; n_children=my_reduction_node->n_children;
/* debug */ /* debug
node_type=my_reduction_node->my_node_type; node_type=my_reduction_node->my_node_type;
/* end debug */ end debug */
if( 1 == n_data_segments ) { if( 1 == n_data_segments ) {
/* single data segment */ /* single data segment */
@ -117,9 +117,9 @@ int mca_coll_sm2_reduce_intra_fanin(void *sbuf, void *rbuf, int count,
/* get a pointer to the shared-memory working buffer */ /* get a pointer to the shared-memory working buffer */
sm_buffer_desc=alloc_sm2_shared_buffer(sm_module); sm_buffer_desc=alloc_sm2_shared_buffer(sm_module);
/* debug */ /* debug
free_buff_free_index=tag; free_buff_free_index=tag;
/* end debug */ end debug */
/* get number of elements to process in this stripe */ /* get number of elements to process in this stripe */
count_this_stripe=count; count_this_stripe=count;