1
1
This commit was SVN r17571.
Этот коммит содержится в:
Rich Graham 2008-02-24 20:54:16 +00:00
родитель 771584bff5
Коммит 2d8c2420e8
3 изменённых файлов: 89 добавлений и 29 удалений

Просмотреть файл

@ -219,6 +219,12 @@ BEGIN_C_DECLS
/* size, per process, of each memory segment */
size_t segement_size_per_process;
/* size, per process and segment , of control region */
size_t ctl_memory_per_proc_per_segment;
/* size, per process and segment , of data region */
size_t data_memory_per_proc_per_segment;
/* number of memory banks */
int sm2_module_num_memory_banks;
@ -268,6 +274,9 @@ BEGIN_C_DECLS
/* multinumial reduction tree */
tree_node_t *reduction_tree;
/* collective tag */
long long collective_tag;
};
typedef struct mca_coll_sm2_module_t mca_coll_sm2_module_t;

Просмотреть файл

@ -26,33 +26,94 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count,
struct mca_coll_base_module_1_1_0_t *module)
{
/* local variables */
int rc=OMPI_SUCCESS;
size_t message_extent;
char *sm_buffer;
int rc=OMPI_SUCCESS,n_dts_per_buffer,n_data_segments,stripe_number;
size_t message_extent,dt_extent,ctl_size,len_data_buffer;
long long tag;
volatile char *sm_buffer;
mca_coll_sm2_module_t *sm_module;
sm_module=(mca_coll_sm2_module_t *) module;
/* get unique tag for this collective - assume only one collective
* per communicator at a given time, so no locking needed
* for atomic update of the tag */
sm_module->collective_tag++;
tag=sm_module->collective_tag;
/* get size of data needed - same layout as user data, so that
* we can apply the reudction routines directly on these buffers
*/
rc=ompi_ddt_type_size(dtype, &dt_extent);
if( OMPI_SUCCESS != rc ) {
goto Error;
}
message_extent=dt_extent*count;
/* lenght of control and data regions */
ctl_size=sm_module->ctl_memory_per_proc_per_segment;
len_data_buffer=sm_module->data_memory_per_proc_per_segment;
/* number of data types copies that the scratch buffer can hold */
n_dts_per_buffer=((int) len_data_buffer)/dt_extent;
if ( 0 == n_dts_per_buffer ) {
rc=OMPI_ERROR;
goto Error;
}
/* compute number of stripes needed to process this collective */
n_data_segments=(count+n_dts_per_buffer -1 ) / n_dts_per_buffer ;
/* get a pointer to the shared-memory working buffer */
/* NOTE: starting with a rather synchronous approach */
for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
sm_buffer=alloc_sm2_shared_buffer(sm_module);
if( NULL == sm_buffer) {
rc=OMPI_ERR_OUT_OF_RESOURCE;
goto Error;
}
/* get size of data needed - same layout as user data, so that
* we can apply the reudction routines directly on these buffers
/*
* Fan into root phase
*/
/* copy segment into shared buffer - later on will optimize to
* eliminate extra copies.
*/
/*
* Wait on children, and apply op to their data
*/
/*
* Signal parent that data is ready
*/
/*
* Fan out from root phase
*/
/*
* wait on Parent to signal that data is ready
*/
/*
* Copy data to shared buffer
*/
/*
* Signal children that Data is ready for reading
*/
/*
* Copy data out to destination
*/
rc=ompi_ddt_type_size(dtype, &message_extent);
if( OMPI_SUCCESS != rc ) {
goto Error;
}
/* "free" the shared-memory working buffer */
rc=free_sm2_shared_buffer(sm_module);
if( OMPI_SUCCESS != rc ) {
goto Error;
}
}
/* return */
return rc;

Просмотреть файл

@ -425,6 +425,9 @@ static int init_sm2_barrier(struct ompi_communicator_t *comm,
/* set the pointer to the request that needs to be completed first */
module->current_request_index=0;
/* set collective tag */
module->collective_tag=0;
/* return - successful */
return OMPI_SUCCESS;
@ -546,6 +549,7 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
(alignment + ctl_memory_per_proc_per_segment -1) / alignment;
ctl_memory_per_proc_per_segment*=alignment;
mca_coll_sm2_component.sm2_ctl_size_allocated=ctl_memory_per_proc_per_segment;
sm_module->ctl_memory_per_proc_per_segment=ctl_memory_per_proc_per_segment;
/* get data region size - allocation happens on a page granularity, with
* a minimum of a page allocated per proc, so adjust to this
@ -566,6 +570,8 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
sm_module->segement_size_per_process=size_tot_per_proc_per_seg;
sm_module->segment_size=size_tot_per_segment;
sm_module->data_memory_per_proc_per_segment=size_tot_per_proc_per_seg-
ctl_memory_per_proc_per_segment;
/* compute memory per bank */
tot_size_per_bank=size_tot_per_segment*mca_coll_sm2_component.sm2_num_regions_per_bank;
@ -640,22 +646,6 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
if( MPI_SUCCESS != ret ) {
goto CLEANUP;
}
/* debug */
if( 0 == ompi_comm_rank(comm) ) {
fprintf(stderr," my rank %d \n",ompi_comm_rank(comm));
for( i=0 ; i < ompi_comm_size(comm) ; i++ ) {
fprintf(stderr," DDDD i %d parent %d children :: ",
i,sm_module->reduction_tree[i].parent_rank);
for (j=0 ; j < sm_module->reduction_tree[i].n_children ; j++ ) {
fprintf(stderr," %d ",
sm_module->reduction_tree[i].children_ranks[j]);
}
fprintf(stderr," \n");
fflush(stderr);
}
}
/* initialize local counters */
sm_module->sm2_allocated_buffer_index=-1;