1
1
This commit was SVN r17656.
Этот коммит содержится в:
Rich Graham 2008-02-28 22:01:19 +00:00
родитель 84b2099fe8
Коммит 940d6732c9
6 изменённых файлов: 34 добавлений и 28 удалений

Просмотреть файл

@ -89,13 +89,13 @@ BEGIN_C_DECLS
size_t sm2_data_size_allocated;
/** MCA parameter: data region alignment */
size_t sm2_data_alignment;
int sm2_data_alignment;
/** MCA parameter: number of memory banks */
size_t sm2_num_mem_banks;
int sm2_num_mem_banks;
/** MCA parameter: number of regions per memory bank */
size_t sm2_num_regions_per_bank;
int sm2_num_regions_per_bank;
/** MCA parameter: order of buffer management barrier tree */
int order_barrier_tree;
@ -347,6 +347,7 @@ BEGIN_C_DECLS
struct ompi_communicator_t *comm,
struct mca_coll_base_module_1_1_0_t *module);
/**
* Macro to setup flag usage
*/

Просмотреть файл

@ -20,6 +20,7 @@
/**
* Shared memory blocking allreduce.
*/
static
int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
@ -28,7 +29,7 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count,
{
/* local variables */
int rc=OMPI_SUCCESS,n_dts_per_buffer,n_data_segments,stripe_number;
int my_rank, child_rank, parent_rank, child, n_parents, n_children;
int my_rank, child_rank, child, n_parents, n_children;
int my_fanin_parent,count_processed,count_this_stripe;
int my_fanout_parent;
size_t message_extent,dt_extent,ctl_size,len_data_buffer;
@ -39,7 +40,7 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count,
volatile char * parent_data_pointer;
char *my_base_temp_pointer;
volatile char * child_base_temp_pointer;
char * volatile parent_base_temp_pointer, * volatile root_base_temp_pointer;
volatile char * parent_base_temp_pointer;
mca_coll_sm2_nb_request_process_shared_mem_t *my_ctl_pointer;
volatile mca_coll_sm2_nb_request_process_shared_mem_t * child_ctl_pointer;
volatile mca_coll_sm2_nb_request_process_shared_mem_t * parent_ctl_pointer;
@ -117,7 +118,8 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count,
* eliminate extra copies.
*/
rc=ompi_ddt_copy_content_same_ddt(dtype, count_this_stripe,
my_data_pointer, (char *)sbuf+dt_extent*count_processed);
(char *)my_data_pointer,
(char *)((char *)sbuf+dt_extent*count_processed));
if( 0 != rc ) {
return OMPI_ERROR;
}
@ -139,15 +141,15 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count,
/* wait until child flag is set */
while(!
(child_ctl_pointer->flag == tag &
child_ctl_pointer->index== stripe_number) ) {
( (child_ctl_pointer->flag == tag) &
(child_ctl_pointer->index== stripe_number) ) ) {
/* Note: Actually need to make progress here */
;
}
/* apply collective operation */
ompi_op_reduce(op,child_data_pointer,my_data_pointer,
count_this_stripe,dtype);
ompi_op_reduce(op,(void *)child_data_pointer,
(void *)my_data_pointer, count_this_stripe,dtype);
} /* end child loop */
/* set memory barriet to make sure data is in main memory before
@ -174,7 +176,8 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count,
/* copy data to user supplied buffer */
rc=ompi_ddt_copy_content_same_ddt(dtype, count_this_stripe,
(char *)rbuf+dt_extent*count_processed,my_data_pointer);
(char *)((char *)rbuf+dt_extent*count_processed),
(char *)my_data_pointer);
if( 0 != rc ) {
return OMPI_ERROR;
}
@ -184,8 +187,11 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count,
((char *)sm_buffer+my_fanout_parent*
sm_module->segement_size_per_process);
parent_data_pointer=parent_base_temp_pointer+ctl_size;
parent_ctl_pointer=parent_base_temp_pointer;
parent_data_pointer=(volatile char *)
((char *)parent_base_temp_pointer+ctl_size);
parent_ctl_pointer=(volatile
mca_coll_sm2_nb_request_process_shared_mem_t *)
parent_base_temp_pointer;
child_ctl_pointer=
(volatile mca_coll_sm2_nb_request_process_shared_mem_t *)
@ -195,15 +201,15 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count,
* wait on Parent to signal that data is ready
*/
while(!
(parent_ctl_pointer->flag == -tag &
parent_ctl_pointer->index== stripe_number) ) {
( (parent_ctl_pointer->flag == -tag) &
(parent_ctl_pointer->index== stripe_number) ) ) {
/* Note: Actually need to make progress here */
;
}
/* copy the data to my shared buffer, for access by children */
rc=ompi_ddt_copy_content_same_ddt(dtype, count_this_stripe,
my_data_pointer,parent_data_pointer);
(char *)my_data_pointer,(char *)parent_data_pointer);
if( 0 != rc ) {
return OMPI_ERROR;
}
@ -218,7 +224,8 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count,
/* copy data to user supplied buffer */
rc=ompi_ddt_copy_content_same_ddt(dtype, count_this_stripe,
(char *)rbuf+dt_extent*count_processed,my_data_pointer);
(char *)rbuf+dt_extent*count_processed,
(char *)my_data_pointer);
if( 0 != rc ) {
return OMPI_ERROR;
}

Просмотреть файл

@ -49,12 +49,15 @@
* parent, and the leaves that have no children. But that's the
* general idea.
*/
/* once this is implemented, change this to be visible */
/*
int mca_coll_sm2_barrier_intra(struct ompi_communicator_t *comm,
struct mca_coll_base_module_1_1_0_t *module)
{
return OMPI_SUCCESS;
}
*/
/* non-blocking barrier - init function */
int mca_coll_sm2_nbbarrier_intra(struct ompi_communicator_t *comm,

Просмотреть файл

@ -45,12 +45,6 @@ const char *mca_coll_sm2_component_version_string =
"Open MPI sm-V2 collective MCA component version " OMPI_VERSION;
/*
* Local functions
*/
static int sm2_module_enable(struct mca_coll_base_module_1_1_0_t *module,
struct ompi_communicator_t *comm);
/*
* Local functions
*/

Просмотреть файл

@ -122,7 +122,7 @@ static int allocate_shared_file(size_t size, char *file_name,
int group_size,my_rank;
bool i_create_shared_file=false;
size_t p;
ssize_t p;
int rc=0, sm_file_inited=0;
struct iovec iov[2];
int sm_file_created;
@ -460,7 +460,8 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
/* local variables */
mca_coll_sm2_module_t *sm_module;
int group_size,ret;
size_t alignment,size,size_tot,size_tot_per_proc_per_seg;
size_t alignment,size;
ssize_t size_tot_per_proc_per_seg;
size_t tot_size_per_bank,size_tot_per_segment;
size_t tot_size_mem_banks;
size_t ctl_memory_per_proc_per_segment;
@ -842,7 +843,7 @@ char *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module)
int free_sm2_shared_buffer(mca_coll_sm2_module_t *module)
{
/* local variables */
int rc,buffer_index,memory_bank_index;
int rc,memory_bank_index;
mca_coll_sm2_nb_request_process_private_mem_t *request;
/* check to see if need to progress the current nb-barrier, which

Просмотреть файл

@ -194,8 +194,8 @@ int setup_multinomial_tree(int tree_order, int num_nodes,
Error:
/* free allocated memory */
for( i=0 ; i < num_nodes ; i++ ) {
if( NULL != tree_nodes[node_index].children_ranks ) {
free(tree_nodes[node_index].children_ranks);
if( NULL != tree_nodes[i].children_ranks ) {
free(tree_nodes[i].children_ranks);
}
}