add selection logic for barrier and reduce.
This commit was SVN r18215.
Этот коммит содержится в:
родитель
bee8b42f29
Коммит
df35223603
@ -106,11 +106,32 @@ BEGIN_C_DECLS
|
|||||||
/** MCA parameter: order of fan-out read tree */
|
/** MCA parameter: order of fan-out read tree */
|
||||||
int order_fanout_read_tree;
|
int order_fanout_read_tree;
|
||||||
|
|
||||||
/*I MCA paramenter: number of polling loops to run while waiting
|
/** MCA paramenter: number of polling loops to run while waiting
|
||||||
* for children or parent to complete their work
|
* for children or parent to complete their work
|
||||||
*/
|
*/
|
||||||
int n_poll_loops;
|
int n_poll_loops;
|
||||||
|
|
||||||
|
/** MCA parameter: message size cutoff for switching between
|
||||||
|
* short and long protocol
|
||||||
|
*/
|
||||||
|
size_t short_message_size;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Parameters to control methods used
|
||||||
|
*/
|
||||||
|
/** MCA parameter: method to force a given barrier method to be used.
|
||||||
|
* 0 - FANIN_FAN_OUT_BARRIER_FN
|
||||||
|
* 1 - RECURSIVE_DOUBLING_BARRIER_FN
|
||||||
|
*/
|
||||||
|
int force_barrier;
|
||||||
|
|
||||||
|
/** MCA parameter: method to force a given reduce method to be used.
|
||||||
|
* 0 - FANIN_FAN_OUT_REDUCE_FN,
|
||||||
|
* 1 - REDUCE_SCATTER_GATHER_FN,
|
||||||
|
*/
|
||||||
|
int force_reduce;
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -118,6 +139,30 @@ BEGIN_C_DECLS
|
|||||||
*/
|
*/
|
||||||
typedef struct mca_coll_sm2_component_t mca_coll_sm2_component_t;
|
typedef struct mca_coll_sm2_component_t mca_coll_sm2_component_t;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Implemented function index list
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* barrier */
|
||||||
|
enum{
|
||||||
|
FANIN_FAN_OUT_BARRIER_FN,
|
||||||
|
RECURSIVE_DOUBLING_BARRIER_FN,
|
||||||
|
N_BARRIER_FNS
|
||||||
|
};
|
||||||
|
|
||||||
|
/* reduce */
|
||||||
|
enum{
|
||||||
|
FANIN_REDUCE_FN,
|
||||||
|
REDUCE_SCATTER_GATHER_FN,
|
||||||
|
N_REDUCE_FNS
|
||||||
|
};
|
||||||
|
enum{
|
||||||
|
SHORT_DATA_FN,
|
||||||
|
LONG_DATA_FN,
|
||||||
|
N_REDUCE_FNS_USED
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
/* enum for node type */
|
/* enum for node type */
|
||||||
enum{
|
enum{
|
||||||
ROOT_NODE,
|
ROOT_NODE,
|
||||||
@ -326,7 +371,7 @@ BEGIN_C_DECLS
|
|||||||
int index_blocking_barrier_memory_bank;
|
int index_blocking_barrier_memory_bank;
|
||||||
|
|
||||||
/* pointers to blocking memory control regions */
|
/* pointers to blocking memory control regions */
|
||||||
mca_coll_sm2_nb_request_process_shared_mem_t ***ctl_blocking_barrier;
|
volatile mca_coll_sm2_nb_request_process_shared_mem_t ***ctl_blocking_barrier;
|
||||||
|
|
||||||
/* description of allocated temp buffers - one struct per
|
/* description of allocated temp buffers - one struct per
|
||||||
* buffer. Each buffer has space "owned" by each process
|
* buffer. Each buffer has space "owned" by each process
|
||||||
@ -403,16 +448,22 @@ BEGIN_C_DECLS
|
|||||||
/* collective tag */
|
/* collective tag */
|
||||||
long long collective_tag;
|
long long collective_tag;
|
||||||
|
|
||||||
/* debug flag RLG */
|
|
||||||
int blocked_on_barrier;
|
|
||||||
|
|
||||||
long long barrier_bank_list[BARRIER_BANK_LIST_SIZE];
|
|
||||||
long long barrier_bank_cntr;
|
|
||||||
/* end debug */
|
|
||||||
|
|
||||||
/* scratch space - one int per process */
|
/* scratch space - one int per process */
|
||||||
int *scratch_space;
|
int *scratch_space;
|
||||||
|
|
||||||
|
/* message size cutoff for switching between short and long
|
||||||
|
* protocol
|
||||||
|
*/
|
||||||
|
size_t short_message_size;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* function table for variants of a given collective
|
||||||
|
* function.
|
||||||
|
*/
|
||||||
|
mca_coll_base_module_barrier_fn_t barrier_functions[N_BARRIER_FNS];
|
||||||
|
mca_coll_base_module_reduce_fn_t list_reduce_functions[N_REDUCE_FNS];
|
||||||
|
mca_coll_base_module_reduce_fn_t reduce_functions[N_REDUCE_FNS_USED];
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct mca_coll_sm2_module_t mca_coll_sm2_module_t;
|
typedef struct mca_coll_sm2_module_t mca_coll_sm2_module_t;
|
||||||
@ -541,6 +592,16 @@ BEGIN_C_DECLS
|
|||||||
int root, struct ompi_communicator_t *comm,
|
int root, struct ompi_communicator_t *comm,
|
||||||
struct mca_coll_base_module_1_1_0_t *module);
|
struct mca_coll_base_module_1_1_0_t *module);
|
||||||
|
|
||||||
|
int mca_coll_sm2_reduce_intra_reducescatter_gather(void *sbuf, void *rbuf,
|
||||||
|
int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op,
|
||||||
|
int root, struct ompi_communicator_t *comm,
|
||||||
|
struct mca_coll_base_module_1_1_0_t *module);
|
||||||
|
|
||||||
|
int mca_coll_sm2_reduce_intra_fanin(void *sbuf, void *rbuf, int count,
|
||||||
|
struct ompi_datatype_t *dtype, struct ompi_op_t *op,
|
||||||
|
int root, struct ompi_communicator_t *comm,
|
||||||
|
struct mca_coll_base_module_1_1_0_t *module);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Shared memory blocking broadcast.
|
* Shared memory blocking broadcast.
|
||||||
*/
|
*/
|
||||||
@ -555,6 +616,14 @@ BEGIN_C_DECLS
|
|||||||
int mca_coll_sm2_barrier_intra( struct ompi_communicator_t *comm,
|
int mca_coll_sm2_barrier_intra( struct ompi_communicator_t *comm,
|
||||||
struct mca_coll_base_module_1_1_0_t *module);
|
struct mca_coll_base_module_1_1_0_t *module);
|
||||||
|
|
||||||
|
int mca_coll_sm2_barrier_intra_fanin_fanout(
|
||||||
|
struct ompi_communicator_t *comm,
|
||||||
|
struct mca_coll_base_module_1_1_0_t *module);
|
||||||
|
|
||||||
|
int mca_coll_sm2_barrier_intra_recursive_doubling(
|
||||||
|
struct ompi_communicator_t *comm,
|
||||||
|
struct mca_coll_base_module_1_1_0_t *module);
|
||||||
|
|
||||||
END_C_DECLS
|
END_C_DECLS
|
||||||
|
|
||||||
#endif /* MCA_COLL_SM2_EXPORT_H */
|
#endif /* MCA_COLL_SM2_EXPORT_H */
|
||||||
|
@ -1118,7 +1118,7 @@ int mca_coll_sm2_allreduce_intra_reducescatter_allgather(void *sbuf, void *rbuf,
|
|||||||
/* local varibles */
|
/* local varibles */
|
||||||
int i,rc=OMPI_SUCCESS,n_dts_per_buffer,n_data_segments,stripe_number;
|
int i,rc=OMPI_SUCCESS,n_dts_per_buffer,n_data_segments,stripe_number;
|
||||||
int pair_rank,exchange,extra_rank,n_proc_data,tmp;
|
int pair_rank,exchange,extra_rank,n_proc_data,tmp;
|
||||||
int starting_proc,n_procs_to_read,base_block_proc,base_read_proc;
|
int starting_proc;
|
||||||
int n_elements_per_proc, n_residual_elements;
|
int n_elements_per_proc, n_residual_elements;
|
||||||
int cnt_offset,n_copy;
|
int cnt_offset,n_copy;
|
||||||
pair_exchange_node_t *my_exchange_node;
|
pair_exchange_node_t *my_exchange_node;
|
||||||
@ -1129,7 +1129,6 @@ int mca_coll_sm2_allreduce_intra_reducescatter_allgather(void *sbuf, void *rbuf,
|
|||||||
ptrdiff_t dt_extent;
|
ptrdiff_t dt_extent;
|
||||||
long long tag, base_tag;
|
long long tag, base_tag;
|
||||||
sm_work_buffer_t *sm_buffer_desc;
|
sm_work_buffer_t *sm_buffer_desc;
|
||||||
volatile char * my_write_pointer;
|
|
||||||
volatile char * extra_rank_write_data_pointer;
|
volatile char * extra_rank_write_data_pointer;
|
||||||
volatile char * extra_rank_read_data_pointer;
|
volatile char * extra_rank_read_data_pointer;
|
||||||
volatile char * partner_base_pointer;
|
volatile char * partner_base_pointer;
|
||||||
|
@ -314,13 +314,12 @@ DONE:
|
|||||||
/**
|
/**
|
||||||
* Shared memory blocking allreduce.
|
* Shared memory blocking allreduce.
|
||||||
*/
|
*/
|
||||||
static
|
|
||||||
int mca_coll_sm2_barrier_intra_fanin_fanout(
|
int mca_coll_sm2_barrier_intra_fanin_fanout(
|
||||||
struct ompi_communicator_t *comm,
|
struct ompi_communicator_t *comm,
|
||||||
struct mca_coll_base_module_1_1_0_t *module)
|
struct mca_coll_base_module_1_1_0_t *module)
|
||||||
{
|
{
|
||||||
/* local variables */
|
/* local variables */
|
||||||
int rc=OMPI_SUCCESS;
|
int rc=OMPI_SUCCESS,bar_buff_index;
|
||||||
int my_rank, child_rank, child, n_parents, n_children;
|
int my_rank, child_rank, child, n_parents, n_children;
|
||||||
int my_fanin_parent;
|
int my_fanin_parent;
|
||||||
int my_fanout_parent;
|
int my_fanout_parent;
|
||||||
@ -334,8 +333,6 @@ int mca_coll_sm2_barrier_intra_fanin_fanout(
|
|||||||
|
|
||||||
sm_module=(mca_coll_sm2_module_t *) module;
|
sm_module=(mca_coll_sm2_module_t *) module;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* get my node for the reduction tree */
|
/* get my node for the reduction tree */
|
||||||
my_rank=ompi_comm_rank(comm);
|
my_rank=ompi_comm_rank(comm);
|
||||||
my_reduction_node=&(sm_module->reduction_tree[my_rank]);
|
my_reduction_node=&(sm_module->reduction_tree[my_rank]);
|
||||||
@ -351,10 +348,17 @@ int mca_coll_sm2_barrier_intra_fanin_fanout(
|
|||||||
tag=sm_module->collective_tag;
|
tag=sm_module->collective_tag;
|
||||||
sm_module->collective_tag++;
|
sm_module->collective_tag++;
|
||||||
|
|
||||||
|
/*
|
||||||
sm_buffer_desc=alloc_sm2_shared_buffer(sm_module);
|
sm_buffer_desc=alloc_sm2_shared_buffer(sm_module);
|
||||||
|
*/
|
||||||
|
sm_module->index_blocking_barrier_memory_bank^=1;
|
||||||
|
bar_buff_index=sm_module->index_blocking_barrier_memory_bank;
|
||||||
|
|
||||||
/* offset to data segment */
|
my_ctl_pointer=
|
||||||
my_ctl_pointer=sm_buffer_desc->proc_memory[my_rank].control_region;
|
sm_module->ctl_blocking_barrier[bar_buff_index][my_rank];
|
||||||
|
/*
|
||||||
|
sm_buffer_desc->proc_memory[my_rank].control_region;
|
||||||
|
*/
|
||||||
|
|
||||||
/***************************
|
/***************************
|
||||||
* Fan into root phase
|
* Fan into root phase
|
||||||
@ -369,11 +373,13 @@ int mca_coll_sm2_barrier_intra_fanin_fanout(
|
|||||||
|
|
||||||
child_rank=my_reduction_node->children_ranks[child];
|
child_rank=my_reduction_node->children_ranks[child];
|
||||||
child_ctl_pointer=
|
child_ctl_pointer=
|
||||||
|
sm_module->ctl_blocking_barrier[bar_buff_index][child_rank];
|
||||||
|
/*
|
||||||
sm_buffer_desc->proc_memory[child_rank].control_region;
|
sm_buffer_desc->proc_memory[child_rank].control_region;
|
||||||
|
*/
|
||||||
|
|
||||||
/* wait until child flag is set */
|
/* wait until child flag is set */
|
||||||
while( child_ctl_pointer->flag != tag ) {
|
while( child_ctl_pointer->flag != tag ) {
|
||||||
/* Note: Actually need to make progress here */
|
|
||||||
opal_progress();
|
opal_progress();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -384,7 +390,9 @@ int mca_coll_sm2_barrier_intra_fanin_fanout(
|
|||||||
/* set memory barriet to make sure data is in main memory before
|
/* set memory barriet to make sure data is in main memory before
|
||||||
* the completion flgas are set.
|
* the completion flgas are set.
|
||||||
*/
|
*/
|
||||||
|
/*
|
||||||
MB();
|
MB();
|
||||||
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Signal parent that data is ready
|
* Signal parent that data is ready
|
||||||
@ -396,7 +404,9 @@ int mca_coll_sm2_barrier_intra_fanin_fanout(
|
|||||||
/* set memory barriet to make sure data is in main memory before
|
/* set memory barriet to make sure data is in main memory before
|
||||||
* the completion flgas are set.
|
* the completion flgas are set.
|
||||||
*/
|
*/
|
||||||
|
/*
|
||||||
MB();
|
MB();
|
||||||
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Signal parent that data is ready
|
* Signal parent that data is ready
|
||||||
@ -415,14 +425,19 @@ int mca_coll_sm2_barrier_intra_fanin_fanout(
|
|||||||
/* I am the root - so copy signal children, and then
|
/* I am the root - so copy signal children, and then
|
||||||
* start reading
|
* start reading
|
||||||
*/
|
*/
|
||||||
|
/*
|
||||||
MB();
|
MB();
|
||||||
|
*/
|
||||||
my_ctl_pointer->flag=-tag;
|
my_ctl_pointer->flag=-tag;
|
||||||
|
|
||||||
|
|
||||||
} else if( LEAF_NODE == my_fanout_read_tree->my_node_type ) {
|
} else if( LEAF_NODE == my_fanout_read_tree->my_node_type ) {
|
||||||
|
|
||||||
parent_ctl_pointer=
|
parent_ctl_pointer=
|
||||||
|
sm_module->ctl_blocking_barrier[bar_buff_index][my_fanout_parent];
|
||||||
|
/*
|
||||||
sm_buffer_desc->proc_memory[my_fanout_parent].control_region;
|
sm_buffer_desc->proc_memory[my_fanout_parent].control_region;
|
||||||
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* wait on Parent to signal that data is ready
|
* wait on Parent to signal that data is ready
|
||||||
@ -435,7 +450,10 @@ int mca_coll_sm2_barrier_intra_fanin_fanout(
|
|||||||
/* interior nodes */
|
/* interior nodes */
|
||||||
|
|
||||||
parent_ctl_pointer=
|
parent_ctl_pointer=
|
||||||
|
sm_module->ctl_blocking_barrier[bar_buff_index][my_fanout_parent];
|
||||||
|
/*
|
||||||
sm_buffer_desc->proc_memory[my_fanout_parent].control_region;
|
sm_buffer_desc->proc_memory[my_fanout_parent].control_region;
|
||||||
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* wait on Parent to signal that data is ready
|
* wait on Parent to signal that data is ready
|
||||||
@ -447,7 +465,9 @@ int mca_coll_sm2_barrier_intra_fanin_fanout(
|
|||||||
/* set memory barriet to make sure data is in main memory before
|
/* set memory barriet to make sure data is in main memory before
|
||||||
* the completion flgas are set.
|
* the completion flgas are set.
|
||||||
*/
|
*/
|
||||||
|
/*
|
||||||
MB();
|
MB();
|
||||||
|
*/
|
||||||
|
|
||||||
/* signal children that they may read the result data */
|
/* signal children that they may read the result data */
|
||||||
my_ctl_pointer->flag=-tag;
|
my_ctl_pointer->flag=-tag;
|
||||||
@ -455,10 +475,12 @@ int mca_coll_sm2_barrier_intra_fanin_fanout(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* "free" the shared-memory working buffer */
|
/* "free" the shared-memory working buffer */
|
||||||
|
/*
|
||||||
rc=free_sm2_shared_buffer(sm_module);
|
rc=free_sm2_shared_buffer(sm_module);
|
||||||
if( OMPI_SUCCESS != rc ) {
|
if( OMPI_SUCCESS != rc ) {
|
||||||
goto Error;
|
goto Error;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
/* return */
|
/* return */
|
||||||
return rc;
|
return rc;
|
||||||
@ -467,6 +489,204 @@ Error:
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shared memory blocking barrier.
|
||||||
|
*/
|
||||||
|
int mca_coll_sm2_barrier_intra_recursive_doubling(
|
||||||
|
struct ompi_communicator_t *comm,
|
||||||
|
struct mca_coll_base_module_1_1_0_t *module)
|
||||||
|
{
|
||||||
|
/* local variables */
|
||||||
|
int rc=OMPI_SUCCESS;
|
||||||
|
int pair_rank,exchange,extra_rank;
|
||||||
|
pair_exchange_node_t *my_exchange_node;
|
||||||
|
int my_rank,bar_buff_index;
|
||||||
|
long long tag, base_tag;
|
||||||
|
mca_coll_sm2_nb_request_process_shared_mem_t *my_ctl_pointer;
|
||||||
|
volatile mca_coll_sm2_nb_request_process_shared_mem_t *
|
||||||
|
partner_ctl_pointer;
|
||||||
|
volatile mca_coll_sm2_nb_request_process_shared_mem_t *
|
||||||
|
extra_ctl_pointer;
|
||||||
|
mca_coll_sm2_module_t *sm_module;
|
||||||
|
/* debug
|
||||||
|
opal_timer_t t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
|
||||||
|
end debug */
|
||||||
|
|
||||||
|
sm_module=(mca_coll_sm2_module_t *) module;
|
||||||
|
|
||||||
|
/* get my node for the reduction tree */
|
||||||
|
my_exchange_node=&(sm_module->recursive_doubling_tree);
|
||||||
|
my_rank=ompi_comm_rank(comm);
|
||||||
|
|
||||||
|
/* get pointer to barrier strcuture */
|
||||||
|
sm_module->index_blocking_barrier_memory_bank^=1;
|
||||||
|
bar_buff_index=sm_module->index_blocking_barrier_memory_bank;
|
||||||
|
|
||||||
|
|
||||||
|
/* get unique set of tags for this stripe.
|
||||||
|
* Assume only one collective
|
||||||
|
* per communicator at a given time, so no locking needed
|
||||||
|
* for atomic update of the tag */
|
||||||
|
base_tag=sm_module->collective_tag;
|
||||||
|
sm_module->collective_tag+=my_exchange_node->n_tags;
|
||||||
|
|
||||||
|
/* get pointers to my work buffers */
|
||||||
|
my_ctl_pointer=
|
||||||
|
sm_module->ctl_blocking_barrier[bar_buff_index][my_rank];
|
||||||
|
/*
|
||||||
|
sm_buffer_desc->proc_memory[my_rank].control_region;
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* copy data in from the "extra" source, if need be */
|
||||||
|
tag=base_tag;
|
||||||
|
if(0 < my_exchange_node->n_extra_sources) {
|
||||||
|
|
||||||
|
if ( EXCHANGE_NODE == my_exchange_node->node_type ) {
|
||||||
|
|
||||||
|
extra_rank=my_exchange_node->rank_extra_source;
|
||||||
|
extra_ctl_pointer=
|
||||||
|
sm_module->ctl_blocking_barrier[bar_buff_index][extra_rank];
|
||||||
|
/*
|
||||||
|
sm_buffer_desc->proc_memory[extra_rank].control_region;
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* wait until remote data is read */
|
||||||
|
while( extra_ctl_pointer->flag < tag ) {
|
||||||
|
opal_progress();
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
/*
|
||||||
|
MB();
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Signal parent that data is ready
|
||||||
|
*/
|
||||||
|
my_ctl_pointer->flag=tag;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
MB();
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* Signal parent that data is ready
|
||||||
|
*/
|
||||||
|
tag=base_tag+1;
|
||||||
|
my_ctl_pointer->flag=tag;
|
||||||
|
|
||||||
|
/* loop over data exchanges */
|
||||||
|
for(exchange=0 ; exchange < my_exchange_node->n_exchanges ; exchange++) {
|
||||||
|
|
||||||
|
/* debug
|
||||||
|
t4=opal_sys_timer_get_cycles();
|
||||||
|
end debug */
|
||||||
|
|
||||||
|
/* is the remote data read */
|
||||||
|
pair_rank=my_exchange_node->rank_exchanges[exchange];
|
||||||
|
partner_ctl_pointer=
|
||||||
|
sm_module->ctl_blocking_barrier[bar_buff_index][pair_rank];
|
||||||
|
/*
|
||||||
|
sm_buffer_desc->proc_memory[pair_rank].control_region;
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
MB();
|
||||||
|
*/
|
||||||
|
my_ctl_pointer->flag=tag;
|
||||||
|
|
||||||
|
/* wait until remote data is read */
|
||||||
|
while( partner_ctl_pointer->flag < tag ) {
|
||||||
|
opal_progress();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* end test */
|
||||||
|
|
||||||
|
/* signal that I am done reading my peer's data */
|
||||||
|
tag++;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/* copy data in from the "extra" source, if need be */
|
||||||
|
if(0 < my_exchange_node->n_extra_sources) {
|
||||||
|
tag=base_tag+my_exchange_node->n_tags-1;
|
||||||
|
|
||||||
|
if ( EXTRA_NODE == my_exchange_node->node_type ) {
|
||||||
|
|
||||||
|
extra_rank=my_exchange_node->rank_extra_source;
|
||||||
|
extra_ctl_pointer=
|
||||||
|
sm_module->ctl_blocking_barrier[bar_buff_index][extra_rank];
|
||||||
|
/*
|
||||||
|
sm_buffer_desc->proc_memory[extra_rank].control_region;
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* wait until remote data is read */
|
||||||
|
while(! ( extra_ctl_pointer->flag == tag ) ) {
|
||||||
|
opal_progress();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* signal that I am done */
|
||||||
|
my_ctl_pointer->flag=tag;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
tag=base_tag+my_exchange_node->n_tags-1;
|
||||||
|
/* set memory barriet to make sure data is in main memory before
|
||||||
|
* the completion flgas are set.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
MB();
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Signal parent that data is ready
|
||||||
|
*/
|
||||||
|
my_ctl_pointer->flag=tag;
|
||||||
|
|
||||||
|
/* wait until child is done to move on - this buffer will
|
||||||
|
* be reused for the next stripe, so don't want to move
|
||||||
|
* on too quick.
|
||||||
|
*/
|
||||||
|
extra_rank=my_exchange_node->rank_extra_source;
|
||||||
|
extra_ctl_pointer=
|
||||||
|
sm_module->ctl_blocking_barrier[bar_buff_index][extra_rank];
|
||||||
|
/*
|
||||||
|
sm_buffer_desc->proc_memory[extra_rank].control_region;
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* wait until remote data is read */
|
||||||
|
while( extra_ctl_pointer->flag < tag ) {
|
||||||
|
opal_progress();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* debug
|
||||||
|
|
||||||
|
t9=opal_sys_timer_get_cycles();
|
||||||
|
timers[5]+=(t9-t8);
|
||||||
|
end debug */
|
||||||
|
|
||||||
|
|
||||||
|
/* "free" the shared-memory working buffer */
|
||||||
|
rc=free_sm2_shared_buffer(sm_module);
|
||||||
|
if( OMPI_SUCCESS != rc ) {
|
||||||
|
goto Error;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* return */
|
||||||
|
return rc;
|
||||||
|
|
||||||
|
Error:
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Shared memory blocking barrier
|
* Shared memory blocking barrier
|
||||||
*/
|
*/
|
||||||
@ -475,8 +695,11 @@ int mca_coll_sm2_barrier_intra( struct ompi_communicator_t *comm,
|
|||||||
{
|
{
|
||||||
/* local variables */
|
/* local variables */
|
||||||
int rc;
|
int rc;
|
||||||
|
mca_coll_sm2_module_t *sm_module;
|
||||||
|
|
||||||
rc= mca_coll_sm2_barrier_intra_fanin_fanout(comm, module);
|
sm_module=(mca_coll_sm2_module_t *) module;
|
||||||
|
|
||||||
|
rc= sm_module->barrier_functions[0](comm, module);
|
||||||
if( OMPI_SUCCESS != rc ) {
|
if( OMPI_SUCCESS != rc ) {
|
||||||
goto Error;
|
goto Error;
|
||||||
}
|
}
|
||||||
|
@ -140,7 +140,7 @@ static int sm2_open(void)
|
|||||||
|
|
||||||
/* set component priority */
|
/* set component priority */
|
||||||
cs->sm2_priority=
|
cs->sm2_priority=
|
||||||
mca_coll_sm2_param_register_int("sm2_priority",0);
|
mca_coll_sm2_param_register_int("sm2_priority",90);
|
||||||
|
|
||||||
/* set control region size (bytes), per proc */
|
/* set control region size (bytes), per proc */
|
||||||
cs->sm2_ctl_size_per_proc=
|
cs->sm2_ctl_size_per_proc=
|
||||||
@ -155,7 +155,7 @@ static int sm2_open(void)
|
|||||||
|
|
||||||
/* Min data Segment size (bytes) - per proc */
|
/* Min data Segment size (bytes) - per proc */
|
||||||
cs->sm2_data_seg_size=
|
cs->sm2_data_seg_size=
|
||||||
mca_coll_sm2_param_register_int("sm2_data_seg_size",0);
|
mca_coll_sm2_param_register_int("sm2_data_seg_size",32768);
|
||||||
|
|
||||||
/* Max data Segment size (bytes) - per proc */
|
/* Max data Segment size (bytes) - per proc */
|
||||||
cs->sm2_max_data_seg_size=
|
cs->sm2_max_data_seg_size=
|
||||||
@ -194,6 +194,20 @@ static int sm2_open(void)
|
|||||||
cs->n_poll_loops=
|
cs->n_poll_loops=
|
||||||
mca_coll_sm2_param_register_int("n_poll_loops",4);
|
mca_coll_sm2_param_register_int("n_poll_loops",4);
|
||||||
|
|
||||||
|
/* Size of message for switching between short and long protocol.
|
||||||
|
* This should probably be the segment size for several algorithms,
|
||||||
|
* though not all.
|
||||||
|
*/
|
||||||
|
cs->short_message_size=
|
||||||
|
mca_coll_sm2_param_register_int("short_message_size",32768);
|
||||||
|
|
||||||
|
/* collective ops to use */
|
||||||
|
cs->force_barrier=
|
||||||
|
mca_coll_sm2_param_register_int("force_barrier",(-1));
|
||||||
|
cs->force_reduce=
|
||||||
|
mca_coll_sm2_param_register_int("force_reduce",(-1));
|
||||||
|
|
||||||
|
|
||||||
/* debug */
|
/* debug */
|
||||||
/*
|
/*
|
||||||
new_sigact.sa_handler=dbg_handler;
|
new_sigact.sa_handler=dbg_handler;
|
||||||
|
@ -726,6 +726,42 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
|||||||
sm_module->super.coll_scatter = NULL;
|
sm_module->super.coll_scatter = NULL;
|
||||||
sm_module->super.coll_scatterv = NULL;
|
sm_module->super.coll_scatterv = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* set up specific function to be used
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* barrier */
|
||||||
|
sm_module->barrier_functions[FANIN_FAN_OUT_BARRIER_FN]=
|
||||||
|
mca_coll_sm2_barrier_intra_fanin_fanout;
|
||||||
|
sm_module->barrier_functions[RECURSIVE_DOUBLING_BARRIER_FN]=
|
||||||
|
mca_coll_sm2_barrier_intra_fanin_fanout;
|
||||||
|
if( ( 0 <= mca_coll_sm2_component.force_barrier ) &&
|
||||||
|
( N_BARRIER_FNS > mca_coll_sm2_component.force_barrier ) ) {
|
||||||
|
/* set user specifed function */
|
||||||
|
mca_coll_base_module_barrier_fn_t tmp_fn=
|
||||||
|
sm_module->barrier_functions[mca_coll_sm2_component.force_barrier];
|
||||||
|
sm_module->barrier_functions[FANIN_FAN_OUT_BARRIER_FN]=tmp_fn;
|
||||||
|
sm_module->barrier_functions[RECURSIVE_DOUBLING_BARRIER_FN]=tmp_fn;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* reduce */
|
||||||
|
sm_module->list_reduce_functions[FANIN_REDUCE_FN]=
|
||||||
|
mca_coll_sm2_reduce_intra_fanin;
|
||||||
|
sm_module->list_reduce_functions[REDUCE_SCATTER_GATHER_FN]=
|
||||||
|
mca_coll_sm2_reduce_intra_reducescatter_gather;
|
||||||
|
sm_module->reduce_functions[SHORT_DATA_FN]=
|
||||||
|
sm_module->list_reduce_functions[FANIN_REDUCE_FN];
|
||||||
|
sm_module->reduce_functions[LONG_DATA_FN]=
|
||||||
|
sm_module->list_reduce_functions[REDUCE_SCATTER_GATHER_FN];
|
||||||
|
if( ( 0 <= mca_coll_sm2_component.force_reduce ) &&
|
||||||
|
( N_REDUCE_FNS > mca_coll_sm2_component.force_reduce ) ) {
|
||||||
|
/* set user specifed function */
|
||||||
|
mca_coll_base_module_barrier_fn_t tmp_fn=
|
||||||
|
sm_module->reduce_functions[mca_coll_sm2_component.force_reduce];
|
||||||
|
sm_module->reduce_functions[SHORT_DATA_FN]=tmp_fn;
|
||||||
|
sm_module->reduce_functions[LONG_DATA_FN]=tmp_fn;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Some initialization
|
* Some initialization
|
||||||
*/
|
*/
|
||||||
@ -1059,7 +1095,7 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
|||||||
sm_module->index_blocking_barrier_memory_bank=0;
|
sm_module->index_blocking_barrier_memory_bank=0;
|
||||||
|
|
||||||
sm_module->ctl_blocking_barrier=
|
sm_module->ctl_blocking_barrier=
|
||||||
(mca_coll_sm2_nb_request_process_shared_mem_t ***)
|
(volatile mca_coll_sm2_nb_request_process_shared_mem_t ***)
|
||||||
malloc(2*sizeof(mca_coll_sm2_nb_request_process_shared_mem_t **));
|
malloc(2*sizeof(mca_coll_sm2_nb_request_process_shared_mem_t **));
|
||||||
if( NULL == sm_module->ctl_blocking_barrier ) {
|
if( NULL == sm_module->ctl_blocking_barrier ) {
|
||||||
goto CLEANUP;
|
goto CLEANUP;
|
||||||
@ -1077,11 +1113,6 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
|||||||
goto CLEANUP;
|
goto CLEANUP;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* debug */
|
|
||||||
fprintf(stderr," sizeof(mca_coll_sm2_nb_request_process_shared_mem_t) %lx \n",
|
|
||||||
sizeof(mca_coll_sm2_nb_request_process_shared_mem_t));
|
|
||||||
fflush(stderr);
|
|
||||||
/* end debug */
|
|
||||||
for( j= 0 ; j < 2 ; j++ ) {
|
for( j= 0 ; j < 2 ; j++ ) {
|
||||||
for( i=0 ; i < group_size ; i++ ) {
|
for( i=0 ; i < group_size ; i++ ) {
|
||||||
sm_module->ctl_blocking_barrier[j][i]=
|
sm_module->ctl_blocking_barrier[j][i]=
|
||||||
@ -1091,18 +1122,12 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
|||||||
j*sizeof(mca_coll_sm2_nb_request_process_shared_mem_t)+
|
j*sizeof(mca_coll_sm2_nb_request_process_shared_mem_t)+
|
||||||
i*sm_module->per_proc_size_of_blocking_barrier_region )
|
i*sm_module->per_proc_size_of_blocking_barrier_region )
|
||||||
;
|
;
|
||||||
/* debug */
|
|
||||||
fprintf(stderr," i %d j %d %p base %p pp %lx\n",i,j,
|
|
||||||
sm_module->ctl_blocking_barrier[j][i],
|
|
||||||
sm_module->sm_blocking_barrier_region,
|
|
||||||
sm_module->per_proc_size_of_blocking_barrier_region);
|
|
||||||
fflush(stderr);
|
|
||||||
/* end debug */
|
|
||||||
sm_module->ctl_blocking_barrier[j][i]->flag=0;
|
sm_module->ctl_blocking_barrier[j][i]->flag=0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* set the switch-over parameter */
|
||||||
|
sm_module->short_message_size=mca_coll_sm2_component.short_message_size;
|
||||||
|
|
||||||
/* touch pages to apply memory affinity - Note: do we really need this or will
|
/* touch pages to apply memory affinity - Note: do we really need this or will
|
||||||
* the algorithms do this */
|
* the algorithms do this */
|
||||||
|
@ -20,22 +20,10 @@
|
|||||||
|
|
||||||
extern uint64_t timers[7];
|
extern uint64_t timers[7];
|
||||||
end debug */
|
end debug */
|
||||||
/* debug
|
|
||||||
#include <assert.h>
|
|
||||||
extern void debug_module(void);
|
|
||||||
extern int last_root;
|
|
||||||
extern int node_type;
|
|
||||||
extern long long free_buff_free_index;
|
|
||||||
int last_root;
|
|
||||||
int node_type;
|
|
||||||
end debug */
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Shared memory blocking allreduce.
|
* Shared memory blocking allreduce.
|
||||||
*/
|
*/
|
||||||
static
|
|
||||||
int mca_coll_sm2_reduce_intra_fanin(void *sbuf, void *rbuf, int count,
|
int mca_coll_sm2_reduce_intra_fanin(void *sbuf, void *rbuf, int count,
|
||||||
struct ompi_datatype_t *dtype,
|
struct ompi_datatype_t *dtype,
|
||||||
struct ompi_op_t *op,
|
struct ompi_op_t *op,
|
||||||
@ -400,8 +388,7 @@ Error:
|
|||||||
/**
|
/**
|
||||||
* Shared memory blocking reduce.
|
* Shared memory blocking reduce.
|
||||||
*/
|
*/
|
||||||
static
|
int mca_coll_sm2_reduce_intra_reducescatter_gather(void *sbuf, void *rbuf,
|
||||||
int mca_coll_sm2_reduce_intra_reducescatter_allgather(void *sbuf, void *rbuf,
|
|
||||||
int count, struct ompi_datatype_t *dtype,
|
int count, struct ompi_datatype_t *dtype,
|
||||||
struct ompi_op_t *op,
|
struct ompi_op_t *op,
|
||||||
int root,
|
int root,
|
||||||
@ -492,14 +479,6 @@ int mca_coll_sm2_reduce_intra_reducescatter_allgather(void *sbuf, void *rbuf,
|
|||||||
sm_module->scratch_space[i]++;
|
sm_module->scratch_space[i]++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* debug
|
|
||||||
fprintf(stderr," my_rank %d element list count_this_stripe %d : ",my_rank,count_this_stripe);
|
|
||||||
for(i=0 ; i < comm_size ; i++ ) {
|
|
||||||
fprintf(stderr," %d ",sm_module->scratch_space[i]);
|
|
||||||
}
|
|
||||||
fprintf(stderr," \n");
|
|
||||||
fflush(stderr);
|
|
||||||
end debug */
|
|
||||||
|
|
||||||
/* get unique set of tags for this stripe.
|
/* get unique set of tags for this stripe.
|
||||||
* Assume only one collective
|
* Assume only one collective
|
||||||
@ -745,6 +724,34 @@ int mca_coll_sm2_reduce_intra(void *sbuf, void *rbuf, int count,
|
|||||||
{
|
{
|
||||||
/* local variables */
|
/* local variables */
|
||||||
int rc;
|
int rc;
|
||||||
|
mca_coll_sm2_module_t *sm_module;
|
||||||
|
ptrdiff_t dt_extent;
|
||||||
|
size_t len_data_buffer;
|
||||||
|
|
||||||
|
sm_module=(mca_coll_sm2_module_t *) module;
|
||||||
|
|
||||||
|
/* get size of data needed - same layout as user data, so that
|
||||||
|
* we can apply the reudction routines directly on these buffers
|
||||||
|
*/
|
||||||
|
rc=ompi_ddt_type_extent(dtype, &dt_extent);
|
||||||
|
if( OMPI_SUCCESS != rc ) {
|
||||||
|
goto Error;
|
||||||
|
}
|
||||||
|
|
||||||
|
len_data_buffer=count*dt_extent;
|
||||||
|
|
||||||
|
if( len_data_buffer <= sm_module->short_message_size) {
|
||||||
|
rc=sm_module->reduce_functions[SHORT_DATA_FN]
|
||||||
|
(sbuf, rbuf, count, dtype, op, root, comm, module);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
rc=sm_module->reduce_functions[LONG_DATA_FN]
|
||||||
|
(sbuf, rbuf, count, dtype, op, root, comm, module);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( OMPI_SUCCESS != rc ) {
|
||||||
|
goto Error;
|
||||||
|
}
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
rc= mca_coll_sm2_reduce_intra_fanin(sbuf, rbuf, count,
|
rc= mca_coll_sm2_reduce_intra_fanin(sbuf, rbuf, count,
|
||||||
@ -752,13 +759,13 @@ int mca_coll_sm2_reduce_intra(void *sbuf, void *rbuf, int count,
|
|||||||
if( OMPI_SUCCESS != rc ) {
|
if( OMPI_SUCCESS != rc ) {
|
||||||
goto Error;
|
goto Error;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
rc= mca_coll_sm2_reduce_intra_reducescatter_allgather(sbuf, rbuf, count,
|
rc= mca_coll_sm2_reduce_intra_reducescatter_gather(sbuf, rbuf, count,
|
||||||
dtype, op, root, comm, module);
|
dtype, op, root, comm, module);
|
||||||
if( OMPI_SUCCESS != rc ) {
|
if( OMPI_SUCCESS != rc ) {
|
||||||
goto Error;
|
goto Error;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user