debug the pipeline algorithm.
This commit was SVN r18008.
Этот коммит содержится в:
родитель
77653ac787
Коммит
90e53ca9ee
@ -319,7 +319,7 @@ int progress_fanin_fanout( void *sbuf, void *rbuf,
|
|||||||
int count_processed,count_this_stripe;
|
int count_processed,count_this_stripe;
|
||||||
ptrdiff_t dt_extent;
|
ptrdiff_t dt_extent;
|
||||||
long long tag;
|
long long tag;
|
||||||
mca_coll_sm2_nb_request_process_shared_mem_t *my_ctl_pointer;
|
volatile mca_coll_sm2_nb_request_process_shared_mem_t *my_ctl_pointer;
|
||||||
volatile mca_coll_sm2_nb_request_process_shared_mem_t * parent_ctl_pointer;
|
volatile mca_coll_sm2_nb_request_process_shared_mem_t * parent_ctl_pointer;
|
||||||
volatile mca_coll_sm2_nb_request_process_shared_mem_t * child_ctl_pointer;
|
volatile mca_coll_sm2_nb_request_process_shared_mem_t * child_ctl_pointer;
|
||||||
volatile char * my_data_pointer;
|
volatile char * my_data_pointer;
|
||||||
@ -329,7 +329,6 @@ int progress_fanin_fanout( void *sbuf, void *rbuf,
|
|||||||
tree_node_t *my_reduction_node;
|
tree_node_t *my_reduction_node;
|
||||||
tree_node_t *my_fanout_read_tree;
|
tree_node_t *my_fanout_read_tree;
|
||||||
|
|
||||||
my_fanout_parent=my_fanout_read_tree->parent_rank;
|
|
||||||
tag=reduction_desc->tag;
|
tag=reduction_desc->tag;
|
||||||
sm_buffer_desc=reduction_desc->shared_buffer;
|
sm_buffer_desc=reduction_desc->shared_buffer;
|
||||||
my_rank=reduction_desc->my_rank;
|
my_rank=reduction_desc->my_rank;
|
||||||
@ -385,8 +384,10 @@ int progress_fanin_fanout( void *sbuf, void *rbuf,
|
|||||||
* provids only 2 buffers, so can't add from two
|
* provids only 2 buffers, so can't add from two
|
||||||
* into a third buffer.
|
* into a third buffer.
|
||||||
*/
|
*/
|
||||||
count_processed=reduction_desc->count_processed;
|
|
||||||
count_this_stripe=reduction_desc->count_this_stripe;
|
count_this_stripe=reduction_desc->count_this_stripe;
|
||||||
|
if( STARTED == reduction_desc->status) {
|
||||||
|
/* copy-in only the first time through */
|
||||||
|
count_processed=reduction_desc->count_processed;
|
||||||
/* error conditions already checed */
|
/* error conditions already checed */
|
||||||
ompi_ddt_type_extent(dtype, &dt_extent);
|
ompi_ddt_type_extent(dtype, &dt_extent);
|
||||||
rc=ompi_ddt_copy_content_same_ddt(dtype, count_this_stripe,
|
rc=ompi_ddt_copy_content_same_ddt(dtype, count_this_stripe,
|
||||||
@ -395,10 +396,12 @@ int progress_fanin_fanout( void *sbuf, void *rbuf,
|
|||||||
if( 0 != rc ) {
|
if( 0 != rc ) {
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Wait on children, and apply op to their data
|
* Wait on children, and apply op to their data
|
||||||
*/
|
*/
|
||||||
|
n_children=my_reduction_node->n_children;
|
||||||
for( child=reduction_desc->n_child_loops_completed ;
|
for( child=reduction_desc->n_child_loops_completed ;
|
||||||
child < n_children ; child++ ) {
|
child < n_children ; child++ ) {
|
||||||
child_rank=my_reduction_node->children_ranks[child];
|
child_rank=my_reduction_node->children_ranks[child];
|
||||||
@ -441,6 +444,7 @@ int progress_fanin_fanout( void *sbuf, void *rbuf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
REDUCTION_FANOUT:
|
REDUCTION_FANOUT:
|
||||||
|
my_fanout_parent=my_fanout_read_tree->parent_rank;
|
||||||
switch (my_reduction_node->my_node_type) {
|
switch (my_reduction_node->my_node_type) {
|
||||||
case LEAF_NODE:
|
case LEAF_NODE:
|
||||||
|
|
||||||
@ -621,8 +625,11 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout_pipeline
|
|||||||
/* NOTE: need to check at communicator creation that we have enough
|
/* NOTE: need to check at communicator creation that we have enough
|
||||||
* temporary buffes for this
|
* temporary buffes for this
|
||||||
*/
|
*/
|
||||||
|
|
||||||
for(i=0 ; i < depth_pipeline ; i++ ) {
|
for(i=0 ; i < depth_pipeline ; i++ ) {
|
||||||
|
/*
|
||||||
working_buffers[i].shared_buffer=alloc_sm2_shared_buffer(sm_module);
|
working_buffers[i].shared_buffer=alloc_sm2_shared_buffer(sm_module);
|
||||||
|
*/
|
||||||
working_buffers[i].status=BUFFER_AVAILABLE;
|
working_buffers[i].status=BUFFER_AVAILABLE;
|
||||||
working_buffers[i].my_rank=my_rank;
|
working_buffers[i].my_rank=my_rank;
|
||||||
working_buffers[i].my_reduction_node=my_reduction_node;
|
working_buffers[i].my_reduction_node=my_reduction_node;
|
||||||
@ -654,13 +661,21 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout_pipeline
|
|||||||
for( i=0 ; i < depth_pipeline ; i++ ) {
|
for( i=0 ; i < depth_pipeline ; i++ ) {
|
||||||
if( working_buffers[i].status != BUFFER_AVAILABLE ){
|
if( working_buffers[i].status != BUFFER_AVAILABLE ){
|
||||||
rc=progress_fanin_fanout( sbuf, rbuf, dtype, op,
|
rc=progress_fanin_fanout( sbuf, rbuf, dtype, op,
|
||||||
&(working_buffers[buffer_index]),
|
&(working_buffers[i]),
|
||||||
sm_module->n_poll_loops, &completed);
|
sm_module->n_poll_loops, &completed);
|
||||||
if( OMPI_SUCCESS != rc ) {
|
if( OMPI_SUCCESS != rc ) {
|
||||||
goto Error;
|
goto Error;
|
||||||
}
|
}
|
||||||
/* update the number of completed segments */
|
/* update the number of completed segments */
|
||||||
|
if( completed ) {
|
||||||
n_completed+=completed;
|
n_completed+=completed;
|
||||||
|
/* release of resources may be our of order, but allocation
|
||||||
|
* is ordered, and only after the pipleline tracker
|
||||||
|
* (working_buffers[]) indicates that it is complete, so
|
||||||
|
* resources will not be re-used too early
|
||||||
|
*/
|
||||||
|
rc=free_sm2_shared_buffer(sm_module);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -669,7 +684,9 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout_pipeline
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* initialize working buffer for this stripe */
|
/* initialize working buffer for this stripe */
|
||||||
working_buffers[buffer_index].status=FANIN;
|
working_buffers[buffer_index].shared_buffer=
|
||||||
|
alloc_sm2_shared_buffer(sm_module);
|
||||||
|
working_buffers[buffer_index].status=STARTED;
|
||||||
working_buffers[buffer_index].n_child_loops_completed=0;
|
working_buffers[buffer_index].n_child_loops_completed=0;
|
||||||
count_processed=stripe_number*n_dts_per_buffer;
|
count_processed=stripe_number*n_dts_per_buffer;
|
||||||
count_this_stripe=n_dts_per_buffer;
|
count_this_stripe=n_dts_per_buffer;
|
||||||
@ -689,7 +706,15 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout_pipeline
|
|||||||
goto Error;
|
goto Error;
|
||||||
}
|
}
|
||||||
/* update the number of completed segments */
|
/* update the number of completed segments */
|
||||||
|
if( completed ) {
|
||||||
n_completed+=completed;
|
n_completed+=completed;
|
||||||
|
/* release of resources may be our of order, but allocation
|
||||||
|
* is ordered, and only after the pipleline tracker
|
||||||
|
* (working_buffers[]) indicates that it is complete, so
|
||||||
|
* resources will not be re-used too early
|
||||||
|
*/
|
||||||
|
rc=free_sm2_shared_buffer(sm_module);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -698,30 +723,42 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout_pipeline
|
|||||||
for( i=0 ; i < depth_pipeline ; i++ ) {
|
for( i=0 ; i < depth_pipeline ; i++ ) {
|
||||||
if( working_buffers[i].status != BUFFER_AVAILABLE ){
|
if( working_buffers[i].status != BUFFER_AVAILABLE ){
|
||||||
rc=progress_fanin_fanout( sbuf, rbuf, dtype, op,
|
rc=progress_fanin_fanout( sbuf, rbuf, dtype, op,
|
||||||
&(working_buffers[buffer_index]),
|
&(working_buffers[i]),
|
||||||
sm_module->n_poll_loops, &completed);
|
sm_module->n_poll_loops, &completed);
|
||||||
if( OMPI_SUCCESS != rc ) {
|
if( OMPI_SUCCESS != rc ) {
|
||||||
goto Error;
|
goto Error;
|
||||||
}
|
}
|
||||||
/* update the number of completed segments */
|
/* update the number of completed segments */
|
||||||
|
if( completed ) {
|
||||||
n_completed+=completed;
|
n_completed+=completed;
|
||||||
|
/* release of resources may be our of order, but allocation
|
||||||
|
* is ordered, and only after the pipleline tracker
|
||||||
|
* (working_buffers[]) indicates that it is complete, so
|
||||||
|
* resources will not be re-used too early
|
||||||
|
*/
|
||||||
|
rc=free_sm2_shared_buffer(sm_module);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* free work buffers */
|
/* free work buffers */
|
||||||
|
/*
|
||||||
for(i=0 ; i < depth_pipeline ; i++ ) {
|
for(i=0 ; i < depth_pipeline ; i++ ) {
|
||||||
rc=free_sm2_shared_buffer(sm_module);
|
rc=free_sm2_shared_buffer(sm_module);
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
/* return */
|
/* return */
|
||||||
return rc;
|
return rc;
|
||||||
|
|
||||||
Error:
|
Error:
|
||||||
/* free work buffers */
|
/* free work buffers */
|
||||||
|
/*
|
||||||
for(i=0 ; i < depth_pipeline ; i++ ) {
|
for(i=0 ; i < depth_pipeline ; i++ ) {
|
||||||
rc=free_sm2_shared_buffer(sm_module);
|
rc=free_sm2_shared_buffer(sm_module);
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
#undef depth_pipeline
|
#undef depth_pipeline
|
||||||
@ -1433,12 +1470,20 @@ int mca_coll_sm2_allreduce_intra(void *sbuf, void *rbuf, int count,
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
#endif /* testing */
|
#endif /* testing */
|
||||||
|
/* Non-Commutative Operation */
|
||||||
|
rc= mca_coll_sm2_allreduce_intra_fanin_fanout_pipeline(
|
||||||
|
sbuf, rbuf, count,dtype, op, comm, module);
|
||||||
|
if( OMPI_SUCCESS != rc ) {
|
||||||
|
goto Error;
|
||||||
|
}
|
||||||
|
#if 0
|
||||||
/* Non-Commutative Operation */
|
/* Non-Commutative Operation */
|
||||||
rc= mca_coll_sm2_allreduce_intra_fanin_fanout(sbuf, rbuf, count,
|
rc= mca_coll_sm2_allreduce_intra_fanin_fanout(sbuf, rbuf, count,
|
||||||
dtype, op, comm, module);
|
dtype, op, comm, module);
|
||||||
if( OMPI_SUCCESS != rc ) {
|
if( OMPI_SUCCESS != rc ) {
|
||||||
goto Error;
|
goto Error;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
#if 0 /* just for some testing */
|
#if 0 /* just for some testing */
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -153,8 +153,8 @@ static int allocate_shared_file(size_t size, char **file_name,
|
|||||||
*/
|
*/
|
||||||
unique_comm_id=(int)getpid();
|
unique_comm_id=(int)getpid();
|
||||||
len=asprintf(&f_name,
|
len=asprintf(&f_name,
|
||||||
"%s"OPAL_PATH_SEP"sm_coll_v2%s_%0d_%0d",orte_process_info.job_session_dir,
|
"%s"OPAL_PATH_SEP"sm_coll_v2_%0d_%0d",orte_process_info.job_session_dir,
|
||||||
orte_process_info.proc_session_dir,ompi_comm_get_cid(comm),unique_comm_id);
|
ompi_comm_get_cid(comm),unique_comm_id);
|
||||||
if( 0 > len ) {
|
if( 0 > len ) {
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
@ -163,8 +163,8 @@ static int allocate_shared_file(size_t size, char **file_name,
|
|||||||
/* process initializing the file */
|
/* process initializing the file */
|
||||||
fd = open(*file_name, O_CREAT|O_RDWR, 0600);
|
fd = open(*file_name, O_CREAT|O_RDWR, 0600);
|
||||||
if (fd < 0) {
|
if (fd < 0) {
|
||||||
opal_output(0,"mca_common_sm_mmap_init: open %s failed with errno=%d\n",
|
opal_output(0,"mca_common_sm_mmap_init: open %s len %d failed with errno=%d\n",
|
||||||
*file_name, errno);
|
*file_name, len, errno);
|
||||||
goto file_opened;
|
goto file_opened;
|
||||||
}
|
}
|
||||||
/* map the file and initialize segment state */
|
/* map the file and initialize segment state */
|
||||||
@ -238,8 +238,8 @@ static int allocate_shared_file(size_t size, char **file_name,
|
|||||||
* communicators, that could have the same communicator id
|
* communicators, that could have the same communicator id
|
||||||
*/
|
*/
|
||||||
len=asprintf(&f_name,
|
len=asprintf(&f_name,
|
||||||
"%s"OPAL_PATH_SEP"sm_coll_v2%s_%0d_%0d",orte_process_info.job_session_dir,
|
"%s"OPAL_PATH_SEP"sm_coll_v2_%0d_%0d",orte_process_info.job_session_dir,
|
||||||
orte_process_info.proc_session_dir,ompi_comm_get_cid(comm),unique_comm_id);
|
ompi_comm_get_cid(comm),unique_comm_id);
|
||||||
if( 0 > len ) {
|
if( 0 > len ) {
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
@ -248,8 +248,8 @@ static int allocate_shared_file(size_t size, char **file_name,
|
|||||||
/* open backing file */
|
/* open backing file */
|
||||||
fd = open(*file_name, O_RDWR, 0600);
|
fd = open(*file_name, O_RDWR, 0600);
|
||||||
if (fd < 0) {
|
if (fd < 0) {
|
||||||
opal_output(0,"mca_common_sm_mmap_init: open %s failed with errno=%d\n",
|
opal_output(0,"mca_common_sm_mmap_init: open %s len %d failed with errno=%d\n",
|
||||||
*file_name, errno);
|
*file_name, len, errno);
|
||||||
goto return_error;
|
goto return_error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user