add more debugging.
This commit was SVN r18100.
Этот коммит содержится в:
родитель
aa1b7dd406
Коммит
08becf33b5
@ -367,6 +367,9 @@ BEGIN_C_DECLS
|
||||
/* collective tag */
|
||||
long long collective_tag;
|
||||
|
||||
/* debug flag RLG */
|
||||
int blocked_on_barrier;
|
||||
|
||||
};
|
||||
|
||||
typedef struct mca_coll_sm2_module_t mca_coll_sm2_module_t;
|
||||
@ -413,7 +416,7 @@ BEGIN_C_DECLS
|
||||
/*
|
||||
* tag
|
||||
*/
|
||||
int tag;
|
||||
long long tag;
|
||||
};
|
||||
typedef struct mca_coll_sm2_module_allreduce_pipeline_t
|
||||
mca_coll_sm2_module_allreduce_pipeline_t;
|
||||
|
@ -46,6 +46,7 @@ extern void debug_module(void);
|
||||
void dbg_handler(int my_signal) {
|
||||
debug_print=1;
|
||||
debug_module();
|
||||
return;
|
||||
}
|
||||
/* end debug */
|
||||
|
||||
@ -194,10 +195,13 @@ static int sm2_open(void)
|
||||
mca_coll_sm2_param_register_int("n_poll_loops",4);
|
||||
|
||||
/* debug */
|
||||
/*
|
||||
new_sigact.sa_handler=dbg_handler;
|
||||
sigemptyset(&(new_sigact.sa_mask));
|
||||
|
||||
retVal=sigaction(SIGUSR2,&new_sigact,NULL);
|
||||
*/
|
||||
signal(SIGUSR2,dbg_handler);
|
||||
/* end debug */
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
|
@ -71,10 +71,11 @@ void debug_module(void) {
|
||||
}
|
||||
}
|
||||
/* data regions */
|
||||
fprintf(stderr," my_debug_rank %d current index %d freed index %d coll_tag %lld \n",
|
||||
fprintf(stderr," my_debug_rank %d current index %d freed index %d coll_tag %lld debug stat %d \n",
|
||||
my_debug_rank,
|
||||
module_dbg->sm2_allocated_buffer_index,module_dbg->sm2_freed_buffer_index,
|
||||
module_dbg->collective_tag);
|
||||
module_dbg->collective_tag,
|
||||
module_dbg->blocked_on_barrier);
|
||||
if( 0 == my_debug_rank ) {
|
||||
for( i=0 ; i < module_dbg->sm2_module_num_buffers ; i++ ) {
|
||||
for( j=0 ; j < my_debug_comm_size ; j++ ) {
|
||||
@ -86,6 +87,7 @@ void debug_module(void) {
|
||||
}
|
||||
|
||||
fflush(stderr);
|
||||
return;
|
||||
|
||||
}
|
||||
/* end debug */
|
||||
@ -933,6 +935,7 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
||||
* the algorithms do this */
|
||||
|
||||
/* debug */
|
||||
sm_module->blocked_on_barrier=0;
|
||||
module_dbg=&(sm_module->super);
|
||||
/* end debug */
|
||||
|
||||
@ -1046,6 +1049,10 @@ sm_work_buffer_t *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module)
|
||||
if( NB_BARRIER_INACTIVE !=
|
||||
module->barrier_request[bank_index].sm2_barrier_phase ) {
|
||||
|
||||
/* debug */
|
||||
module->blocked_on_barrier=1;
|
||||
/* end debug */
|
||||
|
||||
request_index=module->current_request_index;
|
||||
/* complete barrier requests in order */
|
||||
for(i_request=0 ; i_request< module->sm2_module_num_memory_banks ;
|
||||
@ -1089,6 +1096,9 @@ sm_work_buffer_t *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module)
|
||||
}
|
||||
|
||||
}
|
||||
/* debug */
|
||||
module->blocked_on_barrier=0;
|
||||
/* end debug */
|
||||
|
||||
buffer_index=module->sm2_allocated_buffer_index;
|
||||
|
||||
|
@ -100,9 +100,9 @@ int mca_coll_sm2_reduce_intra_fanin(void *sbuf, void *rbuf, int count,
|
||||
* for atomic update of the tag */
|
||||
tag=sm_module->collective_tag;
|
||||
sm_module->collective_tag++;
|
||||
/* debug */
|
||||
/* debug
|
||||
assert(tag);
|
||||
/* end debug */
|
||||
end debug */
|
||||
|
||||
/* get a pointer to the shared-memory working buffer */
|
||||
sm_buffer_desc=alloc_sm2_shared_buffer(sm_module);
|
||||
@ -145,14 +145,13 @@ int mca_coll_sm2_reduce_intra_fanin(void *sbuf, void *rbuf, int count,
|
||||
child_data_pointer=
|
||||
sm_buffer_desc->proc_memory[child_rank].data_segment;
|
||||
|
||||
/* debug */
|
||||
/* debug
|
||||
if( 0 == child_ctl_pointer->flag ) {
|
||||
fprintf(stderr,"TTT 2 count %d root %d child_rank %d \n",
|
||||
count,root,child_rank);
|
||||
debug_module();
|
||||
}
|
||||
/* assert(child_ctl_pointer->flag); */
|
||||
/* end debug */
|
||||
end debug */
|
||||
/* wait until child flag is set */
|
||||
while(child_ctl_pointer->flag != tag) {
|
||||
opal_progress();
|
||||
@ -194,14 +193,13 @@ int mca_coll_sm2_reduce_intra_fanin(void *sbuf, void *rbuf, int count,
|
||||
sm_buffer_desc->proc_memory[child_rank].data_segment;
|
||||
|
||||
/* wait until child flag is set */
|
||||
/* debug */
|
||||
/* debug
|
||||
if( 0 == child_ctl_pointer->flag ) {
|
||||
fprintf(stderr,"TTT 3 count %d root %d child_rank \n",
|
||||
count,root,child_rank);
|
||||
debug_module();
|
||||
}
|
||||
/* assert(child_ctl_pointer->flag); */
|
||||
/* end debug */
|
||||
end debug */
|
||||
while(child_ctl_pointer->flag != tag) {
|
||||
opal_progress();
|
||||
}
|
||||
@ -307,14 +305,13 @@ int mca_coll_sm2_reduce_intra_fanin(void *sbuf, void *rbuf, int count,
|
||||
sm_buffer_desc->proc_memory[child_rank].data_segment;
|
||||
|
||||
/* wait until child flag is set */
|
||||
/* debug */
|
||||
/* debug
|
||||
if( 0 == child_ctl_pointer->flag ) {
|
||||
fprintf(stderr,"TTT 1 count %d root %d child_rank %d \n",
|
||||
count,root,child_rank);
|
||||
debug_module();
|
||||
}
|
||||
/* assert(child_ctl_pointer->flag); */
|
||||
/* end debug */
|
||||
end debug */
|
||||
while(child_ctl_pointer->flag != tag) {
|
||||
opal_progress();
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user