From 08becf33b50987e7c11e034beaa276d8accdb317 Mon Sep 17 00:00:00 2001 From: Rich Graham Date: Tue, 8 Apr 2008 18:44:50 +0000 Subject: [PATCH] add more debugging. This commit was SVN r18100. --- ompi/mca/coll/sm2/coll_sm2.h | 5 ++++- ompi/mca/coll/sm2/coll_sm2_component.c | 4 ++++ ompi/mca/coll/sm2/coll_sm2_module.c | 14 ++++++++++++-- ompi/mca/coll/sm2/coll_sm2_reduce.c | 19 ++++++++----------- 4 files changed, 28 insertions(+), 14 deletions(-) diff --git a/ompi/mca/coll/sm2/coll_sm2.h b/ompi/mca/coll/sm2/coll_sm2.h index 010ee6591b..bcd9eafc98 100644 --- a/ompi/mca/coll/sm2/coll_sm2.h +++ b/ompi/mca/coll/sm2/coll_sm2.h @@ -367,6 +367,9 @@ BEGIN_C_DECLS /* collective tag */ long long collective_tag; + /* debug flag RLG */ + int blocked_on_barrier; + }; typedef struct mca_coll_sm2_module_t mca_coll_sm2_module_t; @@ -413,7 +416,7 @@ BEGIN_C_DECLS /* * tag */ - int tag; + long long tag; }; typedef struct mca_coll_sm2_module_allreduce_pipeline_t mca_coll_sm2_module_allreduce_pipeline_t; diff --git a/ompi/mca/coll/sm2/coll_sm2_component.c b/ompi/mca/coll/sm2/coll_sm2_component.c index 013a567785..48b3d1248a 100644 --- a/ompi/mca/coll/sm2/coll_sm2_component.c +++ b/ompi/mca/coll/sm2/coll_sm2_component.c @@ -46,6 +46,7 @@ extern void debug_module(void); void dbg_handler(int my_signal) { debug_print=1; debug_module(); + return; } /* end debug */ @@ -194,10 +195,13 @@ static int sm2_open(void) mca_coll_sm2_param_register_int("n_poll_loops",4); /* debug */ + /* new_sigact.sa_handler=dbg_handler; sigemptyset(&(new_sigact.sa_mask)); retVal=sigaction(SIGUSR2,&new_sigact,NULL); + */ + signal(SIGUSR2,dbg_handler); /* end debug */ return OMPI_SUCCESS; diff --git a/ompi/mca/coll/sm2/coll_sm2_module.c b/ompi/mca/coll/sm2/coll_sm2_module.c index 78a1dc249f..2291464069 100644 --- a/ompi/mca/coll/sm2/coll_sm2_module.c +++ b/ompi/mca/coll/sm2/coll_sm2_module.c @@ -71,10 +71,11 @@ void debug_module(void) { } } /* data regions */ - fprintf(stderr," my_debug_rank %d current index %d freed index %d coll_tag %lld \n", + fprintf(stderr," my_debug_rank %d current index %d freed index %d coll_tag %lld debug stat %d \n", my_debug_rank, module_dbg->sm2_allocated_buffer_index,module_dbg->sm2_freed_buffer_index, - module_dbg->collective_tag); + module_dbg->collective_tag, + module_dbg->blocked_on_barrier); if( 0 == my_debug_rank ) { for( i=0 ; i < module_dbg->sm2_module_num_buffers ; i++ ) { for( j=0 ; j < my_debug_comm_size ; j++ ) { @@ -86,6 +87,7 @@ void debug_module(void) { } fflush(stderr); + return; } /* end debug */ @@ -933,6 +935,7 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority) * the algorithms do this */ /* debug */ + sm_module->blocked_on_barrier=0; module_dbg=&(sm_module->super); /* end debug */ @@ -1046,6 +1049,10 @@ sm_work_buffer_t *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module) if( NB_BARRIER_INACTIVE != module->barrier_request[bank_index].sm2_barrier_phase ) { + /* debug */ + module->blocked_on_barrier=1; + /* end debug */ + request_index=module->current_request_index; /* complete barrier requests in order */ for(i_request=0 ; i_request< module->sm2_module_num_memory_banks ; @@ -1089,6 +1096,9 @@ sm_work_buffer_t *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module) } } + /* debug */ + module->blocked_on_barrier=0; + /* end debug */ buffer_index=module->sm2_allocated_buffer_index; diff --git a/ompi/mca/coll/sm2/coll_sm2_reduce.c b/ompi/mca/coll/sm2/coll_sm2_reduce.c index d120ced482..859375d942 100644 --- a/ompi/mca/coll/sm2/coll_sm2_reduce.c +++ b/ompi/mca/coll/sm2/coll_sm2_reduce.c @@ -100,9 +100,9 @@ int mca_coll_sm2_reduce_intra_fanin(void *sbuf, void *rbuf, int count, * for atomic update of the tag */ tag=sm_module->collective_tag; sm_module->collective_tag++; - /* debug */ + /* debug assert(tag); - /* end debug */ + end debug */ /* get a pointer to the shared-memory working buffer */ sm_buffer_desc=alloc_sm2_shared_buffer(sm_module); @@ -145,14 +145,13 @@ int mca_coll_sm2_reduce_intra_fanin(void *sbuf, void *rbuf, int count, child_data_pointer= sm_buffer_desc->proc_memory[child_rank].data_segment; - /* debug */ + /* debug if( 0 == child_ctl_pointer->flag ) { fprintf(stderr,"TTT 2 count %d root %d child_rank %d \n", count,root,child_rank); debug_module(); } -/* assert(child_ctl_pointer->flag); */ - /* end debug */ + end debug */ /* wait until child flag is set */ while(child_ctl_pointer->flag != tag) { opal_progress(); @@ -194,14 +193,13 @@ int mca_coll_sm2_reduce_intra_fanin(void *sbuf, void *rbuf, int count, sm_buffer_desc->proc_memory[child_rank].data_segment; /* wait until child flag is set */ - /* debug */ + /* debug if( 0 == child_ctl_pointer->flag ) { fprintf(stderr,"TTT 3 count %d root %d child_rank \n", count,root,child_rank); debug_module(); } -/* assert(child_ctl_pointer->flag); */ - /* end debug */ + end debug */ while(child_ctl_pointer->flag != tag) { opal_progress(); } @@ -307,14 +305,13 @@ int mca_coll_sm2_reduce_intra_fanin(void *sbuf, void *rbuf, int count, sm_buffer_desc->proc_memory[child_rank].data_segment; /* wait until child flag is set */ - /* debug */ + /* debug if( 0 == child_ctl_pointer->flag ) { fprintf(stderr,"TTT 1 count %d root %d child_rank %d \n", count,root,child_rank); debug_module(); } -/* assert(child_ctl_pointer->flag); */ - /* end debug */ + end debug */ while(child_ctl_pointer->flag != tag) { opal_progress(); }