From 8765a2bbddf036d26a7aa9c3469289bdead96a53 Mon Sep 17 00:00:00 2001 From: Rich Graham Date: Tue, 8 Apr 2008 20:38:20 +0000 Subject: [PATCH] more debug code. This commit was SVN r18101. --- ompi/mca/coll/sm2/coll_sm2.h | 7 +++++ ompi/mca/coll/sm2/coll_sm2_component.c | 2 +- ompi/mca/coll/sm2/coll_sm2_module.c | 39 ++++++++++++++++++++++++-- 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/ompi/mca/coll/sm2/coll_sm2.h b/ompi/mca/coll/sm2/coll_sm2.h index bcd9eafc98..a1fff20673 100644 --- a/ompi/mca/coll/sm2/coll_sm2.h +++ b/ompi/mca/coll/sm2/coll_sm2.h @@ -270,6 +270,9 @@ BEGIN_C_DECLS typedef struct mca_coll_sm2_nb_request_process_private_mem_t mca_coll_sm2_nb_request_process_private_mem_t; + /* debug */ +#define BARRIER_BANK_LIST_SIZE 32 + /* end debug */ struct mca_coll_sm2_module_t { /* base structure */ mca_coll_base_module_1_1_0_t super; @@ -370,6 +373,10 @@ BEGIN_C_DECLS /* debug flag RLG */ int blocked_on_barrier; + long long barrier_bank_list[BARRIER_BANK_LIST_SIZE]; + long long barrier_bank_cntr; + /* end debug */ + }; typedef struct mca_coll_sm2_module_t mca_coll_sm2_module_t; diff --git a/ompi/mca/coll/sm2/coll_sm2_component.c b/ompi/mca/coll/sm2/coll_sm2_component.c index 48b3d1248a..1acd4f6b82 100644 --- a/ompi/mca/coll/sm2/coll_sm2_component.c +++ b/ompi/mca/coll/sm2/coll_sm2_component.c @@ -44,7 +44,7 @@ extern int my_debug_rank; extern void debug_module(void); void dbg_handler(int my_signal) { - debug_print=1; +/* debug_print=1; */ debug_module(); return; } diff --git a/ompi/mca/coll/sm2/coll_sm2_module.c b/ompi/mca/coll/sm2/coll_sm2_module.c index 2291464069..4cd12374e7 100644 --- a/ompi/mca/coll/sm2/coll_sm2_module.c +++ b/ompi/mca/coll/sm2/coll_sm2_module.c @@ -50,6 +50,7 @@ extern int my_debug_rank; extern int my_debug_comm_size; extern void debug_module(void); static mca_coll_sm2_module_t *module_dbg; +static int blocking_cnt=0; void debug_module(void) { int i,j,k; char *ptr; @@ -71,11 +72,16 @@ void debug_module(void) { } } /* data regions */ - fprintf(stderr," my_debug_rank %d current index %d freed index %d coll_tag %lld debug stat %d \n", + fprintf(stderr," my_debug_rank %d current index %d freed index %d coll_tag %lld debug stat %d blocking_cnt %d \n", my_debug_rank, module_dbg->sm2_allocated_buffer_index,module_dbg->sm2_freed_buffer_index, module_dbg->collective_tag, - module_dbg->blocked_on_barrier); + module_dbg->blocked_on_barrier,blocking_cnt); + fprintf(stderr," my_debug_rank %d barrier_bank_cntr %lld ", + my_debug_rank,module_dbg->barrier_bank_cntr); + for( i=0 ; i < BARRIER_BANK_LIST_SIZE ; i++ ) + fprintf(stderr,"%2d",module_dbg->barrier_bank_list[i]); + fprintf(stderr," \n"); if( 0 == my_debug_rank ) { for( i=0 ; i < module_dbg->sm2_module_num_buffers ; i++ ) { for( j=0 ; j < my_debug_comm_size ; j++ ) { @@ -520,7 +526,7 @@ static int init_sm2_barrier(struct ompi_communicator_t *comm, module->current_request_index=0; /* set starting collective tag */ - module->collective_tag=2; + module->collective_tag=1; /* return - successful */ return OMPI_SUCCESS; @@ -936,6 +942,7 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority) /* debug */ sm_module->blocked_on_barrier=0; + sm_module->barrier_bank_cntr=0; module_dbg=&(sm_module->super); /* end debug */ @@ -1018,6 +1025,12 @@ sm_work_buffer_t *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module) if ( NB_BARRIER_DONE == module->barrier_request[module->current_request_index]. sm2_barrier_phase ) { + /* debug */ + module->barrier_bank_list + [module->barrier_bank_cntr%BARRIER_BANK_LIST_SIZE]= + module->current_request_index; + module->barrier_bank_cntr++; + /* debug */ /* set request to inactive */ module->barrier_request[module->current_request_index]. sm2_barrier_phase=NB_BARRIER_INACTIVE; @@ -1029,6 +1042,7 @@ sm_work_buffer_t *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module) module->sm2_module_num_memory_banks ) { module->current_request_index=0; } + } } @@ -1060,6 +1074,9 @@ sm_work_buffer_t *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module) /* complete requests in order */ request=&(module->barrier_request[module->current_request_index]); + /* debug */ + blocking_cnt=0; + /* end debug */ while ( NB_BARRIER_DONE != request->sm2_barrier_phase ) { rc=mca_coll_sm2_nbbarrier_intra_progress(module->module_comm, request, @@ -1068,7 +1085,16 @@ sm_work_buffer_t *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module) return NULL; } opal_progress(); + /* debug */ + blocking_cnt++; + /* end debug */ } + /* debug */ + module->barrier_bank_list + [module->barrier_bank_cntr%BARRIER_BANK_LIST_SIZE]= + module->current_request_index; + module->barrier_bank_cntr++; + /* debug */ /* set the reqeust to inactive, and point current_request_index * to the request for the next memory bank @@ -1134,6 +1160,12 @@ int free_sm2_shared_buffer(mca_coll_sm2_module_t *module) if ( NB_BARRIER_DONE == module->barrier_request[module->current_request_index]. sm2_barrier_phase ) { + /* debug */ + module->barrier_bank_list + [module->barrier_bank_cntr%BARRIER_BANK_LIST_SIZE]= + module->current_request_index; + module->barrier_bank_cntr++; + /* debug */ /* set request to inactive */ module->barrier_request[module->current_request_index]. sm2_barrier_phase=NB_BARRIER_INACTIVE; @@ -1145,6 +1177,7 @@ int free_sm2_shared_buffer(mca_coll_sm2_module_t *module) module->sm2_module_num_memory_banks ) { module->current_request_index=0; } + } } /* done with progress */