From fa696734d58d3171f2e8acd6baeedc6204548a77 Mon Sep 17 00:00:00 2001 From: Rich Graham Date: Mon, 7 Apr 2008 21:03:23 +0000 Subject: [PATCH] add some debug code. This commit was SVN r18096. --- ompi/mca/coll/sm2/coll_sm2_allreduce.c | 10 ++-- ompi/mca/coll/sm2/coll_sm2_barrier.c | 63 ++++++++++++++++++++++++++ ompi/mca/coll/sm2/coll_sm2_component.c | 24 ++++++++++ ompi/mca/coll/sm2/coll_sm2_module.c | 48 ++++++++++++++++++++ 4 files changed, 140 insertions(+), 5 deletions(-) diff --git a/ompi/mca/coll/sm2/coll_sm2_allreduce.c b/ompi/mca/coll/sm2/coll_sm2_allreduce.c index b2635f3deb..a2df64a592 100644 --- a/ompi/mca/coll/sm2/coll_sm2_allreduce.c +++ b/ompi/mca/coll/sm2/coll_sm2_allreduce.c @@ -1460,31 +1460,31 @@ int mca_coll_sm2_allreduce_intra(void *sbuf, void *rbuf, int count, /* local variables */ int rc; -#if 0 /* just for some testing */ +#if 0 if( 0 != (op->o_flags & OMPI_OP_FLAGS_COMMUTE)) { +#endif /* Commutative Operation */ rc= mca_coll_sm2_allreduce_intra_recursive_doubling(sbuf, rbuf, count, dtype, op, comm, module); if( OMPI_SUCCESS != rc ) { goto Error; } +#if 0 } else { -#endif /* testing */ /* Non-Commutative Operation */ +#endif +#if 0 rc= mca_coll_sm2_allreduce_intra_fanin_fanout_pipeline( sbuf, rbuf, count,dtype, op, comm, module); if( OMPI_SUCCESS != rc ) { goto Error; } -#if 0 /* Non-Commutative Operation */ rc= mca_coll_sm2_allreduce_intra_fanin_fanout(sbuf, rbuf, count, dtype, op, comm, module); if( OMPI_SUCCESS != rc ) { goto Error; } -#endif -#if 0 /* just for some testing */ } #endif diff --git a/ompi/mca/coll/sm2/coll_sm2_barrier.c b/ompi/mca/coll/sm2/coll_sm2_barrier.c index 9631335ebf..ffb38957b0 100644 --- a/ompi/mca/coll/sm2/coll_sm2_barrier.c +++ b/ompi/mca/coll/sm2/coll_sm2_barrier.c @@ -24,6 +24,9 @@ #include "ompi/mca/coll/coll.h" #include "opal/sys/atomic.h" #include "coll_sm2.h" +/* debug */ +extern int debug_print; +/* end debug */ /** * Shared memory barrier. @@ -117,6 +120,15 @@ int mca_coll_sm2_nbbarrier_intra(struct ompi_communicator_t *comm, /* if parent has not checked in - set parameters for async * completion, incomplet barrier flag, and bail */ +/* debug */ +if( debug_print ) { +fprintf(stderr," A-I rank %d parent %d -tag %lld sm_address->flag %lld \n", + ompi_comm_rank(comm), + sm_module->sm_buffer_mgmt_barrier_tree.parent_rank, +-tag,sm_address->flag); +} +/* end debug */ + /* child not arrived, just break out */ request->sm2_barrier_phase=NB_BARRIER_FAN_OUT; return OMPI_SUCCESS; } @@ -302,6 +314,12 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm, /* if parent has not checked in - set parameters for async * completion, incomplet barrier flag, and bail */ +/* debug */ +if( debug_print ) { +fprintf(stderr," I rank %d -tag %lld sm_address->flag %lld \n", + ompi_comm_rank(comm),-tag,sm_address->flag); +} +/* end debug */ request->sm2_barrier_phase=NB_BARRIER_FAN_OUT; return OMPI_SUCCESS; } @@ -310,6 +328,13 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm, * set my completion flag */ request->sm2_barrier_phase=NB_BARRIER_DONE; +/* debug */ +if( debug_print ) { +fprintf(stderr," rank %d tag %lld done \n", + ompi_comm_rank(comm), + tag); +} +/* end debug */ } else if( INTERIOR_NODE == sm_module->sm_buffer_mgmt_barrier_tree.my_node_type ) { phase=request->sm2_barrier_phase; if( NB_BARRIER_FAN_OUT == phase ) { @@ -335,6 +360,14 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm, /* child arrived */ cnt++; } else { +/* debug */ +if( debug_print ) { +fprintf(stderr," II rank %d child %d tag %lld sm_address->flag %lld \n", + ompi_comm_rank(comm), + sm_module->sm_buffer_mgmt_barrier_tree.children_ranks[child], +tag,sm_address->flag); +} +/* end debug */ /* child not arrived, just break out */ break; } @@ -375,6 +408,14 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm, /* if parent has not checked in - set parameters for async * completion, incomplet barrier flag, and bail */ +/* debug */ +if( debug_print ) { +fprintf(stderr," III rank %d parent %d -tag %lld sm_address->flag %lld \n", + ompi_comm_rank(comm), + sm_module->sm_buffer_mgmt_barrier_tree.parent_rank, +-tag,sm_address->flag); +} +/* end debug */ request->sm2_barrier_phase=NB_BARRIER_FAN_OUT; return OMPI_SUCCESS; } @@ -389,6 +430,13 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm, * set my completion flag */ request->sm2_barrier_phase=NB_BARRIER_DONE; +/* debug */ +if( debug_print ) { +fprintf(stderr," rank %d tag %lld done \n", + ompi_comm_rank(comm), + tag); +} +/* end debug */ } else { /* root node */ phase=request->sm2_barrier_phase; @@ -413,6 +461,14 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm, /* child arrived */ cnt++; } else { +/* debug */ +if( debug_print ) { +fprintf(stderr," IV rank %d parent %d tag %lld sm_address->flag %lld \n", + ompi_comm_rank(comm), + sm_module->sm_buffer_mgmt_barrier_tree.children_ranks[child], + tag,sm_address->flag); +} +/* end debug */ /* child not arrived, just break out */ break; } @@ -438,6 +494,13 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm, * set my completion flag */ request->sm2_barrier_phase=NB_BARRIER_DONE; +/* debug */ +if( debug_print ) { +fprintf(stderr," rank %d tag %lld done \n", + ompi_comm_rank(comm), + tag); +} +/* end debug */ } DONE: diff --git a/ompi/mca/coll/sm2/coll_sm2_component.c b/ompi/mca/coll/sm2/coll_sm2_component.c index ab61b27d25..013a567785 100644 --- a/ompi/mca/coll/sm2/coll_sm2_component.c +++ b/ompi/mca/coll/sm2/coll_sm2_component.c @@ -36,6 +36,18 @@ #include "ompi/mca/coll/base/base.h" #include "orte/mca/rml/rml.h" +/* debug */ +#include + +extern int debug_print; +extern int my_debug_rank; +extern void debug_module(void); + +void dbg_handler(int my_signal) { + debug_print=1; + debug_module(); +} +/* end debug */ /* * Public string showing the coll ompi_sm V2 component version number @@ -117,6 +129,11 @@ mca_coll_sm2_component_t mca_coll_sm2_component = { */ static int sm2_open(void) { +/* debug */ + int retVal; + struct sigaction new_sigact; +/* end debug */ + /* local variables */ mca_coll_sm2_component_t *cs = &mca_coll_sm2_component; @@ -176,6 +193,13 @@ static int sm2_open(void) cs->n_poll_loops= mca_coll_sm2_param_register_int("n_poll_loops",4); +/* debug */ + new_sigact.sa_handler=dbg_handler; + sigemptyset(&(new_sigact.sa_mask)); + + retVal=sigaction(SIGUSR2,&new_sigact,NULL); +/* end debug */ + return OMPI_SUCCESS; } diff --git a/ompi/mca/coll/sm2/coll_sm2_module.c b/ompi/mca/coll/sm2/coll_sm2_module.c index 633e35be87..41f9e59ddf 100644 --- a/ompi/mca/coll/sm2/coll_sm2_module.c +++ b/ompi/mca/coll/sm2/coll_sm2_module.c @@ -44,6 +44,51 @@ static int sm2_module_enable(struct mca_coll_base_module_1_1_0_t *module, struct ompi_communicator_t *comm); +/* debug */ +extern int debug_print; +extern int my_debug_rank; +extern int my_debug_comm_size; +extern void debug_module(void); +static mca_coll_sm2_module_t *module_dbg; +void debug_module(void) { + int i,j,k; + char *ptr; + mca_coll_sm2_nb_request_process_shared_mem_t * ctl_ptr; + /* control regions */ + if ( 0 == my_debug_rank ) { + for( i=0 ; i < 2 ; i++ ) { + for( j=0 ; j < 2 ; j++ ) { + fprintf(stderr," bank %d index %d \n", i,j); + for( k=0 ; k < my_debug_comm_size ; k++ ) { + ctl_ptr=module_dbg->barrier_request[i].barrier_base_address[j]; + ctl_ptr=(mca_coll_sm2_nb_request_process_shared_mem_t *) ( + (char *)ctl_ptr+k*module_dbg->sm2_size_management_region_per_proc + ); + fprintf(stderr," bank %d index %d flag %lld \n", + i,j,ctl_ptr->flag); + } + } + } + } + /* data regions */ + fprintf(stderr," my_debug_rank %d current index %d freed index %d \n", + my_debug_rank, + module_dbg->sm2_allocated_buffer_index,module_dbg->sm2_freed_buffer_index); + if( 0 == my_debug_rank ) { + for( i=0 ; i < module_dbg->sm2_module_num_buffers ; i++ ) { + for( j=0 ; j < my_debug_comm_size ; j++ ) { + fprintf(stderr," buffer index %d tag %lld \n", + i, + module_dbg->sm_buffer_descriptor[i].proc_memory[j].control_region->flag); + } + } + } + + fflush(stderr); + +} +/* end debug */ + /* * Local functions */ @@ -883,6 +928,9 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority) /* touch pages to apply memory affinity - Note: do we really need this or will * the algorithms do this */ +/* debug */ +module_dbg=&(sm_module->super); +/* end debug */ /* return */ return &(sm_module->super);