add some debug code.
This commit was SVN r18096.
Этот коммит содержится в:
родитель
28746bbcdb
Коммит
fa696734d5
@ -1460,31 +1460,31 @@ int mca_coll_sm2_allreduce_intra(void *sbuf, void *rbuf, int count,
|
|||||||
/* local variables */
|
/* local variables */
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
#if 0 /* just for some testing */
|
#if 0
|
||||||
if( 0 != (op->o_flags & OMPI_OP_FLAGS_COMMUTE)) {
|
if( 0 != (op->o_flags & OMPI_OP_FLAGS_COMMUTE)) {
|
||||||
|
#endif
|
||||||
/* Commutative Operation */
|
/* Commutative Operation */
|
||||||
rc= mca_coll_sm2_allreduce_intra_recursive_doubling(sbuf, rbuf, count,
|
rc= mca_coll_sm2_allreduce_intra_recursive_doubling(sbuf, rbuf, count,
|
||||||
dtype, op, comm, module);
|
dtype, op, comm, module);
|
||||||
if( OMPI_SUCCESS != rc ) {
|
if( OMPI_SUCCESS != rc ) {
|
||||||
goto Error;
|
goto Error;
|
||||||
}
|
}
|
||||||
|
#if 0
|
||||||
} else {
|
} else {
|
||||||
#endif /* testing */
|
|
||||||
/* Non-Commutative Operation */
|
/* Non-Commutative Operation */
|
||||||
|
#endif
|
||||||
|
#if 0
|
||||||
rc= mca_coll_sm2_allreduce_intra_fanin_fanout_pipeline(
|
rc= mca_coll_sm2_allreduce_intra_fanin_fanout_pipeline(
|
||||||
sbuf, rbuf, count,dtype, op, comm, module);
|
sbuf, rbuf, count,dtype, op, comm, module);
|
||||||
if( OMPI_SUCCESS != rc ) {
|
if( OMPI_SUCCESS != rc ) {
|
||||||
goto Error;
|
goto Error;
|
||||||
}
|
}
|
||||||
#if 0
|
|
||||||
/* Non-Commutative Operation */
|
/* Non-Commutative Operation */
|
||||||
rc= mca_coll_sm2_allreduce_intra_fanin_fanout(sbuf, rbuf, count,
|
rc= mca_coll_sm2_allreduce_intra_fanin_fanout(sbuf, rbuf, count,
|
||||||
dtype, op, comm, module);
|
dtype, op, comm, module);
|
||||||
if( OMPI_SUCCESS != rc ) {
|
if( OMPI_SUCCESS != rc ) {
|
||||||
goto Error;
|
goto Error;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
#if 0 /* just for some testing */
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -24,6 +24,9 @@
|
|||||||
#include "ompi/mca/coll/coll.h"
|
#include "ompi/mca/coll/coll.h"
|
||||||
#include "opal/sys/atomic.h"
|
#include "opal/sys/atomic.h"
|
||||||
#include "coll_sm2.h"
|
#include "coll_sm2.h"
|
||||||
|
/* debug */
|
||||||
|
extern int debug_print;
|
||||||
|
/* end debug */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Shared memory barrier.
|
* Shared memory barrier.
|
||||||
@ -117,6 +120,15 @@ int mca_coll_sm2_nbbarrier_intra(struct ompi_communicator_t *comm,
|
|||||||
/* if parent has not checked in - set parameters for async
|
/* if parent has not checked in - set parameters for async
|
||||||
* completion, incomplet barrier flag, and bail
|
* completion, incomplet barrier flag, and bail
|
||||||
*/
|
*/
|
||||||
|
/* debug */
|
||||||
|
if( debug_print ) {
|
||||||
|
fprintf(stderr," A-I rank %d parent %d -tag %lld sm_address->flag %lld \n",
|
||||||
|
ompi_comm_rank(comm),
|
||||||
|
sm_module->sm_buffer_mgmt_barrier_tree.parent_rank,
|
||||||
|
-tag,sm_address->flag);
|
||||||
|
}
|
||||||
|
/* end debug */
|
||||||
|
/* child not arrived, just break out */
|
||||||
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
|
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -302,6 +314,12 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm,
|
|||||||
/* if parent has not checked in - set parameters for async
|
/* if parent has not checked in - set parameters for async
|
||||||
* completion, incomplet barrier flag, and bail
|
* completion, incomplet barrier flag, and bail
|
||||||
*/
|
*/
|
||||||
|
/* debug */
|
||||||
|
if( debug_print ) {
|
||||||
|
fprintf(stderr," I rank %d -tag %lld sm_address->flag %lld \n",
|
||||||
|
ompi_comm_rank(comm),-tag,sm_address->flag);
|
||||||
|
}
|
||||||
|
/* end debug */
|
||||||
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
|
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -310,6 +328,13 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm,
|
|||||||
* set my completion flag
|
* set my completion flag
|
||||||
*/
|
*/
|
||||||
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
||||||
|
/* debug */
|
||||||
|
if( debug_print ) {
|
||||||
|
fprintf(stderr," rank %d tag %lld done \n",
|
||||||
|
ompi_comm_rank(comm),
|
||||||
|
tag);
|
||||||
|
}
|
||||||
|
/* end debug */
|
||||||
} else if( INTERIOR_NODE == sm_module->sm_buffer_mgmt_barrier_tree.my_node_type ) {
|
} else if( INTERIOR_NODE == sm_module->sm_buffer_mgmt_barrier_tree.my_node_type ) {
|
||||||
phase=request->sm2_barrier_phase;
|
phase=request->sm2_barrier_phase;
|
||||||
if( NB_BARRIER_FAN_OUT == phase ) {
|
if( NB_BARRIER_FAN_OUT == phase ) {
|
||||||
@ -335,6 +360,14 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm,
|
|||||||
/* child arrived */
|
/* child arrived */
|
||||||
cnt++;
|
cnt++;
|
||||||
} else {
|
} else {
|
||||||
|
/* debug */
|
||||||
|
if( debug_print ) {
|
||||||
|
fprintf(stderr," II rank %d child %d tag %lld sm_address->flag %lld \n",
|
||||||
|
ompi_comm_rank(comm),
|
||||||
|
sm_module->sm_buffer_mgmt_barrier_tree.children_ranks[child],
|
||||||
|
tag,sm_address->flag);
|
||||||
|
}
|
||||||
|
/* end debug */
|
||||||
/* child not arrived, just break out */
|
/* child not arrived, just break out */
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -375,6 +408,14 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm,
|
|||||||
/* if parent has not checked in - set parameters for async
|
/* if parent has not checked in - set parameters for async
|
||||||
* completion, incomplet barrier flag, and bail
|
* completion, incomplet barrier flag, and bail
|
||||||
*/
|
*/
|
||||||
|
/* debug */
|
||||||
|
if( debug_print ) {
|
||||||
|
fprintf(stderr," III rank %d parent %d -tag %lld sm_address->flag %lld \n",
|
||||||
|
ompi_comm_rank(comm),
|
||||||
|
sm_module->sm_buffer_mgmt_barrier_tree.parent_rank,
|
||||||
|
-tag,sm_address->flag);
|
||||||
|
}
|
||||||
|
/* end debug */
|
||||||
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
|
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -389,6 +430,13 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm,
|
|||||||
* set my completion flag
|
* set my completion flag
|
||||||
*/
|
*/
|
||||||
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
||||||
|
/* debug */
|
||||||
|
if( debug_print ) {
|
||||||
|
fprintf(stderr," rank %d tag %lld done \n",
|
||||||
|
ompi_comm_rank(comm),
|
||||||
|
tag);
|
||||||
|
}
|
||||||
|
/* end debug */
|
||||||
} else {
|
} else {
|
||||||
/* root node */
|
/* root node */
|
||||||
phase=request->sm2_barrier_phase;
|
phase=request->sm2_barrier_phase;
|
||||||
@ -413,6 +461,14 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm,
|
|||||||
/* child arrived */
|
/* child arrived */
|
||||||
cnt++;
|
cnt++;
|
||||||
} else {
|
} else {
|
||||||
|
/* debug */
|
||||||
|
if( debug_print ) {
|
||||||
|
fprintf(stderr," IV rank %d parent %d tag %lld sm_address->flag %lld \n",
|
||||||
|
ompi_comm_rank(comm),
|
||||||
|
sm_module->sm_buffer_mgmt_barrier_tree.children_ranks[child],
|
||||||
|
tag,sm_address->flag);
|
||||||
|
}
|
||||||
|
/* end debug */
|
||||||
/* child not arrived, just break out */
|
/* child not arrived, just break out */
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -438,6 +494,13 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm,
|
|||||||
* set my completion flag
|
* set my completion flag
|
||||||
*/
|
*/
|
||||||
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
||||||
|
/* debug */
|
||||||
|
if( debug_print ) {
|
||||||
|
fprintf(stderr," rank %d tag %lld done \n",
|
||||||
|
ompi_comm_rank(comm),
|
||||||
|
tag);
|
||||||
|
}
|
||||||
|
/* end debug */
|
||||||
}
|
}
|
||||||
|
|
||||||
DONE:
|
DONE:
|
||||||
|
@ -36,6 +36,18 @@
|
|||||||
#include "ompi/mca/coll/base/base.h"
|
#include "ompi/mca/coll/base/base.h"
|
||||||
#include "orte/mca/rml/rml.h"
|
#include "orte/mca/rml/rml.h"
|
||||||
|
|
||||||
|
/* debug */
|
||||||
|
#include <signal.h>
|
||||||
|
|
||||||
|
extern int debug_print;
|
||||||
|
extern int my_debug_rank;
|
||||||
|
extern void debug_module(void);
|
||||||
|
|
||||||
|
void dbg_handler(int my_signal) {
|
||||||
|
debug_print=1;
|
||||||
|
debug_module();
|
||||||
|
}
|
||||||
|
/* end debug */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Public string showing the coll ompi_sm V2 component version number
|
* Public string showing the coll ompi_sm V2 component version number
|
||||||
@ -117,6 +129,11 @@ mca_coll_sm2_component_t mca_coll_sm2_component = {
|
|||||||
*/
|
*/
|
||||||
static int sm2_open(void)
|
static int sm2_open(void)
|
||||||
{
|
{
|
||||||
|
/* debug */
|
||||||
|
int retVal;
|
||||||
|
struct sigaction new_sigact;
|
||||||
|
/* end debug */
|
||||||
|
|
||||||
/* local variables */
|
/* local variables */
|
||||||
mca_coll_sm2_component_t *cs = &mca_coll_sm2_component;
|
mca_coll_sm2_component_t *cs = &mca_coll_sm2_component;
|
||||||
|
|
||||||
@ -176,6 +193,13 @@ static int sm2_open(void)
|
|||||||
cs->n_poll_loops=
|
cs->n_poll_loops=
|
||||||
mca_coll_sm2_param_register_int("n_poll_loops",4);
|
mca_coll_sm2_param_register_int("n_poll_loops",4);
|
||||||
|
|
||||||
|
/* debug */
|
||||||
|
new_sigact.sa_handler=dbg_handler;
|
||||||
|
sigemptyset(&(new_sigact.sa_mask));
|
||||||
|
|
||||||
|
retVal=sigaction(SIGUSR2,&new_sigact,NULL);
|
||||||
|
/* end debug */
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -44,6 +44,51 @@
|
|||||||
static int sm2_module_enable(struct mca_coll_base_module_1_1_0_t *module,
|
static int sm2_module_enable(struct mca_coll_base_module_1_1_0_t *module,
|
||||||
struct ompi_communicator_t *comm);
|
struct ompi_communicator_t *comm);
|
||||||
|
|
||||||
|
/* debug */
|
||||||
|
extern int debug_print;
|
||||||
|
extern int my_debug_rank;
|
||||||
|
extern int my_debug_comm_size;
|
||||||
|
extern void debug_module(void);
|
||||||
|
static mca_coll_sm2_module_t *module_dbg;
|
||||||
|
void debug_module(void) {
|
||||||
|
int i,j,k;
|
||||||
|
char *ptr;
|
||||||
|
mca_coll_sm2_nb_request_process_shared_mem_t * ctl_ptr;
|
||||||
|
/* control regions */
|
||||||
|
if ( 0 == my_debug_rank ) {
|
||||||
|
for( i=0 ; i < 2 ; i++ ) {
|
||||||
|
for( j=0 ; j < 2 ; j++ ) {
|
||||||
|
fprintf(stderr," bank %d index %d \n", i,j);
|
||||||
|
for( k=0 ; k < my_debug_comm_size ; k++ ) {
|
||||||
|
ctl_ptr=module_dbg->barrier_request[i].barrier_base_address[j];
|
||||||
|
ctl_ptr=(mca_coll_sm2_nb_request_process_shared_mem_t *) (
|
||||||
|
(char *)ctl_ptr+k*module_dbg->sm2_size_management_region_per_proc
|
||||||
|
);
|
||||||
|
fprintf(stderr," bank %d index %d flag %lld \n",
|
||||||
|
i,j,ctl_ptr->flag);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* data regions */
|
||||||
|
fprintf(stderr," my_debug_rank %d current index %d freed index %d \n",
|
||||||
|
my_debug_rank,
|
||||||
|
module_dbg->sm2_allocated_buffer_index,module_dbg->sm2_freed_buffer_index);
|
||||||
|
if( 0 == my_debug_rank ) {
|
||||||
|
for( i=0 ; i < module_dbg->sm2_module_num_buffers ; i++ ) {
|
||||||
|
for( j=0 ; j < my_debug_comm_size ; j++ ) {
|
||||||
|
fprintf(stderr," buffer index %d tag %lld \n",
|
||||||
|
i,
|
||||||
|
module_dbg->sm_buffer_descriptor[i].proc_memory[j].control_region->flag);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fflush(stderr);
|
||||||
|
|
||||||
|
}
|
||||||
|
/* end debug */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Local functions
|
* Local functions
|
||||||
*/
|
*/
|
||||||
@ -883,6 +928,9 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
|||||||
/* touch pages to apply memory affinity - Note: do we really need this or will
|
/* touch pages to apply memory affinity - Note: do we really need this or will
|
||||||
* the algorithms do this */
|
* the algorithms do this */
|
||||||
|
|
||||||
|
/* debug */
|
||||||
|
module_dbg=&(sm_module->super);
|
||||||
|
/* end debug */
|
||||||
|
|
||||||
/* return */
|
/* return */
|
||||||
return &(sm_module->super);
|
return &(sm_module->super);
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user