1
1
This commit was SVN r18096.
Этот коммит содержится в:
Rich Graham 2008-04-07 21:03:23 +00:00
родитель 28746bbcdb
Коммит fa696734d5
4 изменённых файлов: 140 добавлений и 5 удалений

Просмотреть файл

@ -1460,31 +1460,31 @@ int mca_coll_sm2_allreduce_intra(void *sbuf, void *rbuf, int count,
/* local variables */ /* local variables */
int rc; int rc;
#if 0 /* just for some testing */ #if 0
if( 0 != (op->o_flags & OMPI_OP_FLAGS_COMMUTE)) { if( 0 != (op->o_flags & OMPI_OP_FLAGS_COMMUTE)) {
#endif
/* Commutative Operation */ /* Commutative Operation */
rc= mca_coll_sm2_allreduce_intra_recursive_doubling(sbuf, rbuf, count, rc= mca_coll_sm2_allreduce_intra_recursive_doubling(sbuf, rbuf, count,
dtype, op, comm, module); dtype, op, comm, module);
if( OMPI_SUCCESS != rc ) { if( OMPI_SUCCESS != rc ) {
goto Error; goto Error;
} }
#if 0
} else { } else {
#endif /* testing */
/* Non-Commutative Operation */ /* Non-Commutative Operation */
#endif
#if 0
rc= mca_coll_sm2_allreduce_intra_fanin_fanout_pipeline( rc= mca_coll_sm2_allreduce_intra_fanin_fanout_pipeline(
sbuf, rbuf, count,dtype, op, comm, module); sbuf, rbuf, count,dtype, op, comm, module);
if( OMPI_SUCCESS != rc ) { if( OMPI_SUCCESS != rc ) {
goto Error; goto Error;
} }
#if 0
/* Non-Commutative Operation */ /* Non-Commutative Operation */
rc= mca_coll_sm2_allreduce_intra_fanin_fanout(sbuf, rbuf, count, rc= mca_coll_sm2_allreduce_intra_fanin_fanout(sbuf, rbuf, count,
dtype, op, comm, module); dtype, op, comm, module);
if( OMPI_SUCCESS != rc ) { if( OMPI_SUCCESS != rc ) {
goto Error; goto Error;
} }
#endif
#if 0 /* just for some testing */
} }
#endif #endif

Просмотреть файл

@ -24,6 +24,9 @@
#include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/coll.h"
#include "opal/sys/atomic.h" #include "opal/sys/atomic.h"
#include "coll_sm2.h" #include "coll_sm2.h"
/* debug */
extern int debug_print;
/* end debug */
/** /**
* Shared memory barrier. * Shared memory barrier.
@ -117,6 +120,15 @@ int mca_coll_sm2_nbbarrier_intra(struct ompi_communicator_t *comm,
/* if parent has not checked in - set parameters for async /* if parent has not checked in - set parameters for async
* completion, incomplet barrier flag, and bail * completion, incomplet barrier flag, and bail
*/ */
/* debug */
if( debug_print ) {
fprintf(stderr," A-I rank %d parent %d -tag %lld sm_address->flag %lld \n",
ompi_comm_rank(comm),
sm_module->sm_buffer_mgmt_barrier_tree.parent_rank,
-tag,sm_address->flag);
}
/* end debug */
/* child not arrived, just break out */
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT; request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -302,6 +314,12 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm,
/* if parent has not checked in - set parameters for async /* if parent has not checked in - set parameters for async
* completion, incomplet barrier flag, and bail * completion, incomplet barrier flag, and bail
*/ */
/* debug */
if( debug_print ) {
fprintf(stderr," I rank %d -tag %lld sm_address->flag %lld \n",
ompi_comm_rank(comm),-tag,sm_address->flag);
}
/* end debug */
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT; request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -310,6 +328,13 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm,
* set my completion flag * set my completion flag
*/ */
request->sm2_barrier_phase=NB_BARRIER_DONE; request->sm2_barrier_phase=NB_BARRIER_DONE;
/* debug */
if( debug_print ) {
fprintf(stderr," rank %d tag %lld done \n",
ompi_comm_rank(comm),
tag);
}
/* end debug */
} else if( INTERIOR_NODE == sm_module->sm_buffer_mgmt_barrier_tree.my_node_type ) { } else if( INTERIOR_NODE == sm_module->sm_buffer_mgmt_barrier_tree.my_node_type ) {
phase=request->sm2_barrier_phase; phase=request->sm2_barrier_phase;
if( NB_BARRIER_FAN_OUT == phase ) { if( NB_BARRIER_FAN_OUT == phase ) {
@ -335,6 +360,14 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm,
/* child arrived */ /* child arrived */
cnt++; cnt++;
} else { } else {
/* debug */
if( debug_print ) {
fprintf(stderr," II rank %d child %d tag %lld sm_address->flag %lld \n",
ompi_comm_rank(comm),
sm_module->sm_buffer_mgmt_barrier_tree.children_ranks[child],
tag,sm_address->flag);
}
/* end debug */
/* child not arrived, just break out */ /* child not arrived, just break out */
break; break;
} }
@ -375,6 +408,14 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm,
/* if parent has not checked in - set parameters for async /* if parent has not checked in - set parameters for async
* completion, incomplet barrier flag, and bail * completion, incomplet barrier flag, and bail
*/ */
/* debug */
if( debug_print ) {
fprintf(stderr," III rank %d parent %d -tag %lld sm_address->flag %lld \n",
ompi_comm_rank(comm),
sm_module->sm_buffer_mgmt_barrier_tree.parent_rank,
-tag,sm_address->flag);
}
/* end debug */
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT; request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -389,6 +430,13 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm,
* set my completion flag * set my completion flag
*/ */
request->sm2_barrier_phase=NB_BARRIER_DONE; request->sm2_barrier_phase=NB_BARRIER_DONE;
/* debug */
if( debug_print ) {
fprintf(stderr," rank %d tag %lld done \n",
ompi_comm_rank(comm),
tag);
}
/* end debug */
} else { } else {
/* root node */ /* root node */
phase=request->sm2_barrier_phase; phase=request->sm2_barrier_phase;
@ -413,6 +461,14 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm,
/* child arrived */ /* child arrived */
cnt++; cnt++;
} else { } else {
/* debug */
if( debug_print ) {
fprintf(stderr," IV rank %d parent %d tag %lld sm_address->flag %lld \n",
ompi_comm_rank(comm),
sm_module->sm_buffer_mgmt_barrier_tree.children_ranks[child],
tag,sm_address->flag);
}
/* end debug */
/* child not arrived, just break out */ /* child not arrived, just break out */
break; break;
} }
@ -438,6 +494,13 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm,
* set my completion flag * set my completion flag
*/ */
request->sm2_barrier_phase=NB_BARRIER_DONE; request->sm2_barrier_phase=NB_BARRIER_DONE;
/* debug */
if( debug_print ) {
fprintf(stderr," rank %d tag %lld done \n",
ompi_comm_rank(comm),
tag);
}
/* end debug */
} }
DONE: DONE:

Просмотреть файл

@ -36,6 +36,18 @@
#include "ompi/mca/coll/base/base.h" #include "ompi/mca/coll/base/base.h"
#include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml.h"
/* debug */
#include <signal.h>
extern int debug_print;
extern int my_debug_rank;
extern void debug_module(void);
void dbg_handler(int my_signal) {
debug_print=1;
debug_module();
}
/* end debug */
/* /*
* Public string showing the coll ompi_sm V2 component version number * Public string showing the coll ompi_sm V2 component version number
@ -117,6 +129,11 @@ mca_coll_sm2_component_t mca_coll_sm2_component = {
*/ */
static int sm2_open(void) static int sm2_open(void)
{ {
/* debug */
int retVal;
struct sigaction new_sigact;
/* end debug */
/* local variables */ /* local variables */
mca_coll_sm2_component_t *cs = &mca_coll_sm2_component; mca_coll_sm2_component_t *cs = &mca_coll_sm2_component;
@ -176,6 +193,13 @@ static int sm2_open(void)
cs->n_poll_loops= cs->n_poll_loops=
mca_coll_sm2_param_register_int("n_poll_loops",4); mca_coll_sm2_param_register_int("n_poll_loops",4);
/* debug */
new_sigact.sa_handler=dbg_handler;
sigemptyset(&(new_sigact.sa_mask));
retVal=sigaction(SIGUSR2,&new_sigact,NULL);
/* end debug */
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }

Просмотреть файл

@ -44,6 +44,51 @@
static int sm2_module_enable(struct mca_coll_base_module_1_1_0_t *module, static int sm2_module_enable(struct mca_coll_base_module_1_1_0_t *module,
struct ompi_communicator_t *comm); struct ompi_communicator_t *comm);
/* debug */
extern int debug_print;
extern int my_debug_rank;
extern int my_debug_comm_size;
extern void debug_module(void);
static mca_coll_sm2_module_t *module_dbg;
void debug_module(void) {
int i,j,k;
char *ptr;
mca_coll_sm2_nb_request_process_shared_mem_t * ctl_ptr;
/* control regions */
if ( 0 == my_debug_rank ) {
for( i=0 ; i < 2 ; i++ ) {
for( j=0 ; j < 2 ; j++ ) {
fprintf(stderr," bank %d index %d \n", i,j);
for( k=0 ; k < my_debug_comm_size ; k++ ) {
ctl_ptr=module_dbg->barrier_request[i].barrier_base_address[j];
ctl_ptr=(mca_coll_sm2_nb_request_process_shared_mem_t *) (
(char *)ctl_ptr+k*module_dbg->sm2_size_management_region_per_proc
);
fprintf(stderr," bank %d index %d flag %lld \n",
i,j,ctl_ptr->flag);
}
}
}
}
/* data regions */
fprintf(stderr," my_debug_rank %d current index %d freed index %d \n",
my_debug_rank,
module_dbg->sm2_allocated_buffer_index,module_dbg->sm2_freed_buffer_index);
if( 0 == my_debug_rank ) {
for( i=0 ; i < module_dbg->sm2_module_num_buffers ; i++ ) {
for( j=0 ; j < my_debug_comm_size ; j++ ) {
fprintf(stderr," buffer index %d tag %lld \n",
i,
module_dbg->sm_buffer_descriptor[i].proc_memory[j].control_region->flag);
}
}
}
fflush(stderr);
}
/* end debug */
/* /*
* Local functions * Local functions
*/ */
@ -883,6 +928,9 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
/* touch pages to apply memory affinity - Note: do we really need this or will /* touch pages to apply memory affinity - Note: do we really need this or will
* the algorithms do this */ * the algorithms do this */
/* debug */
module_dbg=&(sm_module->super);
/* end debug */
/* return */ /* return */
return &(sm_module->super); return &(sm_module->super);