2005-08-31 01:43:48 +00:00
/*
2005-11-05 19:57:48 +00:00
* Copyright ( c ) 2004 - 2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation . All rights reserved .
* Copyright ( c ) 2004 - 2005 The University of Tennessee and The University
* of Tennessee Research Foundation . All rights
* reserved .
2005-08-31 01:43:48 +00:00
* Copyright ( c ) 2004 - 2005 High Performance Computing Center Stuttgart ,
* University of Stuttgart . All rights reserved .
* Copyright ( c ) 2004 - 2005 The Regents of the University of California .
* All rights reserved .
* $ COPYRIGHT $
*
* Additional copyrights may follow
*
* $ HEADER $
*/
# include "ompi_config.h"
# include "mpi.h"
- Check, whether the compiler supports __builtin_clz (count leading
zeroes);
if so, use it for bit-operations like opal_cube_dim and opal_hibit.
Implement two versions of power-of-two.
In case of opal_next_poweroftwo, this reduces the average execution
time from 83 cycles to 4 cycles (Intel Nehalem, icc, -O2, inlining,
measured rdtsc, with loop over 2^27 values).
Numbers for other functions are similar (but of course heavily depend
on the usage, e.g. opal_hibit() with a start of 4 does not save
much). The bsr instruction on AMD Opteron is also not as fast.
- Replace various places where the next power-of-two is computed.
Tested on Intel Nehalem Cluster with openib, compilers GNU-4.6.1 and
Intel-12.0.4 using mpi_testsuite -t "Collective" with 128 processes.
This commit was SVN r25270.
2011-10-11 22:49:01 +00:00
# include "opal/util/bit_ops.h"
2006-02-12 01:33:29 +00:00
# include "ompi/constants.h"
# include "ompi/communicator/communicator.h"
# include "ompi/mca/coll/base/coll_tags.h"
2005-10-11 18:51:03 +00:00
# include "coll_tuned.h"
2005-08-31 01:43:48 +00:00
# include "coll_tuned_topo.h"
/*
* Some static helpers .
*/
static int pown ( int fanout , int num )
{
int j , p = 1 ;
if ( num < 0 ) return 0 ;
2005-10-13 23:38:21 +00:00
if ( 1 = = num ) return fanout ;
if ( 2 = = fanout ) {
return p < < num ;
}
else {
for ( j = 0 ; j < num ; j + + ) { p * = fanout ; }
}
2005-08-31 01:43:48 +00:00
return p ;
}
static int calculate_level ( int fanout , int rank )
{
int level , num ;
if ( rank < 0 ) return - 1 ;
for ( level = 0 , num = 0 ; num < = rank ; level + + ) {
num + = pown ( fanout , level ) ;
}
return level - 1 ;
}
static int calculate_num_nodes_up_to_level ( int fanout , int level )
{
/* just use geometric progression formula for sum:
a ^ 0 + a ^ 1 + . . . a ^ ( n - 1 ) = ( a ^ n - 1 ) / ( a - 1 ) */
return ( ( pown ( fanout , level ) - 1 ) / ( fanout - 1 ) ) ;
}
/*
* And now the building functions .
2007-03-02 17:17:14 +00:00
*
* An example for fanout = 2 , comm_size = 7
*
* 0 < - - delta = 1 ( fanout ^ 0 )
* / \
* 1 2 < - - delta = 2 ( fanout ^ 1 )
* / \ / \
* 3 5 4 6 < - - delta = 4 ( fanout ^ 2 )
2005-08-31 01:43:48 +00:00
*/
ompi_coll_tree_t *
ompi_coll_tuned_topo_build_tree ( int fanout ,
2006-10-18 02:00:46 +00:00
struct ompi_communicator_t * comm ,
int root )
2005-08-31 01:43:48 +00:00
{
2012-04-06 15:48:07 +00:00
int rank , size , schild , sparent , shiftedrank , i ;
2005-08-31 01:43:48 +00:00
int level ; /* location of my rank in the tree structure of size */
int delta ; /* number of nodes on my level */
int slimit ; /* total number of nodes on levels above me */
ompi_coll_tree_t * tree ;
2008-06-09 14:53:58 +00:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:topo_build_tree Building fo %d rt %d " , fanout , root ) ) ;
2005-08-31 01:43:48 +00:00
2005-09-03 01:41:13 +00:00
if ( fanout < 1 ) {
2008-06-09 14:53:58 +00:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:topo_build_tree invalid fanout %d " , fanout ) ) ;
2005-09-03 01:41:13 +00:00
return NULL ;
}
if ( fanout > MAXTREEFANOUT ) {
2008-06-09 14:53:58 +00:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:topo_build_tree invalid fanout %d bigger than max %d " , fanout , MAXTREEFANOUT ) ) ;
2005-09-03 01:41:13 +00:00
return NULL ;
}
2005-08-31 01:43:48 +00:00
/*
* Get size and rank of the process in this communicator
*/
size = ompi_comm_size ( comm ) ;
rank = ompi_comm_rank ( comm ) ;
tree = ( ompi_coll_tree_t * ) malloc ( sizeof ( ompi_coll_tree_t ) ) ;
2005-09-03 01:41:13 +00:00
if ( ! tree ) {
2008-06-09 14:53:58 +00:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:topo_build_tree PANIC::out of memory " ) ) ;
2005-09-03 01:41:13 +00:00
return NULL ;
}
2005-08-31 01:43:48 +00:00
tree - > tree_root = MPI_UNDEFINED ;
tree - > tree_nextsize = MPI_UNDEFINED ;
/*
* Set root
*/
tree - > tree_root = root ;
/*
* Initialize tree
*/
tree - > tree_fanout = fanout ;
2005-09-03 01:41:13 +00:00
tree - > tree_bmtree = 0 ;
2005-08-31 01:43:48 +00:00
tree - > tree_root = root ;
tree - > tree_prev = - 1 ;
tree - > tree_nextsize = 0 ;
for ( i = 0 ; i < fanout ; i + + ) {
tree - > tree_next [ i ] = - 1 ;
}
/* return if we have less than 2 processes */
if ( size < 2 ) {
return tree ;
}
/*
* Shift all ranks by root , so that the algorithm can be
* designed as if root would be always 0
* shiftedrank should be used in calculating distances
* and position in tree
*/
shiftedrank = rank - root ;
if ( shiftedrank < 0 ) {
shiftedrank + = size ;
}
/* calculate my level */
level = calculate_level ( fanout , shiftedrank ) ;
delta = pown ( fanout , level ) ;
/* find my children */
for ( i = 0 ; i < fanout ; i + + ) {
schild = shiftedrank + delta * ( i + 1 ) ;
if ( schild < size ) {
tree - > tree_next [ i ] = ( schild + root ) % size ;
tree - > tree_nextsize = tree - > tree_nextsize + 1 ;
} else {
break ;
}
}
/* find my parent */
slimit = calculate_num_nodes_up_to_level ( fanout , level ) ;
sparent = shiftedrank ;
if ( sparent < fanout ) {
sparent = 0 ;
} else {
while ( sparent > = slimit ) {
sparent - = delta / fanout ;
}
}
tree - > tree_prev = ( sparent + root ) % size ;
return tree ;
}
2006-10-26 22:53:05 +00:00
/*
* Constructs in - order binary tree which can be used for non - commutative reduce
* operations .
* Root of this tree is always rank ( size - 1 ) and fanout is 2.
* Here are some of the examples of this tree :
2007-03-02 17:17:14 +00:00
* size = = 2 size = = 3 size = = 4 size = = 9
* 1 2 3 8
* / / \ / \ / \
* 0 1 0 2 1 7 3
* / / \ / \
* 0 6 5 2 1
* / /
* 4 0
2006-10-26 22:53:05 +00:00
*/
ompi_coll_tree_t *
ompi_coll_tuned_topo_build_in_order_bintree ( struct ompi_communicator_t * comm )
{
2012-04-06 15:48:07 +00:00
int rank , size , myrank , rightsize , delta , parent , lchild , rchild ;
2006-10-26 22:53:05 +00:00
ompi_coll_tree_t * tree ;
/*
* Get size and rank of the process in this communicator
*/
size = ompi_comm_size ( comm ) ;
rank = ompi_comm_rank ( comm ) ;
tree = ( ompi_coll_tree_t * ) malloc ( sizeof ( ompi_coll_tree_t ) ) ;
if ( ! tree ) {
2008-06-09 14:53:58 +00:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream ,
2006-10-26 22:53:05 +00:00
" coll:tuned:topo_build_tree PANIC::out of memory " ) ) ;
return NULL ;
}
tree - > tree_root = MPI_UNDEFINED ;
tree - > tree_nextsize = MPI_UNDEFINED ;
/*
* Initialize tree
*/
tree - > tree_fanout = 2 ;
tree - > tree_bmtree = 0 ;
tree - > tree_root = size - 1 ;
tree - > tree_prev = - 1 ;
tree - > tree_nextsize = 0 ;
tree - > tree_next [ 0 ] = - 1 ;
tree - > tree_next [ 1 ] = - 1 ;
2008-06-09 14:53:58 +00:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream ,
2006-10-26 22:53:05 +00:00
" coll:tuned:topo_build_in_order_tree Building fo %d rt %d " ,
tree - > tree_fanout , tree - > tree_root ) ) ;
/*
* Build the tree
*/
myrank = rank ;
parent = size - 1 ;
delta = 0 ;
while ( 1 ) {
/* Compute the size of the right subtree */
rightsize = size > > 1 ;
/* Determine the left and right child of this parent */
lchild = - 1 ;
rchild = - 1 ;
if ( size - 1 > 0 ) {
lchild = parent - 1 ;
if ( lchild > 0 ) {
rchild = rightsize - 1 ;
}
}
/* The following cases are possible: myrank can be
- a parent ,
- belong to the left subtree , or
- belong to the right subtee
Each of the cases need to be handled differently .
*/
if ( myrank = = parent ) {
/* I am the parent:
- compute real ranks of my children , and exit the loop . */
if ( lchild > = 0 ) tree - > tree_next [ 0 ] = lchild + delta ;
if ( rchild > = 0 ) tree - > tree_next [ 1 ] = rchild + delta ;
break ;
}
if ( myrank > rchild ) {
/* I belong to the left subtree:
- If I am the left child , compute real rank of my parent
- Iterate down through tree :
compute new size , shift ranks down , and update delta .
*/
if ( myrank = = lchild ) {
tree - > tree_prev = parent + delta ;
}
size = size - rightsize - 1 ;
delta = delta + rightsize ;
myrank = myrank - rightsize ;
parent = size - 1 ;
} else {
/* I belong to the right subtree:
- If I am the right child , compute real rank of my parent
- Iterate down through tree :
compute new size and parent ,
but the delta and rank do not need to change .
*/
if ( myrank = = rchild ) {
tree - > tree_prev = parent + delta ;
}
size = rightsize ;
parent = rchild ;
}
}
if ( tree - > tree_next [ 0 ] > = 0 ) { tree - > tree_nextsize = 1 ; }
if ( tree - > tree_next [ 1 ] > = 0 ) { tree - > tree_nextsize + = 1 ; }
return tree ;
}
2005-08-31 01:43:48 +00:00
int ompi_coll_tuned_topo_destroy_tree ( ompi_coll_tree_t * * tree )
{
2005-09-03 01:41:13 +00:00
ompi_coll_tree_t * ptr ;
if ( ( ! tree ) | | ( ! * tree ) ) {
return OMPI_SUCCESS ;
}
ptr = * tree ;
free ( ptr ) ;
* tree = NULL ; /* mark tree as gone */
2005-08-31 01:43:48 +00:00
return OMPI_SUCCESS ;
}
2007-02-28 01:11:01 +00:00
/*
*
* Here are some of the examples of this tree :
* size = = 2 size = 4 size = 8
* 0 0 0
* / | \ / | \
* 1 2 1 4 2 1
* | | | \
* 3 6 5 3
* |
* 7
*/
2005-09-03 01:41:13 +00:00
ompi_coll_tree_t *
2005-08-31 01:43:48 +00:00
ompi_coll_tuned_topo_build_bmtree ( struct ompi_communicator_t * comm ,
2006-10-18 02:00:46 +00:00
int root )
2005-08-31 01:43:48 +00:00
{
2012-04-06 15:48:07 +00:00
int childs = 0 , rank , size , mask = 1 , index , remote , i ;
2005-09-03 01:41:13 +00:00
ompi_coll_tree_t * bmtree ;
2005-08-31 01:43:48 +00:00
2008-06-09 14:53:58 +00:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:topo:build_bmtree rt %d " , root ) ) ;
2005-08-31 01:43:48 +00:00
/*
* Get size and rank of the process in this communicator
*/
size = ompi_comm_size ( comm ) ;
rank = ompi_comm_rank ( comm ) ;
index = rank - root ;
2005-09-03 01:41:13 +00:00
bmtree = ( ompi_coll_tree_t * ) malloc ( sizeof ( ompi_coll_tree_t ) ) ;
if ( ! bmtree ) {
2008-06-09 14:53:58 +00:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:topo:build_bmtree PANIC out of memory " ) ) ;
2005-09-03 01:41:13 +00:00
return NULL ;
}
bmtree - > tree_bmtree = 1 ;
2005-08-31 01:43:48 +00:00
2005-09-03 01:41:13 +00:00
bmtree - > tree_root = MPI_UNDEFINED ;
bmtree - > tree_nextsize = MPI_UNDEFINED ;
2012-04-06 15:48:07 +00:00
for ( i = 0 ; i < MAXTREEFANOUT ; i + + ) {
2005-09-03 01:41:13 +00:00
bmtree - > tree_next [ i ] = - 1 ;
2005-08-31 01:43:48 +00:00
}
if ( index < 0 ) index + = size ;
- Check, whether the compiler supports __builtin_clz (count leading
zeroes);
if so, use it for bit-operations like opal_cube_dim and opal_hibit.
Implement two versions of power-of-two.
In case of opal_next_poweroftwo, this reduces the average execution
time from 83 cycles to 4 cycles (Intel Nehalem, icc, -O2, inlining,
measured rdtsc, with loop over 2^27 values).
Numbers for other functions are similar (but of course heavily depend
on the usage, e.g. opal_hibit() with a start of 4 does not save
much). The bsr instruction on AMD Opteron is also not as fast.
- Replace various places where the next power-of-two is computed.
Tested on Intel Nehalem Cluster with openib, compilers GNU-4.6.1 and
Intel-12.0.4 using mpi_testsuite -t "Collective" with 128 processes.
This commit was SVN r25270.
2011-10-11 22:49:01 +00:00
mask = opal_next_poweroftwo ( index ) ;
2005-08-31 01:43:48 +00:00
/* Now I can compute my father rank */
if ( root = = rank ) {
2005-09-03 01:41:13 +00:00
bmtree - > tree_prev = root ;
2005-08-31 01:43:48 +00:00
} else {
remote = ( index ^ ( mask > > 1 ) ) + root ;
if ( remote > = size ) remote - = size ;
2005-09-03 01:41:13 +00:00
bmtree - > tree_prev = remote ;
2005-08-31 01:43:48 +00:00
}
/* And now let's fill my childs */
while ( mask < size ) {
remote = ( index ^ mask ) ;
if ( remote > = size ) break ;
remote + = root ;
if ( remote > = size ) remote - = size ;
2005-09-03 01:41:13 +00:00
if ( childs = = MAXTREEFANOUT ) {
2008-06-09 14:53:58 +00:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:topo:build_bmtree max fanout incorrect %d needed %d " , MAXTREEFANOUT , childs ) ) ;
2005-09-03 01:41:13 +00:00
return NULL ;
}
bmtree - > tree_next [ childs ] = remote ;
2005-08-31 01:43:48 +00:00
mask < < = 1 ;
childs + + ;
}
2005-09-03 01:41:13 +00:00
bmtree - > tree_nextsize = childs ;
bmtree - > tree_root = root ;
2005-08-31 01:43:48 +00:00
return bmtree ;
}
2007-02-28 01:11:01 +00:00
/*
* Constructs in - order binomial tree which can be used for gather / scatter
* operations .
*
* Here are some of the examples of this tree :
* size = = 2 size = 4 size = 8
* 0 0 0
* / / | / | \
* 1 1 2 1 2 4
* | | | \
* 3 3 5 6
* |
* 7
*/
ompi_coll_tree_t *
ompi_coll_tuned_topo_build_in_order_bmtree ( struct ompi_communicator_t * comm ,
2012-04-06 15:48:07 +00:00
int root )
2007-02-28 01:11:01 +00:00
{
2012-04-06 15:48:07 +00:00
int childs = 0 , rank , vrank , size , mask = 1 , remote , i ;
2007-02-28 01:11:01 +00:00
ompi_coll_tree_t * bmtree ;
2008-06-09 14:53:58 +00:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:topo:build_in_order_bmtree rt %d " , root ) ) ;
2007-02-28 01:11:01 +00:00
/*
* Get size and rank of the process in this communicator
*/
size = ompi_comm_size ( comm ) ;
rank = ompi_comm_rank ( comm ) ;
vrank = ( rank - root + size ) % size ;
bmtree = ( ompi_coll_tree_t * ) malloc ( sizeof ( ompi_coll_tree_t ) ) ;
if ( ! bmtree ) {
2008-06-09 14:53:58 +00:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:topo:build_bmtree PANIC out of memory " ) ) ;
2007-02-28 01:11:01 +00:00
return NULL ;
}
bmtree - > tree_bmtree = 1 ;
bmtree - > tree_root = MPI_UNDEFINED ;
bmtree - > tree_nextsize = MPI_UNDEFINED ;
for ( i = 0 ; i < MAXTREEFANOUT ; i + + ) {
bmtree - > tree_next [ i ] = - 1 ;
}
if ( root = = rank ) {
2012-04-06 15:48:07 +00:00
bmtree - > tree_prev = root ;
2007-02-28 01:11:01 +00:00
}
while ( mask < size ) {
2012-04-06 15:48:07 +00:00
remote = vrank ^ mask ;
if ( remote < vrank ) {
bmtree - > tree_prev = ( remote + root ) % size ;
break ;
} else if ( remote < size ) {
bmtree - > tree_next [ childs ] = ( remote + root ) % size ;
childs + + ;
if ( childs = = MAXTREEFANOUT ) {
OPAL_OUTPUT ( ( ompi_coll_tuned_stream ,
" coll:tuned:topo:build_bmtree max fanout incorrect %d needed %d " ,
MAXTREEFANOUT , childs ) ) ;
return NULL ;
}
}
mask < < = 1 ;
2007-02-28 01:11:01 +00:00
}
bmtree - > tree_nextsize = childs ;
bmtree - > tree_root = root ;
return bmtree ;
}
2005-08-31 01:43:48 +00:00
2006-10-23 21:46:30 +00:00
ompi_coll_tree_t *
2005-08-31 01:43:48 +00:00
ompi_coll_tuned_topo_build_chain ( int fanout ,
2006-10-18 02:00:46 +00:00
struct ompi_communicator_t * comm ,
int root )
2005-08-31 01:43:48 +00:00
{
2012-04-06 15:48:07 +00:00
int i , maxchainlen , mark , head , len , rank , size , srank /* shifted rank */ ;
2006-10-23 21:46:30 +00:00
ompi_coll_tree_t * chain ;
2005-08-31 01:43:48 +00:00
2008-06-09 14:53:58 +00:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:topo:build_chain fo %d rt %d " , fanout , root ) ) ;
2005-08-31 01:43:48 +00:00
/*
* Get size and rank of the process in this communicator
*/
size = ompi_comm_size ( comm ) ;
rank = ompi_comm_rank ( comm ) ;
2005-09-03 01:41:13 +00:00
if ( fanout < 1 ) {
2008-06-09 14:53:58 +00:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:topo:build_chain WARNING invalid fanout of ZERO, forcing to 1 (pipeline)! " ) ) ;
2006-01-08 02:41:09 +00:00
fanout = 1 ;
2005-09-03 01:41:13 +00:00
}
if ( fanout > MAXTREEFANOUT ) {
2008-06-09 14:53:58 +00:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:topo:build_chain WARNING invalid fanout %d bigger than max %d, forcing to max! " , fanout , MAXTREEFANOUT ) ) ;
2006-01-08 02:41:09 +00:00
fanout = MAXTREEFANOUT ;
2005-08-31 01:43:48 +00:00
}
/*
* Allocate space for topology arrays if needed
*/
2006-10-23 21:46:30 +00:00
chain = ( ompi_coll_tree_t * ) malloc ( sizeof ( ompi_coll_tree_t ) ) ;
2005-09-03 01:41:13 +00:00
if ( ! chain ) {
2008-06-09 14:53:58 +00:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:topo:build_chain PANIC out of memory " ) ) ;
2005-09-03 01:41:13 +00:00
fflush ( stdout ) ;
return NULL ;
}
2006-10-23 21:46:30 +00:00
chain - > tree_root = MPI_UNDEFINED ;
chain - > tree_nextsize = - 1 ;
for ( i = 0 ; i < fanout ; i + + ) chain - > tree_next [ i ] = - 1 ;
2005-08-31 01:43:48 +00:00
/*
* Set root & numchain
*/
2006-10-23 21:46:30 +00:00
chain - > tree_root = root ;
2005-08-31 01:43:48 +00:00
if ( ( size - 1 ) < fanout ) {
2006-10-23 21:46:30 +00:00
chain - > tree_nextsize = size - 1 ;
2005-08-31 01:43:48 +00:00
fanout = size - 1 ;
} else {
2006-10-23 21:46:30 +00:00
chain - > tree_nextsize = fanout ;
2005-08-31 01:43:48 +00:00
}
/*
* Shift ranks
*/
srank = rank - root ;
if ( srank < 0 ) srank + = size ;
/*
* Special case - fanout = = 1
*/
if ( fanout = = 1 ) {
2006-10-23 21:46:30 +00:00
if ( srank = = 0 ) chain - > tree_prev = - 1 ;
else chain - > tree_prev = ( srank - 1 + root ) % size ;
2005-08-31 01:43:48 +00:00
if ( ( srank + 1 ) > = size ) {
2006-10-23 21:46:30 +00:00
chain - > tree_next [ 0 ] = - 1 ;
chain - > tree_nextsize = 0 ;
2005-08-31 01:43:48 +00:00
} else {
2006-10-23 21:46:30 +00:00
chain - > tree_next [ 0 ] = ( srank + 1 + root ) % size ;
chain - > tree_nextsize = 1 ;
2005-08-31 01:43:48 +00:00
}
return chain ;
}
/* Let's handle the case where there is just one node in the communicator */
if ( size = = 1 ) {
2006-10-23 21:46:30 +00:00
chain - > tree_next [ 0 ] = - 1 ;
chain - > tree_nextsize = 0 ;
chain - > tree_prev = - 1 ;
2005-08-31 01:43:48 +00:00
return chain ;
}
/*
* Calculate maximum chain length
*/
maxchainlen = ( size - 1 ) / fanout ;
if ( ( size - 1 ) % fanout ! = 0 ) {
maxchainlen + + ;
mark = ( size - 1 ) % fanout ;
} else {
mark = fanout + 1 ;
}
/*
* Find your own place in the list of shifted ranks
*/
if ( srank ! = 0 ) {
int column ;
if ( srank - 1 < ( mark * maxchainlen ) ) {
column = ( srank - 1 ) / maxchainlen ;
head = 1 + column * maxchainlen ;
len = maxchainlen ;
} else {
column = mark + ( srank - 1 - mark * maxchainlen ) / ( maxchainlen - 1 ) ;
head = mark * maxchainlen + 1 + ( column - mark ) * ( maxchainlen - 1 ) ;
len = maxchainlen - 1 ;
}
if ( srank = = head ) {
2006-10-23 21:46:30 +00:00
chain - > tree_prev = 0 ; /*root*/
2005-08-31 01:43:48 +00:00
} else {
2006-10-23 21:46:30 +00:00
chain - > tree_prev = srank - 1 ; /* rank -1 */
2005-08-31 01:43:48 +00:00
}
if ( srank = = ( head + len - 1 ) ) {
2006-10-23 21:46:30 +00:00
chain - > tree_next [ 0 ] = - 1 ;
chain - > tree_nextsize = 0 ;
2005-08-31 01:43:48 +00:00
} else {
if ( ( srank + 1 ) < size ) {
2006-10-23 21:46:30 +00:00
chain - > tree_next [ 0 ] = srank + 1 ;
chain - > tree_nextsize = 1 ;
2005-08-31 01:43:48 +00:00
} else {
2006-10-23 21:46:30 +00:00
chain - > tree_next [ 0 ] = - 1 ;
chain - > tree_nextsize = 0 ;
2005-08-31 01:43:48 +00:00
}
}
}
/*
* Unshift values
*/
if ( rank = = root ) {
2006-10-23 21:46:30 +00:00
chain - > tree_prev = - 1 ;
chain - > tree_next [ 0 ] = ( root + 1 ) % size ;
2005-08-31 01:43:48 +00:00
for ( i = 1 ; i < fanout ; i + + ) {
2006-10-23 21:46:30 +00:00
chain - > tree_next [ i ] = chain - > tree_next [ i - 1 ] + maxchainlen ;
2005-08-31 01:43:48 +00:00
if ( i > mark ) {
2006-10-23 21:46:30 +00:00
chain - > tree_next [ i ] - - ;
2005-08-31 01:43:48 +00:00
}
2006-10-23 21:46:30 +00:00
chain - > tree_next [ i ] % = size ;
2005-08-31 01:43:48 +00:00
}
2006-10-23 21:46:30 +00:00
chain - > tree_nextsize = fanout ;
2005-08-31 01:43:48 +00:00
} else {
2006-10-23 21:46:30 +00:00
chain - > tree_prev = ( chain - > tree_prev + root ) % size ;
if ( chain - > tree_next [ 0 ] ! = - 1 ) {
chain - > tree_next [ 0 ] = ( chain - > tree_next [ 0 ] + root ) % size ;
2005-08-31 01:43:48 +00:00
}
}
return chain ;
}
2005-09-09 23:05:17 +00:00
int ompi_coll_tuned_topo_dump_tree ( ompi_coll_tree_t * tree , int rank )
{
2006-10-18 02:00:46 +00:00
int i ;
2005-09-09 23:05:17 +00:00
2008-06-09 14:53:58 +00:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:topo:topo_dump_tree %1d tree root %d "
2006-10-23 21:46:30 +00:00
" fanout %d BM %1d nextsize %d prev %d " ,
rank , tree - > tree_root , tree - > tree_bmtree , tree - > tree_fanout ,
tree - > tree_nextsize , tree - > tree_prev ) ) ;
if ( tree - > tree_nextsize ) {
for ( i = 0 ; i < tree - > tree_nextsize ; i + + )
2008-06-09 14:53:58 +00:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " [%1d] %d " , i , tree - > tree_next [ i ] ) ) ;
2006-10-18 02:00:46 +00:00
}
return ( 0 ) ;
2005-09-09 23:05:17 +00:00
}