2005-10-27 03:11:32 +04:00
/*
2005-11-05 22:57:48 +03:00
* Copyright ( c ) 2004 - 2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation . All rights reserved .
2006-04-20 03:42:06 +04:00
* Copyright ( c ) 2004 - 2006 The University of Tennessee and The University
2005-11-05 22:57:48 +03:00
* of Tennessee Research Foundation . All rights
* reserved .
2005-10-27 03:11:32 +04:00
* Copyright ( c ) 2004 - 2005 High Performance Computing Center Stuttgart ,
* University of Stuttgart . All rights reserved .
* Copyright ( c ) 2004 - 2005 The Regents of the University of California .
* All rights reserved .
* $ COPYRIGHT $
*
* Additional copyrights may follow
*
* $ HEADER $
*/
# include "ompi_config.h"
# include "mpi.h"
2006-02-12 04:33:29 +03:00
# include "ompi/constants.h"
# include "ompi/datatype/datatype.h"
# include "ompi/communicator/communicator.h"
# include "ompi/mca/coll/coll.h"
# include "ompi/mca/coll/base/coll_tags.h"
# include "ompi/mca/pml/pml.h"
# include "ompi/op/op.h"
2005-10-27 03:11:32 +04:00
# include "coll_tuned.h"
# include "coll_tuned_topo.h"
/*
2005-12-22 16:49:33 +03:00
* ompi_coll_tuned_allreduce_intra_nonoverlapping
2005-10-27 03:11:32 +04:00
*
* This function just calls a reduce followed by a broadcast
* both called functions are tuned but they complete sequentially ,
* i . e . no additional overlapping
* meaning if the number of segments used is greater than the topo depth
* then once the first segment of data is fully ' reduced ' it is not broadcast
* while the reduce continues ( cost = cost - reduce + cost - bcast + decision x 3 )
*
*/
int
2005-12-22 16:49:33 +03:00
ompi_coll_tuned_allreduce_intra_nonoverlapping ( void * sbuf , void * rbuf , int count ,
2005-10-27 03:11:32 +04:00
struct ompi_datatype_t * dtype ,
struct ompi_op_t * op ,
struct ompi_communicator_t * comm )
{
int err ;
int rank ;
rank = ompi_comm_rank ( comm ) ;
2005-12-22 16:49:33 +03:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:allreduce_intra_nonoverlapping rank %d " , rank ) ) ;
2005-10-27 03:11:32 +04:00
/* Reduce to 0 and broadcast. */
if ( MPI_IN_PLACE = = sbuf ) {
if ( 0 = = ompi_comm_rank ( comm ) ) {
err = comm - > c_coll . coll_reduce ( MPI_IN_PLACE , rbuf , count , dtype , op , 0 , comm ) ;
} else {
err = comm - > c_coll . coll_reduce ( rbuf , NULL , count , dtype , op , 0 , comm ) ;
}
} else {
err = comm - > c_coll . coll_reduce ( sbuf , rbuf , count , dtype , op , 0 , comm ) ;
}
if ( MPI_SUCCESS ! = err ) {
return err ;
}
return comm - > c_coll . coll_bcast ( rbuf , count , dtype , 0 , comm ) ;
}
/*
* Linear functions are copied from the BASIC coll module
* they do not segment the message and are simple implementations
* but for some small number of nodes and / or small data sizes they
* are just as fast as tuned / tree based segmenting operations
* and as such may be selected by the decision functions
* These are copied into this module due to the way we select modules
* in V1 . i . e . in V2 we will handle this differently and so will not
* have to duplicate code .
* GEF Oct05 after asking Jeff .
*/
/* copied function (with appropriate renaming) starts here */
/*
* allreduce_intra
*
* Function : - allreduce using other MPI collectives
* Accepts : - same as MPI_Allreduce ( )
* Returns : - MPI_SUCCESS or error code
*/
int
2005-12-22 16:49:33 +03:00
ompi_coll_tuned_allreduce_intra_basic_linear ( void * sbuf , void * rbuf , int count ,
2005-10-27 03:11:32 +04:00
struct ompi_datatype_t * dtype ,
struct ompi_op_t * op ,
struct ompi_communicator_t * comm )
{
int err ;
int rank ;
rank = ompi_comm_rank ( comm ) ;
2005-12-22 16:49:33 +03:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:allreduce_intra_basic_linear rank %d " , rank ) ) ;
2005-10-27 03:11:32 +04:00
/* Reduce to 0 and broadcast. */
if ( MPI_IN_PLACE = = sbuf ) {
if ( 0 = = ompi_comm_rank ( comm ) ) {
2005-12-22 16:49:33 +03:00
err = ompi_coll_tuned_reduce_intra_basic_linear ( MPI_IN_PLACE , rbuf , count , dtype , op , 0 , comm ) ;
2005-10-27 03:11:32 +04:00
} else {
2005-12-22 16:49:33 +03:00
err = ompi_coll_tuned_reduce_intra_basic_linear ( rbuf , NULL , count , dtype , op , 0 , comm ) ;
2005-10-27 03:11:32 +04:00
}
} else {
2005-12-22 16:49:33 +03:00
err = ompi_coll_tuned_reduce_intra_basic_linear ( sbuf , rbuf , count , dtype , op , 0 , comm ) ;
2005-10-27 03:11:32 +04:00
}
if ( MPI_SUCCESS ! = err ) {
return err ;
}
2005-12-22 16:49:33 +03:00
return ompi_coll_tuned_bcast_intra_basic_linear ( rbuf , count , dtype , 0 , comm ) ;
2005-10-27 03:11:32 +04:00
}
/* copied function (with appropriate renaming) ends here */
/* The following are used by dynamic and forced rules */
/* publish details of each algorithm and if its forced/fixed/locked in */
/* as you add methods/algorithms you must update this and the query/map routines */
2006-04-20 03:42:06 +04:00
/* this routine is called by the component only */
/* this makes sure that the mca parameters are set to their initial values and perms */
/* module does not call this they call the forced_getvalues routine instead */
int ompi_coll_tuned_allreduce_intra_check_forced_init ( coll_tuned_force_algorithm_mca_param_indices_t * mca_param_indices )
2005-10-27 03:11:32 +04:00
{
2006-04-20 03:42:06 +04:00
int rc ;
int max_alg = 2 ;
ompi_coll_tuned_forced_max_algorithms [ ALLREDUCE ] = max_alg ;
rc = mca_base_param_reg_int ( & mca_coll_tuned_component . super . collm_version ,
" allreduce_algorithm_count " ,
" Number of allreduce algorithms available " ,
false , true , max_alg , NULL ) ;
2005-10-27 03:11:32 +04:00
2006-04-20 03:42:06 +04:00
mca_param_indices - > algorithm_param_index = mca_base_param_reg_int (
& mca_coll_tuned_component . super . collm_version ,
2005-10-27 03:11:32 +04:00
" allreduce_algorithm " ,
2006-04-20 03:42:06 +04:00
" Which allreduce algorithm is used. Can be locked down to any of: 0 ignore, 1 basic linear, 2 nonoverlapping (tuned reduce + tuned bcast) " ,
false , false , 0 , NULL ) ;
2005-10-27 03:11:32 +04:00
2006-04-20 03:42:06 +04:00
mca_param_indices - > segsize_param_index = mca_base_param_reg_int (
& mca_coll_tuned_component . super . collm_version ,
2005-10-27 03:11:32 +04:00
" allreduce_algorithm_segmentsize " ,
" Segment size in bytes used by default for allreduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. " ,
2006-04-20 03:42:06 +04:00
false , false , 0 , NULL ) ;
2005-10-27 03:11:32 +04:00
2006-04-20 03:42:06 +04:00
mca_param_indices - > tree_fanout_param_index = mca_base_param_reg_int (
& mca_coll_tuned_component . super . collm_version ,
2005-10-27 03:11:32 +04:00
" allreduce_algorithm_tree_fanout " ,
" Fanout for n-tree used for allreduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. " ,
2006-04-20 03:42:06 +04:00
false , false , ompi_coll_tuned_init_tree_fanout , /* get system wide default */
NULL ) ;
2005-10-27 03:11:32 +04:00
2006-04-20 03:42:06 +04:00
mca_param_indices - > chain_fanout_param_index = mca_base_param_reg_int (
& mca_coll_tuned_component . super . collm_version ,
2005-10-27 03:11:32 +04:00
" allreduce_algorithm_chain_fanout " ,
" Fanout for chains used for allreduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. " ,
false , false ,
2005-12-22 16:49:33 +03:00
ompi_coll_tuned_init_chain_fanout , /* get system wide default */
2006-04-20 03:42:06 +04:00
NULL ) ;
2005-10-27 03:11:32 +04:00
return ( MPI_SUCCESS ) ;
}
2005-12-22 16:49:33 +03:00
int ompi_coll_tuned_allreduce_intra_do_forced ( void * sbuf , void * rbuf , int count ,
2005-10-27 03:11:32 +04:00
struct ompi_datatype_t * dtype ,
struct ompi_op_t * op ,
struct ompi_communicator_t * comm )
{
2005-12-22 16:49:33 +03:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:allreduce_intra_do_forced selected algorithm %d " ,
2006-04-20 03:42:06 +04:00
comm - > c_coll_selected_data - > user_forced [ ALLREDUCE ] . algorithm ) ) ;
2005-10-31 23:45:50 +03:00
2006-04-20 03:42:06 +04:00
switch ( comm - > c_coll_selected_data - > user_forced [ ALLREDUCE ] . algorithm ) {
2005-12-22 16:49:33 +03:00
case ( 0 ) : return ompi_coll_tuned_allreduce_intra_dec_fixed ( sbuf , rbuf , count , dtype , op , comm ) ;
case ( 1 ) : return ompi_coll_tuned_allreduce_intra_basic_linear ( sbuf , rbuf , count , dtype , op , comm ) ;
case ( 2 ) : return ompi_coll_tuned_allreduce_intra_nonoverlapping ( sbuf , rbuf , count , dtype , op , comm ) ;
2005-10-27 03:11:32 +04:00
default :
2005-12-22 16:49:33 +03:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:allreduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid? " ,
2006-04-20 03:42:06 +04:00
comm - > c_coll_selected_data - > user_forced [ ALLREDUCE ] . algorithm ,
ompi_coll_tuned_forced_max_algorithms [ ALLREDUCE ] ) ) ;
2005-10-27 03:11:32 +04:00
return ( MPI_ERR_ARG ) ;
} /* switch */
}
2005-12-22 16:49:33 +03:00
int ompi_coll_tuned_allreduce_intra_do_this ( void * sbuf , void * rbuf , int count ,
2005-11-11 07:49:29 +03:00
struct ompi_datatype_t * dtype ,
struct ompi_op_t * op ,
struct ompi_communicator_t * comm ,
2006-04-20 03:42:06 +04:00
int algorithm , int faninout , int segsize )
2005-11-11 07:49:29 +03:00
{
2005-12-22 16:49:33 +03:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:allreduce_intra_do_this algorithm %d topo fan in/out %d segsize %d " ,
2006-04-20 03:42:06 +04:00
algorithm , faninout , segsize ) ) ;
2005-11-11 07:49:29 +03:00
2006-04-20 03:42:06 +04:00
switch ( algorithm ) {
2005-12-22 16:49:33 +03:00
case ( 0 ) : return ompi_coll_tuned_allreduce_intra_dec_fixed ( sbuf , rbuf , count , dtype , op , comm ) ;
case ( 1 ) : return ompi_coll_tuned_allreduce_intra_basic_linear ( sbuf , rbuf , count , dtype , op , comm ) ;
case ( 2 ) : return ompi_coll_tuned_allreduce_intra_nonoverlapping ( sbuf , rbuf , count , dtype , op , comm ) ;
2005-11-11 07:49:29 +03:00
default :
2005-12-22 16:49:33 +03:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:allreduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid? " ,
2006-04-20 03:42:06 +04:00
algorithm , ompi_coll_tuned_forced_max_algorithms [ ALLREDUCE ] ) ) ;
2005-11-11 07:49:29 +03:00
return ( MPI_ERR_ARG ) ;
} /* switch */
}
2005-10-27 03:11:32 +04:00