diff --git a/ompi/mca/coll/tuned/Makefile.am b/ompi/mca/coll/tuned/Makefile.am index bfbd1d9f4f..7e607257dd 100644 --- a/ompi/mca/coll/tuned/Makefile.am +++ b/ompi/mca/coll/tuned/Makefile.am @@ -2,7 +2,7 @@ # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University +# Copyright (c) 2004-2009 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -22,14 +22,12 @@ sources = \ coll_tuned_util.h \ coll_tuned_dynamic_file.h \ coll_tuned_dynamic_rules.h \ - coll_tuned_forced.h \ coll_tuned_topo.c \ coll_tuned_util.c \ coll_tuned_decision_fixed.c \ coll_tuned_decision_dynamic.c \ coll_tuned_dynamic_file.c \ coll_tuned_dynamic_rules.c \ - coll_tuned_forced.c \ coll_tuned_allreduce.c \ coll_tuned_alltoall.c \ coll_tuned_alltoallv.c \ diff --git a/ompi/mca/coll/tuned/coll_tuned.h b/ompi/mca/coll/tuned/coll_tuned.h index 19c858de38..144236df6a 100644 --- a/ompi/mca/coll/tuned/coll_tuned.h +++ b/ompi/mca/coll/tuned/coll_tuned.h @@ -2,7 +2,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -34,12 +34,26 @@ /* also need the dynamic rule structures */ #include "coll_tuned_dynamic_rules.h" -/* need the forced user choice structures */ -#include "coll_tuned_forced.h" - /* some fixed value index vars to simplify certain operations */ -typedef enum COLLTYPE {ALLGATHER, ALLGATHERV, ALLREDUCE, ALLTOALL, ALLTOALLV, ALLTOALLW, BARRIER, BCAST, -EXSCAN, GATHER, GATHERV, REDUCE, REDUCESCATTER, SCAN, SCATTER, SCATTERV, COLLCOUNT} COLLTYPE_T; +typedef enum COLLTYPE { + ALLGATHER = 0, /* 0 */ + ALLGATHERV, /* 1 */ + ALLREDUCE, /* 2 */ + ALLTOALL, /* 3 */ + ALLTOALLV, /* 4 */ + ALLTOALLW, /* 5 */ + BARRIER, /* 6 */ + BCAST, /* 7 */ + EXSCAN, /* 8 */ + GATHER, /* 9 */ + GATHERV, /* 10 */ + REDUCE, /* 11 */ + REDUCESCATTER, /* 12 */ + SCAN, /* 13 */ + SCATTER, /* 14 */ + SCATTERV, /* 15 */ + COLLCOUNT /* 16 end counter keep it as last element */ +} COLLTYPE_T; /* defined arg lists to simply auto inclusion of user overriding decision functions */ #define ALLGATHER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module @@ -60,343 +74,357 @@ EXSCAN, GATHER, GATHERV, REDUCE, REDUCESCATTER, SCAN, SCATTER, SCATTERV, COLLCOU #define SCATTERV_ARGS void *sbuf, int *scounts, int *disps, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module /* end defined arg lists to simply auto inclusion of user overriding decision functions */ -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS - /* these are the same across all modules and are loaded at component query time */ - extern int ompi_coll_tuned_stream; - extern int ompi_coll_tuned_priority; - extern int ompi_coll_tuned_preallocate_memory_comm_size_limit; - extern int ompi_coll_tuned_use_dynamic_rules; - extern char* ompi_coll_tuned_dynamic_rules_filename; - extern int ompi_coll_tuned_init_tree_fanout; - extern int ompi_coll_tuned_init_chain_fanout; - extern int ompi_coll_tuned_init_max_requests; +/* these are the same across all modules and are loaded at component query time */ +extern int ompi_coll_tuned_stream; +extern int ompi_coll_tuned_priority; +extern int ompi_coll_tuned_preallocate_memory_comm_size_limit; +extern int ompi_coll_tuned_use_dynamic_rules; +extern char* ompi_coll_tuned_dynamic_rules_filename; +extern int ompi_coll_tuned_init_tree_fanout; +extern int ompi_coll_tuned_init_chain_fanout; +extern int ompi_coll_tuned_init_max_requests; - /* forced algorithm choices */ - /* the indices to the MCA params so that modules can look them up at open / comm create time */ - extern coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COLLCOUNT]; - /* the actual max algorithm values (readonly), loaded at component open */ - extern int ompi_coll_tuned_forced_max_algorithms[COLLCOUNT]; - - /* - * coll API functions - */ - - /* API functions */ - - int ompi_coll_tuned_init_query(bool enable_progress_threads, - bool enable_mpi_threads); - - mca_coll_base_module_t * - ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority); - - /* API functions of decision functions and any implementations */ - - /* - * Note this gets long as we have to have a prototype for each - * MPI collective 4 times.. 2 for the comm type and 2 for each decision - * type. - * we might cut down the decision prototypes by conditional compiling - */ - - /* All Gather */ - int ompi_coll_tuned_allgather_intra_dec_fixed(ALLGATHER_ARGS); - int ompi_coll_tuned_allgather_intra_dec_dynamic(ALLGATHER_ARGS); - int ompi_coll_tuned_allgather_intra_do_forced(ALLGATHER_ARGS); - int ompi_coll_tuned_allgather_intra_do_this(ALLGATHER_ARGS, int algorithm, int faninout, int segsize); - int ompi_coll_tuned_allgather_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_allgather_intra_bruck(ALLGATHER_ARGS); - int ompi_coll_tuned_allgather_intra_recursivedoubling(ALLGATHER_ARGS); - int ompi_coll_tuned_allgather_intra_ring(ALLGATHER_ARGS); - int ompi_coll_tuned_allgather_intra_neighborexchange(ALLGATHER_ARGS); - int ompi_coll_tuned_allgather_intra_basic_linear(ALLGATHER_ARGS); - int ompi_coll_tuned_allgather_intra_two_procs(ALLGATHER_ARGS); - int ompi_coll_tuned_allgather_inter_dec_fixed(ALLGATHER_ARGS); - int ompi_coll_tuned_allgather_inter_dec_dynamic(ALLGATHER_ARGS); - - /* All GatherV */ - int ompi_coll_tuned_allgatherv_intra_dec_fixed(ALLGATHERV_ARGS); - int ompi_coll_tuned_allgatherv_intra_dec_dynamic(ALLGATHERV_ARGS); - int ompi_coll_tuned_allgatherv_intra_do_forced(ALLGATHERV_ARGS); - int ompi_coll_tuned_allgatherv_intra_do_this(ALLGATHERV_ARGS, int algorithm, int faninout, int segsize); - int ompi_coll_tuned_allgatherv_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_allgatherv_intra_bruck(ALLGATHERV_ARGS); - int ompi_coll_tuned_allgatherv_intra_ring(ALLGATHERV_ARGS); - int ompi_coll_tuned_allgatherv_intra_neighborexchange(ALLGATHERV_ARGS); - int ompi_coll_tuned_allgatherv_intra_basic_default(ALLGATHERV_ARGS); - int ompi_coll_tuned_allgatherv_intra_two_procs(ALLGATHERV_ARGS); - int ompi_coll_tuned_allgatherv_inter_dec_fixed(ALLGATHERV_ARGS); - int ompi_coll_tuned_allgatherv_inter_dec_dynamic(ALLGATHERV_ARGS); - - /* All Reduce */ - int ompi_coll_tuned_allreduce_intra_dec_fixed(ALLREDUCE_ARGS); - int ompi_coll_tuned_allreduce_intra_dec_dynamic(ALLREDUCE_ARGS); - int ompi_coll_tuned_allreduce_intra_do_forced(ALLREDUCE_ARGS); - int ompi_coll_tuned_allreduce_intra_do_this(ALLREDUCE_ARGS, int algorithm, int faninout, int segsize); - int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_allreduce_intra_nonoverlapping(ALLREDUCE_ARGS); - int ompi_coll_tuned_allreduce_intra_recursivedoubling(ALLREDUCE_ARGS); - int ompi_coll_tuned_allreduce_intra_ring(ALLREDUCE_ARGS); - int ompi_coll_tuned_allreduce_intra_ring_segmented(ALLREDUCE_ARGS, uint32_t segsize); - int ompi_coll_tuned_allreduce_intra_basic_linear(ALLREDUCE_ARGS); - int ompi_coll_tuned_allreduce_inter_dec_fixed(ALLREDUCE_ARGS); - int ompi_coll_tuned_allreduce_inter_dec_dynamic(ALLREDUCE_ARGS); - - /* AlltoAll */ - int ompi_coll_tuned_alltoall_intra_dec_fixed(ALLTOALL_ARGS); - int ompi_coll_tuned_alltoall_intra_dec_dynamic(ALLTOALL_ARGS); - int ompi_coll_tuned_alltoall_intra_do_forced(ALLTOALL_ARGS); - int ompi_coll_tuned_alltoall_intra_do_this(ALLTOALL_ARGS, int algorithm, int faninout, int segsize, int max_requests); - int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_alltoall_intra_pairwise(ALLTOALL_ARGS); - int ompi_coll_tuned_alltoall_intra_bruck(ALLTOALL_ARGS); - int ompi_coll_tuned_alltoall_intra_basic_linear(ALLTOALL_ARGS); - int ompi_coll_tuned_alltoall_intra_linear_sync(ALLTOALL_ARGS, int max_requests); - int ompi_coll_tuned_alltoall_intra_two_procs(ALLTOALL_ARGS); - int ompi_coll_tuned_alltoall_inter_dec_fixed(ALLTOALL_ARGS); - int ompi_coll_tuned_alltoall_inter_dec_dynamic(ALLTOALL_ARGS); - - /* AlltoAllV */ - int ompi_coll_tuned_alltoallv_intra_dec_fixed(ALLTOALLV_ARGS); - int ompi_coll_tuned_alltoallv_intra_dec_dynamic(ALLTOALLV_ARGS); - int ompi_coll_tuned_alltoallv_intra_do_forced(ALLTOALLV_ARGS); - int ompi_coll_tuned_alltoallv_intra_do_this(ALLTOALLV_ARGS, int algorithm); - int ompi_coll_tuned_alltoallv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_alltoallv_intra_pairwise(ALLTOALLV_ARGS); - int ompi_coll_tuned_alltoallv_intra_basic_linear(ALLTOALLV_ARGS); - int ompi_coll_tuned_alltoallv_inter_dec_fixed(ALLTOALLV_ARGS); - int ompi_coll_tuned_alltoallv_inter_dec_dynamic(ALLTOALLV_ARGS); - - /* AlltoAllW */ - int ompi_coll_tuned_alltoallw_intra_dec_fixed(ALLTOALLW_ARGS); - int ompi_coll_tuned_alltoallw_intra_dec_dynamic(ALLTOALLW_ARGS); - int ompi_coll_tuned_alltoallw_inter_dec_fixed(ALLTOALLW_ARGS); - int ompi_coll_tuned_alltoallw_inter_dec_dynamic(ALLTOALLW_ARGS); - - /* Barrier */ - int ompi_coll_tuned_barrier_intra_dec_fixed(BARRIER_ARGS); - int ompi_coll_tuned_barrier_intra_dec_dynamic(BARRIER_ARGS); - int ompi_coll_tuned_barrier_intra_do_forced(BARRIER_ARGS); - int ompi_coll_tuned_barrier_intra_do_this(BARRIER_ARGS, int algorithm, int faninout, int segsize); - int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_barrier_inter_dec_fixed(BARRIER_ARGS); - int ompi_coll_tuned_barrier_inter_dec_dynamic(BARRIER_ARGS); - int ompi_coll_tuned_barrier_intra_doublering(BARRIER_ARGS); - int ompi_coll_tuned_barrier_intra_recursivedoubling(BARRIER_ARGS); - int ompi_coll_tuned_barrier_intra_bruck(BARRIER_ARGS); - int ompi_coll_tuned_barrier_intra_two_procs(BARRIER_ARGS); - int ompi_coll_tuned_barrier_intra_linear(BARRIER_ARGS); - int ompi_coll_tuned_barrier_intra_tree(BARRIER_ARGS); - - /* Bcast */ - int ompi_coll_tuned_bcast_intra_generic( BCAST_ARGS, uint32_t count_by_segment, ompi_coll_tree_t* tree ); - int ompi_coll_tuned_bcast_intra_dec_fixed(BCAST_ARGS); - int ompi_coll_tuned_bcast_intra_dec_dynamic(BCAST_ARGS); - int ompi_coll_tuned_bcast_intra_do_forced(BCAST_ARGS); - int ompi_coll_tuned_bcast_intra_do_this(BCAST_ARGS, int algorithm, int faninout, int segsize); - int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_bcast_intra_basic_linear(BCAST_ARGS); - int ompi_coll_tuned_bcast_intra_chain(BCAST_ARGS, uint32_t segsize, int32_t chains); - int ompi_coll_tuned_bcast_intra_pipeline(BCAST_ARGS, uint32_t segsize); - int ompi_coll_tuned_bcast_intra_binomial(BCAST_ARGS, uint32_t segsize); - int ompi_coll_tuned_bcast_intra_bintree(BCAST_ARGS, uint32_t segsize); - int ompi_coll_tuned_bcast_intra_split_bintree(BCAST_ARGS, uint32_t segsize); - int ompi_coll_tuned_bcast_inter_dec_fixed(BCAST_ARGS); - int ompi_coll_tuned_bcast_inter_dec_dynamic(BCAST_ARGS); - - /* Exscan */ - int ompi_coll_tuned_exscan_intra_dec_fixed(EXSCAN_ARGS); - int ompi_coll_tuned_exscan_intra_dec_dynamic(EXSCAN_ARGS); - int ompi_coll_tuned_exscan_inter_dec_fixed(EXSCAN_ARGS); - int ompi_coll_tuned_exscan_inter_dec_dynamic(EXSCAN_ARGS); - - /* Gather */ - int ompi_coll_tuned_gather_intra_dec_fixed(GATHER_ARGS); - int ompi_coll_tuned_gather_intra_dec_dynamic(GATHER_ARGS); - int ompi_coll_tuned_gather_intra_do_forced(GATHER_ARGS); - int ompi_coll_tuned_gather_intra_do_this(GATHER_ARGS, int algorithm, int faninout, int segsize); - int ompi_coll_tuned_gather_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_gather_intra_basic_linear(GATHER_ARGS); - int ompi_coll_tuned_gather_intra_binomial(GATHER_ARGS); - int ompi_coll_tuned_gather_intra_linear_sync(GATHER_ARGS, int first_segment_size); - int ompi_coll_tuned_gather_inter_dec_fixed(GATHER_ARGS); - int ompi_coll_tuned_gather_inter_dec_dynamic(GATHER_ARGS); - - /* GatherV */ - int ompi_coll_tuned_gatherv_intra_dec_fixed(GATHERV_ARGS); - int ompi_coll_tuned_gatherv_intra_dec_dynamic(GATHER_ARGS); - int ompi_coll_tuned_gatherv_inter_dec_fixed(GATHER_ARGS); - int ompi_coll_tuned_gatherv_inter_dec_dynamic(GATHER_ARGS); - - /* Reduce */ - int ompi_coll_tuned_reduce_generic( REDUCE_ARGS, ompi_coll_tree_t* tree, int count_by_segment, int max_outstanding_reqs ); - int ompi_coll_tuned_reduce_intra_dec_fixed(REDUCE_ARGS); - int ompi_coll_tuned_reduce_intra_dec_dynamic(REDUCE_ARGS); - int ompi_coll_tuned_reduce_intra_do_forced(REDUCE_ARGS); - int ompi_coll_tuned_reduce_intra_do_this(REDUCE_ARGS, int algorithm, int faninout, int segsize, int max_oustanding_reqs); - int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_reduce_intra_basic_linear(REDUCE_ARGS); - int ompi_coll_tuned_reduce_intra_chain(REDUCE_ARGS, uint32_t segsize, int fanout, int max_outstanding_reqs ); - int ompi_coll_tuned_reduce_intra_pipeline(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs ); - int ompi_coll_tuned_reduce_intra_binary(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs ); - int ompi_coll_tuned_reduce_intra_binomial(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs ); - int ompi_coll_tuned_reduce_intra_in_order_binary(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs ); - int ompi_coll_tuned_reduce_inter_dec_fixed(REDUCE_ARGS); - int ompi_coll_tuned_reduce_inter_dec_dynamic(REDUCE_ARGS); - - /* Reduce_scatter */ - int ompi_coll_tuned_reduce_scatter_intra_dec_fixed(REDUCESCATTER_ARGS); - int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(REDUCESCATTER_ARGS); - int ompi_coll_tuned_reduce_scatter_intra_do_forced(REDUCESCATTER_ARGS); - int ompi_coll_tuned_reduce_scatter_intra_do_this(REDUCESCATTER_ARGS, int algorithm, int faninout, int segsize); - int ompi_coll_tuned_reduce_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(REDUCESCATTER_ARGS); - int ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(REDUCESCATTER_ARGS); - int ompi_coll_tuned_reduce_scatter_intra_ring(REDUCESCATTER_ARGS); - - int ompi_coll_tuned_reduce_scatter_inter_dec_fixed(REDUCESCATTER_ARGS); - int ompi_coll_tuned_reduce_scatter_inter_dec_dynamic(REDUCESCATTER_ARGS); - - /* Scan */ - int ompi_coll_tuned_scan_intra_dec_fixed(SCAN_ARGS); - int ompi_coll_tuned_scan_intra_dec_dynamic(SCAN_ARGS); - int ompi_coll_tuned_scan_inter_dec_fixed(SCAN_ARGS); - int ompi_coll_tuned_scan_inter_dec_dynamic(SCAN_ARGS); - - /* Scatter */ - int ompi_coll_tuned_scatter_intra_dec_fixed(SCATTER_ARGS); - int ompi_coll_tuned_scatter_intra_dec_dynamic(SCATTER_ARGS); - int ompi_coll_tuned_scatter_intra_do_forced(SCATTER_ARGS); - int ompi_coll_tuned_scatter_intra_do_this(SCATTER_ARGS, int algorithm, int faninout, int segsize); - int ompi_coll_tuned_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_scatter_intra_basic_linear(SCATTER_ARGS); - int ompi_coll_tuned_scatter_intra_binomial(SCATTER_ARGS); - int ompi_coll_tuned_scatter_inter_dec_fixed(SCATTER_ARGS); - int ompi_coll_tuned_scatter_inter_dec_dynamic(SCATTER_ARGS); - - /* ScatterV */ - int ompi_coll_tuned_scatterv_intra_dec_fixed(SCATTERV_ARGS); - int ompi_coll_tuned_scatterv_intra_dec_dynamic(SCATTERV_ARGS); - int ompi_coll_tuned_scatterv_inter_dec_fixed(SCATTERV_ARGS); - int ompi_coll_tuned_scatterv_inter_dec_dynamic(SCATTERV_ARGS); - - int mca_coll_tuned_ft_event(int state); +/* forced algorithm choices */ +/* this structure is for storing the indexes to the forced algorithm mca params... */ +/* we get these at component query (so that registered values appear in ompi_infoi) */ +struct coll_tuned_force_algorithm_mca_param_indices_t { + int algorithm_param_index; /* which algorithm you want to force */ + int segsize_param_index; /* segsize to use (if supported), 0 = no segmentation */ + int tree_fanout_param_index; /* tree fanout/in to use */ + int chain_fanout_param_index; /* K-chain fanout/in to use */ + int max_requests_param_index; /* Maximum number of outstanding send or recv requests */ +}; +typedef struct coll_tuned_force_algorithm_mca_param_indices_t coll_tuned_force_algorithm_mca_param_indices_t; - /* Utility functions */ +/* the following type is for storing actual value obtained from the MCA on each tuned module */ +/* via their mca param indices lookup in the component */ +/* this structure is stored once per collective type per communicator... */ +struct coll_tuned_force_algorithm_params_t { + int algorithm; /* which algorithm you want to force */ + int segsize; /* segsize to use (if supported), 0 = no segmentation */ + int tree_fanout; /* tree fanout/in to use */ + int chain_fanout; /* K-chain fanout/in to use */ + int max_requests; /* Maximum number of outstanding send or recv requests */ +}; +typedef struct coll_tuned_force_algorithm_params_t coll_tuned_force_algorithm_params_t; - static inline void ompi_coll_tuned_free_reqs(ompi_request_t **reqs, int count) - { +/* the indices to the MCA params so that modules can look them up at open / comm create time */ +extern coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COLLCOUNT]; +/* the actual max algorithm values (readonly), loaded at component open */ +extern int ompi_coll_tuned_forced_max_algorithms[COLLCOUNT]; + +/* + * coll API functions + */ + +/* API functions */ + +int ompi_coll_tuned_init_query(bool enable_progress_threads, + bool enable_mpi_threads); + +mca_coll_base_module_t * +ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority); + +/* API functions of decision functions and any implementations */ + +/* + * Note this gets long as we have to have a prototype for each + * MPI collective 4 times.. 2 for the comm type and 2 for each decision + * type. + * we might cut down the decision prototypes by conditional compiling + */ + +/* All Gather */ +int ompi_coll_tuned_allgather_intra_dec_fixed(ALLGATHER_ARGS); +int ompi_coll_tuned_allgather_intra_dec_dynamic(ALLGATHER_ARGS); +int ompi_coll_tuned_allgather_intra_do_forced(ALLGATHER_ARGS); +int ompi_coll_tuned_allgather_intra_do_this(ALLGATHER_ARGS, int algorithm, int faninout, int segsize); +int ompi_coll_tuned_allgather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_allgather_intra_bruck(ALLGATHER_ARGS); +int ompi_coll_tuned_allgather_intra_recursivedoubling(ALLGATHER_ARGS); +int ompi_coll_tuned_allgather_intra_ring(ALLGATHER_ARGS); +int ompi_coll_tuned_allgather_intra_neighborexchange(ALLGATHER_ARGS); +int ompi_coll_tuned_allgather_intra_basic_linear(ALLGATHER_ARGS); +int ompi_coll_tuned_allgather_intra_two_procs(ALLGATHER_ARGS); +int ompi_coll_tuned_allgather_inter_dec_fixed(ALLGATHER_ARGS); +int ompi_coll_tuned_allgather_inter_dec_dynamic(ALLGATHER_ARGS); + +/* All GatherV */ +int ompi_coll_tuned_allgatherv_intra_dec_fixed(ALLGATHERV_ARGS); +int ompi_coll_tuned_allgatherv_intra_dec_dynamic(ALLGATHERV_ARGS); +int ompi_coll_tuned_allgatherv_intra_do_forced(ALLGATHERV_ARGS); +int ompi_coll_tuned_allgatherv_intra_do_this(ALLGATHERV_ARGS, int algorithm, int faninout, int segsize); +int ompi_coll_tuned_allgatherv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_allgatherv_intra_bruck(ALLGATHERV_ARGS); +int ompi_coll_tuned_allgatherv_intra_ring(ALLGATHERV_ARGS); +int ompi_coll_tuned_allgatherv_intra_neighborexchange(ALLGATHERV_ARGS); +int ompi_coll_tuned_allgatherv_intra_basic_default(ALLGATHERV_ARGS); +int ompi_coll_tuned_allgatherv_intra_two_procs(ALLGATHERV_ARGS); +int ompi_coll_tuned_allgatherv_inter_dec_fixed(ALLGATHERV_ARGS); +int ompi_coll_tuned_allgatherv_inter_dec_dynamic(ALLGATHERV_ARGS); + +/* All Reduce */ +int ompi_coll_tuned_allreduce_intra_dec_fixed(ALLREDUCE_ARGS); +int ompi_coll_tuned_allreduce_intra_dec_dynamic(ALLREDUCE_ARGS); +int ompi_coll_tuned_allreduce_intra_do_forced(ALLREDUCE_ARGS); +int ompi_coll_tuned_allreduce_intra_do_this(ALLREDUCE_ARGS, int algorithm, int faninout, int segsize); +int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_allreduce_intra_nonoverlapping(ALLREDUCE_ARGS); +int ompi_coll_tuned_allreduce_intra_recursivedoubling(ALLREDUCE_ARGS); +int ompi_coll_tuned_allreduce_intra_ring(ALLREDUCE_ARGS); +int ompi_coll_tuned_allreduce_intra_ring_segmented(ALLREDUCE_ARGS, uint32_t segsize); +int ompi_coll_tuned_allreduce_intra_basic_linear(ALLREDUCE_ARGS); +int ompi_coll_tuned_allreduce_inter_dec_fixed(ALLREDUCE_ARGS); +int ompi_coll_tuned_allreduce_inter_dec_dynamic(ALLREDUCE_ARGS); + +/* AlltoAll */ +int ompi_coll_tuned_alltoall_intra_dec_fixed(ALLTOALL_ARGS); +int ompi_coll_tuned_alltoall_intra_dec_dynamic(ALLTOALL_ARGS); +int ompi_coll_tuned_alltoall_intra_do_forced(ALLTOALL_ARGS); +int ompi_coll_tuned_alltoall_intra_do_this(ALLTOALL_ARGS, int algorithm, int faninout, int segsize, int max_requests); +int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_alltoall_intra_pairwise(ALLTOALL_ARGS); +int ompi_coll_tuned_alltoall_intra_bruck(ALLTOALL_ARGS); +int ompi_coll_tuned_alltoall_intra_basic_linear(ALLTOALL_ARGS); +int ompi_coll_tuned_alltoall_intra_linear_sync(ALLTOALL_ARGS, int max_requests); +int ompi_coll_tuned_alltoall_intra_two_procs(ALLTOALL_ARGS); +int ompi_coll_tuned_alltoall_inter_dec_fixed(ALLTOALL_ARGS); +int ompi_coll_tuned_alltoall_inter_dec_dynamic(ALLTOALL_ARGS); + +/* AlltoAllV */ +int ompi_coll_tuned_alltoallv_intra_dec_fixed(ALLTOALLV_ARGS); +int ompi_coll_tuned_alltoallv_intra_dec_dynamic(ALLTOALLV_ARGS); +int ompi_coll_tuned_alltoallv_intra_do_forced(ALLTOALLV_ARGS); +int ompi_coll_tuned_alltoallv_intra_do_this(ALLTOALLV_ARGS, int algorithm); +int ompi_coll_tuned_alltoallv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_alltoallv_intra_pairwise(ALLTOALLV_ARGS); +int ompi_coll_tuned_alltoallv_intra_basic_linear(ALLTOALLV_ARGS); +int ompi_coll_tuned_alltoallv_inter_dec_fixed(ALLTOALLV_ARGS); +int ompi_coll_tuned_alltoallv_inter_dec_dynamic(ALLTOALLV_ARGS); + +/* AlltoAllW */ +int ompi_coll_tuned_alltoallw_intra_dec_fixed(ALLTOALLW_ARGS); +int ompi_coll_tuned_alltoallw_intra_dec_dynamic(ALLTOALLW_ARGS); +int ompi_coll_tuned_alltoallw_inter_dec_fixed(ALLTOALLW_ARGS); +int ompi_coll_tuned_alltoallw_inter_dec_dynamic(ALLTOALLW_ARGS); + +/* Barrier */ +int ompi_coll_tuned_barrier_intra_dec_fixed(BARRIER_ARGS); +int ompi_coll_tuned_barrier_intra_dec_dynamic(BARRIER_ARGS); +int ompi_coll_tuned_barrier_intra_do_forced(BARRIER_ARGS); +int ompi_coll_tuned_barrier_intra_do_this(BARRIER_ARGS, int algorithm, int faninout, int segsize); +int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_barrier_inter_dec_fixed(BARRIER_ARGS); +int ompi_coll_tuned_barrier_inter_dec_dynamic(BARRIER_ARGS); +int ompi_coll_tuned_barrier_intra_doublering(BARRIER_ARGS); +int ompi_coll_tuned_barrier_intra_recursivedoubling(BARRIER_ARGS); +int ompi_coll_tuned_barrier_intra_bruck(BARRIER_ARGS); +int ompi_coll_tuned_barrier_intra_two_procs(BARRIER_ARGS); +int ompi_coll_tuned_barrier_intra_linear(BARRIER_ARGS); +int ompi_coll_tuned_barrier_intra_tree(BARRIER_ARGS); + +/* Bcast */ +int ompi_coll_tuned_bcast_intra_generic( BCAST_ARGS, uint32_t count_by_segment, ompi_coll_tree_t* tree ); +int ompi_coll_tuned_bcast_intra_dec_fixed(BCAST_ARGS); +int ompi_coll_tuned_bcast_intra_dec_dynamic(BCAST_ARGS); +int ompi_coll_tuned_bcast_intra_do_forced(BCAST_ARGS); +int ompi_coll_tuned_bcast_intra_do_this(BCAST_ARGS, int algorithm, int faninout, int segsize); +int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_bcast_intra_basic_linear(BCAST_ARGS); +int ompi_coll_tuned_bcast_intra_chain(BCAST_ARGS, uint32_t segsize, int32_t chains); +int ompi_coll_tuned_bcast_intra_pipeline(BCAST_ARGS, uint32_t segsize); +int ompi_coll_tuned_bcast_intra_binomial(BCAST_ARGS, uint32_t segsize); +int ompi_coll_tuned_bcast_intra_bintree(BCAST_ARGS, uint32_t segsize); +int ompi_coll_tuned_bcast_intra_split_bintree(BCAST_ARGS, uint32_t segsize); +int ompi_coll_tuned_bcast_inter_dec_fixed(BCAST_ARGS); +int ompi_coll_tuned_bcast_inter_dec_dynamic(BCAST_ARGS); + +/* Exscan */ +int ompi_coll_tuned_exscan_intra_dec_fixed(EXSCAN_ARGS); +int ompi_coll_tuned_exscan_intra_dec_dynamic(EXSCAN_ARGS); +int ompi_coll_tuned_exscan_inter_dec_fixed(EXSCAN_ARGS); +int ompi_coll_tuned_exscan_inter_dec_dynamic(EXSCAN_ARGS); + +/* Gather */ +int ompi_coll_tuned_gather_intra_dec_fixed(GATHER_ARGS); +int ompi_coll_tuned_gather_intra_dec_dynamic(GATHER_ARGS); +int ompi_coll_tuned_gather_intra_do_forced(GATHER_ARGS); +int ompi_coll_tuned_gather_intra_do_this(GATHER_ARGS, int algorithm, int faninout, int segsize); +int ompi_coll_tuned_gather_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_gather_intra_basic_linear(GATHER_ARGS); +int ompi_coll_tuned_gather_intra_binomial(GATHER_ARGS); +int ompi_coll_tuned_gather_intra_linear_sync(GATHER_ARGS, int first_segment_size); +int ompi_coll_tuned_gather_inter_dec_fixed(GATHER_ARGS); +int ompi_coll_tuned_gather_inter_dec_dynamic(GATHER_ARGS); + +/* GatherV */ +int ompi_coll_tuned_gatherv_intra_dec_fixed(GATHERV_ARGS); +int ompi_coll_tuned_gatherv_intra_dec_dynamic(GATHER_ARGS); +int ompi_coll_tuned_gatherv_inter_dec_fixed(GATHER_ARGS); +int ompi_coll_tuned_gatherv_inter_dec_dynamic(GATHER_ARGS); + +/* Reduce */ +int ompi_coll_tuned_reduce_generic( REDUCE_ARGS, ompi_coll_tree_t* tree, int count_by_segment, int max_outstanding_reqs ); +int ompi_coll_tuned_reduce_intra_dec_fixed(REDUCE_ARGS); +int ompi_coll_tuned_reduce_intra_dec_dynamic(REDUCE_ARGS); +int ompi_coll_tuned_reduce_intra_do_forced(REDUCE_ARGS); +int ompi_coll_tuned_reduce_intra_do_this(REDUCE_ARGS, int algorithm, int faninout, int segsize, int max_oustanding_reqs); +int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_reduce_intra_basic_linear(REDUCE_ARGS); +int ompi_coll_tuned_reduce_intra_chain(REDUCE_ARGS, uint32_t segsize, int fanout, int max_outstanding_reqs ); +int ompi_coll_tuned_reduce_intra_pipeline(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs ); +int ompi_coll_tuned_reduce_intra_binary(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs ); +int ompi_coll_tuned_reduce_intra_binomial(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs ); +int ompi_coll_tuned_reduce_intra_in_order_binary(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs ); +int ompi_coll_tuned_reduce_inter_dec_fixed(REDUCE_ARGS); +int ompi_coll_tuned_reduce_inter_dec_dynamic(REDUCE_ARGS); + +/* Reduce_scatter */ +int ompi_coll_tuned_reduce_scatter_intra_dec_fixed(REDUCESCATTER_ARGS); +int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(REDUCESCATTER_ARGS); +int ompi_coll_tuned_reduce_scatter_intra_do_forced(REDUCESCATTER_ARGS); +int ompi_coll_tuned_reduce_scatter_intra_do_this(REDUCESCATTER_ARGS, int algorithm, int faninout, int segsize); +int ompi_coll_tuned_reduce_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(REDUCESCATTER_ARGS); +int ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(REDUCESCATTER_ARGS); +int ompi_coll_tuned_reduce_scatter_intra_ring(REDUCESCATTER_ARGS); + +int ompi_coll_tuned_reduce_scatter_inter_dec_fixed(REDUCESCATTER_ARGS); +int ompi_coll_tuned_reduce_scatter_inter_dec_dynamic(REDUCESCATTER_ARGS); + +/* Scan */ +int ompi_coll_tuned_scan_intra_dec_fixed(SCAN_ARGS); +int ompi_coll_tuned_scan_intra_dec_dynamic(SCAN_ARGS); +int ompi_coll_tuned_scan_inter_dec_fixed(SCAN_ARGS); +int ompi_coll_tuned_scan_inter_dec_dynamic(SCAN_ARGS); + +/* Scatter */ +int ompi_coll_tuned_scatter_intra_dec_fixed(SCATTER_ARGS); +int ompi_coll_tuned_scatter_intra_dec_dynamic(SCATTER_ARGS); +int ompi_coll_tuned_scatter_intra_do_forced(SCATTER_ARGS); +int ompi_coll_tuned_scatter_intra_do_this(SCATTER_ARGS, int algorithm, int faninout, int segsize); +int ompi_coll_tuned_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_scatter_intra_basic_linear(SCATTER_ARGS); +int ompi_coll_tuned_scatter_intra_binomial(SCATTER_ARGS); +int ompi_coll_tuned_scatter_inter_dec_fixed(SCATTER_ARGS); +int ompi_coll_tuned_scatter_inter_dec_dynamic(SCATTER_ARGS); + +/* ScatterV */ +int ompi_coll_tuned_scatterv_intra_dec_fixed(SCATTERV_ARGS); +int ompi_coll_tuned_scatterv_intra_dec_dynamic(SCATTERV_ARGS); +int ompi_coll_tuned_scatterv_inter_dec_fixed(SCATTERV_ARGS); +int ompi_coll_tuned_scatterv_inter_dec_dynamic(SCATTERV_ARGS); + +int mca_coll_tuned_ft_event(int state); + + +/* Utility functions */ + +static inline void ompi_coll_tuned_free_reqs(ompi_request_t **reqs, int count) +{ int i; for (i = 0; i < count; ++i) ompi_request_free(&reqs[i]); - } +} - struct mca_coll_tuned_component_t { +struct mca_coll_tuned_component_t { /** Base coll component */ mca_coll_base_component_2_0_0_t super; - + /** MCA parameter: Priority of this component */ int tuned_priority; - + /** global stuff that I need the component to store */ - + /* MCA parameters first */ - + /* cached decision table stuff (moved from MCW module) */ - ompi_coll_alg_rule_t *all_base_rules; - }; - /** - * Convenience typedef - */ - typedef struct mca_coll_tuned_component_t mca_coll_tuned_component_t; + ompi_coll_alg_rule_t *all_base_rules; +}; +/** + * Convenience typedef + */ +typedef struct mca_coll_tuned_component_t mca_coll_tuned_component_t; - /** - * Global component instance - */ - OMPI_MODULE_DECLSPEC extern mca_coll_tuned_component_t mca_coll_tuned_component; +/** + * Global component instance + */ +OMPI_MODULE_DECLSPEC extern mca_coll_tuned_component_t mca_coll_tuned_component; - /* - * Data structure for hanging data off the communicator - * i.e. per module instance - */ - struct mca_coll_tuned_comm_t { +/* + * Data structure for hanging data off the communicator + * i.e. per module instance + */ +struct mca_coll_tuned_comm_t { /* standard data for requests and PML usage */ - + /* Precreate space for requests * Note this does not effect basic, * but if in wrong context can confuse a debugger * this is controlled by an MCA param */ - + ompi_request_t **mcct_reqs; int mcct_num_reqs; - + /* * tuned topo information caching per communicator * * for each communicator we cache the topo information so we can * reuse without regenerating if we change the root, [or fanout] * then regenerate and recache this information - * */ - + /* general tree with n fan out */ ompi_coll_tree_t *cached_ntree; int cached_ntree_root; int cached_ntree_fanout; - + /* binary tree */ ompi_coll_tree_t *cached_bintree; int cached_bintree_root; - + /* binomial tree */ ompi_coll_tree_t *cached_bmtree; int cached_bmtree_root; - + /* binomial tree */ ompi_coll_tree_t *cached_in_order_bmtree; int cached_in_order_bmtree_root; - + /* chained tree (fanout followed by pipelines) */ ompi_coll_tree_t *cached_chain; int cached_chain_root; int cached_chain_fanout; - + /* pipeline */ ompi_coll_tree_t *cached_pipeline; int cached_pipeline_root; - + /* in-order binary tree (root of the in-order binary tree is rank 0) */ ompi_coll_tree_t *cached_in_order_bintree; - /* extra data required by the decision functions */ - ompi_coll_alg_rule_t *all_base_rules; /* stored only on MCW, all other coms ref it */ /* moving to the component */ ompi_coll_com_rule_t *com_rules[COLLCOUNT]; /* the communicator rules for each MPI collective for ONLY my comsize */ /* for forced algorithms we store the information on the module */ /* previously we only had one shared copy, ops, it really is per comm/module */ coll_tuned_force_algorithm_params_t user_forced[COLLCOUNT]; - }; - /** - * Convenience typedef - */ - typedef struct mca_coll_tuned_comm_t mca_coll_tuned_comm_t; +}; +typedef struct mca_coll_tuned_comm_t mca_coll_tuned_comm_t; - struct mca_coll_tuned_module_t { +struct mca_coll_tuned_module_t { mca_coll_base_module_t super; - + mca_coll_tuned_comm_t *tuned_data; - }; - typedef struct mca_coll_tuned_module_t mca_coll_tuned_module_t; - OBJ_CLASS_DECLARATION(mca_coll_tuned_module_t); +}; +typedef struct mca_coll_tuned_module_t mca_coll_tuned_module_t; +OBJ_CLASS_DECLARATION(mca_coll_tuned_module_t); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #define COLL_TUNED_UPDATE_BINTREE( OMPI_COMM, TUNED_MODULE, ROOT ) \ do { \ diff --git a/ompi/mca/coll/tuned/coll_tuned_component.c b/ompi/mca/coll/tuned/coll_tuned_component.c index 521a0f67a3..e38c194bd0 100644 --- a/ompi/mca/coll/tuned/coll_tuned_component.c +++ b/ompi/mca/coll/tuned/coll_tuned_component.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -31,8 +31,7 @@ #include "mpi.h" #include "ompi/mca/coll/coll.h" #include "coll_tuned.h" - - +#include "coll_tuned_dynamic_file.h" /* * Public string showing the coll ompi_tuned component version number @@ -58,7 +57,6 @@ coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COL /* max algorithm values */ int ompi_coll_tuned_forced_max_algorithms[COLLCOUNT]; - /* * Local function */ @@ -71,14 +69,10 @@ static int tuned_close(void); */ mca_coll_tuned_component_t mca_coll_tuned_component = { - /* First, fill in the super */ - { - /* First, the mca_component_t struct containing meta information about the component itself */ - { MCA_COLL_BASE_VERSION_2_0_0, @@ -107,13 +101,14 @@ mca_coll_tuned_component_t mca_coll_tuned_component = { 0, /* Tuned component specific information */ - /* Note some of this WAS in the module */ NULL /* ompi_coll_alg_rule_t ptr */ }; static int tuned_open(void) { + int rc; + #if OPAL_ENABLE_DEBUG { int param; @@ -177,6 +172,18 @@ static int tuned_open(void) "Filename of configuration file that contains the dynamic (@runtime) decision function rules", false, false, ompi_coll_tuned_dynamic_rules_filename, &ompi_coll_tuned_dynamic_rules_filename); + if( ompi_coll_tuned_dynamic_rules_filename ) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:component_open Reading collective rules file [%s]", + ompi_coll_tuned_dynamic_rules_filename)); + rc = ompi_coll_tuned_read_rules_config_file( ompi_coll_tuned_dynamic_rules_filename, + &(mca_coll_tuned_component.all_base_rules), COLLCOUNT); + if( rc >= 0 ) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_open Read %d valid rules\n", rc)); + } else { + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_open Reading collective rules file failed\n")); + mca_coll_tuned_component.all_base_rules = NULL; + } + } ompi_coll_tuned_allreduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLREDUCE]); ompi_coll_tuned_alltoall_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALL]); ompi_coll_tuned_allgather_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLGATHER]); @@ -206,6 +213,11 @@ static int tuned_close(void) OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:component_close: done!")); + if( NULL != mca_coll_tuned_component.all_base_rules ) { + ompi_coll_tuned_free_all_rules(mca_coll_tuned_component.all_base_rules, COLLCOUNT); + mca_coll_tuned_component.all_base_rules = NULL; + } + return OMPI_SUCCESS; } @@ -227,45 +239,36 @@ mca_coll_tuned_module_destruct(mca_coll_tuned_module_t *module) data = module->tuned_data; if (NULL != data) { #if OPAL_ENABLE_DEBUG - /* Reset the reqs to NULL/0 -- they'll be freed as part of freeing - the generel c_coll_selected_data */ - data->mcct_reqs = NULL; - data->mcct_num_reqs = 0; + /* Reset the reqs to NULL/0 -- they'll be freed as part of freeing + the generel c_coll_selected_data */ + data->mcct_reqs = NULL; + data->mcct_num_reqs = 0; #endif - /* free any cached information that has been allocated */ - if (data->cached_ntree) { /* destroy general tree if defined */ - ompi_coll_tuned_topo_destroy_tree (&data->cached_ntree); - } - if (data->cached_bintree) { /* destroy bintree if defined */ - ompi_coll_tuned_topo_destroy_tree (&data->cached_bintree); - } - if (data->cached_bmtree) { /* destroy bmtree if defined */ - ompi_coll_tuned_topo_destroy_tree (&data->cached_bmtree); - } - if (data->cached_in_order_bmtree) { /* destroy bmtree if defined */ - ompi_coll_tuned_topo_destroy_tree (&data->cached_in_order_bmtree); - } - if (data->cached_chain) { /* destroy general chain if defined */ - ompi_coll_tuned_topo_destroy_tree (&data->cached_chain); - } - if (data->cached_pipeline) { /* destroy pipeline if defined */ - ompi_coll_tuned_topo_destroy_tree (&data->cached_pipeline); - } - if (data->cached_in_order_bintree) { /* destroy in order bintree if defined */ - ompi_coll_tuned_topo_destroy_tree (&data->cached_in_order_bintree); - } + /* free any cached information that has been allocated */ + if (data->cached_ntree) { /* destroy general tree if defined */ + ompi_coll_tuned_topo_destroy_tree (&data->cached_ntree); + } + if (data->cached_bintree) { /* destroy bintree if defined */ + ompi_coll_tuned_topo_destroy_tree (&data->cached_bintree); + } + if (data->cached_bmtree) { /* destroy bmtree if defined */ + ompi_coll_tuned_topo_destroy_tree (&data->cached_bmtree); + } + if (data->cached_in_order_bmtree) { /* destroy bmtree if defined */ + ompi_coll_tuned_topo_destroy_tree (&data->cached_in_order_bmtree); + } + if (data->cached_chain) { /* destroy general chain if defined */ + ompi_coll_tuned_topo_destroy_tree (&data->cached_chain); + } + if (data->cached_pipeline) { /* destroy pipeline if defined */ + ompi_coll_tuned_topo_destroy_tree (&data->cached_pipeline); + } + if (data->cached_in_order_bintree) { /* destroy in order bintree if defined */ + ompi_coll_tuned_topo_destroy_tree (&data->cached_in_order_bintree); + } -#if 0 /* FIXME: */ - /* if any algorithm rules are cached on the communicator, only free them if its MCW */ - /* as this is the only place they are allocated by reading the decision configure file */ - if ((ompi_coll_tuned_use_dynamic_rules)&&(&ompi_mpi_comm_world==comm)) { - if (comm->data->all_base_rules) { - ompi_coll_tuned_free_all_rules (comm->data->all_base_rules, COLLCOUNT); - } - } -#endif - free(data); + free(data); } } diff --git a/ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c b/ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c index 29e457251b..129f7530aa 100644 --- a/ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c +++ b/ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -56,7 +56,7 @@ ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; @@ -76,19 +76,19 @@ ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count, dsize, &faninout, &segsize, &ignoreme); if (alg) { - /* we have found a valid choice from the file based rules for this message size */ + /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_allreduce_intra_do_this (sbuf, rbuf, count, dtype, op, - comm, module, + comm, module, alg, faninout, segsize); } /* found a method */ } /*end if any com rules to check */ if (data->user_forced[ALLREDUCE].algorithm) { return ompi_coll_tuned_allreduce_intra_do_forced (sbuf, rbuf, count, dtype, op, - comm, module); + comm, module); } return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, - comm, module); + comm, module); } /* @@ -104,7 +104,7 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; @@ -126,23 +126,22 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount, dsize, &faninout, &segsize, &max_requests); if (alg) { - /* we have found a valid choice from the file based rules for this message size */ + /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_alltoall_intra_do_this (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module, + rbuf, rcount, rdtype, + comm, module, alg, faninout, segsize, max_requests); } /* found a method */ } /*end if any com rules to check */ - if (data->user_forced[ALLTOALL].algorithm) { return ompi_coll_tuned_alltoall_intra_do_forced (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); + rbuf, rcount, rdtype, + comm, module); } return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); + rbuf, rcount, rdtype, + comm, module); } /* @@ -163,16 +162,27 @@ int ompi_coll_tuned_alltoallv_intra_dec_dynamic(void *sbuf, int *scounts, int *s OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_alltoallv_intra_dec_dynamic")); - /* - * BEGIN - File Based Rules - * - * Here is where we would check to see if we have some file based - * rules. Currently, we do not, so move on to seeing if the user - * specified a specific algorithm. If not, then use the fixed - * decision code to decide. - * - * END - File Based Rules + /** + * check to see if we have some filebased rules. As we don't have global + * knowledge about the total amount of data, use the first available rule. + * This allow the users to specify the alltoallv algorithm to be used only + * based on the communicator size. */ + if (data->com_rules[ALLTOALLV]) { + int alg, faninout, segsize, max_requests; + + alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLTOALLV], + 0, &faninout, &segsize, &max_requests); + + if (alg) { + /* we have found a valid choice from the file based rules for this message size */ + return ompi_coll_tuned_alltoallv_intra_do_this (sbuf, scounts, sdisps, sdtype, + rbuf, rcounts, rdisps, rdtype, + comm, module, + alg); + } /* found a method */ + } /*end if any com rules to check */ + if (data->user_forced[ALLTOALLV].algorithm) { return ompi_coll_tuned_alltoallv_intra_do_forced(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps, rdtype, @@ -191,7 +201,7 @@ int ompi_coll_tuned_alltoallv_intra_dec_dynamic(void *sbuf, int *scounts, int *s * Returns: - MPI_SUCCESS or error code (passed from the barrier implementation) */ int ompi_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; @@ -207,7 +217,7 @@ int ompi_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm, 0, &faninout, &segsize, &ignoreme); if (alg) { - /* we have found a valid choice from the file based rules for this message size */ + /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_barrier_intra_do_this (comm, module, alg, faninout, segsize); } /* found a method */ @@ -229,7 +239,7 @@ int ompi_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm, int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; @@ -249,9 +259,9 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count, dsize, &faninout, &segsize, &ignoreme); if (alg) { - /* we have found a valid choice from the file based rules for this message size */ + /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_bcast_intra_do_this (buff, count, datatype, root, - comm, module, + comm, module, alg, faninout, segsize); } /* found a method */ } /*end if any com rules to check */ @@ -259,10 +269,10 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count, if (data->user_forced[BCAST].algorithm) { return ompi_coll_tuned_bcast_intra_do_forced (buff, count, datatype, root, - comm, module); + comm, module); } return ompi_coll_tuned_bcast_intra_dec_fixed (buff, count, datatype, root, - comm, module); + comm, module); } /* @@ -277,7 +287,7 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf, int count, struct ompi_datatype_t* datatype, struct ompi_op_t* op, int root, struct ompi_communicator_t* comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; @@ -298,10 +308,10 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf, dsize, &faninout, &segsize, &max_requests); if (alg) { - /* we have found a valid choice from the file based rules for this message size */ + /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_reduce_intra_do_this (sendbuf, recvbuf, count, datatype, op, root, - comm, module, + comm, module, alg, faninout, segsize, max_requests); @@ -310,12 +320,12 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf, if (data->user_forced[REDUCE].algorithm) { return ompi_coll_tuned_reduce_intra_do_forced (sendbuf, recvbuf, count, datatype, - op, root, - comm, module); + op, root, + comm, module); } return ompi_coll_tuned_reduce_intra_dec_fixed (sendbuf, recvbuf, count, datatype, - op, root, - comm, module); + op, root, + comm, module); } /* @@ -332,7 +342,7 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(void *sbuf, void *rbuf, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; @@ -354,10 +364,10 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(void *sbuf, void *rbuf, dsize, &faninout, &segsize, &ignoreme); if (alg) { - /* we have found a valid choice from the file based rules for this message size */ + /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_reduce_scatter_intra_do_this (sbuf, rbuf, rcounts, - dtype, op, - comm, module, + dtype, op, + comm, module, alg, faninout, segsize); } /* found a method */ @@ -369,8 +379,8 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(void *sbuf, void *rbuf, comm, module); } return ompi_coll_tuned_reduce_scatter_intra_dec_fixed (sbuf, rbuf, rcounts, - dtype, op, - comm, module); + dtype, op, + comm, module); } /* @@ -387,49 +397,49 @@ int ompi_coll_tuned_allgather_intra_dec_dynamic(void *sbuf, int scount, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; OPAL_OUTPUT((ompi_coll_tuned_stream, - "ompi_coll_tuned_allgather_intra_dec_dynamic")); + "ompi_coll_tuned_allgather_intra_dec_dynamic")); if (data->com_rules[ALLGATHER]) { - /* We have file based rules: - - calculate message size and other necessary information */ - int comsize; - int alg, faninout, segsize, ignoreme; - size_t dsize; + /* We have file based rules: + - calculate message size and other necessary information */ + int comsize; + int alg, faninout, segsize, ignoreme; + size_t dsize; - ompi_datatype_type_size (sdtype, &dsize); - comsize = ompi_comm_size(comm); - dsize *= comsize * scount; + ompi_datatype_type_size (sdtype, &dsize); + comsize = ompi_comm_size(comm); + dsize *= comsize * scount; - alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLGATHER], - dsize, &faninout, &segsize, &ignoreme); - if (alg) { - /* we have found a valid choice from the file based rules for - this message size */ - return ompi_coll_tuned_allgather_intra_do_this (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module, - alg, faninout, segsize); - } + alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLGATHER], + dsize, &faninout, &segsize, &ignoreme); + if (alg) { + /* we have found a valid choice from the file based rules for + this message size */ + return ompi_coll_tuned_allgather_intra_do_this (sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module, + alg, faninout, segsize); + } } /* We do not have file based rules */ if (data->user_forced[ALLGATHER].algorithm) { - /* User-forced algorithm */ - return ompi_coll_tuned_allgather_intra_do_forced (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); + /* User-forced algorithm */ + return ompi_coll_tuned_allgather_intra_do_forced (sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); } /* Use default decision */ return ompi_coll_tuned_allgather_intra_dec_fixed (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); + rbuf, rcount, rdtype, + comm, module); } /* @@ -447,100 +457,146 @@ int ompi_coll_tuned_allgatherv_intra_dec_dynamic(void *sbuf, int scount, int *rdispls, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; OPAL_OUTPUT((ompi_coll_tuned_stream, - "ompi_coll_tuned_allgatherv_intra_dec_dynamic")); + "ompi_coll_tuned_allgatherv_intra_dec_dynamic")); if (data->com_rules[ALLGATHERV]) { - /* We have file based rules: - - calculate message size and other necessary information */ - int comsize, i; - int alg, faninout, segsize, ignoreme; - size_t dsize, total_size; + /* We have file based rules: + - calculate message size and other necessary information */ + int comsize, i; + int alg, faninout, segsize, ignoreme; + size_t dsize, total_size; - comsize = ompi_comm_size(comm); - ompi_datatype_type_size (sdtype, &dsize); - total_size = 0; - for (i = 0; i < comsize; i++) { total_size += dsize * rcounts[i]; } + comsize = ompi_comm_size(comm); + ompi_datatype_type_size (sdtype, &dsize); + total_size = 0; + for (i = 0; i < comsize; i++) { total_size += dsize * rcounts[i]; } - alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLGATHERV], - total_size, &faninout, &segsize, &ignoreme); - if (alg) { - /* we have found a valid choice from the file based rules for - this message size */ - return ompi_coll_tuned_allgatherv_intra_do_this (sbuf, scount, sdtype, - rbuf, rcounts, - rdispls, rdtype, - comm, module, - alg, faninout, segsize); - } + alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLGATHERV], + total_size, &faninout, &segsize, &ignoreme); + if (alg) { + /* we have found a valid choice from the file based rules for + this message size */ + return ompi_coll_tuned_allgatherv_intra_do_this (sbuf, scount, sdtype, + rbuf, rcounts, + rdispls, rdtype, + comm, module, + alg, faninout, segsize); + } } /* We do not have file based rules */ if (data->user_forced[ALLGATHERV].algorithm) { - /* User-forced algorithm */ - return ompi_coll_tuned_allgatherv_intra_do_forced (sbuf, scount, sdtype, - rbuf, rcounts, - rdispls, rdtype, - comm, module); + /* User-forced algorithm */ + return ompi_coll_tuned_allgatherv_intra_do_forced (sbuf, scount, sdtype, + rbuf, rcounts, + rdispls, rdtype, + comm, module); } /* Use default decision */ return ompi_coll_tuned_allgatherv_intra_dec_fixed (sbuf, scount, sdtype, - rbuf, rcounts, - rdispls, rdtype, - comm, module); + rbuf, rcounts, + rdispls, rdtype, + comm, module); } int ompi_coll_tuned_gather_intra_dec_dynamic(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; OPAL_OUTPUT((ompi_coll_tuned_stream, - "ompi_coll_tuned_gather_intra_dec_dynamic")); + "ompi_coll_tuned_gather_intra_dec_dynamic")); + + /** + * check to see if we have some filebased rules. + */ + if (data->com_rules[GATHER]) { + int comsize, alg, faninout, segsize, max_requests; + size_t dsize; + + comsize = ompi_comm_size(comm); + ompi_datatype_type_size (sdtype, &dsize); + dsize *= comsize; + + alg = ompi_coll_tuned_get_target_method_params (data->com_rules[GATHER], + dsize, &faninout, &segsize, &max_requests); + + if (alg) { + /* we have found a valid choice from the file based rules for this message size */ + return ompi_coll_tuned_gather_intra_do_this (sbuf, scount, sdtype, + rbuf, rcount, rdtype, + root, comm, module, + alg, faninout, segsize); + } /* found a method */ + } /*end if any com rules to check */ if (data->user_forced[GATHER].algorithm) { - return ompi_coll_tuned_gather_intra_do_forced (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - root, comm, module); + return ompi_coll_tuned_gather_intra_do_forced (sbuf, scount, sdtype, + rbuf, rcount, rdtype, + root, comm, module); } return ompi_coll_tuned_gather_intra_dec_fixed (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - root, comm, module); + rbuf, rcount, rdtype, + root, comm, module); } int ompi_coll_tuned_scatter_intra_dec_dynamic(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; OPAL_OUTPUT((ompi_coll_tuned_stream, - "ompi_coll_tuned_scatter_intra_dec_dynamic")); + "ompi_coll_tuned_scatter_intra_dec_dynamic")); + + /** + * check to see if we have some filebased rules. + */ + if (data->com_rules[SCATTER]) { + int comsize, alg, faninout, segsize, max_requests; + size_t dsize; + + comsize = ompi_comm_size(comm); + ompi_datatype_type_size (sdtype, &dsize); + dsize *= comsize; + + alg = ompi_coll_tuned_get_target_method_params (data->com_rules[SCATTER], + dsize, &faninout, &segsize, &max_requests); + + if (alg) { + /* we have found a valid choice from the file based rules for this message size */ + return ompi_coll_tuned_scatter_intra_do_this (sbuf, scount, sdtype, + rbuf, rcount, rdtype, + root, comm, module, + alg, faninout, segsize); + } /* found a method */ + } /*end if any com rules to check */ if (data->user_forced[SCATTER].algorithm) { - return ompi_coll_tuned_scatter_intra_do_forced (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - root, comm, module); + return ompi_coll_tuned_scatter_intra_do_forced (sbuf, scount, sdtype, + rbuf, rcount, rdtype, + root, comm, module); } return ompi_coll_tuned_scatter_intra_dec_fixed (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - root, comm, module); + rbuf, rcount, rdtype, + root, comm, module); } diff --git a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c index 933d7bb628..380239d903 100644 --- a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c +++ b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -40,7 +40,7 @@ ompi_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { size_t dsize, block_dsize; int comm_size = ompi_comm_size(comm); @@ -58,22 +58,22 @@ ompi_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count, block_dsize = dsize * count; if (block_dsize < intermediate_message) { - return (ompi_coll_tuned_allreduce_intra_recursivedoubling (sbuf, rbuf, - count, dtype, - op, comm, module)); + return (ompi_coll_tuned_allreduce_intra_recursivedoubling (sbuf, rbuf, + count, dtype, + op, comm, module)); } if( ompi_op_is_commute(op) && (count > comm_size) ) { - const size_t segment_size = 1 << 20; /* 1 MB */ - if ((comm_size * segment_size >= block_dsize)) { - return (ompi_coll_tuned_allreduce_intra_ring (sbuf, rbuf, count, dtype, - op, comm, module)); - } else { - return (ompi_coll_tuned_allreduce_intra_ring_segmented (sbuf, rbuf, - count, dtype, - op, comm, module, - segment_size)); - } + const size_t segment_size = 1 << 20; /* 1 MB */ + if ((comm_size * segment_size >= block_dsize)) { + return (ompi_coll_tuned_allreduce_intra_ring (sbuf, rbuf, count, dtype, + op, comm, module)); + } else { + return (ompi_coll_tuned_allreduce_intra_ring_segmented (sbuf, rbuf, + count, dtype, + op, comm, module, + segment_size)); + } } return (ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, @@ -93,7 +93,7 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { int communicator_size; size_t dsize, block_dsize; @@ -106,8 +106,8 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount, /* special case */ if (communicator_size==2) { return ompi_coll_tuned_alltoall_intra_two_procs(sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); + rbuf, rcount, rdtype, + comm, module); } /* Decision function based on measurement on Grig cluster at @@ -118,19 +118,19 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount, block_dsize = dsize * scount; if ((block_dsize < 200) && (communicator_size > 12)) { - return ompi_coll_tuned_alltoall_intra_bruck(sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); + return ompi_coll_tuned_alltoall_intra_bruck(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); } else if (block_dsize < 3000) { - return ompi_coll_tuned_alltoall_intra_basic_linear(sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); + return ompi_coll_tuned_alltoall_intra_basic_linear(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); } return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, - comm, module); + comm, module); #if 0 /* previous decision */ @@ -179,7 +179,7 @@ int ompi_coll_tuned_alltoallv_intra_dec_fixed(void *sbuf, int *scounts, int *sdi * Returns: - MPI_SUCCESS or error code (passed from the barrier implementation) */ int ompi_coll_tuned_barrier_intra_dec_fixed(struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { int communicator_size = ompi_comm_size(comm); @@ -219,10 +219,10 @@ int ompi_coll_tuned_barrier_intra_dec_fixed(struct ompi_communicator_t *comm, int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { /* Decision function based on MX results for - messages up to 36MB and communicator sizes up to 64 nodes */ + messages up to 36MB and communicator sizes up to 64 nodes */ const size_t small_message_size = 2048; const size_t intermediate_message_size = 370728; const double a_p16 = 3.2118e-6; /* [1 / byte] */ @@ -249,56 +249,56 @@ int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count, /* Handle messages of small and intermediate size, and single-element broadcasts */ if ((message_size < small_message_size) || (count <= 1)) { - /* Binomial without segmentation */ - segsize = 0; - return ompi_coll_tuned_bcast_intra_binomial (buff, count, datatype, - root, comm, module, - segsize); + /* Binomial without segmentation */ + segsize = 0; + return ompi_coll_tuned_bcast_intra_binomial (buff, count, datatype, + root, comm, module, + segsize); } else if (message_size < intermediate_message_size) { - /* SplittedBinary with 1KB segments */ - segsize = 1024; - return ompi_coll_tuned_bcast_intra_split_bintree(buff, count, datatype, - root, comm, module, - segsize); + /* SplittedBinary with 1KB segments */ + segsize = 1024; + return ompi_coll_tuned_bcast_intra_split_bintree(buff, count, datatype, + root, comm, module, + segsize); } /* Handle large message sizes */ else if (communicator_size < (a_p128 * message_size + b_p128)) { - /* Pipeline with 128KB segments */ - segsize = 1024 << 7; - return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, - root, comm, module, - segsize); + /* Pipeline with 128KB segments */ + segsize = 1024 << 7; + return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, + root, comm, module, + segsize); } else if (communicator_size < 13) { - /* Split Binary with 8KB segments */ - segsize = 1024 << 3; - return ompi_coll_tuned_bcast_intra_split_bintree(buff, count, datatype, - root, comm, module, - segsize); + /* Split Binary with 8KB segments */ + segsize = 1024 << 3; + return ompi_coll_tuned_bcast_intra_split_bintree(buff, count, datatype, + root, comm, module, + segsize); } else if (communicator_size < (a_p64 * message_size + b_p64)) { - /* Pipeline with 64KB segments */ - segsize = 1024 << 6; - return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, - root, comm, module, - segsize); + /* Pipeline with 64KB segments */ + segsize = 1024 << 6; + return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, + root, comm, module, + segsize); } else if (communicator_size < (a_p16 * message_size + b_p16)) { - /* Pipeline with 16KB segments */ - segsize = 1024 << 4; - return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, - root, comm, module, - segsize); + /* Pipeline with 16KB segments */ + segsize = 1024 << 4; + return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, + root, comm, module, + segsize); } /* Pipeline with 8KB segments */ segsize = 1024 << 3; return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, - root, comm, module, - segsize); + root, comm, module, + segsize); #if 0 /* this is based on gige measurements */ @@ -340,7 +340,7 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf, int count, struct ompi_datatype_t* datatype, struct ompi_op_t* op, int root, struct ompi_communicator_t* comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { int communicator_size, segsize = 0; size_t message_size, dsize; @@ -370,10 +370,10 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf, return ompi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm, module); } return ompi_coll_tuned_reduce_intra_in_order_binary (sendbuf, recvbuf, count, datatype, op, root, comm, module, - 0, max_requests); + 0, max_requests); } - OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_reduce_intra_dec_fixed" + OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_reduce_intra_dec_fixed " "root %d rank %d com_size %d msg_length %lu", root, ompi_comm_rank(comm), communicator_size, (unsigned long)message_size)); @@ -385,17 +385,17 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf, /* Binomial_0K */ segsize = 0; return ompi_coll_tuned_reduce_intra_binomial(sendbuf, recvbuf, count, datatype, op, root, comm, module, - segsize, max_requests); + segsize, max_requests); } else if (communicator_size > (a1 * message_size + b1)) { /* Binomial_1K */ segsize = 1024; return ompi_coll_tuned_reduce_intra_binomial(sendbuf, recvbuf, count, datatype, op, root, comm, module, - segsize, max_requests); + segsize, max_requests); } else if (communicator_size > (a2 * message_size + b2)) { /* Pipeline_1K */ segsize = 1024; return ompi_coll_tuned_reduce_intra_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm, module, - segsize, max_requests); + segsize, max_requests); } else if (communicator_size > (a3 * message_size + b3)) { /* Binary_32K */ segsize = 32*1024; @@ -410,7 +410,7 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf, segsize = 64*1024; } return ompi_coll_tuned_reduce_intra_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm, module, - segsize, max_requests); + segsize, max_requests); #if 0 /* for small messages use linear algorithm */ @@ -433,11 +433,11 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf, /* later swap this for a binary tree */ /* fanout = 2; */ return ompi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, module, - segsize, fanout, max_requests); + segsize, fanout, max_requests); } segsize = 1024; return ompi_coll_tuned_reduce_intra_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm, module, - segsize, max_requests); + segsize, max_requests); #endif /* 0 */ } @@ -457,51 +457,51 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_fixed( void *sbuf, void *rbuf, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { - int comm_size, i, pow2; - size_t total_message_size, dsize; - const double a = 0.0012; - const double b = 8.0; - const size_t small_message_size = 12 * 1024; - const size_t large_message_size = 256 * 1024; - bool zerocounts = false; + int comm_size, i, pow2; + size_t total_message_size, dsize; + const double a = 0.0012; + const double b = 8.0; + const size_t small_message_size = 12 * 1024; + const size_t large_message_size = 256 * 1024; + bool zerocounts = false; - OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_reduce_scatter_intra_dec_fixed")); + OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_reduce_scatter_intra_dec_fixed")); - comm_size = ompi_comm_size(comm); - /* We need data size for decision function */ - ompi_datatype_type_size(dtype, &dsize); - total_message_size = 0; - for (i = 0; i < comm_size; i++) { - total_message_size += rcounts[i]; - if (0 == rcounts[i]) { - zerocounts = true; - } - } + comm_size = ompi_comm_size(comm); + /* We need data size for decision function */ + ompi_datatype_type_size(dtype, &dsize); + total_message_size = 0; + for (i = 0; i < comm_size; i++) { + total_message_size += rcounts[i]; + if (0 == rcounts[i]) { + zerocounts = true; + } + } - if( !ompi_op_is_commute(op) || (zerocounts)) { - return ompi_coll_tuned_reduce_scatter_intra_nonoverlapping (sbuf, rbuf, rcounts, - dtype, op, - comm, module); - } + if( !ompi_op_is_commute(op) || (zerocounts)) { + return ompi_coll_tuned_reduce_scatter_intra_nonoverlapping (sbuf, rbuf, rcounts, + dtype, op, + comm, module); + } - total_message_size *= dsize; + total_message_size *= dsize; - /* compute the nearest power of 2 */ - for (pow2 = 1; pow2 < comm_size; pow2 <<= 1); + /* compute the nearest power of 2 */ + for (pow2 = 1; pow2 < comm_size; pow2 <<= 1); - if ((total_message_size <= small_message_size) || - ((total_message_size <= large_message_size) && (pow2 == comm_size)) || - (comm_size >= a * total_message_size + b)) { - return - ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts, - dtype, op, - comm, module); - } - return ompi_coll_tuned_reduce_scatter_intra_ring(sbuf, rbuf, rcounts, - dtype, op, - comm, module); + if ((total_message_size <= small_message_size) || + ((total_message_size <= large_message_size) && (pow2 == comm_size)) || + (comm_size >= a * total_message_size + b)) { + return + ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts, + dtype, op, + comm, module); + } + return ompi_coll_tuned_reduce_scatter_intra_ring(sbuf, rbuf, rcounts, + dtype, op, + comm, module); } /* @@ -520,80 +520,80 @@ int ompi_coll_tuned_allgather_intra_dec_fixed(void *sbuf, int scount, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - int communicator_size, pow2_size; - size_t dsize, total_dsize; + int communicator_size, pow2_size; + size_t dsize, total_dsize; - communicator_size = ompi_comm_size(comm); + communicator_size = ompi_comm_size(comm); - /* Special case for 2 processes */ - if (communicator_size == 2) { - return ompi_coll_tuned_allgather_intra_two_procs (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); + /* Special case for 2 processes */ + if (communicator_size == 2) { + return ompi_coll_tuned_allgather_intra_two_procs (sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); } - /* Determine complete data size */ - ompi_datatype_type_size(sdtype, &dsize); - total_dsize = dsize * scount * communicator_size; + /* Determine complete data size */ + ompi_datatype_type_size(sdtype, &dsize); + total_dsize = dsize * scount * communicator_size; - OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allgather_intra_dec_fixed" - " rank %d com_size %d msg_length %lu", - ompi_comm_rank(comm), communicator_size, (unsigned long)total_dsize)); + OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allgather_intra_dec_fixed" + " rank %d com_size %d msg_length %lu", + ompi_comm_rank(comm), communicator_size, (unsigned long)total_dsize)); - for (pow2_size = 1; pow2_size < communicator_size; pow2_size <<=1); + for (pow2_size = 1; pow2_size < communicator_size; pow2_size <<=1); - /* Decision based on MX 2Gb results from Grig cluster at - The University of Tennesse, Knoxville - - if total message size is less than 50KB use either bruck or - recursive doubling for non-power of two and power of two nodes, - respectively. - - else use ring and neighbor exchange algorithms for odd and even - number of nodes, respectively. - */ - if (total_dsize < 50000) { - if (pow2_size == communicator_size) { - return ompi_coll_tuned_allgather_intra_recursivedoubling(sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); - } else { - return ompi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); - } - } else { - if (communicator_size % 2) { - return ompi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); - } else { - return ompi_coll_tuned_allgather_intra_neighborexchange(sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); - } - } + /* Decision based on MX 2Gb results from Grig cluster at + The University of Tennesse, Knoxville + - if total message size is less than 50KB use either bruck or + recursive doubling for non-power of two and power of two nodes, + respectively. + - else use ring and neighbor exchange algorithms for odd and even + number of nodes, respectively. + */ + if (total_dsize < 50000) { + if (pow2_size == communicator_size) { + return ompi_coll_tuned_allgather_intra_recursivedoubling(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); + } else { + return ompi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); + } + } else { + if (communicator_size % 2) { + return ompi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); + } else { + return ompi_coll_tuned_allgather_intra_neighborexchange(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); + } + } #if defined(USE_MPICH2_DECISION) - /* Decision as in MPICH-2 - presented in Thakur et.al. "Optimization of Collective Communication - Operations in MPICH", International Journal of High Performance Computing - Applications, Vol. 19, No. 1, 49-66 (2005) - - for power-of-two processes and small and medium size messages - (up to 512KB) use recursive doubling - - for non-power-of-two processes and small messages (80KB) use bruck, - - for everything else use ring. + /* Decision as in MPICH-2 + presented in Thakur et.al. "Optimization of Collective Communication + Operations in MPICH", International Journal of High Performance Computing + Applications, Vol. 19, No. 1, 49-66 (2005) + - for power-of-two processes and small and medium size messages + (up to 512KB) use recursive doubling + - for non-power-of-two processes and small messages (80KB) use bruck, + - for everything else use ring. */ - if ((pow2_size == communicator_size) && (total_dsize < 524288)) { - return ompi_coll_tuned_allgather_intra_recursivedoubling(sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); - } else if (total_dsize <= 81920) { - return ompi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); - } - return ompi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); + if ((pow2_size == communicator_size) && (total_dsize < 524288)) { + return ompi_coll_tuned_allgather_intra_recursivedoubling(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); + } else if (total_dsize <= 81920) { + return ompi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); + } + return ompi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); #endif /* defined(USE_MPICH2_DECISION) */ } @@ -612,7 +612,7 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(void *sbuf, int scount, int *rdispls, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { int i; int communicator_size; @@ -639,22 +639,22 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(void *sbuf, int scount, " rank %d com_size %d msg_length %lu", ompi_comm_rank(comm), communicator_size, (unsigned long)total_dsize)); - /* Decision based on allgather decision. */ - if (total_dsize < 50000) { - return ompi_coll_tuned_allgatherv_intra_bruck(sbuf, scount, sdtype, - rbuf, rcounts, rdispls, rdtype, - comm, module); - } else { - if (communicator_size % 2) { - return ompi_coll_tuned_allgatherv_intra_ring(sbuf, scount, sdtype, - rbuf, rcounts, rdispls, rdtype, - comm, module); - } else { - return ompi_coll_tuned_allgatherv_intra_neighborexchange(sbuf, scount, sdtype, - rbuf, rcounts, rdispls, rdtype, - comm, module); - } - } + /* Decision based on allgather decision. */ + if (total_dsize < 50000) { + return ompi_coll_tuned_allgatherv_intra_bruck(sbuf, scount, sdtype, + rbuf, rcounts, rdispls, rdtype, + comm, module); + } else { + if (communicator_size % 2) { + return ompi_coll_tuned_allgatherv_intra_ring(sbuf, scount, sdtype, + rbuf, rcounts, rdispls, rdtype, + comm, module); + } else { + return ompi_coll_tuned_allgatherv_intra_neighborexchange(sbuf, scount, sdtype, + rbuf, rcounts, rdispls, rdtype, + comm, module); + } + } } /* @@ -667,12 +667,12 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(void *sbuf, int scount, */ int ompi_coll_tuned_gather_intra_dec_fixed(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - int root, + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { const int large_segment_size = 32768; const int small_segment_size = 1024; @@ -688,7 +688,7 @@ int ompi_coll_tuned_gather_intra_dec_fixed(void *sbuf, int scount, size_t dsize, block_size; OPAL_OUTPUT((ompi_coll_tuned_stream, - "ompi_coll_tuned_gather_intra_dec_fixed")); + "ompi_coll_tuned_gather_intra_dec_fixed")); communicator_size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); @@ -724,8 +724,8 @@ int ompi_coll_tuned_gather_intra_dec_fixed(void *sbuf, int scount, } /* Otherwise, use basic linear */ return ompi_coll_tuned_gather_intra_basic_linear (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - root, comm, module); + rbuf, rcount, rdtype, + root, comm, module); } /* @@ -738,11 +738,11 @@ int ompi_coll_tuned_gather_intra_dec_fixed(void *sbuf, int scount, */ int ompi_coll_tuned_scatter_intra_dec_fixed(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) { const size_t small_block_size = 300; const int small_comm_size = 10; @@ -750,7 +750,7 @@ int ompi_coll_tuned_scatter_intra_dec_fixed(void *sbuf, int scount, size_t dsize, block_size; OPAL_OUTPUT((ompi_coll_tuned_stream, - "ompi_coll_tuned_scatter_intra_dec_fixed")); + "ompi_coll_tuned_scatter_intra_dec_fixed")); communicator_size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); @@ -770,6 +770,6 @@ int ompi_coll_tuned_scatter_intra_dec_fixed(void *sbuf, int scount, root, comm, module); } return ompi_coll_tuned_scatter_intra_basic_linear (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - root, comm, module); + rbuf, rcount, rdtype, + root, comm, module); } diff --git a/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c b/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c index 91d71d9ff6..ae50910ff0 100644 --- a/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c +++ b/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -314,18 +314,12 @@ ompi_coll_com_rule_t* ompi_coll_tuned_get_com_rule_ptr (ompi_coll_alg_rule_t* ru best_com_p = com_p = alg_p->com_rules; i = best = 0; - while (in_com_sizes) { - /* OPAL_OUTPUT((ompi_coll_tuned_stream,"checking comsize %d against alg_id %d com_id %d index %d com_size %d", */ - /* mpi_comsize, com_p->alg_rule_id, com_p->com_rule_id, i, com_p->mpi_comsize)); */ - if (com_p->mpi_comsize <= mpi_comsize) { - best = i; - best_com_p = com_p; - /* OPAL_OUTPUT((ompi_coll_tuned_stream(":ok\n")); */ - } - else { - /* OPAL_OUTPUT((ompi_coll_tuned_stream(":nop\n")); */ + while( i < alg_p->n_com_sizes ) { + if (com_p->mpi_comsize > mpi_comsize) { break; } + best = i; + best_com_p = com_p; /* go to the next entry */ com_p++; i++; @@ -359,26 +353,11 @@ int ompi_coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rul ompi_coll_msg_rule_t* best_msg_p = (ompi_coll_msg_rule_t*) NULL; int i, best; - if (!base_com_rule) { + /* No rule or zero rules */ + if( (NULL == base_com_rule) || (0 == base_com_rule->n_msg_sizes)) { return (0); } - if (!result_topo_faninout) { - return (0); - } - - if (!result_segsize) { - return (0); - } - - if (!max_requests) { - return (0); - } - - if (!base_com_rule->n_msg_sizes) { /* check for count of message sizes */ - return (0); /* no msg sizes so no rule */ - } - /* ok have some msg sizes, now to find the one closest to my mpi_msgsize */ /* make a copy of the first msg rule */ diff --git a/ompi/mca/coll/tuned/coll_tuned_forced.c b/ompi/mca/coll/tuned/coll_tuned_forced.c deleted file mode 100644 index b30947e6e5..0000000000 --- a/ompi/mca/coll/tuned/coll_tuned_forced.c +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "mpi.h" -#include "opal/mca/mca.h" -#include "ompi/constants.h" -#include "coll_tuned.h" - -/* need to include our own topo prototypes so we can malloc data on the comm correctly */ -#include "coll_tuned_topo.h" - -/* also need the dynamic rule structures */ -#include "coll_tuned_forced.h" - -#include "coll_tuned_util.h" - -#include -#include - -/* We put all routines that handle the MCA user forced algorithm and parameter choices here */ -/* recheck the setting of forced, called on module create (i.e. for each new comm) */ - -int ompi_coll_tuned_forced_getvalues (coll_tuned_force_algorithm_mca_param_indices_t mca_params, - coll_tuned_force_algorithm_params_t *forced_values) -{ - mca_base_param_lookup_int (mca_params.algorithm_param_index, &(forced_values->algorithm)); - mca_base_param_lookup_int (mca_params.segsize_param_index, &(forced_values->segsize)); - mca_base_param_lookup_int (mca_params.tree_fanout_param_index, &(forced_values->tree_fanout)); - mca_base_param_lookup_int (mca_params.chain_fanout_param_index, &(forced_values->chain_fanout)); - mca_base_param_lookup_int (mca_params.max_requests_param_index, &(forced_values->max_requests)); - - return (MPI_SUCCESS); -} - - -/* special version of above just for barrier which only has one option available (at the moment...) */ -int ompi_coll_tuned_forced_getvalues_barrier (coll_tuned_force_algorithm_mca_param_indices_t mca_params, - coll_tuned_force_algorithm_params_t *forced_values) -{ - mca_base_param_lookup_int (mca_params.algorithm_param_index, &(forced_values->algorithm)); - - return (MPI_SUCCESS); -} - - diff --git a/ompi/mca/coll/tuned/coll_tuned_forced.h b/ompi/mca/coll/tuned/coll_tuned_forced.h deleted file mode 100644 index b02897af77..0000000000 --- a/ompi/mca/coll/tuned/coll_tuned_forced.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_COLL_TUNED_FORCED_H_HAS_BEEN_INCLUDED -#define MCA_COLL_TUNED_FORCED_H_HAS_BEEN_INCLUDED - -#include "ompi_config.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - - -/* this structure is for storing the indexes to the forced algorithm mca params... */ -/* we get these at component query (so that registered values appear in ompi_infoi) */ - -struct coll_tuned_force_algorithm_mca_param_indices_t { - int algorithm_param_index; /* which algorithm you want to force */ - int segsize_param_index; /* segsize to use (if supported), 0 = no segmentation */ - int tree_fanout_param_index; /* tree fanout/in to use */ - int chain_fanout_param_index; /* K-chain fanout/in to use */ - int max_requests_param_index; /* Maximum number of outstanding send or recv requests */ -}; - -typedef struct coll_tuned_force_algorithm_mca_param_indices_t coll_tuned_force_algorithm_mca_param_indices_t; - - -/* the following type is for storing actual value obtained from the MCA on each tuned module */ -/* via their mca param indices lookup in the component */ -/* this structure is stored once per collective type per communicator... */ -struct coll_tuned_force_algorithm_params_t { - int algorithm; /* which algorithm you want to force */ - int segsize; /* segsize to use (if supported), 0 = no segmentation */ - int tree_fanout; /* tree fanout/in to use */ - int chain_fanout; /* K-chain fanout/in to use */ - int max_requests; /* Maximum number of outstanding send or recv requests */ -}; - -typedef struct coll_tuned_force_algorithm_params_t coll_tuned_force_algorithm_params_t; - - -/* prototypes */ - -int ompi_coll_tuned_forced_getvalues (coll_tuned_force_algorithm_mca_param_indices_t mca_params, - coll_tuned_force_algorithm_params_t *forced_values); - -/* barrier has less options than any other collective so it gets its own special function */ -int ompi_coll_tuned_forced_getvalues_barrier (coll_tuned_force_algorithm_mca_param_indices_t mca_params, - coll_tuned_force_algorithm_params_t *forced_values); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif /* MCA_COLL_TUNED_FORCED_H_HAS_BEEN_INCLUDED */ - - diff --git a/ompi/mca/coll/tuned/coll_tuned_gather.c b/ompi/mca/coll/tuned/coll_tuned_gather.c index d21bb63e0e..160ce59a7c 100644 --- a/ompi/mca/coll/tuned/coll_tuned_gather.c +++ b/ompi/mca/coll/tuned/coll_tuned_gather.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -440,49 +440,49 @@ ompi_coll_tuned_gather_intra_check_forced_init(coll_tuned_force_algorithm_mca_pa ompi_coll_tuned_forced_max_algorithms[GATHER] = max_alg; rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version, - "gather_algorithm_count", - "Number of gather algorithms available", - false, true, max_alg, NULL); + "gather_algorithm_count", + "Number of gather algorithms available", + false, true, max_alg, NULL); mca_param_indices->algorithm_param_index - = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, - "gather_algorithm", - "Which gather algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 binomial, 3 linear with synchronization.", - false, false, 0, NULL); + = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, + "gather_algorithm", + "Which gather algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 binomial, 3 linear with synchronization.", + false, false, 0, NULL); if (mca_param_indices->algorithm_param_index < 0) { return mca_param_indices->algorithm_param_index; } mca_base_param_lookup_int(mca_param_indices->algorithm_param_index, &(requested_alg)); if( 0 > requested_alg || requested_alg > max_alg ) { - if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) { - opal_output( 0, "Gather algorithm #%d is not available (range [0..%d]). Switching back to ignore(0)\n", - requested_alg, max_alg ); - } - mca_base_param_set_int( mca_param_indices->algorithm_param_index, 0); + if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) { + opal_output( 0, "Gather algorithm #%d is not available (range [0..%d]). Switching back to default(0)\n", + requested_alg, max_alg ); + } + mca_base_param_set_int( mca_param_indices->algorithm_param_index, 0); } mca_param_indices->segsize_param_index - = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, - "gather_algorithm_segmentsize", - "Segment size in bytes used by default for gather algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.", - false, false, 0, NULL); + = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, + "gather_algorithm_segmentsize", + "Segment size in bytes used by default for gather algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.", + false, false, 0, NULL); mca_param_indices->tree_fanout_param_index - = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, - "gather_algorithm_tree_fanout", - "Fanout for n-tree used for gather algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.", - false, false, - ompi_coll_tuned_init_tree_fanout, /* get system wide default */ - NULL); + = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, + "gather_algorithm_tree_fanout", + "Fanout for n-tree used for gather algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.", + false, false, + ompi_coll_tuned_init_tree_fanout, /* get system wide default */ + NULL); mca_param_indices->chain_fanout_param_index - = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, - "gather_algorithm_chain_fanout", - "Fanout for chains used for gather algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.", - false, false, - ompi_coll_tuned_init_chain_fanout, /* get system wide default */ - NULL); + = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, + "gather_algorithm_chain_fanout", + "Fanout for chains used for gather algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.", + false, false, + ompi_coll_tuned_init_chain_fanout, /* get system wide default */ + NULL); return (MPI_SUCCESS); } diff --git a/ompi/mca/coll/tuned/coll_tuned_module.c b/ompi/mca/coll/tuned/coll_tuned_module.c index 29256bbe1d..2f37d7452c 100644 --- a/ompi/mca/coll/tuned/coll_tuned_module.c +++ b/ompi/mca/coll/tuned/coll_tuned_module.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -31,7 +31,6 @@ #include "coll_tuned_topo.h" #include "coll_tuned_dynamic_rules.h" #include "coll_tuned_dynamic_file.h" -#include "coll_tuned_forced.h" static int tuned_module_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm); @@ -61,27 +60,21 @@ ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority) OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:module_tuned query called")); + /** + * No support for inter-communicator yet. + */ + if (OMPI_COMM_IS_INTER(comm)) { + *priority = 0; + return NULL; + } + /** * If it is inter-communicator and size is less than 2 we have specialized modules * to handle the intra collective communications. */ if (OMPI_COMM_IS_INTRA(comm) && ompi_comm_size(comm) < 2) { - *priority = 0; - return NULL; - } - - if (OMPI_COMM_IS_INTER(comm)) { -#if 0 - if (ompi_coll_tuned_use_dynamic_rules) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using inter_dynamic")); - } else { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using inter_fixed")); - - } -#endif - /* tuned does not support inter-communicator yet */ - *priority = 0; - return NULL; + *priority = 0; + return NULL; } tuned_module = OBJ_NEW(mca_coll_tuned_module_t); @@ -99,72 +92,86 @@ ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority) tuned_module->super.ft_event = mca_coll_tuned_ft_event; if (ompi_coll_tuned_use_dynamic_rules) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using intra_dynamic")); - - tuned_module->super.coll_allgather = ompi_coll_tuned_allgather_intra_dec_dynamic; - tuned_module->super.coll_allgatherv = ompi_coll_tuned_allgatherv_intra_dec_dynamic; - tuned_module->super.coll_allreduce = ompi_coll_tuned_allreduce_intra_dec_dynamic; - tuned_module->super.coll_alltoall = ompi_coll_tuned_alltoall_intra_dec_dynamic; - tuned_module->super.coll_alltoallv = ompi_coll_tuned_alltoallv_intra_dec_dynamic; - tuned_module->super.coll_alltoallw = NULL; - tuned_module->super.coll_barrier = ompi_coll_tuned_barrier_intra_dec_dynamic; - tuned_module->super.coll_bcast = ompi_coll_tuned_bcast_intra_dec_dynamic; - tuned_module->super.coll_exscan = NULL; - tuned_module->super.coll_gather = ompi_coll_tuned_gather_intra_dec_dynamic; - tuned_module->super.coll_gatherv = NULL; - tuned_module->super.coll_reduce = ompi_coll_tuned_reduce_intra_dec_dynamic; - tuned_module->super.coll_reduce_scatter = ompi_coll_tuned_reduce_scatter_intra_dec_dynamic; - tuned_module->super.coll_scan = NULL; - tuned_module->super.coll_scatter = ompi_coll_tuned_scatter_intra_dec_dynamic; - tuned_module->super.coll_scatterv = NULL; + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using intra_dynamic")); + tuned_module->super.coll_allgather = ompi_coll_tuned_allgather_intra_dec_dynamic; + tuned_module->super.coll_allgatherv = ompi_coll_tuned_allgatherv_intra_dec_dynamic; + tuned_module->super.coll_allreduce = ompi_coll_tuned_allreduce_intra_dec_dynamic; + tuned_module->super.coll_alltoall = ompi_coll_tuned_alltoall_intra_dec_dynamic; + tuned_module->super.coll_alltoallv = ompi_coll_tuned_alltoallv_intra_dec_dynamic; + tuned_module->super.coll_alltoallw = NULL; + tuned_module->super.coll_barrier = ompi_coll_tuned_barrier_intra_dec_dynamic; + tuned_module->super.coll_bcast = ompi_coll_tuned_bcast_intra_dec_dynamic; + tuned_module->super.coll_exscan = NULL; + tuned_module->super.coll_gather = ompi_coll_tuned_gather_intra_dec_dynamic; + tuned_module->super.coll_gatherv = NULL; + tuned_module->super.coll_reduce = ompi_coll_tuned_reduce_intra_dec_dynamic; + tuned_module->super.coll_reduce_scatter = ompi_coll_tuned_reduce_scatter_intra_dec_dynamic; + tuned_module->super.coll_scan = NULL; + tuned_module->super.coll_scatter = ompi_coll_tuned_scatter_intra_dec_dynamic; + tuned_module->super.coll_scatterv = NULL; } else { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using intra_fixed")); - - tuned_module->super.coll_allgather = ompi_coll_tuned_allgather_intra_dec_fixed; - tuned_module->super.coll_allgatherv = ompi_coll_tuned_allgatherv_intra_dec_fixed; - tuned_module->super.coll_allreduce = ompi_coll_tuned_allreduce_intra_dec_fixed; - tuned_module->super.coll_alltoall = ompi_coll_tuned_alltoall_intra_dec_fixed; - tuned_module->super.coll_alltoallv = ompi_coll_tuned_alltoallv_intra_dec_fixed; - tuned_module->super.coll_alltoallw = NULL; - tuned_module->super.coll_barrier = ompi_coll_tuned_barrier_intra_dec_fixed; - tuned_module->super.coll_bcast = ompi_coll_tuned_bcast_intra_dec_fixed; - tuned_module->super.coll_exscan = NULL; - tuned_module->super.coll_gather = ompi_coll_tuned_gather_intra_dec_fixed; - tuned_module->super.coll_gatherv = NULL; - tuned_module->super.coll_reduce = ompi_coll_tuned_reduce_intra_dec_fixed; - tuned_module->super.coll_reduce_scatter = ompi_coll_tuned_reduce_scatter_intra_dec_fixed; - tuned_module->super.coll_scan = NULL; - tuned_module->super.coll_scatter = ompi_coll_tuned_scatter_intra_dec_fixed; - tuned_module->super.coll_scatterv = NULL; + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using intra_fixed")); + tuned_module->super.coll_allgather = ompi_coll_tuned_allgather_intra_dec_fixed; + tuned_module->super.coll_allgatherv = ompi_coll_tuned_allgatherv_intra_dec_fixed; + tuned_module->super.coll_allreduce = ompi_coll_tuned_allreduce_intra_dec_fixed; + tuned_module->super.coll_alltoall = ompi_coll_tuned_alltoall_intra_dec_fixed; + tuned_module->super.coll_alltoallv = ompi_coll_tuned_alltoallv_intra_dec_fixed; + tuned_module->super.coll_alltoallw = NULL; + tuned_module->super.coll_barrier = ompi_coll_tuned_barrier_intra_dec_fixed; + tuned_module->super.coll_bcast = ompi_coll_tuned_bcast_intra_dec_fixed; + tuned_module->super.coll_exscan = NULL; + tuned_module->super.coll_gather = ompi_coll_tuned_gather_intra_dec_fixed; + tuned_module->super.coll_gatherv = NULL; + tuned_module->super.coll_reduce = ompi_coll_tuned_reduce_intra_dec_fixed; + tuned_module->super.coll_reduce_scatter = ompi_coll_tuned_reduce_scatter_intra_dec_fixed; + tuned_module->super.coll_scan = NULL; + tuned_module->super.coll_scatter = ompi_coll_tuned_scatter_intra_dec_fixed; + tuned_module->super.coll_scatterv = NULL; } return &(tuned_module->super); } +/* We put all routines that handle the MCA user forced algorithm and parameter choices here */ +/* recheck the setting of forced, called on module create (i.e. for each new comm) */ + +static int +ompi_coll_tuned_forced_getvalues( enum COLLTYPE type, + coll_tuned_force_algorithm_params_t *forced_values ) +{ + coll_tuned_force_algorithm_mca_param_indices_t* mca_params; + + mca_params = &(ompi_coll_tuned_forced_params[type]); + + mca_base_param_lookup_int (mca_params->algorithm_param_index, &(forced_values->algorithm)); + if( BARRIER != type ) { + mca_base_param_lookup_int (mca_params->segsize_param_index, &(forced_values->segsize)); + mca_base_param_lookup_int (mca_params->tree_fanout_param_index, &(forced_values->tree_fanout)); + mca_base_param_lookup_int (mca_params->chain_fanout_param_index, &(forced_values->chain_fanout)); + mca_base_param_lookup_int (mca_params->max_requests_param_index, &(forced_values->max_requests)); + } + return (MPI_SUCCESS); +} /* * Init module on the communicator */ static int -tuned_module_enable(mca_coll_base_module_t *module, - struct ompi_communicator_t *comm) +tuned_module_enable( mca_coll_base_module_t *module, + struct ompi_communicator_t *comm ) { - int size; + int size, i; mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t *) module; mca_coll_tuned_comm_t *data = NULL; - /* fanout parameters */ - int rc=0; - int i; - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init called.")); - /* This routine will become more complex and might have to be */ - /* broken into more sections/function calls */ - - /* Order of operations: + /* This routine will become more complex and might have to be + * broken into more sections/function calls + * + * Order of operations: * alloc memory for nb reqs (in case we fall through) * add decision rules if using dynamic rules * compact rules using communicator size info etc @@ -174,15 +181,13 @@ tuned_module_enable(mca_coll_base_module_t *module, */ /* Allocate the data that hangs off the communicator */ - if (OMPI_COMM_IS_INTER(comm)) { size = ompi_comm_remote_size(comm); } else { size = ompi_comm_size(comm); } - - /* + /** * we still malloc data as it is used by the TUNED modules * if we don't allocate it and fall back to a BASIC module routine then confuses debuggers * we place any special info after the default data @@ -195,12 +200,9 @@ tuned_module_enable(mca_coll_base_module_t *module, */ /* if we within the memory/size limit, allow preallocated data */ - - - if (size<=ompi_coll_tuned_preallocate_memory_comm_size_limit) { + if( size <= ompi_coll_tuned_preallocate_memory_comm_size_limit ) { data = (mca_coll_tuned_comm_t*)malloc(sizeof(struct mca_coll_tuned_comm_t) + - (sizeof(ompi_request_t *) * size * 2)); - + (sizeof(ompi_request_t *) * size * 2)); if (NULL == data) { return OMPI_ERROR; } @@ -208,7 +210,6 @@ tuned_module_enable(mca_coll_base_module_t *module, data->mcct_num_reqs = size * 2; } else { data = (mca_coll_tuned_comm_t*)malloc(sizeof(struct mca_coll_tuned_comm_t)); - if (NULL == data) { return OMPI_ERROR; } @@ -216,138 +217,62 @@ tuned_module_enable(mca_coll_base_module_t *module, data->mcct_num_reqs = 0; } - - /* + /** * If using dynamic and you are MPI_COMM_WORLD and you want to use a parameter file.. * then this effects how much storage space you need * (This is a basic version of what will go into V2) - * */ - - size = ompi_comm_size(comm); /* find size so we can (A) decide if to access the file directly */ - /* (B) so we can get our very own customised ompi_coll_com_rule_t ptr */ - /* which only has rules in it for our com size */ - - /* actually if they are below a threadhold, they all open it */ - /* have to build a collective in here.. but just for MCW.. */ - /* but we have to make sure we have the same rules everywhere :( */ - /* if using dynamic rules make sure all overrides are NULL before we start override anything accidently */ if (ompi_coll_tuned_use_dynamic_rules) { - /* base rules */ - data->all_base_rules = (ompi_coll_alg_rule_t*) NULL; + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init MCW & Dynamic")); - /* each collective rule for my com size */ - for (i=0;icom_rules[i] = (ompi_coll_com_rule_t*) NULL; + /** + * next dynamic state, recheck all forced rules as well + * warning, we should check to make sure this is really an INTRA comm here... + */ + ompi_coll_tuned_forced_getvalues( ALLGATHER, &(data->user_forced[ALLGATHER])); + ompi_coll_tuned_forced_getvalues( ALLGATHERV, &(data->user_forced[ALLGATHERV])); + ompi_coll_tuned_forced_getvalues( ALLREDUCE, &(data->user_forced[ALLREDUCE])); + ompi_coll_tuned_forced_getvalues( ALLTOALL, &(data->user_forced[ALLTOALL])); + ompi_coll_tuned_forced_getvalues( ALLTOALLV, &(data->user_forced[ALLTOALLV])); + ompi_coll_tuned_forced_getvalues( ALLTOALLW, &(data->user_forced[ALLTOALLW])); + ompi_coll_tuned_forced_getvalues( BARRIER, &(data->user_forced[BARRIER])); + ompi_coll_tuned_forced_getvalues( BCAST, &(data->user_forced[BCAST])); + ompi_coll_tuned_forced_getvalues( EXSCAN, &(data->user_forced[EXSCAN])); + ompi_coll_tuned_forced_getvalues( GATHER, &(data->user_forced[GATHER])); + ompi_coll_tuned_forced_getvalues( GATHERV, &(data->user_forced[GATHERV])); + ompi_coll_tuned_forced_getvalues( REDUCE, &(data->user_forced[REDUCE])); + ompi_coll_tuned_forced_getvalues( REDUCESCATTER, &(data->user_forced[REDUCESCATTER])); + ompi_coll_tuned_forced_getvalues( SCAN, &(data->user_forced[SCAN])); + ompi_coll_tuned_forced_getvalues( SCATTER, &(data->user_forced[SCATTER])); + ompi_coll_tuned_forced_getvalues( SCATTERV, &(data->user_forced[SCATTERV])); + + if( NULL != mca_coll_tuned_component.all_base_rules ) { + /* extract our customized communicator sized rule set, for each collective */ + for( i = 0; i < COLLCOUNT; i++ ) { + data->com_rules[i] = ompi_coll_tuned_get_com_rule_ptr( mca_coll_tuned_component.all_base_rules, + i, size ); + } } } - /* next dynamic state, recheck all forced rules as well */ - /* warning, we should check to make sure this is really an INTRA comm here... */ - if (ompi_coll_tuned_use_dynamic_rules) { - ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLREDUCE], &(data->user_forced[ALLREDUCE])); - ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLTOALL], &(data->user_forced[ALLTOALL])); - ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLGATHER], &(data->user_forced[ALLGATHER])); - ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLGATHERV], &(data->user_forced[ALLGATHERV])); - ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLTOALLV], &(data->user_forced[ALLTOALLV])); - ompi_coll_tuned_forced_getvalues_barrier (ompi_coll_tuned_forced_params[BARRIER], &(data->user_forced[BARRIER])); - ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[BCAST], &(data->user_forced[BCAST])); - ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[REDUCE], &(data->user_forced[REDUCE])); - ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[REDUCESCATTER], &(data->user_forced[REDUCESCATTER])); - ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[GATHER], &(data->user_forced[GATHER])); - ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[SCATTER], &(data->user_forced[SCATTER])); - } - - - if (&ompi_mpi_comm_world.comm==comm) { - if (ompi_coll_tuned_use_dynamic_rules) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init MCW & Dynamic")); - if (ompi_coll_tuned_dynamic_rules_filename) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Opening [%s]", - ompi_coll_tuned_dynamic_rules_filename)); - rc = ompi_coll_tuned_read_rules_config_file (ompi_coll_tuned_dynamic_rules_filename, - &(data->all_base_rules), COLLCOUNT); - if (rc>=0) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Read %d valid rules\n", rc)); - /* at this point we all have a base set of rules */ - /* now we can get our customized communicator sized rule set, for each collective */ - for (i=0;icom_rules[i] = ompi_coll_tuned_get_com_rule_ptr (data->all_base_rules, i, size); - } - } - else { /* failed to read config file, thus make sure its a NULL... */ - data->all_base_rules = (ompi_coll_alg_rule_t*) NULL; - } - } /* end if a config filename exists */ - } /* end if dynamic_rules */ - } /* end if MCW */ - - /* ok, if using dynamic rules, not MCW and we are just any rank and a base set of rules exist.. ref them */ - /* order of eval is important here, if we are MCW ompi_mpi_comm_world.c_coll_selected_data is NULL still.. */ - -#if 0 /* FIXME: don't know how to deal with this */ - if ((ompi_coll_tuned_use_dynamic_rules)&&(!(&ompi_mpi_comm_world==comm))&& - ((ompi_mpi_comm_world.c_coll_selected_data)->all_base_rules)) { - - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init NOT MCW & Dynamic")); - - /* this will, erm fail if MCW doesn't exist which it should! */ - data->all_base_rules = (ompi_mpi_comm_world.c_coll_selected_data)->all_base_rules; - - /* at this point we all have a base set of rules if they exist atall */ - /* now we can get our customized communicator sized rule set, for each collective */ - for (i=0;icom_rules[i] = ompi_coll_tuned_get_com_rule_ptr (data->all_base_rules, i, size); - } - } -#endif - - /* - * now for the cached topo functions - * guess the initial topologies to use rank 0 as root - */ - /* general n fan out tree */ - data->cached_ntree = ompi_coll_tuned_topo_build_tree (ompi_coll_tuned_init_tree_fanout, - comm, 0); - data->cached_ntree_root = 0; - data->cached_ntree_fanout = ompi_coll_tuned_init_tree_fanout; - + data->cached_ntree = NULL; /* binary tree */ - data->cached_bintree = ompi_coll_tuned_topo_build_tree (2, comm, 0); - data->cached_bintree_root = 0; - + data->cached_bintree = NULL; /* binomial tree */ - data->cached_bmtree = ompi_coll_tuned_topo_build_bmtree (comm, 0); - data->cached_bmtree_root = 0; - + data->cached_bmtree = NULL; /* binomial tree */ - data->cached_in_order_bmtree = ompi_coll_tuned_topo_build_in_order_bmtree (comm, 0); - data->cached_in_order_bmtree_root = 0; - /* - * chains (fanout followed by pipelines) - * are more difficuilt as the fan out really really depends on message size [sometimes].. - * as size gets larger fan-out gets smaller [usually] - * - * will probably change how we cache this later, for now a midsize - * GEF - */ - data->cached_chain = ompi_coll_tuned_topo_build_chain (ompi_coll_tuned_init_chain_fanout, - comm, 0); - data->cached_chain_root = 0; - data->cached_chain_fanout = ompi_coll_tuned_init_chain_fanout; - + data->cached_in_order_bmtree = NULL; + /* chains (fanout followed by pipelines) */ + data->cached_chain = NULL; /* standard pipeline */ - data->cached_pipeline = ompi_coll_tuned_topo_build_chain (1, comm, 0); - data->cached_pipeline_root = 0; - + data->cached_pipeline = NULL; /* in-order binary tree */ - data->cached_in_order_bintree = ompi_coll_tuned_topo_build_in_order_bintree(comm); + data->cached_in_order_bintree = NULL; /* All done */ - tuned_module->tuned_data = data; OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Tuned is in use")); diff --git a/ompi/mca/coll/tuned/coll_tuned_reduce.c b/ompi/mca/coll/tuned/coll_tuned_reduce.c index 9cdaec127c..56eca6ae4a 100644 --- a/ompi/mca/coll/tuned/coll_tuned_reduce.c +++ b/ompi/mca/coll/tuned/coll_tuned_reduce.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -43,7 +43,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_count, ompi_datatype_t* datatype, ompi_op_t* op, int root, ompi_communicator_t* comm, - mca_coll_base_module_t *module, + mca_coll_base_module_t *module, ompi_coll_tree_t* tree, int count_by_segment, int max_outstanding_reqs ) { @@ -79,7 +79,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c if( tree->tree_nextsize > 0 ) { ptrdiff_t true_lower_bound, true_extent, real_segment_size; ompi_datatype_get_true_extent( datatype, &true_lower_bound, - &true_extent ); + &true_extent ); /* handle non existant recv buffer (i.e. its NULL) and protect the recv buffer on non-root nodes */ @@ -88,18 +88,18 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c /* Allocate temporary accumulator buffer. */ accumbuf_free = (char*)malloc(true_extent + (original_count - 1) * extent); - if (accumbuf_free == NULL) { + if (accumbuf_free == NULL) { line = __LINE__; ret = -1; goto error_hndl; } accumbuf = accumbuf_free - lower_bound; - } + } /* If this is a non-commutative operation we must copy sendbuf to the accumbuf, in order to simplfy the loops */ if (!ompi_op_is_commute(op)) { - ompi_datatype_copy_content_same_ddt(datatype, original_count, - (char*)accumbuf, - (char*)sendtmpbuf); + ompi_datatype_copy_content_same_ddt(datatype, original_count, + (char*)accumbuf, + (char*)sendtmpbuf); } /* Allocate two buffers for incoming segments */ real_segment_size = true_extent + (count_by_segment - 1) * extent; @@ -232,11 +232,11 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c the number of segments we have two options: - send all segments using blocking send to the parent, or - avoid overflooding the parent nodes by limiting the number of - outstanding requests to max_oustanding_reqs. + outstanding requests to max_oustanding_reqs. TODO/POSSIBLE IMPROVEMENT: If there is a way to determine the eager size for the current communication, synchronization should be used only when the message/segment size is smaller than the eager size. - */ + */ else { /* If the number of segments is less than a maximum number of oustanding @@ -266,9 +266,9 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c /* Otherwise, introduce flow control: - post max_outstanding_reqs non-blocking synchronous send, - for remaining segments - - wait for a ssend to complete, and post the next one. + - wait for a ssend to complete, and post the next one. - wait for all outstanding sends to complete. - */ + */ else { int creq = 0; @@ -346,7 +346,7 @@ int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count, ompi_datatype_t* datatype, ompi_op_t* op, int root, ompi_communicator_t* comm, - mca_coll_base_module_t *module, + mca_coll_base_module_t *module, uint32_t segsize, int fanout, int max_outstanding_reqs ) { @@ -376,7 +376,7 @@ int ompi_coll_tuned_reduce_intra_pipeline( void *sendbuf, void *recvbuf, int count, ompi_datatype_t* datatype, ompi_op_t* op, int root, ompi_communicator_t* comm, - mca_coll_base_module_t *module, + mca_coll_base_module_t *module, uint32_t segsize, int max_outstanding_reqs ) { @@ -407,7 +407,7 @@ int ompi_coll_tuned_reduce_intra_binary( void *sendbuf, void *recvbuf, int count, ompi_datatype_t* datatype, ompi_op_t* op, int root, ompi_communicator_t* comm, - mca_coll_base_module_t *module, + mca_coll_base_module_t *module, uint32_t segsize, int max_outstanding_reqs ) { @@ -438,7 +438,7 @@ int ompi_coll_tuned_reduce_intra_binomial( void *sendbuf, void *recvbuf, int count, ompi_datatype_t* datatype, ompi_op_t* op, int root, ompi_communicator_t* comm, - mca_coll_base_module_t *module, + mca_coll_base_module_t *module, uint32_t segsize, int max_outstanding_reqs ) { @@ -477,7 +477,7 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf, ompi_datatype_t* datatype, ompi_op_t* op, int root, ompi_communicator_t* comm, - mca_coll_base_module_t *module, + mca_coll_base_module_t *module, uint32_t segsize, int max_outstanding_reqs ) { @@ -514,57 +514,57 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf, use_this_sendbuf = sendbuf; use_this_recvbuf = recvbuf; if (io_root != root) { - ptrdiff_t tlb, text, lb, ext; - char *tmpbuf = NULL; + ptrdiff_t tlb, text, lb, ext; + char *tmpbuf = NULL; - ompi_datatype_get_extent(datatype, &lb, &ext); - ompi_datatype_get_true_extent(datatype, &tlb, &text); + ompi_datatype_get_extent(datatype, &lb, &ext); + ompi_datatype_get_true_extent(datatype, &tlb, &text); - if ((root == rank) && (MPI_IN_PLACE == sendbuf)) { - tmpbuf = (char *) malloc(text + (count - 1) * ext); - if (NULL == tmpbuf) { - return MPI_ERR_INTERN; - } - ompi_datatype_copy_content_same_ddt(datatype, count, - (char*)tmpbuf, - (char*)recvbuf); - use_this_sendbuf = tmpbuf; - } else if (io_root == rank) { - tmpbuf = (char *) malloc(text + (count - 1) * ext); - if (NULL == tmpbuf) { - return MPI_ERR_INTERN; - } - use_this_recvbuf = tmpbuf; - } + if ((root == rank) && (MPI_IN_PLACE == sendbuf)) { + tmpbuf = (char *) malloc(text + (count - 1) * ext); + if (NULL == tmpbuf) { + return MPI_ERR_INTERN; + } + ompi_datatype_copy_content_same_ddt(datatype, count, + (char*)tmpbuf, + (char*)recvbuf); + use_this_sendbuf = tmpbuf; + } else if (io_root == rank) { + tmpbuf = (char *) malloc(text + (count - 1) * ext); + if (NULL == tmpbuf) { + return MPI_ERR_INTERN; + } + use_this_recvbuf = tmpbuf; + } } /* Use generic reduce with in-order binary tree topology and io_root */ ret = ompi_coll_tuned_reduce_generic( use_this_sendbuf, use_this_recvbuf, count, datatype, - op, io_root, comm, module, + op, io_root, comm, module, data->cached_in_order_bintree, segcount, max_outstanding_reqs ); if (MPI_SUCCESS != ret) { return ret; } /* Clean up */ if (io_root != root) { - if (root == rank) { - /* Receive result from rank io_root to recvbuf */ - ret = MCA_PML_CALL(recv(recvbuf, count, datatype, io_root, - MCA_COLL_BASE_TAG_REDUCE, comm, - MPI_STATUS_IGNORE)); - if (MPI_SUCCESS != ret) { return ret; } - if (MPI_IN_PLACE == sendbuf) { - free(use_this_sendbuf); - } + if (root == rank) { + /* Receive result from rank io_root to recvbuf */ + ret = MCA_PML_CALL(recv(recvbuf, count, datatype, io_root, + MCA_COLL_BASE_TAG_REDUCE, comm, + MPI_STATUS_IGNORE)); + if (MPI_SUCCESS != ret) { return ret; } + if (MPI_IN_PLACE == sendbuf) { + free(use_this_sendbuf); + } - } else if (io_root == rank) { - /* Send result from use_this_recvbuf to root */ - ret = MCA_PML_CALL(send(use_this_recvbuf, count, datatype, root, - MCA_COLL_BASE_TAG_REDUCE, - MCA_PML_BASE_SEND_STANDARD, comm)); - if (MPI_SUCCESS != ret) { return ret; } - free(use_this_recvbuf); - } + } else if (io_root == rank) { + /* Send result from use_this_recvbuf to root */ + ret = MCA_PML_CALL(send(use_this_recvbuf, count, datatype, root, + MCA_COLL_BASE_TAG_REDUCE, + MCA_PML_BASE_SEND_STANDARD, comm)); + if (MPI_SUCCESS != ret) { return ret; } + free(use_this_recvbuf); + } } return MPI_SUCCESS; @@ -596,8 +596,8 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) { int i, rank, err, size; ptrdiff_t true_lb, true_extent, lb, extent; @@ -650,7 +650,7 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count, if (rank == (size - 1)) { err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)rbuf, - (char*)sbuf); + (char*)sbuf); } else { err = MCA_PML_CALL(recv(rbuf, count, dtype, size - 1, MCA_COLL_BASE_TAG_REDUCE, comm, @@ -688,7 +688,7 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count, if (NULL != inplace_temp) { err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)sbuf, - inplace_temp); + inplace_temp); free(inplace_temp); } if (NULL != free_buffer) { @@ -789,7 +789,7 @@ int ompi_coll_tuned_reduce_intra_do_forced(void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; @@ -808,19 +808,19 @@ int ompi_coll_tuned_reduce_intra_do_forced(void *sbuf, void* rbuf, int count, case (1): return ompi_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, root, comm, module); case (2): return ompi_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype, - op, root, comm, module, - segsize, chain_fanout, max_requests); + op, root, comm, module, + segsize, chain_fanout, max_requests); case (3): return ompi_coll_tuned_reduce_intra_pipeline (sbuf, rbuf, count, dtype, - op, root, comm, module, - segsize, max_requests); + op, root, comm, module, + segsize, max_requests); case (4): return ompi_coll_tuned_reduce_intra_binary (sbuf, rbuf, count, dtype, - op, root, comm, module, - segsize, max_requests); + op, root, comm, module, + segsize, max_requests); case (5): return ompi_coll_tuned_reduce_intra_binomial (sbuf, rbuf, count, dtype, - op, root, comm, module, - segsize, max_requests); + op, root, comm, module, + segsize, max_requests); case (6): return ompi_coll_tuned_reduce_intra_in_order_binary(sbuf, rbuf, count, dtype, - op, root, comm, module, + op, root, comm, module, segsize, max_requests); default: OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", @@ -834,7 +834,7 @@ int ompi_coll_tuned_reduce_intra_do_this(void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module, + mca_coll_base_module_t *module, int algorithm, int faninout, int segsize, int max_requests ) { @@ -843,23 +843,23 @@ int ompi_coll_tuned_reduce_intra_do_this(void *sbuf, void* rbuf, int count, switch (algorithm) { case (0): return ompi_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype, - op, root, comm, module); + op, root, comm, module); case (1): return ompi_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, root, comm, module); case (2): return ompi_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype, - op, root, comm, module, + op, root, comm, module, segsize, faninout, max_requests); case (3): return ompi_coll_tuned_reduce_intra_pipeline (sbuf, rbuf, count, dtype, - op, root, comm, module, - segsize, max_requests); + op, root, comm, module, + segsize, max_requests); case (4): return ompi_coll_tuned_reduce_intra_binary (sbuf, rbuf, count, dtype, - op, root, comm, module, - segsize, max_requests); + op, root, comm, module, + segsize, max_requests); case (5): return ompi_coll_tuned_reduce_intra_binomial (sbuf, rbuf, count, dtype, - op, root, comm, module, - segsize, max_requests); + op, root, comm, module, + segsize, max_requests); case (6): return ompi_coll_tuned_reduce_intra_in_order_binary(sbuf, rbuf, count, dtype, - op, root, comm, module, + op, root, comm, module, segsize, max_requests); default: OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",