From 23e8ce91bae3779ba32f236ec5330ca743c1f55c Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Fri, 14 Aug 2009 21:06:23 +0000 Subject: [PATCH] Rework the selection logic for the tuned collectives. All supported collectives now are able to use the dynamic rules. Moreover, these rules are loaded only once, and stored at the component level. All communicators are able to use these rules (not only MPI_COMM_WORLD as until now). A lot of minor corrections, memory management issues and reduction in the amount of memory used by the tuned collectives. This commit was SVN r21825. --- ompi/mca/coll/tuned/Makefile.am | 4 +- ompi/mca/coll/tuned/coll_tuned.h | 582 +++++++++--------- ompi/mca/coll/tuned/coll_tuned_component.c | 93 +-- .../coll/tuned/coll_tuned_decision_dynamic.c | 302 +++++---- .../coll/tuned/coll_tuned_decision_fixed.c | 408 ++++++------ .../mca/coll/tuned/coll_tuned_dynamic_rules.c | 35 +- ompi/mca/coll/tuned/coll_tuned_forced.c | 62 -- ompi/mca/coll/tuned/coll_tuned_forced.h | 71 --- ompi/mca/coll/tuned/coll_tuned_gather.c | 58 +- ompi/mca/coll/tuned/coll_tuned_module.c | 299 ++++----- ompi/mca/coll/tuned/coll_tuned_reduce.c | 158 ++--- 11 files changed, 964 insertions(+), 1108 deletions(-) delete mode 100644 ompi/mca/coll/tuned/coll_tuned_forced.c delete mode 100644 ompi/mca/coll/tuned/coll_tuned_forced.h diff --git a/ompi/mca/coll/tuned/Makefile.am b/ompi/mca/coll/tuned/Makefile.am index bfbd1d9f4f..7e607257dd 100644 --- a/ompi/mca/coll/tuned/Makefile.am +++ b/ompi/mca/coll/tuned/Makefile.am @@ -2,7 +2,7 @@ # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University +# Copyright (c) 2004-2009 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -22,14 +22,12 @@ sources = \ coll_tuned_util.h \ coll_tuned_dynamic_file.h \ coll_tuned_dynamic_rules.h \ - coll_tuned_forced.h \ coll_tuned_topo.c \ coll_tuned_util.c \ coll_tuned_decision_fixed.c \ coll_tuned_decision_dynamic.c \ coll_tuned_dynamic_file.c \ coll_tuned_dynamic_rules.c \ - coll_tuned_forced.c \ coll_tuned_allreduce.c \ coll_tuned_alltoall.c \ coll_tuned_alltoallv.c \ diff --git a/ompi/mca/coll/tuned/coll_tuned.h b/ompi/mca/coll/tuned/coll_tuned.h index 19c858de38..144236df6a 100644 --- a/ompi/mca/coll/tuned/coll_tuned.h +++ b/ompi/mca/coll/tuned/coll_tuned.h @@ -2,7 +2,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -34,12 +34,26 @@ /* also need the dynamic rule structures */ #include "coll_tuned_dynamic_rules.h" -/* need the forced user choice structures */ -#include "coll_tuned_forced.h" - /* some fixed value index vars to simplify certain operations */ -typedef enum COLLTYPE {ALLGATHER, ALLGATHERV, ALLREDUCE, ALLTOALL, ALLTOALLV, ALLTOALLW, BARRIER, BCAST, -EXSCAN, GATHER, GATHERV, REDUCE, REDUCESCATTER, SCAN, SCATTER, SCATTERV, COLLCOUNT} COLLTYPE_T; +typedef enum COLLTYPE { + ALLGATHER = 0, /* 0 */ + ALLGATHERV, /* 1 */ + ALLREDUCE, /* 2 */ + ALLTOALL, /* 3 */ + ALLTOALLV, /* 4 */ + ALLTOALLW, /* 5 */ + BARRIER, /* 6 */ + BCAST, /* 7 */ + EXSCAN, /* 8 */ + GATHER, /* 9 */ + GATHERV, /* 10 */ + REDUCE, /* 11 */ + REDUCESCATTER, /* 12 */ + SCAN, /* 13 */ + SCATTER, /* 14 */ + SCATTERV, /* 15 */ + COLLCOUNT /* 16 end counter keep it as last element */ +} COLLTYPE_T; /* defined arg lists to simply auto inclusion of user overriding decision functions */ #define ALLGATHER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module @@ -60,343 +74,357 @@ EXSCAN, GATHER, GATHERV, REDUCE, REDUCESCATTER, SCAN, SCATTER, SCATTERV, COLLCOU #define SCATTERV_ARGS void *sbuf, int *scounts, int *disps, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module /* end defined arg lists to simply auto inclusion of user overriding decision functions */ -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS - /* these are the same across all modules and are loaded at component query time */ - extern int ompi_coll_tuned_stream; - extern int ompi_coll_tuned_priority; - extern int ompi_coll_tuned_preallocate_memory_comm_size_limit; - extern int ompi_coll_tuned_use_dynamic_rules; - extern char* ompi_coll_tuned_dynamic_rules_filename; - extern int ompi_coll_tuned_init_tree_fanout; - extern int ompi_coll_tuned_init_chain_fanout; - extern int ompi_coll_tuned_init_max_requests; +/* these are the same across all modules and are loaded at component query time */ +extern int ompi_coll_tuned_stream; +extern int ompi_coll_tuned_priority; +extern int ompi_coll_tuned_preallocate_memory_comm_size_limit; +extern int ompi_coll_tuned_use_dynamic_rules; +extern char* ompi_coll_tuned_dynamic_rules_filename; +extern int ompi_coll_tuned_init_tree_fanout; +extern int ompi_coll_tuned_init_chain_fanout; +extern int ompi_coll_tuned_init_max_requests; - /* forced algorithm choices */ - /* the indices to the MCA params so that modules can look them up at open / comm create time */ - extern coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COLLCOUNT]; - /* the actual max algorithm values (readonly), loaded at component open */ - extern int ompi_coll_tuned_forced_max_algorithms[COLLCOUNT]; - - /* - * coll API functions - */ - - /* API functions */ - - int ompi_coll_tuned_init_query(bool enable_progress_threads, - bool enable_mpi_threads); - - mca_coll_base_module_t * - ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority); - - /* API functions of decision functions and any implementations */ - - /* - * Note this gets long as we have to have a prototype for each - * MPI collective 4 times.. 2 for the comm type and 2 for each decision - * type. - * we might cut down the decision prototypes by conditional compiling - */ - - /* All Gather */ - int ompi_coll_tuned_allgather_intra_dec_fixed(ALLGATHER_ARGS); - int ompi_coll_tuned_allgather_intra_dec_dynamic(ALLGATHER_ARGS); - int ompi_coll_tuned_allgather_intra_do_forced(ALLGATHER_ARGS); - int ompi_coll_tuned_allgather_intra_do_this(ALLGATHER_ARGS, int algorithm, int faninout, int segsize); - int ompi_coll_tuned_allgather_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_allgather_intra_bruck(ALLGATHER_ARGS); - int ompi_coll_tuned_allgather_intra_recursivedoubling(ALLGATHER_ARGS); - int ompi_coll_tuned_allgather_intra_ring(ALLGATHER_ARGS); - int ompi_coll_tuned_allgather_intra_neighborexchange(ALLGATHER_ARGS); - int ompi_coll_tuned_allgather_intra_basic_linear(ALLGATHER_ARGS); - int ompi_coll_tuned_allgather_intra_two_procs(ALLGATHER_ARGS); - int ompi_coll_tuned_allgather_inter_dec_fixed(ALLGATHER_ARGS); - int ompi_coll_tuned_allgather_inter_dec_dynamic(ALLGATHER_ARGS); - - /* All GatherV */ - int ompi_coll_tuned_allgatherv_intra_dec_fixed(ALLGATHERV_ARGS); - int ompi_coll_tuned_allgatherv_intra_dec_dynamic(ALLGATHERV_ARGS); - int ompi_coll_tuned_allgatherv_intra_do_forced(ALLGATHERV_ARGS); - int ompi_coll_tuned_allgatherv_intra_do_this(ALLGATHERV_ARGS, int algorithm, int faninout, int segsize); - int ompi_coll_tuned_allgatherv_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_allgatherv_intra_bruck(ALLGATHERV_ARGS); - int ompi_coll_tuned_allgatherv_intra_ring(ALLGATHERV_ARGS); - int ompi_coll_tuned_allgatherv_intra_neighborexchange(ALLGATHERV_ARGS); - int ompi_coll_tuned_allgatherv_intra_basic_default(ALLGATHERV_ARGS); - int ompi_coll_tuned_allgatherv_intra_two_procs(ALLGATHERV_ARGS); - int ompi_coll_tuned_allgatherv_inter_dec_fixed(ALLGATHERV_ARGS); - int ompi_coll_tuned_allgatherv_inter_dec_dynamic(ALLGATHERV_ARGS); - - /* All Reduce */ - int ompi_coll_tuned_allreduce_intra_dec_fixed(ALLREDUCE_ARGS); - int ompi_coll_tuned_allreduce_intra_dec_dynamic(ALLREDUCE_ARGS); - int ompi_coll_tuned_allreduce_intra_do_forced(ALLREDUCE_ARGS); - int ompi_coll_tuned_allreduce_intra_do_this(ALLREDUCE_ARGS, int algorithm, int faninout, int segsize); - int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_allreduce_intra_nonoverlapping(ALLREDUCE_ARGS); - int ompi_coll_tuned_allreduce_intra_recursivedoubling(ALLREDUCE_ARGS); - int ompi_coll_tuned_allreduce_intra_ring(ALLREDUCE_ARGS); - int ompi_coll_tuned_allreduce_intra_ring_segmented(ALLREDUCE_ARGS, uint32_t segsize); - int ompi_coll_tuned_allreduce_intra_basic_linear(ALLREDUCE_ARGS); - int ompi_coll_tuned_allreduce_inter_dec_fixed(ALLREDUCE_ARGS); - int ompi_coll_tuned_allreduce_inter_dec_dynamic(ALLREDUCE_ARGS); - - /* AlltoAll */ - int ompi_coll_tuned_alltoall_intra_dec_fixed(ALLTOALL_ARGS); - int ompi_coll_tuned_alltoall_intra_dec_dynamic(ALLTOALL_ARGS); - int ompi_coll_tuned_alltoall_intra_do_forced(ALLTOALL_ARGS); - int ompi_coll_tuned_alltoall_intra_do_this(ALLTOALL_ARGS, int algorithm, int faninout, int segsize, int max_requests); - int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_alltoall_intra_pairwise(ALLTOALL_ARGS); - int ompi_coll_tuned_alltoall_intra_bruck(ALLTOALL_ARGS); - int ompi_coll_tuned_alltoall_intra_basic_linear(ALLTOALL_ARGS); - int ompi_coll_tuned_alltoall_intra_linear_sync(ALLTOALL_ARGS, int max_requests); - int ompi_coll_tuned_alltoall_intra_two_procs(ALLTOALL_ARGS); - int ompi_coll_tuned_alltoall_inter_dec_fixed(ALLTOALL_ARGS); - int ompi_coll_tuned_alltoall_inter_dec_dynamic(ALLTOALL_ARGS); - - /* AlltoAllV */ - int ompi_coll_tuned_alltoallv_intra_dec_fixed(ALLTOALLV_ARGS); - int ompi_coll_tuned_alltoallv_intra_dec_dynamic(ALLTOALLV_ARGS); - int ompi_coll_tuned_alltoallv_intra_do_forced(ALLTOALLV_ARGS); - int ompi_coll_tuned_alltoallv_intra_do_this(ALLTOALLV_ARGS, int algorithm); - int ompi_coll_tuned_alltoallv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_alltoallv_intra_pairwise(ALLTOALLV_ARGS); - int ompi_coll_tuned_alltoallv_intra_basic_linear(ALLTOALLV_ARGS); - int ompi_coll_tuned_alltoallv_inter_dec_fixed(ALLTOALLV_ARGS); - int ompi_coll_tuned_alltoallv_inter_dec_dynamic(ALLTOALLV_ARGS); - - /* AlltoAllW */ - int ompi_coll_tuned_alltoallw_intra_dec_fixed(ALLTOALLW_ARGS); - int ompi_coll_tuned_alltoallw_intra_dec_dynamic(ALLTOALLW_ARGS); - int ompi_coll_tuned_alltoallw_inter_dec_fixed(ALLTOALLW_ARGS); - int ompi_coll_tuned_alltoallw_inter_dec_dynamic(ALLTOALLW_ARGS); - - /* Barrier */ - int ompi_coll_tuned_barrier_intra_dec_fixed(BARRIER_ARGS); - int ompi_coll_tuned_barrier_intra_dec_dynamic(BARRIER_ARGS); - int ompi_coll_tuned_barrier_intra_do_forced(BARRIER_ARGS); - int ompi_coll_tuned_barrier_intra_do_this(BARRIER_ARGS, int algorithm, int faninout, int segsize); - int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_barrier_inter_dec_fixed(BARRIER_ARGS); - int ompi_coll_tuned_barrier_inter_dec_dynamic(BARRIER_ARGS); - int ompi_coll_tuned_barrier_intra_doublering(BARRIER_ARGS); - int ompi_coll_tuned_barrier_intra_recursivedoubling(BARRIER_ARGS); - int ompi_coll_tuned_barrier_intra_bruck(BARRIER_ARGS); - int ompi_coll_tuned_barrier_intra_two_procs(BARRIER_ARGS); - int ompi_coll_tuned_barrier_intra_linear(BARRIER_ARGS); - int ompi_coll_tuned_barrier_intra_tree(BARRIER_ARGS); - - /* Bcast */ - int ompi_coll_tuned_bcast_intra_generic( BCAST_ARGS, uint32_t count_by_segment, ompi_coll_tree_t* tree ); - int ompi_coll_tuned_bcast_intra_dec_fixed(BCAST_ARGS); - int ompi_coll_tuned_bcast_intra_dec_dynamic(BCAST_ARGS); - int ompi_coll_tuned_bcast_intra_do_forced(BCAST_ARGS); - int ompi_coll_tuned_bcast_intra_do_this(BCAST_ARGS, int algorithm, int faninout, int segsize); - int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_bcast_intra_basic_linear(BCAST_ARGS); - int ompi_coll_tuned_bcast_intra_chain(BCAST_ARGS, uint32_t segsize, int32_t chains); - int ompi_coll_tuned_bcast_intra_pipeline(BCAST_ARGS, uint32_t segsize); - int ompi_coll_tuned_bcast_intra_binomial(BCAST_ARGS, uint32_t segsize); - int ompi_coll_tuned_bcast_intra_bintree(BCAST_ARGS, uint32_t segsize); - int ompi_coll_tuned_bcast_intra_split_bintree(BCAST_ARGS, uint32_t segsize); - int ompi_coll_tuned_bcast_inter_dec_fixed(BCAST_ARGS); - int ompi_coll_tuned_bcast_inter_dec_dynamic(BCAST_ARGS); - - /* Exscan */ - int ompi_coll_tuned_exscan_intra_dec_fixed(EXSCAN_ARGS); - int ompi_coll_tuned_exscan_intra_dec_dynamic(EXSCAN_ARGS); - int ompi_coll_tuned_exscan_inter_dec_fixed(EXSCAN_ARGS); - int ompi_coll_tuned_exscan_inter_dec_dynamic(EXSCAN_ARGS); - - /* Gather */ - int ompi_coll_tuned_gather_intra_dec_fixed(GATHER_ARGS); - int ompi_coll_tuned_gather_intra_dec_dynamic(GATHER_ARGS); - int ompi_coll_tuned_gather_intra_do_forced(GATHER_ARGS); - int ompi_coll_tuned_gather_intra_do_this(GATHER_ARGS, int algorithm, int faninout, int segsize); - int ompi_coll_tuned_gather_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_gather_intra_basic_linear(GATHER_ARGS); - int ompi_coll_tuned_gather_intra_binomial(GATHER_ARGS); - int ompi_coll_tuned_gather_intra_linear_sync(GATHER_ARGS, int first_segment_size); - int ompi_coll_tuned_gather_inter_dec_fixed(GATHER_ARGS); - int ompi_coll_tuned_gather_inter_dec_dynamic(GATHER_ARGS); - - /* GatherV */ - int ompi_coll_tuned_gatherv_intra_dec_fixed(GATHERV_ARGS); - int ompi_coll_tuned_gatherv_intra_dec_dynamic(GATHER_ARGS); - int ompi_coll_tuned_gatherv_inter_dec_fixed(GATHER_ARGS); - int ompi_coll_tuned_gatherv_inter_dec_dynamic(GATHER_ARGS); - - /* Reduce */ - int ompi_coll_tuned_reduce_generic( REDUCE_ARGS, ompi_coll_tree_t* tree, int count_by_segment, int max_outstanding_reqs ); - int ompi_coll_tuned_reduce_intra_dec_fixed(REDUCE_ARGS); - int ompi_coll_tuned_reduce_intra_dec_dynamic(REDUCE_ARGS); - int ompi_coll_tuned_reduce_intra_do_forced(REDUCE_ARGS); - int ompi_coll_tuned_reduce_intra_do_this(REDUCE_ARGS, int algorithm, int faninout, int segsize, int max_oustanding_reqs); - int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_reduce_intra_basic_linear(REDUCE_ARGS); - int ompi_coll_tuned_reduce_intra_chain(REDUCE_ARGS, uint32_t segsize, int fanout, int max_outstanding_reqs ); - int ompi_coll_tuned_reduce_intra_pipeline(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs ); - int ompi_coll_tuned_reduce_intra_binary(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs ); - int ompi_coll_tuned_reduce_intra_binomial(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs ); - int ompi_coll_tuned_reduce_intra_in_order_binary(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs ); - int ompi_coll_tuned_reduce_inter_dec_fixed(REDUCE_ARGS); - int ompi_coll_tuned_reduce_inter_dec_dynamic(REDUCE_ARGS); - - /* Reduce_scatter */ - int ompi_coll_tuned_reduce_scatter_intra_dec_fixed(REDUCESCATTER_ARGS); - int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(REDUCESCATTER_ARGS); - int ompi_coll_tuned_reduce_scatter_intra_do_forced(REDUCESCATTER_ARGS); - int ompi_coll_tuned_reduce_scatter_intra_do_this(REDUCESCATTER_ARGS, int algorithm, int faninout, int segsize); - int ompi_coll_tuned_reduce_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(REDUCESCATTER_ARGS); - int ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(REDUCESCATTER_ARGS); - int ompi_coll_tuned_reduce_scatter_intra_ring(REDUCESCATTER_ARGS); - - int ompi_coll_tuned_reduce_scatter_inter_dec_fixed(REDUCESCATTER_ARGS); - int ompi_coll_tuned_reduce_scatter_inter_dec_dynamic(REDUCESCATTER_ARGS); - - /* Scan */ - int ompi_coll_tuned_scan_intra_dec_fixed(SCAN_ARGS); - int ompi_coll_tuned_scan_intra_dec_dynamic(SCAN_ARGS); - int ompi_coll_tuned_scan_inter_dec_fixed(SCAN_ARGS); - int ompi_coll_tuned_scan_inter_dec_dynamic(SCAN_ARGS); - - /* Scatter */ - int ompi_coll_tuned_scatter_intra_dec_fixed(SCATTER_ARGS); - int ompi_coll_tuned_scatter_intra_dec_dynamic(SCATTER_ARGS); - int ompi_coll_tuned_scatter_intra_do_forced(SCATTER_ARGS); - int ompi_coll_tuned_scatter_intra_do_this(SCATTER_ARGS, int algorithm, int faninout, int segsize); - int ompi_coll_tuned_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); - int ompi_coll_tuned_scatter_intra_basic_linear(SCATTER_ARGS); - int ompi_coll_tuned_scatter_intra_binomial(SCATTER_ARGS); - int ompi_coll_tuned_scatter_inter_dec_fixed(SCATTER_ARGS); - int ompi_coll_tuned_scatter_inter_dec_dynamic(SCATTER_ARGS); - - /* ScatterV */ - int ompi_coll_tuned_scatterv_intra_dec_fixed(SCATTERV_ARGS); - int ompi_coll_tuned_scatterv_intra_dec_dynamic(SCATTERV_ARGS); - int ompi_coll_tuned_scatterv_inter_dec_fixed(SCATTERV_ARGS); - int ompi_coll_tuned_scatterv_inter_dec_dynamic(SCATTERV_ARGS); - - int mca_coll_tuned_ft_event(int state); +/* forced algorithm choices */ +/* this structure is for storing the indexes to the forced algorithm mca params... */ +/* we get these at component query (so that registered values appear in ompi_infoi) */ +struct coll_tuned_force_algorithm_mca_param_indices_t { + int algorithm_param_index; /* which algorithm you want to force */ + int segsize_param_index; /* segsize to use (if supported), 0 = no segmentation */ + int tree_fanout_param_index; /* tree fanout/in to use */ + int chain_fanout_param_index; /* K-chain fanout/in to use */ + int max_requests_param_index; /* Maximum number of outstanding send or recv requests */ +}; +typedef struct coll_tuned_force_algorithm_mca_param_indices_t coll_tuned_force_algorithm_mca_param_indices_t; - /* Utility functions */ +/* the following type is for storing actual value obtained from the MCA on each tuned module */ +/* via their mca param indices lookup in the component */ +/* this structure is stored once per collective type per communicator... */ +struct coll_tuned_force_algorithm_params_t { + int algorithm; /* which algorithm you want to force */ + int segsize; /* segsize to use (if supported), 0 = no segmentation */ + int tree_fanout; /* tree fanout/in to use */ + int chain_fanout; /* K-chain fanout/in to use */ + int max_requests; /* Maximum number of outstanding send or recv requests */ +}; +typedef struct coll_tuned_force_algorithm_params_t coll_tuned_force_algorithm_params_t; - static inline void ompi_coll_tuned_free_reqs(ompi_request_t **reqs, int count) - { +/* the indices to the MCA params so that modules can look them up at open / comm create time */ +extern coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COLLCOUNT]; +/* the actual max algorithm values (readonly), loaded at component open */ +extern int ompi_coll_tuned_forced_max_algorithms[COLLCOUNT]; + +/* + * coll API functions + */ + +/* API functions */ + +int ompi_coll_tuned_init_query(bool enable_progress_threads, + bool enable_mpi_threads); + +mca_coll_base_module_t * +ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority); + +/* API functions of decision functions and any implementations */ + +/* + * Note this gets long as we have to have a prototype for each + * MPI collective 4 times.. 2 for the comm type and 2 for each decision + * type. + * we might cut down the decision prototypes by conditional compiling + */ + +/* All Gather */ +int ompi_coll_tuned_allgather_intra_dec_fixed(ALLGATHER_ARGS); +int ompi_coll_tuned_allgather_intra_dec_dynamic(ALLGATHER_ARGS); +int ompi_coll_tuned_allgather_intra_do_forced(ALLGATHER_ARGS); +int ompi_coll_tuned_allgather_intra_do_this(ALLGATHER_ARGS, int algorithm, int faninout, int segsize); +int ompi_coll_tuned_allgather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_allgather_intra_bruck(ALLGATHER_ARGS); +int ompi_coll_tuned_allgather_intra_recursivedoubling(ALLGATHER_ARGS); +int ompi_coll_tuned_allgather_intra_ring(ALLGATHER_ARGS); +int ompi_coll_tuned_allgather_intra_neighborexchange(ALLGATHER_ARGS); +int ompi_coll_tuned_allgather_intra_basic_linear(ALLGATHER_ARGS); +int ompi_coll_tuned_allgather_intra_two_procs(ALLGATHER_ARGS); +int ompi_coll_tuned_allgather_inter_dec_fixed(ALLGATHER_ARGS); +int ompi_coll_tuned_allgather_inter_dec_dynamic(ALLGATHER_ARGS); + +/* All GatherV */ +int ompi_coll_tuned_allgatherv_intra_dec_fixed(ALLGATHERV_ARGS); +int ompi_coll_tuned_allgatherv_intra_dec_dynamic(ALLGATHERV_ARGS); +int ompi_coll_tuned_allgatherv_intra_do_forced(ALLGATHERV_ARGS); +int ompi_coll_tuned_allgatherv_intra_do_this(ALLGATHERV_ARGS, int algorithm, int faninout, int segsize); +int ompi_coll_tuned_allgatherv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_allgatherv_intra_bruck(ALLGATHERV_ARGS); +int ompi_coll_tuned_allgatherv_intra_ring(ALLGATHERV_ARGS); +int ompi_coll_tuned_allgatherv_intra_neighborexchange(ALLGATHERV_ARGS); +int ompi_coll_tuned_allgatherv_intra_basic_default(ALLGATHERV_ARGS); +int ompi_coll_tuned_allgatherv_intra_two_procs(ALLGATHERV_ARGS); +int ompi_coll_tuned_allgatherv_inter_dec_fixed(ALLGATHERV_ARGS); +int ompi_coll_tuned_allgatherv_inter_dec_dynamic(ALLGATHERV_ARGS); + +/* All Reduce */ +int ompi_coll_tuned_allreduce_intra_dec_fixed(ALLREDUCE_ARGS); +int ompi_coll_tuned_allreduce_intra_dec_dynamic(ALLREDUCE_ARGS); +int ompi_coll_tuned_allreduce_intra_do_forced(ALLREDUCE_ARGS); +int ompi_coll_tuned_allreduce_intra_do_this(ALLREDUCE_ARGS, int algorithm, int faninout, int segsize); +int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_allreduce_intra_nonoverlapping(ALLREDUCE_ARGS); +int ompi_coll_tuned_allreduce_intra_recursivedoubling(ALLREDUCE_ARGS); +int ompi_coll_tuned_allreduce_intra_ring(ALLREDUCE_ARGS); +int ompi_coll_tuned_allreduce_intra_ring_segmented(ALLREDUCE_ARGS, uint32_t segsize); +int ompi_coll_tuned_allreduce_intra_basic_linear(ALLREDUCE_ARGS); +int ompi_coll_tuned_allreduce_inter_dec_fixed(ALLREDUCE_ARGS); +int ompi_coll_tuned_allreduce_inter_dec_dynamic(ALLREDUCE_ARGS); + +/* AlltoAll */ +int ompi_coll_tuned_alltoall_intra_dec_fixed(ALLTOALL_ARGS); +int ompi_coll_tuned_alltoall_intra_dec_dynamic(ALLTOALL_ARGS); +int ompi_coll_tuned_alltoall_intra_do_forced(ALLTOALL_ARGS); +int ompi_coll_tuned_alltoall_intra_do_this(ALLTOALL_ARGS, int algorithm, int faninout, int segsize, int max_requests); +int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_alltoall_intra_pairwise(ALLTOALL_ARGS); +int ompi_coll_tuned_alltoall_intra_bruck(ALLTOALL_ARGS); +int ompi_coll_tuned_alltoall_intra_basic_linear(ALLTOALL_ARGS); +int ompi_coll_tuned_alltoall_intra_linear_sync(ALLTOALL_ARGS, int max_requests); +int ompi_coll_tuned_alltoall_intra_two_procs(ALLTOALL_ARGS); +int ompi_coll_tuned_alltoall_inter_dec_fixed(ALLTOALL_ARGS); +int ompi_coll_tuned_alltoall_inter_dec_dynamic(ALLTOALL_ARGS); + +/* AlltoAllV */ +int ompi_coll_tuned_alltoallv_intra_dec_fixed(ALLTOALLV_ARGS); +int ompi_coll_tuned_alltoallv_intra_dec_dynamic(ALLTOALLV_ARGS); +int ompi_coll_tuned_alltoallv_intra_do_forced(ALLTOALLV_ARGS); +int ompi_coll_tuned_alltoallv_intra_do_this(ALLTOALLV_ARGS, int algorithm); +int ompi_coll_tuned_alltoallv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_alltoallv_intra_pairwise(ALLTOALLV_ARGS); +int ompi_coll_tuned_alltoallv_intra_basic_linear(ALLTOALLV_ARGS); +int ompi_coll_tuned_alltoallv_inter_dec_fixed(ALLTOALLV_ARGS); +int ompi_coll_tuned_alltoallv_inter_dec_dynamic(ALLTOALLV_ARGS); + +/* AlltoAllW */ +int ompi_coll_tuned_alltoallw_intra_dec_fixed(ALLTOALLW_ARGS); +int ompi_coll_tuned_alltoallw_intra_dec_dynamic(ALLTOALLW_ARGS); +int ompi_coll_tuned_alltoallw_inter_dec_fixed(ALLTOALLW_ARGS); +int ompi_coll_tuned_alltoallw_inter_dec_dynamic(ALLTOALLW_ARGS); + +/* Barrier */ +int ompi_coll_tuned_barrier_intra_dec_fixed(BARRIER_ARGS); +int ompi_coll_tuned_barrier_intra_dec_dynamic(BARRIER_ARGS); +int ompi_coll_tuned_barrier_intra_do_forced(BARRIER_ARGS); +int ompi_coll_tuned_barrier_intra_do_this(BARRIER_ARGS, int algorithm, int faninout, int segsize); +int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_barrier_inter_dec_fixed(BARRIER_ARGS); +int ompi_coll_tuned_barrier_inter_dec_dynamic(BARRIER_ARGS); +int ompi_coll_tuned_barrier_intra_doublering(BARRIER_ARGS); +int ompi_coll_tuned_barrier_intra_recursivedoubling(BARRIER_ARGS); +int ompi_coll_tuned_barrier_intra_bruck(BARRIER_ARGS); +int ompi_coll_tuned_barrier_intra_two_procs(BARRIER_ARGS); +int ompi_coll_tuned_barrier_intra_linear(BARRIER_ARGS); +int ompi_coll_tuned_barrier_intra_tree(BARRIER_ARGS); + +/* Bcast */ +int ompi_coll_tuned_bcast_intra_generic( BCAST_ARGS, uint32_t count_by_segment, ompi_coll_tree_t* tree ); +int ompi_coll_tuned_bcast_intra_dec_fixed(BCAST_ARGS); +int ompi_coll_tuned_bcast_intra_dec_dynamic(BCAST_ARGS); +int ompi_coll_tuned_bcast_intra_do_forced(BCAST_ARGS); +int ompi_coll_tuned_bcast_intra_do_this(BCAST_ARGS, int algorithm, int faninout, int segsize); +int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_bcast_intra_basic_linear(BCAST_ARGS); +int ompi_coll_tuned_bcast_intra_chain(BCAST_ARGS, uint32_t segsize, int32_t chains); +int ompi_coll_tuned_bcast_intra_pipeline(BCAST_ARGS, uint32_t segsize); +int ompi_coll_tuned_bcast_intra_binomial(BCAST_ARGS, uint32_t segsize); +int ompi_coll_tuned_bcast_intra_bintree(BCAST_ARGS, uint32_t segsize); +int ompi_coll_tuned_bcast_intra_split_bintree(BCAST_ARGS, uint32_t segsize); +int ompi_coll_tuned_bcast_inter_dec_fixed(BCAST_ARGS); +int ompi_coll_tuned_bcast_inter_dec_dynamic(BCAST_ARGS); + +/* Exscan */ +int ompi_coll_tuned_exscan_intra_dec_fixed(EXSCAN_ARGS); +int ompi_coll_tuned_exscan_intra_dec_dynamic(EXSCAN_ARGS); +int ompi_coll_tuned_exscan_inter_dec_fixed(EXSCAN_ARGS); +int ompi_coll_tuned_exscan_inter_dec_dynamic(EXSCAN_ARGS); + +/* Gather */ +int ompi_coll_tuned_gather_intra_dec_fixed(GATHER_ARGS); +int ompi_coll_tuned_gather_intra_dec_dynamic(GATHER_ARGS); +int ompi_coll_tuned_gather_intra_do_forced(GATHER_ARGS); +int ompi_coll_tuned_gather_intra_do_this(GATHER_ARGS, int algorithm, int faninout, int segsize); +int ompi_coll_tuned_gather_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_gather_intra_basic_linear(GATHER_ARGS); +int ompi_coll_tuned_gather_intra_binomial(GATHER_ARGS); +int ompi_coll_tuned_gather_intra_linear_sync(GATHER_ARGS, int first_segment_size); +int ompi_coll_tuned_gather_inter_dec_fixed(GATHER_ARGS); +int ompi_coll_tuned_gather_inter_dec_dynamic(GATHER_ARGS); + +/* GatherV */ +int ompi_coll_tuned_gatherv_intra_dec_fixed(GATHERV_ARGS); +int ompi_coll_tuned_gatherv_intra_dec_dynamic(GATHER_ARGS); +int ompi_coll_tuned_gatherv_inter_dec_fixed(GATHER_ARGS); +int ompi_coll_tuned_gatherv_inter_dec_dynamic(GATHER_ARGS); + +/* Reduce */ +int ompi_coll_tuned_reduce_generic( REDUCE_ARGS, ompi_coll_tree_t* tree, int count_by_segment, int max_outstanding_reqs ); +int ompi_coll_tuned_reduce_intra_dec_fixed(REDUCE_ARGS); +int ompi_coll_tuned_reduce_intra_dec_dynamic(REDUCE_ARGS); +int ompi_coll_tuned_reduce_intra_do_forced(REDUCE_ARGS); +int ompi_coll_tuned_reduce_intra_do_this(REDUCE_ARGS, int algorithm, int faninout, int segsize, int max_oustanding_reqs); +int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_reduce_intra_basic_linear(REDUCE_ARGS); +int ompi_coll_tuned_reduce_intra_chain(REDUCE_ARGS, uint32_t segsize, int fanout, int max_outstanding_reqs ); +int ompi_coll_tuned_reduce_intra_pipeline(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs ); +int ompi_coll_tuned_reduce_intra_binary(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs ); +int ompi_coll_tuned_reduce_intra_binomial(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs ); +int ompi_coll_tuned_reduce_intra_in_order_binary(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs ); +int ompi_coll_tuned_reduce_inter_dec_fixed(REDUCE_ARGS); +int ompi_coll_tuned_reduce_inter_dec_dynamic(REDUCE_ARGS); + +/* Reduce_scatter */ +int ompi_coll_tuned_reduce_scatter_intra_dec_fixed(REDUCESCATTER_ARGS); +int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(REDUCESCATTER_ARGS); +int ompi_coll_tuned_reduce_scatter_intra_do_forced(REDUCESCATTER_ARGS); +int ompi_coll_tuned_reduce_scatter_intra_do_this(REDUCESCATTER_ARGS, int algorithm, int faninout, int segsize); +int ompi_coll_tuned_reduce_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(REDUCESCATTER_ARGS); +int ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(REDUCESCATTER_ARGS); +int ompi_coll_tuned_reduce_scatter_intra_ring(REDUCESCATTER_ARGS); + +int ompi_coll_tuned_reduce_scatter_inter_dec_fixed(REDUCESCATTER_ARGS); +int ompi_coll_tuned_reduce_scatter_inter_dec_dynamic(REDUCESCATTER_ARGS); + +/* Scan */ +int ompi_coll_tuned_scan_intra_dec_fixed(SCAN_ARGS); +int ompi_coll_tuned_scan_intra_dec_dynamic(SCAN_ARGS); +int ompi_coll_tuned_scan_inter_dec_fixed(SCAN_ARGS); +int ompi_coll_tuned_scan_inter_dec_dynamic(SCAN_ARGS); + +/* Scatter */ +int ompi_coll_tuned_scatter_intra_dec_fixed(SCATTER_ARGS); +int ompi_coll_tuned_scatter_intra_dec_dynamic(SCATTER_ARGS); +int ompi_coll_tuned_scatter_intra_do_forced(SCATTER_ARGS); +int ompi_coll_tuned_scatter_intra_do_this(SCATTER_ARGS, int algorithm, int faninout, int segsize); +int ompi_coll_tuned_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices); +int ompi_coll_tuned_scatter_intra_basic_linear(SCATTER_ARGS); +int ompi_coll_tuned_scatter_intra_binomial(SCATTER_ARGS); +int ompi_coll_tuned_scatter_inter_dec_fixed(SCATTER_ARGS); +int ompi_coll_tuned_scatter_inter_dec_dynamic(SCATTER_ARGS); + +/* ScatterV */ +int ompi_coll_tuned_scatterv_intra_dec_fixed(SCATTERV_ARGS); +int ompi_coll_tuned_scatterv_intra_dec_dynamic(SCATTERV_ARGS); +int ompi_coll_tuned_scatterv_inter_dec_fixed(SCATTERV_ARGS); +int ompi_coll_tuned_scatterv_inter_dec_dynamic(SCATTERV_ARGS); + +int mca_coll_tuned_ft_event(int state); + + +/* Utility functions */ + +static inline void ompi_coll_tuned_free_reqs(ompi_request_t **reqs, int count) +{ int i; for (i = 0; i < count; ++i) ompi_request_free(&reqs[i]); - } +} - struct mca_coll_tuned_component_t { +struct mca_coll_tuned_component_t { /** Base coll component */ mca_coll_base_component_2_0_0_t super; - + /** MCA parameter: Priority of this component */ int tuned_priority; - + /** global stuff that I need the component to store */ - + /* MCA parameters first */ - + /* cached decision table stuff (moved from MCW module) */ - ompi_coll_alg_rule_t *all_base_rules; - }; - /** - * Convenience typedef - */ - typedef struct mca_coll_tuned_component_t mca_coll_tuned_component_t; + ompi_coll_alg_rule_t *all_base_rules; +}; +/** + * Convenience typedef + */ +typedef struct mca_coll_tuned_component_t mca_coll_tuned_component_t; - /** - * Global component instance - */ - OMPI_MODULE_DECLSPEC extern mca_coll_tuned_component_t mca_coll_tuned_component; +/** + * Global component instance + */ +OMPI_MODULE_DECLSPEC extern mca_coll_tuned_component_t mca_coll_tuned_component; - /* - * Data structure for hanging data off the communicator - * i.e. per module instance - */ - struct mca_coll_tuned_comm_t { +/* + * Data structure for hanging data off the communicator + * i.e. per module instance + */ +struct mca_coll_tuned_comm_t { /* standard data for requests and PML usage */ - + /* Precreate space for requests * Note this does not effect basic, * but if in wrong context can confuse a debugger * this is controlled by an MCA param */ - + ompi_request_t **mcct_reqs; int mcct_num_reqs; - + /* * tuned topo information caching per communicator * * for each communicator we cache the topo information so we can * reuse without regenerating if we change the root, [or fanout] * then regenerate and recache this information - * */ - + /* general tree with n fan out */ ompi_coll_tree_t *cached_ntree; int cached_ntree_root; int cached_ntree_fanout; - + /* binary tree */ ompi_coll_tree_t *cached_bintree; int cached_bintree_root; - + /* binomial tree */ ompi_coll_tree_t *cached_bmtree; int cached_bmtree_root; - + /* binomial tree */ ompi_coll_tree_t *cached_in_order_bmtree; int cached_in_order_bmtree_root; - + /* chained tree (fanout followed by pipelines) */ ompi_coll_tree_t *cached_chain; int cached_chain_root; int cached_chain_fanout; - + /* pipeline */ ompi_coll_tree_t *cached_pipeline; int cached_pipeline_root; - + /* in-order binary tree (root of the in-order binary tree is rank 0) */ ompi_coll_tree_t *cached_in_order_bintree; - /* extra data required by the decision functions */ - ompi_coll_alg_rule_t *all_base_rules; /* stored only on MCW, all other coms ref it */ /* moving to the component */ ompi_coll_com_rule_t *com_rules[COLLCOUNT]; /* the communicator rules for each MPI collective for ONLY my comsize */ /* for forced algorithms we store the information on the module */ /* previously we only had one shared copy, ops, it really is per comm/module */ coll_tuned_force_algorithm_params_t user_forced[COLLCOUNT]; - }; - /** - * Convenience typedef - */ - typedef struct mca_coll_tuned_comm_t mca_coll_tuned_comm_t; +}; +typedef struct mca_coll_tuned_comm_t mca_coll_tuned_comm_t; - struct mca_coll_tuned_module_t { +struct mca_coll_tuned_module_t { mca_coll_base_module_t super; - + mca_coll_tuned_comm_t *tuned_data; - }; - typedef struct mca_coll_tuned_module_t mca_coll_tuned_module_t; - OBJ_CLASS_DECLARATION(mca_coll_tuned_module_t); +}; +typedef struct mca_coll_tuned_module_t mca_coll_tuned_module_t; +OBJ_CLASS_DECLARATION(mca_coll_tuned_module_t); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #define COLL_TUNED_UPDATE_BINTREE( OMPI_COMM, TUNED_MODULE, ROOT ) \ do { \ diff --git a/ompi/mca/coll/tuned/coll_tuned_component.c b/ompi/mca/coll/tuned/coll_tuned_component.c index 521a0f67a3..e38c194bd0 100644 --- a/ompi/mca/coll/tuned/coll_tuned_component.c +++ b/ompi/mca/coll/tuned/coll_tuned_component.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -31,8 +31,7 @@ #include "mpi.h" #include "ompi/mca/coll/coll.h" #include "coll_tuned.h" - - +#include "coll_tuned_dynamic_file.h" /* * Public string showing the coll ompi_tuned component version number @@ -58,7 +57,6 @@ coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COL /* max algorithm values */ int ompi_coll_tuned_forced_max_algorithms[COLLCOUNT]; - /* * Local function */ @@ -71,14 +69,10 @@ static int tuned_close(void); */ mca_coll_tuned_component_t mca_coll_tuned_component = { - /* First, fill in the super */ - { - /* First, the mca_component_t struct containing meta information about the component itself */ - { MCA_COLL_BASE_VERSION_2_0_0, @@ -107,13 +101,14 @@ mca_coll_tuned_component_t mca_coll_tuned_component = { 0, /* Tuned component specific information */ - /* Note some of this WAS in the module */ NULL /* ompi_coll_alg_rule_t ptr */ }; static int tuned_open(void) { + int rc; + #if OPAL_ENABLE_DEBUG { int param; @@ -177,6 +172,18 @@ static int tuned_open(void) "Filename of configuration file that contains the dynamic (@runtime) decision function rules", false, false, ompi_coll_tuned_dynamic_rules_filename, &ompi_coll_tuned_dynamic_rules_filename); + if( ompi_coll_tuned_dynamic_rules_filename ) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:component_open Reading collective rules file [%s]", + ompi_coll_tuned_dynamic_rules_filename)); + rc = ompi_coll_tuned_read_rules_config_file( ompi_coll_tuned_dynamic_rules_filename, + &(mca_coll_tuned_component.all_base_rules), COLLCOUNT); + if( rc >= 0 ) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_open Read %d valid rules\n", rc)); + } else { + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_open Reading collective rules file failed\n")); + mca_coll_tuned_component.all_base_rules = NULL; + } + } ompi_coll_tuned_allreduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLREDUCE]); ompi_coll_tuned_alltoall_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALL]); ompi_coll_tuned_allgather_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLGATHER]); @@ -206,6 +213,11 @@ static int tuned_close(void) OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:component_close: done!")); + if( NULL != mca_coll_tuned_component.all_base_rules ) { + ompi_coll_tuned_free_all_rules(mca_coll_tuned_component.all_base_rules, COLLCOUNT); + mca_coll_tuned_component.all_base_rules = NULL; + } + return OMPI_SUCCESS; } @@ -227,45 +239,36 @@ mca_coll_tuned_module_destruct(mca_coll_tuned_module_t *module) data = module->tuned_data; if (NULL != data) { #if OPAL_ENABLE_DEBUG - /* Reset the reqs to NULL/0 -- they'll be freed as part of freeing - the generel c_coll_selected_data */ - data->mcct_reqs = NULL; - data->mcct_num_reqs = 0; + /* Reset the reqs to NULL/0 -- they'll be freed as part of freeing + the generel c_coll_selected_data */ + data->mcct_reqs = NULL; + data->mcct_num_reqs = 0; #endif - /* free any cached information that has been allocated */ - if (data->cached_ntree) { /* destroy general tree if defined */ - ompi_coll_tuned_topo_destroy_tree (&data->cached_ntree); - } - if (data->cached_bintree) { /* destroy bintree if defined */ - ompi_coll_tuned_topo_destroy_tree (&data->cached_bintree); - } - if (data->cached_bmtree) { /* destroy bmtree if defined */ - ompi_coll_tuned_topo_destroy_tree (&data->cached_bmtree); - } - if (data->cached_in_order_bmtree) { /* destroy bmtree if defined */ - ompi_coll_tuned_topo_destroy_tree (&data->cached_in_order_bmtree); - } - if (data->cached_chain) { /* destroy general chain if defined */ - ompi_coll_tuned_topo_destroy_tree (&data->cached_chain); - } - if (data->cached_pipeline) { /* destroy pipeline if defined */ - ompi_coll_tuned_topo_destroy_tree (&data->cached_pipeline); - } - if (data->cached_in_order_bintree) { /* destroy in order bintree if defined */ - ompi_coll_tuned_topo_destroy_tree (&data->cached_in_order_bintree); - } + /* free any cached information that has been allocated */ + if (data->cached_ntree) { /* destroy general tree if defined */ + ompi_coll_tuned_topo_destroy_tree (&data->cached_ntree); + } + if (data->cached_bintree) { /* destroy bintree if defined */ + ompi_coll_tuned_topo_destroy_tree (&data->cached_bintree); + } + if (data->cached_bmtree) { /* destroy bmtree if defined */ + ompi_coll_tuned_topo_destroy_tree (&data->cached_bmtree); + } + if (data->cached_in_order_bmtree) { /* destroy bmtree if defined */ + ompi_coll_tuned_topo_destroy_tree (&data->cached_in_order_bmtree); + } + if (data->cached_chain) { /* destroy general chain if defined */ + ompi_coll_tuned_topo_destroy_tree (&data->cached_chain); + } + if (data->cached_pipeline) { /* destroy pipeline if defined */ + ompi_coll_tuned_topo_destroy_tree (&data->cached_pipeline); + } + if (data->cached_in_order_bintree) { /* destroy in order bintree if defined */ + ompi_coll_tuned_topo_destroy_tree (&data->cached_in_order_bintree); + } -#if 0 /* FIXME: */ - /* if any algorithm rules are cached on the communicator, only free them if its MCW */ - /* as this is the only place they are allocated by reading the decision configure file */ - if ((ompi_coll_tuned_use_dynamic_rules)&&(&ompi_mpi_comm_world==comm)) { - if (comm->data->all_base_rules) { - ompi_coll_tuned_free_all_rules (comm->data->all_base_rules, COLLCOUNT); - } - } -#endif - free(data); + free(data); } } diff --git a/ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c b/ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c index 29e457251b..129f7530aa 100644 --- a/ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c +++ b/ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -56,7 +56,7 @@ ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; @@ -76,19 +76,19 @@ ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count, dsize, &faninout, &segsize, &ignoreme); if (alg) { - /* we have found a valid choice from the file based rules for this message size */ + /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_allreduce_intra_do_this (sbuf, rbuf, count, dtype, op, - comm, module, + comm, module, alg, faninout, segsize); } /* found a method */ } /*end if any com rules to check */ if (data->user_forced[ALLREDUCE].algorithm) { return ompi_coll_tuned_allreduce_intra_do_forced (sbuf, rbuf, count, dtype, op, - comm, module); + comm, module); } return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, - comm, module); + comm, module); } /* @@ -104,7 +104,7 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; @@ -126,23 +126,22 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount, dsize, &faninout, &segsize, &max_requests); if (alg) { - /* we have found a valid choice from the file based rules for this message size */ + /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_alltoall_intra_do_this (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module, + rbuf, rcount, rdtype, + comm, module, alg, faninout, segsize, max_requests); } /* found a method */ } /*end if any com rules to check */ - if (data->user_forced[ALLTOALL].algorithm) { return ompi_coll_tuned_alltoall_intra_do_forced (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); + rbuf, rcount, rdtype, + comm, module); } return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); + rbuf, rcount, rdtype, + comm, module); } /* @@ -163,16 +162,27 @@ int ompi_coll_tuned_alltoallv_intra_dec_dynamic(void *sbuf, int *scounts, int *s OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_alltoallv_intra_dec_dynamic")); - /* - * BEGIN - File Based Rules - * - * Here is where we would check to see if we have some file based - * rules. Currently, we do not, so move on to seeing if the user - * specified a specific algorithm. If not, then use the fixed - * decision code to decide. - * - * END - File Based Rules + /** + * check to see if we have some filebased rules. As we don't have global + * knowledge about the total amount of data, use the first available rule. + * This allow the users to specify the alltoallv algorithm to be used only + * based on the communicator size. */ + if (data->com_rules[ALLTOALLV]) { + int alg, faninout, segsize, max_requests; + + alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLTOALLV], + 0, &faninout, &segsize, &max_requests); + + if (alg) { + /* we have found a valid choice from the file based rules for this message size */ + return ompi_coll_tuned_alltoallv_intra_do_this (sbuf, scounts, sdisps, sdtype, + rbuf, rcounts, rdisps, rdtype, + comm, module, + alg); + } /* found a method */ + } /*end if any com rules to check */ + if (data->user_forced[ALLTOALLV].algorithm) { return ompi_coll_tuned_alltoallv_intra_do_forced(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps, rdtype, @@ -191,7 +201,7 @@ int ompi_coll_tuned_alltoallv_intra_dec_dynamic(void *sbuf, int *scounts, int *s * Returns: - MPI_SUCCESS or error code (passed from the barrier implementation) */ int ompi_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; @@ -207,7 +217,7 @@ int ompi_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm, 0, &faninout, &segsize, &ignoreme); if (alg) { - /* we have found a valid choice from the file based rules for this message size */ + /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_barrier_intra_do_this (comm, module, alg, faninout, segsize); } /* found a method */ @@ -229,7 +239,7 @@ int ompi_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm, int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; @@ -249,9 +259,9 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count, dsize, &faninout, &segsize, &ignoreme); if (alg) { - /* we have found a valid choice from the file based rules for this message size */ + /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_bcast_intra_do_this (buff, count, datatype, root, - comm, module, + comm, module, alg, faninout, segsize); } /* found a method */ } /*end if any com rules to check */ @@ -259,10 +269,10 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count, if (data->user_forced[BCAST].algorithm) { return ompi_coll_tuned_bcast_intra_do_forced (buff, count, datatype, root, - comm, module); + comm, module); } return ompi_coll_tuned_bcast_intra_dec_fixed (buff, count, datatype, root, - comm, module); + comm, module); } /* @@ -277,7 +287,7 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf, int count, struct ompi_datatype_t* datatype, struct ompi_op_t* op, int root, struct ompi_communicator_t* comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; @@ -298,10 +308,10 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf, dsize, &faninout, &segsize, &max_requests); if (alg) { - /* we have found a valid choice from the file based rules for this message size */ + /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_reduce_intra_do_this (sendbuf, recvbuf, count, datatype, op, root, - comm, module, + comm, module, alg, faninout, segsize, max_requests); @@ -310,12 +320,12 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf, if (data->user_forced[REDUCE].algorithm) { return ompi_coll_tuned_reduce_intra_do_forced (sendbuf, recvbuf, count, datatype, - op, root, - comm, module); + op, root, + comm, module); } return ompi_coll_tuned_reduce_intra_dec_fixed (sendbuf, recvbuf, count, datatype, - op, root, - comm, module); + op, root, + comm, module); } /* @@ -332,7 +342,7 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(void *sbuf, void *rbuf, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; @@ -354,10 +364,10 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(void *sbuf, void *rbuf, dsize, &faninout, &segsize, &ignoreme); if (alg) { - /* we have found a valid choice from the file based rules for this message size */ + /* we have found a valid choice from the file based rules for this message size */ return ompi_coll_tuned_reduce_scatter_intra_do_this (sbuf, rbuf, rcounts, - dtype, op, - comm, module, + dtype, op, + comm, module, alg, faninout, segsize); } /* found a method */ @@ -369,8 +379,8 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(void *sbuf, void *rbuf, comm, module); } return ompi_coll_tuned_reduce_scatter_intra_dec_fixed (sbuf, rbuf, rcounts, - dtype, op, - comm, module); + dtype, op, + comm, module); } /* @@ -387,49 +397,49 @@ int ompi_coll_tuned_allgather_intra_dec_dynamic(void *sbuf, int scount, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; OPAL_OUTPUT((ompi_coll_tuned_stream, - "ompi_coll_tuned_allgather_intra_dec_dynamic")); + "ompi_coll_tuned_allgather_intra_dec_dynamic")); if (data->com_rules[ALLGATHER]) { - /* We have file based rules: - - calculate message size and other necessary information */ - int comsize; - int alg, faninout, segsize, ignoreme; - size_t dsize; + /* We have file based rules: + - calculate message size and other necessary information */ + int comsize; + int alg, faninout, segsize, ignoreme; + size_t dsize; - ompi_datatype_type_size (sdtype, &dsize); - comsize = ompi_comm_size(comm); - dsize *= comsize * scount; + ompi_datatype_type_size (sdtype, &dsize); + comsize = ompi_comm_size(comm); + dsize *= comsize * scount; - alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLGATHER], - dsize, &faninout, &segsize, &ignoreme); - if (alg) { - /* we have found a valid choice from the file based rules for - this message size */ - return ompi_coll_tuned_allgather_intra_do_this (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module, - alg, faninout, segsize); - } + alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLGATHER], + dsize, &faninout, &segsize, &ignoreme); + if (alg) { + /* we have found a valid choice from the file based rules for + this message size */ + return ompi_coll_tuned_allgather_intra_do_this (sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module, + alg, faninout, segsize); + } } /* We do not have file based rules */ if (data->user_forced[ALLGATHER].algorithm) { - /* User-forced algorithm */ - return ompi_coll_tuned_allgather_intra_do_forced (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); + /* User-forced algorithm */ + return ompi_coll_tuned_allgather_intra_do_forced (sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); } /* Use default decision */ return ompi_coll_tuned_allgather_intra_dec_fixed (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); + rbuf, rcount, rdtype, + comm, module); } /* @@ -447,100 +457,146 @@ int ompi_coll_tuned_allgatherv_intra_dec_dynamic(void *sbuf, int scount, int *rdispls, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; OPAL_OUTPUT((ompi_coll_tuned_stream, - "ompi_coll_tuned_allgatherv_intra_dec_dynamic")); + "ompi_coll_tuned_allgatherv_intra_dec_dynamic")); if (data->com_rules[ALLGATHERV]) { - /* We have file based rules: - - calculate message size and other necessary information */ - int comsize, i; - int alg, faninout, segsize, ignoreme; - size_t dsize, total_size; + /* We have file based rules: + - calculate message size and other necessary information */ + int comsize, i; + int alg, faninout, segsize, ignoreme; + size_t dsize, total_size; - comsize = ompi_comm_size(comm); - ompi_datatype_type_size (sdtype, &dsize); - total_size = 0; - for (i = 0; i < comsize; i++) { total_size += dsize * rcounts[i]; } + comsize = ompi_comm_size(comm); + ompi_datatype_type_size (sdtype, &dsize); + total_size = 0; + for (i = 0; i < comsize; i++) { total_size += dsize * rcounts[i]; } - alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLGATHERV], - total_size, &faninout, &segsize, &ignoreme); - if (alg) { - /* we have found a valid choice from the file based rules for - this message size */ - return ompi_coll_tuned_allgatherv_intra_do_this (sbuf, scount, sdtype, - rbuf, rcounts, - rdispls, rdtype, - comm, module, - alg, faninout, segsize); - } + alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLGATHERV], + total_size, &faninout, &segsize, &ignoreme); + if (alg) { + /* we have found a valid choice from the file based rules for + this message size */ + return ompi_coll_tuned_allgatherv_intra_do_this (sbuf, scount, sdtype, + rbuf, rcounts, + rdispls, rdtype, + comm, module, + alg, faninout, segsize); + } } /* We do not have file based rules */ if (data->user_forced[ALLGATHERV].algorithm) { - /* User-forced algorithm */ - return ompi_coll_tuned_allgatherv_intra_do_forced (sbuf, scount, sdtype, - rbuf, rcounts, - rdispls, rdtype, - comm, module); + /* User-forced algorithm */ + return ompi_coll_tuned_allgatherv_intra_do_forced (sbuf, scount, sdtype, + rbuf, rcounts, + rdispls, rdtype, + comm, module); } /* Use default decision */ return ompi_coll_tuned_allgatherv_intra_dec_fixed (sbuf, scount, sdtype, - rbuf, rcounts, - rdispls, rdtype, - comm, module); + rbuf, rcounts, + rdispls, rdtype, + comm, module); } int ompi_coll_tuned_gather_intra_dec_dynamic(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; OPAL_OUTPUT((ompi_coll_tuned_stream, - "ompi_coll_tuned_gather_intra_dec_dynamic")); + "ompi_coll_tuned_gather_intra_dec_dynamic")); + + /** + * check to see if we have some filebased rules. + */ + if (data->com_rules[GATHER]) { + int comsize, alg, faninout, segsize, max_requests; + size_t dsize; + + comsize = ompi_comm_size(comm); + ompi_datatype_type_size (sdtype, &dsize); + dsize *= comsize; + + alg = ompi_coll_tuned_get_target_method_params (data->com_rules[GATHER], + dsize, &faninout, &segsize, &max_requests); + + if (alg) { + /* we have found a valid choice from the file based rules for this message size */ + return ompi_coll_tuned_gather_intra_do_this (sbuf, scount, sdtype, + rbuf, rcount, rdtype, + root, comm, module, + alg, faninout, segsize); + } /* found a method */ + } /*end if any com rules to check */ if (data->user_forced[GATHER].algorithm) { - return ompi_coll_tuned_gather_intra_do_forced (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - root, comm, module); + return ompi_coll_tuned_gather_intra_do_forced (sbuf, scount, sdtype, + rbuf, rcount, rdtype, + root, comm, module); } return ompi_coll_tuned_gather_intra_dec_fixed (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - root, comm, module); + rbuf, rcount, rdtype, + root, comm, module); } int ompi_coll_tuned_scatter_intra_dec_dynamic(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; OPAL_OUTPUT((ompi_coll_tuned_stream, - "ompi_coll_tuned_scatter_intra_dec_dynamic")); + "ompi_coll_tuned_scatter_intra_dec_dynamic")); + + /** + * check to see if we have some filebased rules. + */ + if (data->com_rules[SCATTER]) { + int comsize, alg, faninout, segsize, max_requests; + size_t dsize; + + comsize = ompi_comm_size(comm); + ompi_datatype_type_size (sdtype, &dsize); + dsize *= comsize; + + alg = ompi_coll_tuned_get_target_method_params (data->com_rules[SCATTER], + dsize, &faninout, &segsize, &max_requests); + + if (alg) { + /* we have found a valid choice from the file based rules for this message size */ + return ompi_coll_tuned_scatter_intra_do_this (sbuf, scount, sdtype, + rbuf, rcount, rdtype, + root, comm, module, + alg, faninout, segsize); + } /* found a method */ + } /*end if any com rules to check */ if (data->user_forced[SCATTER].algorithm) { - return ompi_coll_tuned_scatter_intra_do_forced (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - root, comm, module); + return ompi_coll_tuned_scatter_intra_do_forced (sbuf, scount, sdtype, + rbuf, rcount, rdtype, + root, comm, module); } return ompi_coll_tuned_scatter_intra_dec_fixed (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - root, comm, module); + rbuf, rcount, rdtype, + root, comm, module); } diff --git a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c index 933d7bb628..380239d903 100644 --- a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c +++ b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -40,7 +40,7 @@ ompi_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { size_t dsize, block_dsize; int comm_size = ompi_comm_size(comm); @@ -58,22 +58,22 @@ ompi_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count, block_dsize = dsize * count; if (block_dsize < intermediate_message) { - return (ompi_coll_tuned_allreduce_intra_recursivedoubling (sbuf, rbuf, - count, dtype, - op, comm, module)); + return (ompi_coll_tuned_allreduce_intra_recursivedoubling (sbuf, rbuf, + count, dtype, + op, comm, module)); } if( ompi_op_is_commute(op) && (count > comm_size) ) { - const size_t segment_size = 1 << 20; /* 1 MB */ - if ((comm_size * segment_size >= block_dsize)) { - return (ompi_coll_tuned_allreduce_intra_ring (sbuf, rbuf, count, dtype, - op, comm, module)); - } else { - return (ompi_coll_tuned_allreduce_intra_ring_segmented (sbuf, rbuf, - count, dtype, - op, comm, module, - segment_size)); - } + const size_t segment_size = 1 << 20; /* 1 MB */ + if ((comm_size * segment_size >= block_dsize)) { + return (ompi_coll_tuned_allreduce_intra_ring (sbuf, rbuf, count, dtype, + op, comm, module)); + } else { + return (ompi_coll_tuned_allreduce_intra_ring_segmented (sbuf, rbuf, + count, dtype, + op, comm, module, + segment_size)); + } } return (ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, @@ -93,7 +93,7 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { int communicator_size; size_t dsize, block_dsize; @@ -106,8 +106,8 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount, /* special case */ if (communicator_size==2) { return ompi_coll_tuned_alltoall_intra_two_procs(sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); + rbuf, rcount, rdtype, + comm, module); } /* Decision function based on measurement on Grig cluster at @@ -118,19 +118,19 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount, block_dsize = dsize * scount; if ((block_dsize < 200) && (communicator_size > 12)) { - return ompi_coll_tuned_alltoall_intra_bruck(sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); + return ompi_coll_tuned_alltoall_intra_bruck(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); } else if (block_dsize < 3000) { - return ompi_coll_tuned_alltoall_intra_basic_linear(sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); + return ompi_coll_tuned_alltoall_intra_basic_linear(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); } return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, - comm, module); + comm, module); #if 0 /* previous decision */ @@ -179,7 +179,7 @@ int ompi_coll_tuned_alltoallv_intra_dec_fixed(void *sbuf, int *scounts, int *sdi * Returns: - MPI_SUCCESS or error code (passed from the barrier implementation) */ int ompi_coll_tuned_barrier_intra_dec_fixed(struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { int communicator_size = ompi_comm_size(comm); @@ -219,10 +219,10 @@ int ompi_coll_tuned_barrier_intra_dec_fixed(struct ompi_communicator_t *comm, int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { /* Decision function based on MX results for - messages up to 36MB and communicator sizes up to 64 nodes */ + messages up to 36MB and communicator sizes up to 64 nodes */ const size_t small_message_size = 2048; const size_t intermediate_message_size = 370728; const double a_p16 = 3.2118e-6; /* [1 / byte] */ @@ -249,56 +249,56 @@ int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count, /* Handle messages of small and intermediate size, and single-element broadcasts */ if ((message_size < small_message_size) || (count <= 1)) { - /* Binomial without segmentation */ - segsize = 0; - return ompi_coll_tuned_bcast_intra_binomial (buff, count, datatype, - root, comm, module, - segsize); + /* Binomial without segmentation */ + segsize = 0; + return ompi_coll_tuned_bcast_intra_binomial (buff, count, datatype, + root, comm, module, + segsize); } else if (message_size < intermediate_message_size) { - /* SplittedBinary with 1KB segments */ - segsize = 1024; - return ompi_coll_tuned_bcast_intra_split_bintree(buff, count, datatype, - root, comm, module, - segsize); + /* SplittedBinary with 1KB segments */ + segsize = 1024; + return ompi_coll_tuned_bcast_intra_split_bintree(buff, count, datatype, + root, comm, module, + segsize); } /* Handle large message sizes */ else if (communicator_size < (a_p128 * message_size + b_p128)) { - /* Pipeline with 128KB segments */ - segsize = 1024 << 7; - return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, - root, comm, module, - segsize); + /* Pipeline with 128KB segments */ + segsize = 1024 << 7; + return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, + root, comm, module, + segsize); } else if (communicator_size < 13) { - /* Split Binary with 8KB segments */ - segsize = 1024 << 3; - return ompi_coll_tuned_bcast_intra_split_bintree(buff, count, datatype, - root, comm, module, - segsize); + /* Split Binary with 8KB segments */ + segsize = 1024 << 3; + return ompi_coll_tuned_bcast_intra_split_bintree(buff, count, datatype, + root, comm, module, + segsize); } else if (communicator_size < (a_p64 * message_size + b_p64)) { - /* Pipeline with 64KB segments */ - segsize = 1024 << 6; - return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, - root, comm, module, - segsize); + /* Pipeline with 64KB segments */ + segsize = 1024 << 6; + return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, + root, comm, module, + segsize); } else if (communicator_size < (a_p16 * message_size + b_p16)) { - /* Pipeline with 16KB segments */ - segsize = 1024 << 4; - return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, - root, comm, module, - segsize); + /* Pipeline with 16KB segments */ + segsize = 1024 << 4; + return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, + root, comm, module, + segsize); } /* Pipeline with 8KB segments */ segsize = 1024 << 3; return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, - root, comm, module, - segsize); + root, comm, module, + segsize); #if 0 /* this is based on gige measurements */ @@ -340,7 +340,7 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf, int count, struct ompi_datatype_t* datatype, struct ompi_op_t* op, int root, struct ompi_communicator_t* comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { int communicator_size, segsize = 0; size_t message_size, dsize; @@ -370,10 +370,10 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf, return ompi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm, module); } return ompi_coll_tuned_reduce_intra_in_order_binary (sendbuf, recvbuf, count, datatype, op, root, comm, module, - 0, max_requests); + 0, max_requests); } - OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_reduce_intra_dec_fixed" + OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_reduce_intra_dec_fixed " "root %d rank %d com_size %d msg_length %lu", root, ompi_comm_rank(comm), communicator_size, (unsigned long)message_size)); @@ -385,17 +385,17 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf, /* Binomial_0K */ segsize = 0; return ompi_coll_tuned_reduce_intra_binomial(sendbuf, recvbuf, count, datatype, op, root, comm, module, - segsize, max_requests); + segsize, max_requests); } else if (communicator_size > (a1 * message_size + b1)) { /* Binomial_1K */ segsize = 1024; return ompi_coll_tuned_reduce_intra_binomial(sendbuf, recvbuf, count, datatype, op, root, comm, module, - segsize, max_requests); + segsize, max_requests); } else if (communicator_size > (a2 * message_size + b2)) { /* Pipeline_1K */ segsize = 1024; return ompi_coll_tuned_reduce_intra_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm, module, - segsize, max_requests); + segsize, max_requests); } else if (communicator_size > (a3 * message_size + b3)) { /* Binary_32K */ segsize = 32*1024; @@ -410,7 +410,7 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf, segsize = 64*1024; } return ompi_coll_tuned_reduce_intra_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm, module, - segsize, max_requests); + segsize, max_requests); #if 0 /* for small messages use linear algorithm */ @@ -433,11 +433,11 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf, /* later swap this for a binary tree */ /* fanout = 2; */ return ompi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, module, - segsize, fanout, max_requests); + segsize, fanout, max_requests); } segsize = 1024; return ompi_coll_tuned_reduce_intra_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm, module, - segsize, max_requests); + segsize, max_requests); #endif /* 0 */ } @@ -457,51 +457,51 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_fixed( void *sbuf, void *rbuf, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { - int comm_size, i, pow2; - size_t total_message_size, dsize; - const double a = 0.0012; - const double b = 8.0; - const size_t small_message_size = 12 * 1024; - const size_t large_message_size = 256 * 1024; - bool zerocounts = false; + int comm_size, i, pow2; + size_t total_message_size, dsize; + const double a = 0.0012; + const double b = 8.0; + const size_t small_message_size = 12 * 1024; + const size_t large_message_size = 256 * 1024; + bool zerocounts = false; - OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_reduce_scatter_intra_dec_fixed")); + OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_reduce_scatter_intra_dec_fixed")); - comm_size = ompi_comm_size(comm); - /* We need data size for decision function */ - ompi_datatype_type_size(dtype, &dsize); - total_message_size = 0; - for (i = 0; i < comm_size; i++) { - total_message_size += rcounts[i]; - if (0 == rcounts[i]) { - zerocounts = true; - } - } + comm_size = ompi_comm_size(comm); + /* We need data size for decision function */ + ompi_datatype_type_size(dtype, &dsize); + total_message_size = 0; + for (i = 0; i < comm_size; i++) { + total_message_size += rcounts[i]; + if (0 == rcounts[i]) { + zerocounts = true; + } + } - if( !ompi_op_is_commute(op) || (zerocounts)) { - return ompi_coll_tuned_reduce_scatter_intra_nonoverlapping (sbuf, rbuf, rcounts, - dtype, op, - comm, module); - } + if( !ompi_op_is_commute(op) || (zerocounts)) { + return ompi_coll_tuned_reduce_scatter_intra_nonoverlapping (sbuf, rbuf, rcounts, + dtype, op, + comm, module); + } - total_message_size *= dsize; + total_message_size *= dsize; - /* compute the nearest power of 2 */ - for (pow2 = 1; pow2 < comm_size; pow2 <<= 1); + /* compute the nearest power of 2 */ + for (pow2 = 1; pow2 < comm_size; pow2 <<= 1); - if ((total_message_size <= small_message_size) || - ((total_message_size <= large_message_size) && (pow2 == comm_size)) || - (comm_size >= a * total_message_size + b)) { - return - ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts, - dtype, op, - comm, module); - } - return ompi_coll_tuned_reduce_scatter_intra_ring(sbuf, rbuf, rcounts, - dtype, op, - comm, module); + if ((total_message_size <= small_message_size) || + ((total_message_size <= large_message_size) && (pow2 == comm_size)) || + (comm_size >= a * total_message_size + b)) { + return + ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts, + dtype, op, + comm, module); + } + return ompi_coll_tuned_reduce_scatter_intra_ring(sbuf, rbuf, rcounts, + dtype, op, + comm, module); } /* @@ -520,80 +520,80 @@ int ompi_coll_tuned_allgather_intra_dec_fixed(void *sbuf, int scount, struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { - int communicator_size, pow2_size; - size_t dsize, total_dsize; + int communicator_size, pow2_size; + size_t dsize, total_dsize; - communicator_size = ompi_comm_size(comm); + communicator_size = ompi_comm_size(comm); - /* Special case for 2 processes */ - if (communicator_size == 2) { - return ompi_coll_tuned_allgather_intra_two_procs (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); + /* Special case for 2 processes */ + if (communicator_size == 2) { + return ompi_coll_tuned_allgather_intra_two_procs (sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); } - /* Determine complete data size */ - ompi_datatype_type_size(sdtype, &dsize); - total_dsize = dsize * scount * communicator_size; + /* Determine complete data size */ + ompi_datatype_type_size(sdtype, &dsize); + total_dsize = dsize * scount * communicator_size; - OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allgather_intra_dec_fixed" - " rank %d com_size %d msg_length %lu", - ompi_comm_rank(comm), communicator_size, (unsigned long)total_dsize)); + OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allgather_intra_dec_fixed" + " rank %d com_size %d msg_length %lu", + ompi_comm_rank(comm), communicator_size, (unsigned long)total_dsize)); - for (pow2_size = 1; pow2_size < communicator_size; pow2_size <<=1); + for (pow2_size = 1; pow2_size < communicator_size; pow2_size <<=1); - /* Decision based on MX 2Gb results from Grig cluster at - The University of Tennesse, Knoxville - - if total message size is less than 50KB use either bruck or - recursive doubling for non-power of two and power of two nodes, - respectively. - - else use ring and neighbor exchange algorithms for odd and even - number of nodes, respectively. - */ - if (total_dsize < 50000) { - if (pow2_size == communicator_size) { - return ompi_coll_tuned_allgather_intra_recursivedoubling(sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); - } else { - return ompi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); - } - } else { - if (communicator_size % 2) { - return ompi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); - } else { - return ompi_coll_tuned_allgather_intra_neighborexchange(sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); - } - } + /* Decision based on MX 2Gb results from Grig cluster at + The University of Tennesse, Knoxville + - if total message size is less than 50KB use either bruck or + recursive doubling for non-power of two and power of two nodes, + respectively. + - else use ring and neighbor exchange algorithms for odd and even + number of nodes, respectively. + */ + if (total_dsize < 50000) { + if (pow2_size == communicator_size) { + return ompi_coll_tuned_allgather_intra_recursivedoubling(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); + } else { + return ompi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); + } + } else { + if (communicator_size % 2) { + return ompi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); + } else { + return ompi_coll_tuned_allgather_intra_neighborexchange(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); + } + } #if defined(USE_MPICH2_DECISION) - /* Decision as in MPICH-2 - presented in Thakur et.al. "Optimization of Collective Communication - Operations in MPICH", International Journal of High Performance Computing - Applications, Vol. 19, No. 1, 49-66 (2005) - - for power-of-two processes and small and medium size messages - (up to 512KB) use recursive doubling - - for non-power-of-two processes and small messages (80KB) use bruck, - - for everything else use ring. + /* Decision as in MPICH-2 + presented in Thakur et.al. "Optimization of Collective Communication + Operations in MPICH", International Journal of High Performance Computing + Applications, Vol. 19, No. 1, 49-66 (2005) + - for power-of-two processes and small and medium size messages + (up to 512KB) use recursive doubling + - for non-power-of-two processes and small messages (80KB) use bruck, + - for everything else use ring. */ - if ((pow2_size == communicator_size) && (total_dsize < 524288)) { - return ompi_coll_tuned_allgather_intra_recursivedoubling(sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); - } else if (total_dsize <= 81920) { - return ompi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); - } - return ompi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype, - rbuf, rcount, rdtype, - comm, module); + if ((pow2_size == communicator_size) && (total_dsize < 524288)) { + return ompi_coll_tuned_allgather_intra_recursivedoubling(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); + } else if (total_dsize <= 81920) { + return ompi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); + } + return ompi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype, + rbuf, rcount, rdtype, + comm, module); #endif /* defined(USE_MPICH2_DECISION) */ } @@ -612,7 +612,7 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(void *sbuf, int scount, int *rdispls, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { int i; int communicator_size; @@ -639,22 +639,22 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(void *sbuf, int scount, " rank %d com_size %d msg_length %lu", ompi_comm_rank(comm), communicator_size, (unsigned long)total_dsize)); - /* Decision based on allgather decision. */ - if (total_dsize < 50000) { - return ompi_coll_tuned_allgatherv_intra_bruck(sbuf, scount, sdtype, - rbuf, rcounts, rdispls, rdtype, - comm, module); - } else { - if (communicator_size % 2) { - return ompi_coll_tuned_allgatherv_intra_ring(sbuf, scount, sdtype, - rbuf, rcounts, rdispls, rdtype, - comm, module); - } else { - return ompi_coll_tuned_allgatherv_intra_neighborexchange(sbuf, scount, sdtype, - rbuf, rcounts, rdispls, rdtype, - comm, module); - } - } + /* Decision based on allgather decision. */ + if (total_dsize < 50000) { + return ompi_coll_tuned_allgatherv_intra_bruck(sbuf, scount, sdtype, + rbuf, rcounts, rdispls, rdtype, + comm, module); + } else { + if (communicator_size % 2) { + return ompi_coll_tuned_allgatherv_intra_ring(sbuf, scount, sdtype, + rbuf, rcounts, rdispls, rdtype, + comm, module); + } else { + return ompi_coll_tuned_allgatherv_intra_neighborexchange(sbuf, scount, sdtype, + rbuf, rcounts, rdispls, rdtype, + comm, module); + } + } } /* @@ -667,12 +667,12 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(void *sbuf, int scount, */ int ompi_coll_tuned_gather_intra_dec_fixed(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - int root, + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { const int large_segment_size = 32768; const int small_segment_size = 1024; @@ -688,7 +688,7 @@ int ompi_coll_tuned_gather_intra_dec_fixed(void *sbuf, int scount, size_t dsize, block_size; OPAL_OUTPUT((ompi_coll_tuned_stream, - "ompi_coll_tuned_gather_intra_dec_fixed")); + "ompi_coll_tuned_gather_intra_dec_fixed")); communicator_size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); @@ -724,8 +724,8 @@ int ompi_coll_tuned_gather_intra_dec_fixed(void *sbuf, int scount, } /* Otherwise, use basic linear */ return ompi_coll_tuned_gather_intra_basic_linear (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - root, comm, module); + rbuf, rcount, rdtype, + root, comm, module); } /* @@ -738,11 +738,11 @@ int ompi_coll_tuned_gather_intra_dec_fixed(void *sbuf, int scount, */ int ompi_coll_tuned_scatter_intra_dec_fixed(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) { const size_t small_block_size = 300; const int small_comm_size = 10; @@ -750,7 +750,7 @@ int ompi_coll_tuned_scatter_intra_dec_fixed(void *sbuf, int scount, size_t dsize, block_size; OPAL_OUTPUT((ompi_coll_tuned_stream, - "ompi_coll_tuned_scatter_intra_dec_fixed")); + "ompi_coll_tuned_scatter_intra_dec_fixed")); communicator_size = ompi_comm_size(comm); rank = ompi_comm_rank(comm); @@ -770,6 +770,6 @@ int ompi_coll_tuned_scatter_intra_dec_fixed(void *sbuf, int scount, root, comm, module); } return ompi_coll_tuned_scatter_intra_basic_linear (sbuf, scount, sdtype, - rbuf, rcount, rdtype, - root, comm, module); + rbuf, rcount, rdtype, + root, comm, module); } diff --git a/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c b/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c index 91d71d9ff6..ae50910ff0 100644 --- a/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c +++ b/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -314,18 +314,12 @@ ompi_coll_com_rule_t* ompi_coll_tuned_get_com_rule_ptr (ompi_coll_alg_rule_t* ru best_com_p = com_p = alg_p->com_rules; i = best = 0; - while (in_com_sizes) { - /* OPAL_OUTPUT((ompi_coll_tuned_stream,"checking comsize %d against alg_id %d com_id %d index %d com_size %d", */ - /* mpi_comsize, com_p->alg_rule_id, com_p->com_rule_id, i, com_p->mpi_comsize)); */ - if (com_p->mpi_comsize <= mpi_comsize) { - best = i; - best_com_p = com_p; - /* OPAL_OUTPUT((ompi_coll_tuned_stream(":ok\n")); */ - } - else { - /* OPAL_OUTPUT((ompi_coll_tuned_stream(":nop\n")); */ + while( i < alg_p->n_com_sizes ) { + if (com_p->mpi_comsize > mpi_comsize) { break; } + best = i; + best_com_p = com_p; /* go to the next entry */ com_p++; i++; @@ -359,26 +353,11 @@ int ompi_coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rul ompi_coll_msg_rule_t* best_msg_p = (ompi_coll_msg_rule_t*) NULL; int i, best; - if (!base_com_rule) { + /* No rule or zero rules */ + if( (NULL == base_com_rule) || (0 == base_com_rule->n_msg_sizes)) { return (0); } - if (!result_topo_faninout) { - return (0); - } - - if (!result_segsize) { - return (0); - } - - if (!max_requests) { - return (0); - } - - if (!base_com_rule->n_msg_sizes) { /* check for count of message sizes */ - return (0); /* no msg sizes so no rule */ - } - /* ok have some msg sizes, now to find the one closest to my mpi_msgsize */ /* make a copy of the first msg rule */ diff --git a/ompi/mca/coll/tuned/coll_tuned_forced.c b/ompi/mca/coll/tuned/coll_tuned_forced.c deleted file mode 100644 index b30947e6e5..0000000000 --- a/ompi/mca/coll/tuned/coll_tuned_forced.c +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "mpi.h" -#include "opal/mca/mca.h" -#include "ompi/constants.h" -#include "coll_tuned.h" - -/* need to include our own topo prototypes so we can malloc data on the comm correctly */ -#include "coll_tuned_topo.h" - -/* also need the dynamic rule structures */ -#include "coll_tuned_forced.h" - -#include "coll_tuned_util.h" - -#include -#include - -/* We put all routines that handle the MCA user forced algorithm and parameter choices here */ -/* recheck the setting of forced, called on module create (i.e. for each new comm) */ - -int ompi_coll_tuned_forced_getvalues (coll_tuned_force_algorithm_mca_param_indices_t mca_params, - coll_tuned_force_algorithm_params_t *forced_values) -{ - mca_base_param_lookup_int (mca_params.algorithm_param_index, &(forced_values->algorithm)); - mca_base_param_lookup_int (mca_params.segsize_param_index, &(forced_values->segsize)); - mca_base_param_lookup_int (mca_params.tree_fanout_param_index, &(forced_values->tree_fanout)); - mca_base_param_lookup_int (mca_params.chain_fanout_param_index, &(forced_values->chain_fanout)); - mca_base_param_lookup_int (mca_params.max_requests_param_index, &(forced_values->max_requests)); - - return (MPI_SUCCESS); -} - - -/* special version of above just for barrier which only has one option available (at the moment...) */ -int ompi_coll_tuned_forced_getvalues_barrier (coll_tuned_force_algorithm_mca_param_indices_t mca_params, - coll_tuned_force_algorithm_params_t *forced_values) -{ - mca_base_param_lookup_int (mca_params.algorithm_param_index, &(forced_values->algorithm)); - - return (MPI_SUCCESS); -} - - diff --git a/ompi/mca/coll/tuned/coll_tuned_forced.h b/ompi/mca/coll/tuned/coll_tuned_forced.h deleted file mode 100644 index b02897af77..0000000000 --- a/ompi/mca/coll/tuned/coll_tuned_forced.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_COLL_TUNED_FORCED_H_HAS_BEEN_INCLUDED -#define MCA_COLL_TUNED_FORCED_H_HAS_BEEN_INCLUDED - -#include "ompi_config.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - - -/* this structure is for storing the indexes to the forced algorithm mca params... */ -/* we get these at component query (so that registered values appear in ompi_infoi) */ - -struct coll_tuned_force_algorithm_mca_param_indices_t { - int algorithm_param_index; /* which algorithm you want to force */ - int segsize_param_index; /* segsize to use (if supported), 0 = no segmentation */ - int tree_fanout_param_index; /* tree fanout/in to use */ - int chain_fanout_param_index; /* K-chain fanout/in to use */ - int max_requests_param_index; /* Maximum number of outstanding send or recv requests */ -}; - -typedef struct coll_tuned_force_algorithm_mca_param_indices_t coll_tuned_force_algorithm_mca_param_indices_t; - - -/* the following type is for storing actual value obtained from the MCA on each tuned module */ -/* via their mca param indices lookup in the component */ -/* this structure is stored once per collective type per communicator... */ -struct coll_tuned_force_algorithm_params_t { - int algorithm; /* which algorithm you want to force */ - int segsize; /* segsize to use (if supported), 0 = no segmentation */ - int tree_fanout; /* tree fanout/in to use */ - int chain_fanout; /* K-chain fanout/in to use */ - int max_requests; /* Maximum number of outstanding send or recv requests */ -}; - -typedef struct coll_tuned_force_algorithm_params_t coll_tuned_force_algorithm_params_t; - - -/* prototypes */ - -int ompi_coll_tuned_forced_getvalues (coll_tuned_force_algorithm_mca_param_indices_t mca_params, - coll_tuned_force_algorithm_params_t *forced_values); - -/* barrier has less options than any other collective so it gets its own special function */ -int ompi_coll_tuned_forced_getvalues_barrier (coll_tuned_force_algorithm_mca_param_indices_t mca_params, - coll_tuned_force_algorithm_params_t *forced_values); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif /* MCA_COLL_TUNED_FORCED_H_HAS_BEEN_INCLUDED */ - - diff --git a/ompi/mca/coll/tuned/coll_tuned_gather.c b/ompi/mca/coll/tuned/coll_tuned_gather.c index d21bb63e0e..160ce59a7c 100644 --- a/ompi/mca/coll/tuned/coll_tuned_gather.c +++ b/ompi/mca/coll/tuned/coll_tuned_gather.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -440,49 +440,49 @@ ompi_coll_tuned_gather_intra_check_forced_init(coll_tuned_force_algorithm_mca_pa ompi_coll_tuned_forced_max_algorithms[GATHER] = max_alg; rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version, - "gather_algorithm_count", - "Number of gather algorithms available", - false, true, max_alg, NULL); + "gather_algorithm_count", + "Number of gather algorithms available", + false, true, max_alg, NULL); mca_param_indices->algorithm_param_index - = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, - "gather_algorithm", - "Which gather algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 binomial, 3 linear with synchronization.", - false, false, 0, NULL); + = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, + "gather_algorithm", + "Which gather algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 binomial, 3 linear with synchronization.", + false, false, 0, NULL); if (mca_param_indices->algorithm_param_index < 0) { return mca_param_indices->algorithm_param_index; } mca_base_param_lookup_int(mca_param_indices->algorithm_param_index, &(requested_alg)); if( 0 > requested_alg || requested_alg > max_alg ) { - if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) { - opal_output( 0, "Gather algorithm #%d is not available (range [0..%d]). Switching back to ignore(0)\n", - requested_alg, max_alg ); - } - mca_base_param_set_int( mca_param_indices->algorithm_param_index, 0); + if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) { + opal_output( 0, "Gather algorithm #%d is not available (range [0..%d]). Switching back to default(0)\n", + requested_alg, max_alg ); + } + mca_base_param_set_int( mca_param_indices->algorithm_param_index, 0); } mca_param_indices->segsize_param_index - = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, - "gather_algorithm_segmentsize", - "Segment size in bytes used by default for gather algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.", - false, false, 0, NULL); + = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, + "gather_algorithm_segmentsize", + "Segment size in bytes used by default for gather algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.", + false, false, 0, NULL); mca_param_indices->tree_fanout_param_index - = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, - "gather_algorithm_tree_fanout", - "Fanout for n-tree used for gather algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.", - false, false, - ompi_coll_tuned_init_tree_fanout, /* get system wide default */ - NULL); + = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, + "gather_algorithm_tree_fanout", + "Fanout for n-tree used for gather algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.", + false, false, + ompi_coll_tuned_init_tree_fanout, /* get system wide default */ + NULL); mca_param_indices->chain_fanout_param_index - = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, - "gather_algorithm_chain_fanout", - "Fanout for chains used for gather algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.", - false, false, - ompi_coll_tuned_init_chain_fanout, /* get system wide default */ - NULL); + = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, + "gather_algorithm_chain_fanout", + "Fanout for chains used for gather algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.", + false, false, + ompi_coll_tuned_init_chain_fanout, /* get system wide default */ + NULL); return (MPI_SUCCESS); } diff --git a/ompi/mca/coll/tuned/coll_tuned_module.c b/ompi/mca/coll/tuned/coll_tuned_module.c index 29256bbe1d..2f37d7452c 100644 --- a/ompi/mca/coll/tuned/coll_tuned_module.c +++ b/ompi/mca/coll/tuned/coll_tuned_module.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -31,7 +31,6 @@ #include "coll_tuned_topo.h" #include "coll_tuned_dynamic_rules.h" #include "coll_tuned_dynamic_file.h" -#include "coll_tuned_forced.h" static int tuned_module_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm); @@ -61,27 +60,21 @@ ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority) OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:module_tuned query called")); + /** + * No support for inter-communicator yet. + */ + if (OMPI_COMM_IS_INTER(comm)) { + *priority = 0; + return NULL; + } + /** * If it is inter-communicator and size is less than 2 we have specialized modules * to handle the intra collective communications. */ if (OMPI_COMM_IS_INTRA(comm) && ompi_comm_size(comm) < 2) { - *priority = 0; - return NULL; - } - - if (OMPI_COMM_IS_INTER(comm)) { -#if 0 - if (ompi_coll_tuned_use_dynamic_rules) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using inter_dynamic")); - } else { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using inter_fixed")); - - } -#endif - /* tuned does not support inter-communicator yet */ - *priority = 0; - return NULL; + *priority = 0; + return NULL; } tuned_module = OBJ_NEW(mca_coll_tuned_module_t); @@ -99,72 +92,86 @@ ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority) tuned_module->super.ft_event = mca_coll_tuned_ft_event; if (ompi_coll_tuned_use_dynamic_rules) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using intra_dynamic")); - - tuned_module->super.coll_allgather = ompi_coll_tuned_allgather_intra_dec_dynamic; - tuned_module->super.coll_allgatherv = ompi_coll_tuned_allgatherv_intra_dec_dynamic; - tuned_module->super.coll_allreduce = ompi_coll_tuned_allreduce_intra_dec_dynamic; - tuned_module->super.coll_alltoall = ompi_coll_tuned_alltoall_intra_dec_dynamic; - tuned_module->super.coll_alltoallv = ompi_coll_tuned_alltoallv_intra_dec_dynamic; - tuned_module->super.coll_alltoallw = NULL; - tuned_module->super.coll_barrier = ompi_coll_tuned_barrier_intra_dec_dynamic; - tuned_module->super.coll_bcast = ompi_coll_tuned_bcast_intra_dec_dynamic; - tuned_module->super.coll_exscan = NULL; - tuned_module->super.coll_gather = ompi_coll_tuned_gather_intra_dec_dynamic; - tuned_module->super.coll_gatherv = NULL; - tuned_module->super.coll_reduce = ompi_coll_tuned_reduce_intra_dec_dynamic; - tuned_module->super.coll_reduce_scatter = ompi_coll_tuned_reduce_scatter_intra_dec_dynamic; - tuned_module->super.coll_scan = NULL; - tuned_module->super.coll_scatter = ompi_coll_tuned_scatter_intra_dec_dynamic; - tuned_module->super.coll_scatterv = NULL; + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using intra_dynamic")); + tuned_module->super.coll_allgather = ompi_coll_tuned_allgather_intra_dec_dynamic; + tuned_module->super.coll_allgatherv = ompi_coll_tuned_allgatherv_intra_dec_dynamic; + tuned_module->super.coll_allreduce = ompi_coll_tuned_allreduce_intra_dec_dynamic; + tuned_module->super.coll_alltoall = ompi_coll_tuned_alltoall_intra_dec_dynamic; + tuned_module->super.coll_alltoallv = ompi_coll_tuned_alltoallv_intra_dec_dynamic; + tuned_module->super.coll_alltoallw = NULL; + tuned_module->super.coll_barrier = ompi_coll_tuned_barrier_intra_dec_dynamic; + tuned_module->super.coll_bcast = ompi_coll_tuned_bcast_intra_dec_dynamic; + tuned_module->super.coll_exscan = NULL; + tuned_module->super.coll_gather = ompi_coll_tuned_gather_intra_dec_dynamic; + tuned_module->super.coll_gatherv = NULL; + tuned_module->super.coll_reduce = ompi_coll_tuned_reduce_intra_dec_dynamic; + tuned_module->super.coll_reduce_scatter = ompi_coll_tuned_reduce_scatter_intra_dec_dynamic; + tuned_module->super.coll_scan = NULL; + tuned_module->super.coll_scatter = ompi_coll_tuned_scatter_intra_dec_dynamic; + tuned_module->super.coll_scatterv = NULL; } else { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using intra_fixed")); - - tuned_module->super.coll_allgather = ompi_coll_tuned_allgather_intra_dec_fixed; - tuned_module->super.coll_allgatherv = ompi_coll_tuned_allgatherv_intra_dec_fixed; - tuned_module->super.coll_allreduce = ompi_coll_tuned_allreduce_intra_dec_fixed; - tuned_module->super.coll_alltoall = ompi_coll_tuned_alltoall_intra_dec_fixed; - tuned_module->super.coll_alltoallv = ompi_coll_tuned_alltoallv_intra_dec_fixed; - tuned_module->super.coll_alltoallw = NULL; - tuned_module->super.coll_barrier = ompi_coll_tuned_barrier_intra_dec_fixed; - tuned_module->super.coll_bcast = ompi_coll_tuned_bcast_intra_dec_fixed; - tuned_module->super.coll_exscan = NULL; - tuned_module->super.coll_gather = ompi_coll_tuned_gather_intra_dec_fixed; - tuned_module->super.coll_gatherv = NULL; - tuned_module->super.coll_reduce = ompi_coll_tuned_reduce_intra_dec_fixed; - tuned_module->super.coll_reduce_scatter = ompi_coll_tuned_reduce_scatter_intra_dec_fixed; - tuned_module->super.coll_scan = NULL; - tuned_module->super.coll_scatter = ompi_coll_tuned_scatter_intra_dec_fixed; - tuned_module->super.coll_scatterv = NULL; + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using intra_fixed")); + tuned_module->super.coll_allgather = ompi_coll_tuned_allgather_intra_dec_fixed; + tuned_module->super.coll_allgatherv = ompi_coll_tuned_allgatherv_intra_dec_fixed; + tuned_module->super.coll_allreduce = ompi_coll_tuned_allreduce_intra_dec_fixed; + tuned_module->super.coll_alltoall = ompi_coll_tuned_alltoall_intra_dec_fixed; + tuned_module->super.coll_alltoallv = ompi_coll_tuned_alltoallv_intra_dec_fixed; + tuned_module->super.coll_alltoallw = NULL; + tuned_module->super.coll_barrier = ompi_coll_tuned_barrier_intra_dec_fixed; + tuned_module->super.coll_bcast = ompi_coll_tuned_bcast_intra_dec_fixed; + tuned_module->super.coll_exscan = NULL; + tuned_module->super.coll_gather = ompi_coll_tuned_gather_intra_dec_fixed; + tuned_module->super.coll_gatherv = NULL; + tuned_module->super.coll_reduce = ompi_coll_tuned_reduce_intra_dec_fixed; + tuned_module->super.coll_reduce_scatter = ompi_coll_tuned_reduce_scatter_intra_dec_fixed; + tuned_module->super.coll_scan = NULL; + tuned_module->super.coll_scatter = ompi_coll_tuned_scatter_intra_dec_fixed; + tuned_module->super.coll_scatterv = NULL; } return &(tuned_module->super); } +/* We put all routines that handle the MCA user forced algorithm and parameter choices here */ +/* recheck the setting of forced, called on module create (i.e. for each new comm) */ + +static int +ompi_coll_tuned_forced_getvalues( enum COLLTYPE type, + coll_tuned_force_algorithm_params_t *forced_values ) +{ + coll_tuned_force_algorithm_mca_param_indices_t* mca_params; + + mca_params = &(ompi_coll_tuned_forced_params[type]); + + mca_base_param_lookup_int (mca_params->algorithm_param_index, &(forced_values->algorithm)); + if( BARRIER != type ) { + mca_base_param_lookup_int (mca_params->segsize_param_index, &(forced_values->segsize)); + mca_base_param_lookup_int (mca_params->tree_fanout_param_index, &(forced_values->tree_fanout)); + mca_base_param_lookup_int (mca_params->chain_fanout_param_index, &(forced_values->chain_fanout)); + mca_base_param_lookup_int (mca_params->max_requests_param_index, &(forced_values->max_requests)); + } + return (MPI_SUCCESS); +} /* * Init module on the communicator */ static int -tuned_module_enable(mca_coll_base_module_t *module, - struct ompi_communicator_t *comm) +tuned_module_enable( mca_coll_base_module_t *module, + struct ompi_communicator_t *comm ) { - int size; + int size, i; mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t *) module; mca_coll_tuned_comm_t *data = NULL; - /* fanout parameters */ - int rc=0; - int i; - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init called.")); - /* This routine will become more complex and might have to be */ - /* broken into more sections/function calls */ - - /* Order of operations: + /* This routine will become more complex and might have to be + * broken into more sections/function calls + * + * Order of operations: * alloc memory for nb reqs (in case we fall through) * add decision rules if using dynamic rules * compact rules using communicator size info etc @@ -174,15 +181,13 @@ tuned_module_enable(mca_coll_base_module_t *module, */ /* Allocate the data that hangs off the communicator */ - if (OMPI_COMM_IS_INTER(comm)) { size = ompi_comm_remote_size(comm); } else { size = ompi_comm_size(comm); } - - /* + /** * we still malloc data as it is used by the TUNED modules * if we don't allocate it and fall back to a BASIC module routine then confuses debuggers * we place any special info after the default data @@ -195,12 +200,9 @@ tuned_module_enable(mca_coll_base_module_t *module, */ /* if we within the memory/size limit, allow preallocated data */ - - - if (size<=ompi_coll_tuned_preallocate_memory_comm_size_limit) { + if( size <= ompi_coll_tuned_preallocate_memory_comm_size_limit ) { data = (mca_coll_tuned_comm_t*)malloc(sizeof(struct mca_coll_tuned_comm_t) + - (sizeof(ompi_request_t *) * size * 2)); - + (sizeof(ompi_request_t *) * size * 2)); if (NULL == data) { return OMPI_ERROR; } @@ -208,7 +210,6 @@ tuned_module_enable(mca_coll_base_module_t *module, data->mcct_num_reqs = size * 2; } else { data = (mca_coll_tuned_comm_t*)malloc(sizeof(struct mca_coll_tuned_comm_t)); - if (NULL == data) { return OMPI_ERROR; } @@ -216,138 +217,62 @@ tuned_module_enable(mca_coll_base_module_t *module, data->mcct_num_reqs = 0; } - - /* + /** * If using dynamic and you are MPI_COMM_WORLD and you want to use a parameter file.. * then this effects how much storage space you need * (This is a basic version of what will go into V2) - * */ - - size = ompi_comm_size(comm); /* find size so we can (A) decide if to access the file directly */ - /* (B) so we can get our very own customised ompi_coll_com_rule_t ptr */ - /* which only has rules in it for our com size */ - - /* actually if they are below a threadhold, they all open it */ - /* have to build a collective in here.. but just for MCW.. */ - /* but we have to make sure we have the same rules everywhere :( */ - /* if using dynamic rules make sure all overrides are NULL before we start override anything accidently */ if (ompi_coll_tuned_use_dynamic_rules) { - /* base rules */ - data->all_base_rules = (ompi_coll_alg_rule_t*) NULL; + OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init MCW & Dynamic")); - /* each collective rule for my com size */ - for (i=0;icom_rules[i] = (ompi_coll_com_rule_t*) NULL; + /** + * next dynamic state, recheck all forced rules as well + * warning, we should check to make sure this is really an INTRA comm here... + */ + ompi_coll_tuned_forced_getvalues( ALLGATHER, &(data->user_forced[ALLGATHER])); + ompi_coll_tuned_forced_getvalues( ALLGATHERV, &(data->user_forced[ALLGATHERV])); + ompi_coll_tuned_forced_getvalues( ALLREDUCE, &(data->user_forced[ALLREDUCE])); + ompi_coll_tuned_forced_getvalues( ALLTOALL, &(data->user_forced[ALLTOALL])); + ompi_coll_tuned_forced_getvalues( ALLTOALLV, &(data->user_forced[ALLTOALLV])); + ompi_coll_tuned_forced_getvalues( ALLTOALLW, &(data->user_forced[ALLTOALLW])); + ompi_coll_tuned_forced_getvalues( BARRIER, &(data->user_forced[BARRIER])); + ompi_coll_tuned_forced_getvalues( BCAST, &(data->user_forced[BCAST])); + ompi_coll_tuned_forced_getvalues( EXSCAN, &(data->user_forced[EXSCAN])); + ompi_coll_tuned_forced_getvalues( GATHER, &(data->user_forced[GATHER])); + ompi_coll_tuned_forced_getvalues( GATHERV, &(data->user_forced[GATHERV])); + ompi_coll_tuned_forced_getvalues( REDUCE, &(data->user_forced[REDUCE])); + ompi_coll_tuned_forced_getvalues( REDUCESCATTER, &(data->user_forced[REDUCESCATTER])); + ompi_coll_tuned_forced_getvalues( SCAN, &(data->user_forced[SCAN])); + ompi_coll_tuned_forced_getvalues( SCATTER, &(data->user_forced[SCATTER])); + ompi_coll_tuned_forced_getvalues( SCATTERV, &(data->user_forced[SCATTERV])); + + if( NULL != mca_coll_tuned_component.all_base_rules ) { + /* extract our customized communicator sized rule set, for each collective */ + for( i = 0; i < COLLCOUNT; i++ ) { + data->com_rules[i] = ompi_coll_tuned_get_com_rule_ptr( mca_coll_tuned_component.all_base_rules, + i, size ); + } } } - /* next dynamic state, recheck all forced rules as well */ - /* warning, we should check to make sure this is really an INTRA comm here... */ - if (ompi_coll_tuned_use_dynamic_rules) { - ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLREDUCE], &(data->user_forced[ALLREDUCE])); - ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLTOALL], &(data->user_forced[ALLTOALL])); - ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLGATHER], &(data->user_forced[ALLGATHER])); - ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLGATHERV], &(data->user_forced[ALLGATHERV])); - ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLTOALLV], &(data->user_forced[ALLTOALLV])); - ompi_coll_tuned_forced_getvalues_barrier (ompi_coll_tuned_forced_params[BARRIER], &(data->user_forced[BARRIER])); - ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[BCAST], &(data->user_forced[BCAST])); - ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[REDUCE], &(data->user_forced[REDUCE])); - ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[REDUCESCATTER], &(data->user_forced[REDUCESCATTER])); - ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[GATHER], &(data->user_forced[GATHER])); - ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[SCATTER], &(data->user_forced[SCATTER])); - } - - - if (&ompi_mpi_comm_world.comm==comm) { - if (ompi_coll_tuned_use_dynamic_rules) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init MCW & Dynamic")); - if (ompi_coll_tuned_dynamic_rules_filename) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Opening [%s]", - ompi_coll_tuned_dynamic_rules_filename)); - rc = ompi_coll_tuned_read_rules_config_file (ompi_coll_tuned_dynamic_rules_filename, - &(data->all_base_rules), COLLCOUNT); - if (rc>=0) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Read %d valid rules\n", rc)); - /* at this point we all have a base set of rules */ - /* now we can get our customized communicator sized rule set, for each collective */ - for (i=0;icom_rules[i] = ompi_coll_tuned_get_com_rule_ptr (data->all_base_rules, i, size); - } - } - else { /* failed to read config file, thus make sure its a NULL... */ - data->all_base_rules = (ompi_coll_alg_rule_t*) NULL; - } - } /* end if a config filename exists */ - } /* end if dynamic_rules */ - } /* end if MCW */ - - /* ok, if using dynamic rules, not MCW and we are just any rank and a base set of rules exist.. ref them */ - /* order of eval is important here, if we are MCW ompi_mpi_comm_world.c_coll_selected_data is NULL still.. */ - -#if 0 /* FIXME: don't know how to deal with this */ - if ((ompi_coll_tuned_use_dynamic_rules)&&(!(&ompi_mpi_comm_world==comm))&& - ((ompi_mpi_comm_world.c_coll_selected_data)->all_base_rules)) { - - OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init NOT MCW & Dynamic")); - - /* this will, erm fail if MCW doesn't exist which it should! */ - data->all_base_rules = (ompi_mpi_comm_world.c_coll_selected_data)->all_base_rules; - - /* at this point we all have a base set of rules if they exist atall */ - /* now we can get our customized communicator sized rule set, for each collective */ - for (i=0;icom_rules[i] = ompi_coll_tuned_get_com_rule_ptr (data->all_base_rules, i, size); - } - } -#endif - - /* - * now for the cached topo functions - * guess the initial topologies to use rank 0 as root - */ - /* general n fan out tree */ - data->cached_ntree = ompi_coll_tuned_topo_build_tree (ompi_coll_tuned_init_tree_fanout, - comm, 0); - data->cached_ntree_root = 0; - data->cached_ntree_fanout = ompi_coll_tuned_init_tree_fanout; - + data->cached_ntree = NULL; /* binary tree */ - data->cached_bintree = ompi_coll_tuned_topo_build_tree (2, comm, 0); - data->cached_bintree_root = 0; - + data->cached_bintree = NULL; /* binomial tree */ - data->cached_bmtree = ompi_coll_tuned_topo_build_bmtree (comm, 0); - data->cached_bmtree_root = 0; - + data->cached_bmtree = NULL; /* binomial tree */ - data->cached_in_order_bmtree = ompi_coll_tuned_topo_build_in_order_bmtree (comm, 0); - data->cached_in_order_bmtree_root = 0; - /* - * chains (fanout followed by pipelines) - * are more difficuilt as the fan out really really depends on message size [sometimes].. - * as size gets larger fan-out gets smaller [usually] - * - * will probably change how we cache this later, for now a midsize - * GEF - */ - data->cached_chain = ompi_coll_tuned_topo_build_chain (ompi_coll_tuned_init_chain_fanout, - comm, 0); - data->cached_chain_root = 0; - data->cached_chain_fanout = ompi_coll_tuned_init_chain_fanout; - + data->cached_in_order_bmtree = NULL; + /* chains (fanout followed by pipelines) */ + data->cached_chain = NULL; /* standard pipeline */ - data->cached_pipeline = ompi_coll_tuned_topo_build_chain (1, comm, 0); - data->cached_pipeline_root = 0; - + data->cached_pipeline = NULL; /* in-order binary tree */ - data->cached_in_order_bintree = ompi_coll_tuned_topo_build_in_order_bintree(comm); + data->cached_in_order_bintree = NULL; /* All done */ - tuned_module->tuned_data = data; OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Tuned is in use")); diff --git a/ompi/mca/coll/tuned/coll_tuned_reduce.c b/ompi/mca/coll/tuned/coll_tuned_reduce.c index 9cdaec127c..56eca6ae4a 100644 --- a/ompi/mca/coll/tuned/coll_tuned_reduce.c +++ b/ompi/mca/coll/tuned/coll_tuned_reduce.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -43,7 +43,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_count, ompi_datatype_t* datatype, ompi_op_t* op, int root, ompi_communicator_t* comm, - mca_coll_base_module_t *module, + mca_coll_base_module_t *module, ompi_coll_tree_t* tree, int count_by_segment, int max_outstanding_reqs ) { @@ -79,7 +79,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c if( tree->tree_nextsize > 0 ) { ptrdiff_t true_lower_bound, true_extent, real_segment_size; ompi_datatype_get_true_extent( datatype, &true_lower_bound, - &true_extent ); + &true_extent ); /* handle non existant recv buffer (i.e. its NULL) and protect the recv buffer on non-root nodes */ @@ -88,18 +88,18 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c /* Allocate temporary accumulator buffer. */ accumbuf_free = (char*)malloc(true_extent + (original_count - 1) * extent); - if (accumbuf_free == NULL) { + if (accumbuf_free == NULL) { line = __LINE__; ret = -1; goto error_hndl; } accumbuf = accumbuf_free - lower_bound; - } + } /* If this is a non-commutative operation we must copy sendbuf to the accumbuf, in order to simplfy the loops */ if (!ompi_op_is_commute(op)) { - ompi_datatype_copy_content_same_ddt(datatype, original_count, - (char*)accumbuf, - (char*)sendtmpbuf); + ompi_datatype_copy_content_same_ddt(datatype, original_count, + (char*)accumbuf, + (char*)sendtmpbuf); } /* Allocate two buffers for incoming segments */ real_segment_size = true_extent + (count_by_segment - 1) * extent; @@ -232,11 +232,11 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c the number of segments we have two options: - send all segments using blocking send to the parent, or - avoid overflooding the parent nodes by limiting the number of - outstanding requests to max_oustanding_reqs. + outstanding requests to max_oustanding_reqs. TODO/POSSIBLE IMPROVEMENT: If there is a way to determine the eager size for the current communication, synchronization should be used only when the message/segment size is smaller than the eager size. - */ + */ else { /* If the number of segments is less than a maximum number of oustanding @@ -266,9 +266,9 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c /* Otherwise, introduce flow control: - post max_outstanding_reqs non-blocking synchronous send, - for remaining segments - - wait for a ssend to complete, and post the next one. + - wait for a ssend to complete, and post the next one. - wait for all outstanding sends to complete. - */ + */ else { int creq = 0; @@ -346,7 +346,7 @@ int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count, ompi_datatype_t* datatype, ompi_op_t* op, int root, ompi_communicator_t* comm, - mca_coll_base_module_t *module, + mca_coll_base_module_t *module, uint32_t segsize, int fanout, int max_outstanding_reqs ) { @@ -376,7 +376,7 @@ int ompi_coll_tuned_reduce_intra_pipeline( void *sendbuf, void *recvbuf, int count, ompi_datatype_t* datatype, ompi_op_t* op, int root, ompi_communicator_t* comm, - mca_coll_base_module_t *module, + mca_coll_base_module_t *module, uint32_t segsize, int max_outstanding_reqs ) { @@ -407,7 +407,7 @@ int ompi_coll_tuned_reduce_intra_binary( void *sendbuf, void *recvbuf, int count, ompi_datatype_t* datatype, ompi_op_t* op, int root, ompi_communicator_t* comm, - mca_coll_base_module_t *module, + mca_coll_base_module_t *module, uint32_t segsize, int max_outstanding_reqs ) { @@ -438,7 +438,7 @@ int ompi_coll_tuned_reduce_intra_binomial( void *sendbuf, void *recvbuf, int count, ompi_datatype_t* datatype, ompi_op_t* op, int root, ompi_communicator_t* comm, - mca_coll_base_module_t *module, + mca_coll_base_module_t *module, uint32_t segsize, int max_outstanding_reqs ) { @@ -477,7 +477,7 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf, ompi_datatype_t* datatype, ompi_op_t* op, int root, ompi_communicator_t* comm, - mca_coll_base_module_t *module, + mca_coll_base_module_t *module, uint32_t segsize, int max_outstanding_reqs ) { @@ -514,57 +514,57 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf, use_this_sendbuf = sendbuf; use_this_recvbuf = recvbuf; if (io_root != root) { - ptrdiff_t tlb, text, lb, ext; - char *tmpbuf = NULL; + ptrdiff_t tlb, text, lb, ext; + char *tmpbuf = NULL; - ompi_datatype_get_extent(datatype, &lb, &ext); - ompi_datatype_get_true_extent(datatype, &tlb, &text); + ompi_datatype_get_extent(datatype, &lb, &ext); + ompi_datatype_get_true_extent(datatype, &tlb, &text); - if ((root == rank) && (MPI_IN_PLACE == sendbuf)) { - tmpbuf = (char *) malloc(text + (count - 1) * ext); - if (NULL == tmpbuf) { - return MPI_ERR_INTERN; - } - ompi_datatype_copy_content_same_ddt(datatype, count, - (char*)tmpbuf, - (char*)recvbuf); - use_this_sendbuf = tmpbuf; - } else if (io_root == rank) { - tmpbuf = (char *) malloc(text + (count - 1) * ext); - if (NULL == tmpbuf) { - return MPI_ERR_INTERN; - } - use_this_recvbuf = tmpbuf; - } + if ((root == rank) && (MPI_IN_PLACE == sendbuf)) { + tmpbuf = (char *) malloc(text + (count - 1) * ext); + if (NULL == tmpbuf) { + return MPI_ERR_INTERN; + } + ompi_datatype_copy_content_same_ddt(datatype, count, + (char*)tmpbuf, + (char*)recvbuf); + use_this_sendbuf = tmpbuf; + } else if (io_root == rank) { + tmpbuf = (char *) malloc(text + (count - 1) * ext); + if (NULL == tmpbuf) { + return MPI_ERR_INTERN; + } + use_this_recvbuf = tmpbuf; + } } /* Use generic reduce with in-order binary tree topology and io_root */ ret = ompi_coll_tuned_reduce_generic( use_this_sendbuf, use_this_recvbuf, count, datatype, - op, io_root, comm, module, + op, io_root, comm, module, data->cached_in_order_bintree, segcount, max_outstanding_reqs ); if (MPI_SUCCESS != ret) { return ret; } /* Clean up */ if (io_root != root) { - if (root == rank) { - /* Receive result from rank io_root to recvbuf */ - ret = MCA_PML_CALL(recv(recvbuf, count, datatype, io_root, - MCA_COLL_BASE_TAG_REDUCE, comm, - MPI_STATUS_IGNORE)); - if (MPI_SUCCESS != ret) { return ret; } - if (MPI_IN_PLACE == sendbuf) { - free(use_this_sendbuf); - } + if (root == rank) { + /* Receive result from rank io_root to recvbuf */ + ret = MCA_PML_CALL(recv(recvbuf, count, datatype, io_root, + MCA_COLL_BASE_TAG_REDUCE, comm, + MPI_STATUS_IGNORE)); + if (MPI_SUCCESS != ret) { return ret; } + if (MPI_IN_PLACE == sendbuf) { + free(use_this_sendbuf); + } - } else if (io_root == rank) { - /* Send result from use_this_recvbuf to root */ - ret = MCA_PML_CALL(send(use_this_recvbuf, count, datatype, root, - MCA_COLL_BASE_TAG_REDUCE, - MCA_PML_BASE_SEND_STANDARD, comm)); - if (MPI_SUCCESS != ret) { return ret; } - free(use_this_recvbuf); - } + } else if (io_root == rank) { + /* Send result from use_this_recvbuf to root */ + ret = MCA_PML_CALL(send(use_this_recvbuf, count, datatype, root, + MCA_COLL_BASE_TAG_REDUCE, + MCA_PML_BASE_SEND_STANDARD, comm)); + if (MPI_SUCCESS != ret) { return ret; } + free(use_this_recvbuf); + } } return MPI_SUCCESS; @@ -596,8 +596,8 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) { int i, rank, err, size; ptrdiff_t true_lb, true_extent, lb, extent; @@ -650,7 +650,7 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count, if (rank == (size - 1)) { err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)rbuf, - (char*)sbuf); + (char*)sbuf); } else { err = MCA_PML_CALL(recv(rbuf, count, dtype, size - 1, MCA_COLL_BASE_TAG_REDUCE, comm, @@ -688,7 +688,7 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count, if (NULL != inplace_temp) { err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)sbuf, - inplace_temp); + inplace_temp); free(inplace_temp); } if (NULL != free_buffer) { @@ -789,7 +789,7 @@ int ompi_coll_tuned_reduce_intra_do_forced(void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + mca_coll_base_module_t *module) { mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module; mca_coll_tuned_comm_t *data = tuned_module->tuned_data; @@ -808,19 +808,19 @@ int ompi_coll_tuned_reduce_intra_do_forced(void *sbuf, void* rbuf, int count, case (1): return ompi_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, root, comm, module); case (2): return ompi_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype, - op, root, comm, module, - segsize, chain_fanout, max_requests); + op, root, comm, module, + segsize, chain_fanout, max_requests); case (3): return ompi_coll_tuned_reduce_intra_pipeline (sbuf, rbuf, count, dtype, - op, root, comm, module, - segsize, max_requests); + op, root, comm, module, + segsize, max_requests); case (4): return ompi_coll_tuned_reduce_intra_binary (sbuf, rbuf, count, dtype, - op, root, comm, module, - segsize, max_requests); + op, root, comm, module, + segsize, max_requests); case (5): return ompi_coll_tuned_reduce_intra_binomial (sbuf, rbuf, count, dtype, - op, root, comm, module, - segsize, max_requests); + op, root, comm, module, + segsize, max_requests); case (6): return ompi_coll_tuned_reduce_intra_in_order_binary(sbuf, rbuf, count, dtype, - op, root, comm, module, + op, root, comm, module, segsize, max_requests); default: OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", @@ -834,7 +834,7 @@ int ompi_coll_tuned_reduce_intra_do_this(void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module, + mca_coll_base_module_t *module, int algorithm, int faninout, int segsize, int max_requests ) { @@ -843,23 +843,23 @@ int ompi_coll_tuned_reduce_intra_do_this(void *sbuf, void* rbuf, int count, switch (algorithm) { case (0): return ompi_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype, - op, root, comm, module); + op, root, comm, module); case (1): return ompi_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, root, comm, module); case (2): return ompi_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype, - op, root, comm, module, + op, root, comm, module, segsize, faninout, max_requests); case (3): return ompi_coll_tuned_reduce_intra_pipeline (sbuf, rbuf, count, dtype, - op, root, comm, module, - segsize, max_requests); + op, root, comm, module, + segsize, max_requests); case (4): return ompi_coll_tuned_reduce_intra_binary (sbuf, rbuf, count, dtype, - op, root, comm, module, - segsize, max_requests); + op, root, comm, module, + segsize, max_requests); case (5): return ompi_coll_tuned_reduce_intra_binomial (sbuf, rbuf, count, dtype, - op, root, comm, module, - segsize, max_requests); + op, root, comm, module, + segsize, max_requests); case (6): return ompi_coll_tuned_reduce_intra_in_order_binary(sbuf, rbuf, count, dtype, - op, root, comm, module, + op, root, comm, module, segsize, max_requests); default: OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",