A few small changes that just expanded in the name of neatness...
(1) As pointed out by Torsten after Jeff comment that there are 15 collectives yesterday.. nope.. I have 16 but miss counted them in my ifdefs (I had two #11s). Replaces with enum... (2) Added a readonly MCA param for how many backend algorithms are available per collective (used by benchmarker/STS) This allowed me to remove the tuned query internal functions and replace them with ompi_coll_tuned_forced_max_algorithms[COLL]. (3) I was reading the user forced MCA params for the collectives on each comm create (module init) but I then put the values into a global set of variables (like ompi_coll_tuned_reduce_forced_algorithm). To fix this and make the code neater: (a) The component looks up the MCA param indices on Open if dynamic_rules is set via the ompi_coll_tuned_COLLECTIVE_intra_check_forced_init () call. (b) Got rid of the ompi_coll_ompi_coll_tuned_COLLECTIVE_forced_algorithm/segmentsize/etc globals with a struct that is now cached on the module data hung off the communicator. i.e. done right. (c) On module init if dynamic rules enabled we call a general getvalues routine (in coll_tuned_forced.c) to get the CURRENT values using the MCA param indices and then put them on the modules data segment. A shorter version of getvalues exists for barrier which only needs the algorithm choice This commit was SVN r9663.
Этот коммит содержится в:
родитель
345551cb36
Коммит
c31a5ad4b3
@ -22,12 +22,14 @@ sources = \
|
||||
coll_tuned_util.h \
|
||||
coll_tuned_dynamic_file.h \
|
||||
coll_tuned_dynamic_rules.h \
|
||||
coll_tuned_forced.h \
|
||||
coll_tuned_topo.c \
|
||||
coll_tuned_util.c \
|
||||
coll_tuned_decision_fixed.c \
|
||||
coll_tuned_decision_dynamic.c \
|
||||
coll_tuned_dynamic_file.c \
|
||||
coll_tuned_dynamic_rules.c \
|
||||
coll_tuned_forced.c \
|
||||
coll_tuned_allreduce.c \
|
||||
coll_tuned_alltoall.c \
|
||||
coll_tuned_barrier.c \
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -33,24 +33,12 @@
|
||||
/* also need the dynamic rule structures */
|
||||
#include "coll_tuned_dynamic_rules.h"
|
||||
|
||||
/* need the forced user choice structures */
|
||||
#include "coll_tuned_forced.h"
|
||||
|
||||
/* some fixed value index vars to simplify certain operations */
|
||||
#define ALLGATHER 0
|
||||
#define ALLGATHERV 1
|
||||
#define ALLREDUCE 2
|
||||
#define ALLTOALL 3
|
||||
#define ALLTOALLV 4
|
||||
#define ALLTOALLW 5
|
||||
#define BARRIER 6
|
||||
#define BCAST 7
|
||||
#define EXSCAN 8
|
||||
#define GATHER 9
|
||||
#define GATHERV 10
|
||||
#define REDUCE 11
|
||||
#define REDUCESCATTER 11
|
||||
#define SCAN 12
|
||||
#define SCATTER 13
|
||||
#define SCATTERV 14
|
||||
#define COLLCOUNT 15
|
||||
typedef enum COLLTYPE {ALLGATHER, ALLGATHERV, ALLREDUCE, ALLTOALL, ALLTOALLV, ALLTOALLW, BARRIER, BCAST,
|
||||
EXSCAN, GATHER, GATHERV, REDUCE, REDUCESCATTER, SCAN, SCATTER, SCATTERV, COLLCOUNT} COLLTYPE_T;
|
||||
|
||||
/* defined arg lists to simply auto inclusion of user overriding decision functions */
|
||||
#define ALLGATHER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm
|
||||
@ -81,6 +69,7 @@ extern "C" {
|
||||
|
||||
/* OMPI_COMP_EXPORT extern const mca_coll_base_component_1_0_0_t mca_coll_tuned_component; */
|
||||
|
||||
/* these are the same across all modules and are loaded at component query time */
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_stream;
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_priority;
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_preallocate_memory_comm_size_limit;
|
||||
@ -90,27 +79,12 @@ OMPI_COMP_EXPORT extern int ompi_coll_tuned_init_tree_fanout;
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_init_chain_fanout;
|
||||
|
||||
/* forced algorithm choices */
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_allreduce_forced_choice;
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_allreduce_forced_segsize;
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_allreduce_forced_tree_fanout;
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_allreduce_forced_chain_fanout;
|
||||
/* the indices to the MCA params so that modules can look them up at open / comm create time */
|
||||
OMPI_COMP_EXPORT extern coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COLLCOUNT];
|
||||
/* the actual max algorithm values (readonly), loaded at component open */
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_forced_max_algorithms[COLLCOUNT];
|
||||
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_alltoall_forced_choice;
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_alltoall_forced_segsize;
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_alltoall_forced_tree_fanout;
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_alltoall_forced_chain_fanout;
|
||||
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_barrier_forced_choice;
|
||||
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_bcast_forced_choice;
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_bcast_forced_segsize;
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_bcast_forced_tree_fanout;
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_bcast_forced_chain_fanout;
|
||||
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_reduce_forced_choice;
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_reduce_forced_segsize;
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_reduce_forced_tree_fanout;
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_reduce_forced_chain_fanout;
|
||||
|
||||
/*
|
||||
* coll API functions
|
||||
@ -156,9 +130,8 @@ OMPI_COMP_EXPORT extern int ompi_coll_tuned_reduce_forced_chain_fanout;
|
||||
int ompi_coll_tuned_allreduce_intra_dec_fixed(ALLREDUCE_ARGS);
|
||||
int ompi_coll_tuned_allreduce_intra_dec_dynamic(ALLREDUCE_ARGS);
|
||||
int ompi_coll_tuned_allreduce_intra_do_forced(ALLREDUCE_ARGS);
|
||||
int ompi_coll_tuned_allreduce_intra_do_this(ALLREDUCE_ARGS, int choice, int faninout, int segsize);
|
||||
int ompi_coll_tuned_allreduce_intra_check_forced(void);
|
||||
int ompi_coll_tuned_allreduce_intra_query(void);
|
||||
int ompi_coll_tuned_allreduce_intra_do_this(ALLREDUCE_ARGS, int algorithm, int faninout, int segsize);
|
||||
int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
|
||||
int ompi_coll_tuned_allreduce_intra_nonoverlapping(ALLREDUCE_ARGS);
|
||||
int ompi_coll_tuned_allreduce_intra_basic_linear(ALLREDUCE_ARGS);
|
||||
int ompi_coll_tuned_allreduce_inter_dec_fixed(ALLREDUCE_ARGS);
|
||||
@ -168,9 +141,8 @@ OMPI_COMP_EXPORT extern int ompi_coll_tuned_reduce_forced_chain_fanout;
|
||||
int ompi_coll_tuned_alltoall_intra_dec_fixed(ALLTOALL_ARGS);
|
||||
int ompi_coll_tuned_alltoall_intra_dec_dynamic(ALLTOALL_ARGS);
|
||||
int ompi_coll_tuned_alltoall_intra_do_forced(ALLTOALL_ARGS);
|
||||
int ompi_coll_tuned_alltoall_intra_do_this(ALLTOALL_ARGS, int choice, int faninout, int segsize);
|
||||
int ompi_coll_tuned_alltoall_intra_check_forced(void);
|
||||
int ompi_coll_tuned_alltoall_intra_query (void);
|
||||
int ompi_coll_tuned_alltoall_intra_do_this(ALLTOALL_ARGS, int algorithm, int faninout, int segsize);
|
||||
int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
|
||||
int ompi_coll_tuned_alltoall_intra_pairwise(ALLTOALL_ARGS);
|
||||
int ompi_coll_tuned_alltoall_intra_bruck(ALLTOALL_ARGS);
|
||||
int ompi_coll_tuned_alltoall_intra_basic_linear(ALLTOALL_ARGS);
|
||||
@ -194,14 +166,10 @@ OMPI_COMP_EXPORT extern int ompi_coll_tuned_reduce_forced_chain_fanout;
|
||||
int ompi_coll_tuned_barrier_intra_dec_fixed(BARRIER_ARGS);
|
||||
int ompi_coll_tuned_barrier_intra_dec_dynamic(BARRIER_ARGS);
|
||||
int ompi_coll_tuned_barrier_intra_do_forced(BARRIER_ARGS);
|
||||
int ompi_coll_tuned_barrier_intra_do_this(BARRIER_ARGS, int choice, int faninout, int segsize);
|
||||
|
||||
int ompi_coll_tuned_barrier_intra_check_forced(void);
|
||||
int ompi_coll_tuned_barrier_intra_query (void);
|
||||
|
||||
int ompi_coll_tuned_barrier_intra_do_this(BARRIER_ARGS, int algorithm, int faninout, int segsize);
|
||||
int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
|
||||
int ompi_coll_tuned_barrier_inter_dec_fixed(BARRIER_ARGS);
|
||||
int ompi_coll_tuned_barrier_inter_dec_dynamic(BARRIER_ARGS);
|
||||
|
||||
int ompi_coll_tuned_barrier_intra_doublering(BARRIER_ARGS);
|
||||
int ompi_coll_tuned_barrier_intra_recursivedoubling(BARRIER_ARGS);
|
||||
int ompi_coll_tuned_barrier_intra_bruck(BARRIER_ARGS);
|
||||
@ -212,9 +180,8 @@ OMPI_COMP_EXPORT extern int ompi_coll_tuned_reduce_forced_chain_fanout;
|
||||
int ompi_coll_tuned_bcast_intra_dec_fixed(BCAST_ARGS);
|
||||
int ompi_coll_tuned_bcast_intra_dec_dynamic(BCAST_ARGS);
|
||||
int ompi_coll_tuned_bcast_intra_do_forced(BCAST_ARGS);
|
||||
int ompi_coll_tuned_bcast_intra_do_this(BCAST_ARGS, int choice, int faninout, int segsize);
|
||||
int ompi_coll_tuned_bcast_intra_check_forced(void);
|
||||
int ompi_coll_tuned_bcast_intra_query (void);
|
||||
int ompi_coll_tuned_bcast_intra_do_this(BCAST_ARGS, int algorithm, int faninout, int segsize);
|
||||
int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
|
||||
int ompi_coll_tuned_bcast_intra_basic_linear(BCAST_ARGS);
|
||||
int ompi_coll_tuned_bcast_intra_chain(BCAST_ARGS, uint32_t segsize, int32_t chains);
|
||||
int ompi_coll_tuned_bcast_intra_pipeline(BCAST_ARGS, uint32_t segsize);
|
||||
@ -246,9 +213,8 @@ OMPI_COMP_EXPORT extern int ompi_coll_tuned_reduce_forced_chain_fanout;
|
||||
int ompi_coll_tuned_reduce_intra_dec_fixed(REDUCE_ARGS);
|
||||
int ompi_coll_tuned_reduce_intra_dec_dynamic(REDUCE_ARGS);
|
||||
int ompi_coll_tuned_reduce_intra_do_forced(REDUCE_ARGS);
|
||||
int ompi_coll_tuned_reduce_intra_do_this(REDUCE_ARGS, int choice, int faninout, int segsize);
|
||||
int ompi_coll_tuned_reduce_intra_check_forced(void);
|
||||
int ompi_coll_tuned_reduce_intra_query (void);
|
||||
int ompi_coll_tuned_reduce_intra_do_this(REDUCE_ARGS, int algorithm, int faninout, int segsize);
|
||||
int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
|
||||
int ompi_coll_tuned_reduce_intra_basic_linear(REDUCE_ARGS);
|
||||
int ompi_coll_tuned_reduce_intra_chain(REDUCE_ARGS, uint32_t segsize, int fanout);
|
||||
int ompi_coll_tuned_reduce_intra_pipeline(REDUCE_ARGS, uint32_t segsize);
|
||||
@ -372,6 +338,10 @@ struct mca_coll_base_comm_t {
|
||||
ompi_coll_alg_rule_t *all_base_rules; /* stored only on MCW, all other coms ref it */
|
||||
/* moving to the component */
|
||||
ompi_coll_com_rule_t *com_rules[COLLCOUNT]; /* the communicator rules for each MPI collective for ONLY my comsize */
|
||||
|
||||
/* for forced algorithms we store the information on the module */
|
||||
/* previously we only had one shared copy, ops, it really is per comm/module */
|
||||
coll_tuned_force_algorithm_params_t user_forced[COLLCOUNT];
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -135,43 +135,55 @@ ompi_coll_tuned_allreduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
|
||||
/* publish details of each algorithm and if its forced/fixed/locked in */
|
||||
/* as you add methods/algorithms you must update this and the query/map routines */
|
||||
|
||||
int ompi_coll_tuned_allreduce_intra_check_forced ( )
|
||||
/* this routine is called by the component only */
|
||||
/* this makes sure that the mca parameters are set to their initial values and perms */
|
||||
/* module does not call this they call the forced_getvalues routine instead */
|
||||
|
||||
int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
|
||||
{
|
||||
int rc;
|
||||
int max_alg = 2;
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
ompi_coll_tuned_forced_max_algorithms[ALLREDUCE] = max_alg;
|
||||
|
||||
rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
|
||||
"allreduce_algorithm_count",
|
||||
"Number of allreduce algorithms available",
|
||||
false, true, max_alg, NULL);
|
||||
|
||||
|
||||
mca_param_indices->algorithm_param_index = mca_base_param_reg_int(
|
||||
&mca_coll_tuned_component.super.collm_version,
|
||||
"allreduce_algorithm",
|
||||
"Which allreduce algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 nonoverlapping (tuned reduce + tuned bcast)",
|
||||
false, false, ompi_coll_tuned_allreduce_forced_choice,
|
||||
&ompi_coll_tuned_allreduce_forced_choice);
|
||||
"Which allreduce algorithm is used. Can be locked down to any of: 0 ignore, 1 basic linear, 2 nonoverlapping (tuned reduce + tuned bcast)",
|
||||
false, false, 0, NULL);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
|
||||
|
||||
mca_param_indices->segsize_param_index = mca_base_param_reg_int(
|
||||
&mca_coll_tuned_component.super.collm_version,
|
||||
"allreduce_algorithm_segmentsize",
|
||||
"Segment size in bytes used by default for allreduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
|
||||
false, false, ompi_coll_tuned_allreduce_forced_segsize,
|
||||
&ompi_coll_tuned_allreduce_forced_segsize);
|
||||
false, false, 0, NULL);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
mca_param_indices->tree_fanout_param_index = mca_base_param_reg_int(
|
||||
&mca_coll_tuned_component.super.collm_version,
|
||||
"allreduce_algorithm_tree_fanout",
|
||||
"Fanout for n-tree used for allreduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
|
||||
false, false,
|
||||
ompi_coll_tuned_init_tree_fanout, /* get system wide default */
|
||||
&ompi_coll_tuned_allreduce_forced_tree_fanout);
|
||||
false, false, ompi_coll_tuned_init_tree_fanout, /* get system wide default */
|
||||
NULL);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
mca_param_indices->chain_fanout_param_index = mca_base_param_reg_int(
|
||||
&mca_coll_tuned_component.super.collm_version,
|
||||
"allreduce_algorithm_chain_fanout",
|
||||
"Fanout for chains used for allreduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
|
||||
false, false,
|
||||
ompi_coll_tuned_init_chain_fanout, /* get system wide default */
|
||||
&ompi_coll_tuned_allreduce_forced_chain_fanout);
|
||||
NULL);
|
||||
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
int ompi_coll_tuned_allreduce_intra_query ( )
|
||||
{
|
||||
return (2); /* 2 algorithms available */
|
||||
}
|
||||
|
||||
|
||||
int ompi_coll_tuned_allreduce_intra_do_forced(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
@ -179,15 +191,16 @@ int ompi_coll_tuned_allreduce_intra_do_forced(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced selected algorithm %d",
|
||||
ompi_coll_tuned_allreduce_forced_choice));
|
||||
comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm));
|
||||
|
||||
switch (ompi_coll_tuned_allreduce_forced_choice) {
|
||||
switch (comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm) {
|
||||
case (0): return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm);
|
||||
case (1): return ompi_coll_tuned_allreduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, comm);
|
||||
case (2): return ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm);
|
||||
default:
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
|
||||
ompi_coll_tuned_allreduce_forced_choice, ompi_coll_tuned_allreduce_intra_query()));
|
||||
comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm,
|
||||
ompi_coll_tuned_forced_max_algorithms[ALLREDUCE]));
|
||||
return (MPI_ERR_ARG);
|
||||
} /* switch */
|
||||
|
||||
@ -198,18 +211,18 @@ int ompi_coll_tuned_allreduce_intra_do_this(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm,
|
||||
int choice, int faninout, int segsize)
|
||||
int algorithm, int faninout, int segsize)
|
||||
{
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_this algorithm %d topo fan in/out %d segsize %d",
|
||||
choice, faninout, segsize));
|
||||
algorithm, faninout, segsize));
|
||||
|
||||
switch (choice) {
|
||||
switch (algorithm) {
|
||||
case (0): return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm);
|
||||
case (1): return ompi_coll_tuned_allreduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, comm);
|
||||
case (2): return ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm);
|
||||
default:
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
|
||||
choice, ompi_coll_tuned_allreduce_intra_query()));
|
||||
algorithm, ompi_coll_tuned_forced_max_algorithms[ALLREDUCE]));
|
||||
return (MPI_ERR_ARG);
|
||||
} /* switch */
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -440,44 +440,51 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
|
||||
/* publish details of each algorithm and if its forced/fixed/locked in */
|
||||
/* as you add methods/algorithms you must update this and the query/map routines */
|
||||
|
||||
int ompi_coll_tuned_alltoall_intra_check_forced ( )
|
||||
{
|
||||
/* this routine is called by the component only */
|
||||
/* this makes sure that the mca parameters are set to their initial values and perms */
|
||||
/* module does not call this they call the forced_getvalues routine instead */
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
|
||||
{
|
||||
int rc;
|
||||
int max_alg = 4;
|
||||
|
||||
ompi_coll_tuned_forced_max_algorithms[ALLTOALL] = max_alg;
|
||||
|
||||
rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
|
||||
"alltoall_algorithm_count",
|
||||
"Number of alltoall algorithms available",
|
||||
false, true, max_alg, NULL);
|
||||
|
||||
|
||||
mca_param_indices->algorithm_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"alltoall_algorithm",
|
||||
"Which alltoall algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 pairwise, 3: modified bruck, 4: two proc only.",
|
||||
false, false, ompi_coll_tuned_alltoall_forced_choice,
|
||||
&ompi_coll_tuned_alltoall_forced_choice);
|
||||
false, false, 0, NULL);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
mca_param_indices->segsize_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"alltoall_algorithm_segmentsize",
|
||||
"Segment size in bytes used by default for alltoall algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
|
||||
false, false, ompi_coll_tuned_alltoall_forced_segsize,
|
||||
&ompi_coll_tuned_alltoall_forced_segsize);
|
||||
false, false, 0, NULL);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
mca_param_indices->tree_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"alltoall_algorithm_tree_fanout",
|
||||
"Fanout for n-tree used for alltoall algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
|
||||
false, false,
|
||||
ompi_coll_tuned_init_tree_fanout, /* get system wide default */
|
||||
&ompi_coll_tuned_alltoall_forced_tree_fanout);
|
||||
NULL);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
mca_param_indices->chain_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"alltoall_algorithm_chain_fanout",
|
||||
"Fanout for chains used for alltoall algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
|
||||
false, false,
|
||||
ompi_coll_tuned_init_chain_fanout, /* get system wide default */
|
||||
&ompi_coll_tuned_alltoall_forced_chain_fanout);
|
||||
NULL);
|
||||
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
int ompi_coll_tuned_alltoall_intra_query ( )
|
||||
{
|
||||
return (4); /* 4 algorithms available */
|
||||
}
|
||||
|
||||
|
||||
int ompi_coll_tuned_alltoall_intra_do_forced(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
@ -485,9 +492,10 @@ int ompi_coll_tuned_alltoall_intra_do_forced(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced selected algorithm %d", ompi_coll_tuned_alltoall_forced_choice));
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced selected algorithm %d",
|
||||
comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm));
|
||||
|
||||
switch (ompi_coll_tuned_alltoall_forced_choice) {
|
||||
switch (comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm) {
|
||||
case (0): return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
case (1): return ompi_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
case (2): return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
@ -495,7 +503,7 @@ switch (ompi_coll_tuned_alltoall_forced_choice) {
|
||||
case (4): return ompi_coll_tuned_alltoall_intra_two_procs (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
default:
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
|
||||
ompi_coll_tuned_alltoall_forced_choice, ompi_coll_tuned_alltoall_intra_query()));
|
||||
comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALL]));
|
||||
return (MPI_ERR_ARG);
|
||||
} /* switch */
|
||||
|
||||
@ -507,12 +515,12 @@ int ompi_coll_tuned_alltoall_intra_do_this(void *sbuf, int scount,
|
||||
void* rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
int choice, int faninout, int segsize)
|
||||
int algorithm, int faninout, int segsize)
|
||||
{
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_this selected algorithm %d topo faninout %d segsize %d",
|
||||
choice, faninout, segsize));
|
||||
algorithm, faninout, segsize));
|
||||
|
||||
switch (choice) {
|
||||
switch (algorithm) {
|
||||
case (0): return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
case (1): return ompi_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
case (2): return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
@ -520,7 +528,7 @@ switch (choice) {
|
||||
case (4): return ompi_coll_tuned_alltoall_intra_two_procs (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
default:
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
|
||||
choice, ompi_coll_tuned_alltoall_intra_query()));
|
||||
algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALL]));
|
||||
return (MPI_ERR_ARG);
|
||||
} /* switch */
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -325,32 +325,38 @@ static int ompi_coll_tuned_barrier_intra_basic_linear(struct ompi_communicator_t
|
||||
/* publish details of each algorithm and if its forced/fixed/locked in */
|
||||
/* as you add methods/algorithms you must update this and the query/map routines */
|
||||
|
||||
int ompi_coll_tuned_barrier_intra_check_forced ( )
|
||||
{
|
||||
/* this routine is called by the component only */
|
||||
/* this makes sure that the mca parameters are set to their initial values and perms */
|
||||
/* module does not call this they call the forced_getvalues routine instead */
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
|
||||
{
|
||||
int rc;
|
||||
int max_alg = 5;
|
||||
|
||||
ompi_coll_tuned_forced_max_algorithms[BARRIER] = max_alg;
|
||||
|
||||
rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
|
||||
"barrier_algorithm_count",
|
||||
"Number of barrier algorithms available",
|
||||
false, true, max_alg, NULL);
|
||||
|
||||
mca_param_indices->algorithm_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"barrier_algorithm",
|
||||
"Which barrier algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 double ring, 3: recursive doubling 4: bruck, 5: two proc only, 6: step based bmtree",
|
||||
false, false, ompi_coll_tuned_barrier_forced_choice,
|
||||
&ompi_coll_tuned_barrier_forced_choice);
|
||||
"Which barrier algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 double ring, 3: recursive doubling 4: bruck, 5: two proc only",
|
||||
false, false, 0, NULL);
|
||||
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
|
||||
int ompi_coll_tuned_barrier_intra_query ( )
|
||||
{
|
||||
return (5); /* 4 algorithms available */
|
||||
/* 2 to do */
|
||||
}
|
||||
|
||||
|
||||
int ompi_coll_tuned_barrier_intra_do_forced(struct ompi_communicator_t *comm)
|
||||
{
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_forced selected algorithm %d", ompi_coll_tuned_barrier_forced_choice));
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_forced selected algorithm %d",
|
||||
comm->c_coll_selected_data->user_forced[BARRIER].algorithm));
|
||||
|
||||
switch (ompi_coll_tuned_barrier_forced_choice) {
|
||||
switch (comm->c_coll_selected_data->user_forced[BARRIER].algorithm) {
|
||||
case (0): return ompi_coll_tuned_barrier_intra_dec_fixed (comm);
|
||||
case (1): return ompi_coll_tuned_barrier_intra_basic_linear (comm);
|
||||
case (2): return ompi_coll_tuned_barrier_intra_doublering (comm);
|
||||
@ -360,18 +366,18 @@ switch (ompi_coll_tuned_barrier_forced_choice) {
|
||||
/* case (6): return ompi_coll_tuned_barrier_intra_bmtree_step (comm); */
|
||||
default:
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
|
||||
ompi_coll_tuned_barrier_forced_choice, ompi_coll_tuned_barrier_intra_query()));
|
||||
comm->c_coll_selected_data->user_forced[BARRIER].algorithm, ompi_coll_tuned_forced_max_algorithms[BARRIER]));
|
||||
return (MPI_ERR_ARG);
|
||||
} /* switch */
|
||||
|
||||
}
|
||||
|
||||
|
||||
int ompi_coll_tuned_barrier_intra_do_this (struct ompi_communicator_t *comm, int choice, int faninout, int segsize)
|
||||
int ompi_coll_tuned_barrier_intra_do_this (struct ompi_communicator_t *comm, int algorithm, int faninout, int segsize)
|
||||
{
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_this selected algorithm %d topo fanin/out%d", choice, faninout));
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_this selected algorithm %d topo fanin/out%d", algorithm, faninout));
|
||||
|
||||
switch (choice) {
|
||||
switch (algorithm) {
|
||||
case (0): return ompi_coll_tuned_barrier_intra_dec_fixed (comm);
|
||||
case (1): return ompi_coll_tuned_barrier_intra_basic_linear (comm);
|
||||
case (2): return ompi_coll_tuned_barrier_intra_doublering (comm);
|
||||
@ -381,7 +387,7 @@ switch (choice) {
|
||||
/* case (6): return ompi_coll_tuned_barrier_intra_bmtree_step (comm); */
|
||||
default:
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
|
||||
choice, ompi_coll_tuned_barrier_intra_query()));
|
||||
algorithm, ompi_coll_tuned_forced_max_algorithms[BARRIER]));
|
||||
return (MPI_ERR_ARG);
|
||||
} /* switch */
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -773,69 +773,81 @@ ompi_coll_tuned_bcast_intra_basic_linear (void *buff, int count,
|
||||
|
||||
/* copied function (with appropriate renaming) ends here */
|
||||
|
||||
/* The following are used by dynamic and forced rules */
|
||||
|
||||
/* publish details of each algorithm and if its forced/fixed/locked in */
|
||||
/* as you add methods/algorithms you must update this and the query/map routines */
|
||||
|
||||
/* this routine is called by the component only */
|
||||
/* this makes sure that the mca parameters are set to their initial values and perms */
|
||||
/* module does not call this they call the forced_getvalues routine instead */
|
||||
|
||||
|
||||
int ompi_coll_tuned_bcast_intra_check_forced ( )
|
||||
int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
|
||||
{
|
||||
int rc;
|
||||
int max_alg = 6;
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
ompi_coll_tuned_forced_max_algorithms[BCAST] = max_alg;
|
||||
|
||||
rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
|
||||
"bcast_algorithm_count",
|
||||
"Number of bcast algorithms available",
|
||||
false, true, max_alg, NULL);
|
||||
|
||||
|
||||
mca_param_indices->algorithm_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"bcast_algorithm",
|
||||
"Which bcast algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 chain, 3: pipeline, 4: split binary tree, 5: binary tree, 6: BM tree.",
|
||||
false, false, ompi_coll_tuned_bcast_forced_choice,
|
||||
&ompi_coll_tuned_bcast_forced_choice);
|
||||
false, false, 0, NULL);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
mca_param_indices->segsize_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"bcast_algorithm_segmentsize",
|
||||
"Segment size in bytes used by default for bcast algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
|
||||
false, false, ompi_coll_tuned_bcast_forced_segsize,
|
||||
&ompi_coll_tuned_bcast_forced_segsize);
|
||||
false, false, 0, NULL);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
mca_param_indices->tree_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"bcast_algorithm_tree_fanout",
|
||||
"Fanout for n-tree used for bcast algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
|
||||
false, false,
|
||||
ompi_coll_tuned_init_tree_fanout, /* get system wide default */
|
||||
&ompi_coll_tuned_bcast_forced_tree_fanout);
|
||||
NULL);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
mca_param_indices->chain_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"bcast_algorithm_chain_fanout",
|
||||
"Fanout for chains used for bcast algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
|
||||
false, false,
|
||||
ompi_coll_tuned_init_chain_fanout, /* get system wide default */
|
||||
&ompi_coll_tuned_bcast_forced_chain_fanout);
|
||||
NULL);
|
||||
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
int ompi_coll_tuned_bcast_intra_query ( )
|
||||
{
|
||||
return (5); /* 5 algorithms available */
|
||||
/* 1 left to implement + NEC version */
|
||||
}
|
||||
|
||||
|
||||
int ompi_coll_tuned_bcast_intra_do_forced(void *buf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced algorithm %d", ompi_coll_tuned_bcast_forced_choice));
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced algorithm %d",
|
||||
comm->c_coll_selected_data->user_forced[BCAST].algorithm));
|
||||
|
||||
switch (ompi_coll_tuned_bcast_forced_choice) {
|
||||
switch (comm->c_coll_selected_data->user_forced[BCAST].algorithm) {
|
||||
case (0): return ompi_coll_tuned_bcast_intra_dec_fixed (buf, count, dtype, root, comm);
|
||||
case (1): return ompi_coll_tuned_bcast_intra_basic_linear (buf, count, dtype, root, comm);
|
||||
case (2): return ompi_coll_tuned_bcast_intra_chain (buf, count, dtype, root, comm, ompi_coll_tuned_bcast_forced_segsize, ompi_coll_tuned_bcast_forced_chain_fanout );
|
||||
case (3): return ompi_coll_tuned_bcast_intra_pipeline (buf, count, dtype, root, comm, ompi_coll_tuned_bcast_forced_segsize);
|
||||
case (4): return ompi_coll_tuned_bcast_intra_split_bintree (buf, count, dtype, root, comm, ompi_coll_tuned_bcast_forced_segsize);
|
||||
case (5): return ompi_coll_tuned_bcast_intra_bintree (buf, count, dtype, root, comm, ompi_coll_tuned_bcast_forced_segsize);
|
||||
case (2): return ompi_coll_tuned_bcast_intra_chain (buf, count, dtype, root, comm,
|
||||
comm->c_coll_selected_data->user_forced[BCAST].segsize,
|
||||
comm->c_coll_selected_data->user_forced[BCAST].chain_fanout );
|
||||
case (3): return ompi_coll_tuned_bcast_intra_pipeline (buf, count, dtype, root, comm,
|
||||
comm->c_coll_selected_data->user_forced[BCAST].segsize);
|
||||
case (4): return ompi_coll_tuned_bcast_intra_split_bintree (buf, count, dtype, root, comm,
|
||||
comm->c_coll_selected_data->user_forced[BCAST].segsize);
|
||||
case (5): return ompi_coll_tuned_bcast_intra_bintree (buf, count, dtype, root, comm,
|
||||
comm->c_coll_selected_data->user_forced[BCAST].segsize);
|
||||
/* case (6): return ompi_coll_tuned_bcast_intra_bmtree (buf, count, dtype, root, comm,
|
||||
* ompi_coll_tuned_bcast_forced_segsize); */
|
||||
default:
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
|
||||
ompi_coll_tuned_bcast_forced_choice, ompi_coll_tuned_bcast_intra_query()));
|
||||
comm->c_coll_selected_data->user_forced[BCAST].algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST]));
|
||||
return (MPI_ERR_ARG);
|
||||
} /* switch */
|
||||
|
||||
@ -846,13 +858,13 @@ int ompi_coll_tuned_bcast_intra_do_this(void *buf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
int choice, int faninout, int segsize)
|
||||
int algorithm, int faninout, int segsize)
|
||||
|
||||
{
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_this algorithm %d topo faninout %d segsize %d",
|
||||
choice, faninout, segsize));
|
||||
algorithm, faninout, segsize));
|
||||
|
||||
switch (choice) {
|
||||
switch (algorithm) {
|
||||
case (0): return ompi_coll_tuned_bcast_intra_dec_fixed (buf, count, dtype, root, comm);
|
||||
case (1): return ompi_coll_tuned_bcast_intra_basic_linear (buf, count, dtype, root, comm);
|
||||
case (2): return ompi_coll_tuned_bcast_intra_chain (buf, count, dtype, root, comm, segsize, faninout );
|
||||
@ -863,7 +875,7 @@ switch (choice) {
|
||||
* segsize); */
|
||||
default:
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
|
||||
choice, ompi_coll_tuned_bcast_intra_query()));
|
||||
algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST]));
|
||||
return (MPI_ERR_ARG);
|
||||
} /* switch */
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -49,27 +49,10 @@ int ompi_coll_tuned_init_tree_fanout = 4;
|
||||
int ompi_coll_tuned_init_chain_fanout = 4;
|
||||
|
||||
/* forced alogrithm variables */
|
||||
int ompi_coll_tuned_allreduce_forced_choice = 0;
|
||||
int ompi_coll_tuned_allreduce_forced_segsize = 0;
|
||||
int ompi_coll_tuned_allreduce_forced_chain_fanout = 0;
|
||||
int ompi_coll_tuned_allreduce_forced_tree_fanout = 0;
|
||||
|
||||
int ompi_coll_tuned_alltoall_forced_choice = 0;
|
||||
int ompi_coll_tuned_alltoall_forced_segsize = 0;
|
||||
int ompi_coll_tuned_alltoall_forced_chain_fanout = 0;
|
||||
int ompi_coll_tuned_alltoall_forced_tree_fanout = 0;
|
||||
|
||||
int ompi_coll_tuned_barrier_forced_choice = 0;
|
||||
|
||||
int ompi_coll_tuned_bcast_forced_choice = 0;
|
||||
int ompi_coll_tuned_bcast_forced_segsize = 0;
|
||||
int ompi_coll_tuned_bcast_forced_chain_fanout = 0;
|
||||
int ompi_coll_tuned_bcast_forced_tree_fanout = 0;
|
||||
|
||||
int ompi_coll_tuned_reduce_forced_choice = 0;
|
||||
int ompi_coll_tuned_reduce_forced_segsize = 0;
|
||||
int ompi_coll_tuned_reduce_forced_chain_fanout = 0;
|
||||
int ompi_coll_tuned_reduce_forced_tree_fanout = 0;
|
||||
/* indices for the MCA parameters */
|
||||
coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COLLCOUNT];
|
||||
/* max algorithm values */
|
||||
int ompi_coll_tuned_forced_max_algorithms[COLLCOUNT];
|
||||
|
||||
|
||||
/*
|
||||
@ -159,7 +142,7 @@ static int tuned_open(void)
|
||||
/* by default DISABLE dynamic rules and instead use fixed [if based] rules */
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"use_dynamic_rules",
|
||||
"Switch used to decide if we use static (if statements) or dynamic (built at runtime) decision function rules",
|
||||
"Switch used to decide if we use static (compiled/if statements) or dynamic (built at runtime) decision function rules",
|
||||
false, false, ompi_coll_tuned_use_dynamic_rules,
|
||||
&ompi_coll_tuned_use_dynamic_rules);
|
||||
|
||||
@ -197,16 +180,21 @@ static int tuned_open(void)
|
||||
}
|
||||
}
|
||||
|
||||
/* now check that the user hasn't overrode any of the decision functions */
|
||||
/* the user can do this before every comm dup/create if they like */
|
||||
/* now check that the user hasn't overrode any of the decision functions if dynamic rules are enabled */
|
||||
/* the user can redo this before every comm dup/create if they like */
|
||||
/* this is useful for benchmarking and user knows best tuning */
|
||||
/* as this is the component we only lookup the indicies of the mca params */
|
||||
/* the actual values are looked up during comm create via module init */
|
||||
|
||||
/* intra functions first */
|
||||
ompi_coll_tuned_allreduce_intra_check_forced();
|
||||
ompi_coll_tuned_alltoall_intra_check_forced();
|
||||
ompi_coll_tuned_barrier_intra_check_forced();
|
||||
ompi_coll_tuned_bcast_intra_check_forced();
|
||||
ompi_coll_tuned_reduce_intra_check_forced();
|
||||
if (ompi_coll_tuned_use_dynamic_rules) {
|
||||
ompi_coll_tuned_allreduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLREDUCE]);
|
||||
ompi_coll_tuned_alltoall_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALL]);
|
||||
/* ompi_coll_tuned_alltoall_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALLV]); */
|
||||
ompi_coll_tuned_barrier_intra_check_forced_init(&ompi_coll_tuned_forced_params[BARRIER]);
|
||||
ompi_coll_tuned_bcast_intra_check_forced_init(&ompi_coll_tuned_forced_params[BCAST]);
|
||||
ompi_coll_tuned_reduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[REDUCE]);
|
||||
}
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:component_open: done!"));
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -80,7 +80,7 @@ ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count,
|
||||
} /* found a method */
|
||||
} /*end if any com rules to check */
|
||||
|
||||
if (ompi_coll_tuned_allreduce_forced_choice) {
|
||||
if (comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm) {
|
||||
return ompi_coll_tuned_allreduce_intra_do_forced (sbuf, rbuf, count, dtype, op, comm);
|
||||
}
|
||||
else {
|
||||
@ -127,7 +127,7 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
|
||||
} /*end if any com rules to check */
|
||||
|
||||
|
||||
if (ompi_coll_tuned_alltoall_forced_choice) {
|
||||
if (comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm) {
|
||||
return ompi_coll_tuned_alltoall_intra_do_forced (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
}
|
||||
else {
|
||||
@ -162,7 +162,7 @@ int ompi_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm)
|
||||
} /* found a method */
|
||||
} /*end if any com rules to check */
|
||||
|
||||
if (ompi_coll_tuned_barrier_forced_choice) {
|
||||
if (comm->c_coll_selected_data->user_forced[BARRIER].algorithm) {
|
||||
return ompi_coll_tuned_barrier_intra_do_forced (comm);
|
||||
}
|
||||
else {
|
||||
@ -205,7 +205,7 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
|
||||
} /*end if any com rules to check */
|
||||
|
||||
|
||||
if (ompi_coll_tuned_bcast_forced_choice) {
|
||||
if (comm->c_coll_selected_data->user_forced[BCAST].algorithm) {
|
||||
return ompi_coll_tuned_bcast_intra_do_forced (buff, count, datatype, root, comm);
|
||||
}
|
||||
else {
|
||||
@ -249,7 +249,7 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf,
|
||||
} /* found a method */
|
||||
} /*end if any com rules to check */
|
||||
|
||||
if (ompi_coll_tuned_reduce_forced_choice) {
|
||||
if (comm->c_coll_selected_data->user_forced[REDUCE].algorithm) {
|
||||
return ompi_coll_tuned_reduce_intra_do_forced (sendbuf, recvbuf, count, datatype, op, root, comm);
|
||||
}
|
||||
else {
|
||||
|
65
ompi/mca/coll/tuned/coll_tuned_forced.c
Обычный файл
65
ompi/mca/coll/tuned/coll_tuned_forced.c
Обычный файл
@ -0,0 +1,65 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "mpi.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
#include "ompi/request/request.h"
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/datatype/datatype.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "coll_tuned.h"
|
||||
|
||||
/* need to include our own topo prototypes so we can malloc data on the comm correctly */
|
||||
#include "coll_tuned_topo.h"
|
||||
|
||||
/* also need the dynamic rule structures */
|
||||
#include "coll_tuned_forced.h"
|
||||
|
||||
#include "coll_tuned_util.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
/* We put all routines that handle the MCA user forced algorithm and parameter choices here */
|
||||
/* recheck the setting of forced, called on module create (i.e. for each new comm) */
|
||||
|
||||
int ompi_coll_tuned_forced_getvalues (coll_tuned_force_algorithm_mca_param_indices_t mca_params,
|
||||
coll_tuned_force_algorithm_params_t *forced_values)
|
||||
{
|
||||
mca_base_param_lookup_int (mca_params.algorithm_param_index, &(forced_values->algorithm));
|
||||
mca_base_param_lookup_int (mca_params.segsize_param_index, &(forced_values->segsize));
|
||||
mca_base_param_lookup_int (mca_params.tree_fanout_param_index, &(forced_values->tree_fanout));
|
||||
mca_base_param_lookup_int (mca_params.chain_fanout_param_index, &(forced_values->chain_fanout));
|
||||
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
/* special version of above just for barrier which only has one option available (at the moment...) */
|
||||
int ompi_coll_tuned_forced_getvalues_barrier (coll_tuned_force_algorithm_mca_param_indices_t mca_params,
|
||||
coll_tuned_force_algorithm_params_t *forced_values)
|
||||
{
|
||||
mca_base_param_lookup_int (mca_params.algorithm_param_index, &(forced_values->algorithm));
|
||||
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
|
71
ompi/mca/coll/tuned/coll_tuned_forced.h
Обычный файл
71
ompi/mca/coll/tuned/coll_tuned_forced.h
Обычный файл
@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_COLL_TUNED_FORCED_H_HAS_BEEN_INCLUDED
|
||||
#define MCA_COLL_TUNED_FORCED_H_HAS_BEEN_INCLUDED
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* this structure is for storing the indexes to the forced algorithm mca params... */
|
||||
/* we get these at component query (so that registered values appear in ompi_infoi) */
|
||||
|
||||
struct coll_tuned_force_algorithm_mca_param_indices_t {
|
||||
int algorithm_param_index; /* which algorithm you want to force */
|
||||
int segsize_param_index; /* segsize to use (if supported), 0 = no segmentation */
|
||||
int tree_fanout_param_index; /* tree fanout/in to use */
|
||||
int chain_fanout_param_index; /* K-chain fanout/in to use */
|
||||
};
|
||||
|
||||
typedef struct coll_tuned_force_algorithm_mca_param_indices_t coll_tuned_force_algorithm_mca_param_indices_t;
|
||||
|
||||
|
||||
/* the following type is for storing actual value obtained from the MCA on each tuned module */
|
||||
/* via their mca param indices lookup in the component */
|
||||
/* this structure is stored once per collective type per communicator... */
|
||||
struct coll_tuned_force_algorithm_params_t {
|
||||
int algorithm; /* which algorithm you want to force */
|
||||
int segsize; /* segsize to use (if supported), 0 = no segmentation */
|
||||
int tree_fanout; /* tree fanout/in to use */
|
||||
int chain_fanout; /* K-chain fanout/in to use */
|
||||
};
|
||||
|
||||
typedef struct coll_tuned_force_algorithm_params_t coll_tuned_force_algorithm_params_t;
|
||||
|
||||
|
||||
/* prototypes */
|
||||
|
||||
int ompi_coll_tuned_forced_getvalues (coll_tuned_force_algorithm_mca_param_indices_t mca_params,
|
||||
coll_tuned_force_algorithm_params_t *forced_values);
|
||||
|
||||
/* barrier has less options than any other collective so it gets its own special function */
|
||||
int ompi_coll_tuned_forced_getvalues_barrier (coll_tuned_force_algorithm_mca_param_indices_t mca_params,
|
||||
coll_tuned_force_algorithm_params_t *forced_values);
|
||||
|
||||
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
#endif /* MCA_COLL_TUNED_FORCED_H_HAS_BEEN_INCLUDED */
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -30,7 +30,7 @@
|
||||
#include "coll_tuned_topo.h"
|
||||
#include "coll_tuned_dynamic_rules.h"
|
||||
#include "coll_tuned_dynamic_file.h"
|
||||
|
||||
#include "coll_tuned_forced.h"
|
||||
|
||||
/*
|
||||
* Which set are we using?
|
||||
@ -332,6 +332,8 @@ ompi_coll_tuned_module_init(struct ompi_communicator_t *comm)
|
||||
*/
|
||||
|
||||
/* if we within the memory/size limit, allow preallocated data */
|
||||
|
||||
|
||||
if (size<=ompi_coll_tuned_preallocate_memory_comm_size_limit) {
|
||||
data = malloc(sizeof(struct mca_coll_base_comm_t) +
|
||||
(sizeof(ompi_request_t *) * size * 2));
|
||||
@ -381,6 +383,17 @@ ompi_coll_tuned_module_init(struct ompi_communicator_t *comm)
|
||||
}
|
||||
}
|
||||
|
||||
/* next dynamic state, recheck all forced rules as well */
|
||||
/* warning, we should check to make sure this is really an INTRA comm here... */
|
||||
if (ompi_coll_tuned_use_dynamic_rules) {
|
||||
ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLREDUCE], &(data->user_forced[ALLREDUCE]));
|
||||
ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLTOALL], &(data->user_forced[ALLTOALL]));
|
||||
/* ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLTOALLV], &(data->user_forced[ALLTOALLV])); */
|
||||
ompi_coll_tuned_forced_getvalues_barrier (ompi_coll_tuned_forced_params[BARRIER], &(data->user_forced[BARRIER]));
|
||||
ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[BCAST], &(data->user_forced[BCAST]));
|
||||
ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[REDUCE], &(data->user_forced[REDUCE]));
|
||||
}
|
||||
|
||||
|
||||
if (&ompi_mpi_comm_world==comm) {
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -444,62 +444,70 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
|
||||
/* publish details of each algorithm and if its forced/fixed/locked in */
|
||||
/* as you add methods/algorithms you must update this and the query/map routines */
|
||||
|
||||
int ompi_coll_tuned_reduce_intra_check_forced ( )
|
||||
{
|
||||
/* this routine is called by the component only */
|
||||
/* this makes sure that the mca parameters are set to their initial values and perms */
|
||||
/* module does not call this they call the forced_getvalues routine instead */
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
|
||||
{
|
||||
int rc;
|
||||
int max_alg = 3;
|
||||
|
||||
ompi_coll_tuned_forced_max_algorithms[REDUCE] = max_alg;
|
||||
|
||||
rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
|
||||
"reduce_algorithm_count",
|
||||
"Number of reduce algorithms available",
|
||||
false, true, max_alg, NULL);
|
||||
|
||||
|
||||
mca_param_indices->algorithm_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"reduce_algorithm",
|
||||
"Which reduce algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 chain, 3 pipeline",
|
||||
false, false, ompi_coll_tuned_reduce_forced_choice,
|
||||
&ompi_coll_tuned_reduce_forced_choice);
|
||||
false, false, 0, NULL);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
mca_param_indices->segsize_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"reduce_algorithm_segmentsize",
|
||||
"Segment size in bytes used by default for reduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
|
||||
false, false, ompi_coll_tuned_reduce_forced_segsize,
|
||||
&ompi_coll_tuned_reduce_forced_segsize);
|
||||
false, false, 0, NULL);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
mca_param_indices->tree_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"reduce_algorithm_tree_fanout",
|
||||
"Fanout for n-tree used for reduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
|
||||
false, false,
|
||||
ompi_coll_tuned_init_tree_fanout, /* get system wide default */
|
||||
&ompi_coll_tuned_reduce_forced_tree_fanout);
|
||||
NULL);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
mca_param_indices->chain_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"reduce_algorithm_chain_fanout",
|
||||
"Fanout for chains used for reduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
|
||||
false, false,
|
||||
ompi_coll_tuned_init_chain_fanout, /* get system wide default */
|
||||
&ompi_coll_tuned_reduce_forced_chain_fanout);
|
||||
NULL);
|
||||
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
int ompi_coll_tuned_reduce_intra_query ( )
|
||||
{
|
||||
return (3); /* 3 algorithms available */
|
||||
}
|
||||
|
||||
|
||||
int ompi_coll_tuned_reduce_intra_do_forced(void *sbuf, void* rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced selected algorithm %d", ompi_coll_tuned_reduce_forced_choice));
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced selected algorithm %d",
|
||||
comm->c_coll_selected_data->user_forced[REDUCE].algorithm));
|
||||
|
||||
switch (ompi_coll_tuned_reduce_forced_choice) {
|
||||
switch (comm->c_coll_selected_data->user_forced[REDUCE].algorithm) {
|
||||
case (0): return ompi_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, root, comm);
|
||||
case (1): return ompi_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, root, comm);
|
||||
case (2): return ompi_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype, op, root, comm,
|
||||
ompi_coll_tuned_reduce_forced_segsize, ompi_coll_tuned_reduce_forced_chain_fanout);
|
||||
case (2): return ompi_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype, op, root, comm,
|
||||
comm->c_coll_selected_data->user_forced[REDUCE].segsize,
|
||||
comm->c_coll_selected_data->user_forced[REDUCE].chain_fanout);
|
||||
case (3): return ompi_coll_tuned_reduce_intra_pipeline (sbuf, rbuf, count, dtype, op, root, comm,
|
||||
ompi_coll_tuned_reduce_forced_segsize);
|
||||
comm->c_coll_selected_data->user_forced[REDUCE].segsize);
|
||||
default:
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
|
||||
ompi_coll_tuned_reduce_forced_choice, ompi_coll_tuned_reduce_intra_query()));
|
||||
comm->c_coll_selected_data->user_forced[REDUCE].algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE]));
|
||||
return (MPI_ERR_ARG);
|
||||
} /* switch */
|
||||
|
||||
@ -510,12 +518,12 @@ int ompi_coll_tuned_reduce_intra_do_this(void *sbuf, void* rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op, int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
int choice, int faninout, int segsize)
|
||||
int algorithm, int faninout, int segsize)
|
||||
{
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_this selected algorithm %d topo faninout %d segsize %d",
|
||||
choice, faninout, segsize));
|
||||
algorithm, faninout, segsize));
|
||||
|
||||
switch (choice) {
|
||||
switch (algorithm) {
|
||||
case (0): return ompi_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, root, comm);
|
||||
case (1): return ompi_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, root, comm);
|
||||
case (2): return ompi_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype, op, root, comm,
|
||||
@ -524,7 +532,7 @@ switch (choice) {
|
||||
segsize);
|
||||
default:
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
|
||||
choice, ompi_coll_tuned_reduce_intra_query()));
|
||||
algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE]));
|
||||
return (MPI_ERR_ARG);
|
||||
} /* switch */
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user