1
1

File based dynamic up and tested...

Lots of misc fixes: printfs->opal_output, handles fanin/out correctly for forced ops
unused vars, correct calculations on meaning of 'msgsize' for decision functions
(varies depending on algorithm), etc

This commit was SVN r8113.
Этот коммит содержится в:
Graham Fagg 2005-11-11 04:49:29 +00:00
родитель 878676218e
Коммит 877f7bbe6a
12 изменённых файлов: 444 добавлений и 138 удалений

Просмотреть файл

@ -172,11 +172,19 @@ OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_chain_fanout;
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm);
int mca_coll_tuned_allreduce_intra_do_forced(void *sbuf, void *rbuf,
int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm);
int mca_coll_tuned_allreduce_intra_do_this(void *sbuf, void *rbuf,
int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
int choice, int faninout, int segsize);
int mca_coll_tuned_allreduce_intra_check_forced(void);
int mca_coll_tuned_allreduce_intra_query(void);
@ -214,11 +222,19 @@ OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_chain_fanout;
void* rbuf, int rcount,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm);
int mca_coll_tuned_alltoall_intra_do_forced(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void* rbuf, int rcount,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm);
int mca_coll_tuned_alltoall_intra_do_this(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void* rbuf, int rcount,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm,
int choice, int faninout, int segsize);
int mca_coll_tuned_alltoall_intra_check_forced(void);
int mca_coll_tuned_alltoall_intra_query (void);
@ -308,7 +324,10 @@ OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_chain_fanout;
int mca_coll_tuned_barrier_intra_dec_fixed(struct ompi_communicator_t *comm);
int mca_coll_tuned_barrier_intra_dec_dynamic(
struct ompi_communicator_t *comm);
int mca_coll_tuned_barrier_intra_do_forced(struct ompi_communicator_t *comm);
int mca_coll_tuned_barrier_intra_do_this(struct ompi_communicator_t *comm, int choice, int faninout, int segsize);
int mca_coll_tuned_barrier_intra_check_forced(void);
int mca_coll_tuned_barrier_intra_query (void);
@ -331,10 +350,17 @@ OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_chain_fanout;
struct ompi_datatype_t *datatype,
int root,
struct ompi_communicator_t *comm);
int mca_coll_tuned_bcast_intra_do_forced(void *buff, int count,
struct ompi_datatype_t *datatype,
int root,
struct ompi_communicator_t *comm);
int mca_coll_tuned_bcast_intra_do_this(void *buff, int count,
struct ompi_datatype_t *datatype,
int root,
struct ompi_communicator_t *comm,
int choice, int faninout, int segsize);
int mca_coll_tuned_bcast_intra_check_forced(void);
int mca_coll_tuned_bcast_intra_query (void);
@ -450,10 +476,17 @@ OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_chain_fanout;
struct ompi_datatype_t *dtype,
struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm);
int mca_coll_tuned_reduce_intra_do_forced(void *sbuf, void* rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm);
int mca_coll_tuned_reduce_intra_do_this(void *sbuf, void* rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm,
int choice, int faninout, int segsize);
int mca_coll_tuned_reduce_intra_check_forced(void);
int mca_coll_tuned_reduce_intra_query (void);

Просмотреть файл

@ -210,8 +210,8 @@ int mca_coll_tuned_allreduce_intra_do_forced(void *sbuf, void *rbuf, int count,
struct ompi_op_t *op,
struct ompi_communicator_t *comm)
{
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced selected algorithm %d",
mca_coll_tuned_allreduce_forced_choice));
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced selected algorithm %d",
mca_coll_tuned_allreduce_forced_choice));
switch (mca_coll_tuned_allreduce_forced_choice) {
case (0): return mca_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm);
@ -226,4 +226,26 @@ switch (mca_coll_tuned_allreduce_forced_choice) {
}
int mca_coll_tuned_allreduce_intra_do_this(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
int choice, int faninout, int segsize)
{
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:allreduce_intra_do_this algorithm %d topo fan in/out %d segsize %d",
choice, faninout, segsize));
switch (choice) {
case (0): return mca_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm);
case (1): return mca_coll_tuned_allreduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, comm);
case (2): return mca_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm);
default:
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:allreduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
choice, mca_coll_tuned_allreduce_intra_query()));
return (MPI_ERR_ARG);
} /* switch */
}

Просмотреть файл

@ -505,3 +505,28 @@ switch (mca_coll_tuned_alltoall_forced_choice) {
}
int mca_coll_tuned_alltoall_intra_do_this(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void* rbuf, int rcount,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm,
int choice, int faninout, int segsize)
{
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:alltoall_intra_do_this selected algorithm %d topo faninout %d segsize %d",
choice, faninout, segsize));
switch (choice) {
case (0): return mca_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
case (1): return mca_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
case (2): return mca_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
case (3): return mca_coll_tuned_alltoall_intra_bruck (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
case (4): return mca_coll_tuned_alltoall_intra_two_procs (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
default:
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:alltoall_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
choice, mca_coll_tuned_alltoall_intra_query()));
return (MPI_ERR_ARG);
} /* switch */
}

Просмотреть файл

@ -221,10 +221,79 @@ int mca_coll_tuned_barrier_intra_two_procs(struct ompi_communicator_t *comm)
}
int mca_coll_tuned_barrier_intra_linear(struct ompi_communicator_t *comm)
/*
* Linear functions are copied from the BASIC coll module
* they do not segment the message and are simple implementations
* but for some small number of nodes and/or small data sizes they
* are just as fast as tuned/tree based segmenting operations
* and as such may be selected by the decision functions
* These are copied into this module due to the way we select modules
* in V1. i.e. in V2 we will handle this differently and so will not
* have to duplicate code.
* GEF Oct05 after asking Jeff.
*/
/* copied function (with appropriate renaming) starts here */
int mca_coll_tuned_barrier_intra_basic_linear(struct ompi_communicator_t *comm)
{
return OMPI_ERR_NOT_IMPLEMENTED;
int i;
int err;
int size = ompi_comm_size(comm);
int rank = ompi_comm_rank(comm);
/* All non-root send & receive zero-length message. */
if (rank > 0) {
err =
MCA_PML_CALL(send
(NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER,
MCA_PML_BASE_SEND_STANDARD, comm));
if (MPI_SUCCESS != err) {
return err;
}
err =
MCA_PML_CALL(recv
(NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER,
comm, MPI_STATUS_IGNORE));
if (MPI_SUCCESS != err) {
return err;
}
}
/* The root collects and broadcasts the messages. */
else {
for (i = 1; i < size; ++i) {
err = MCA_PML_CALL(recv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE,
MCA_COLL_BASE_TAG_BARRIER,
comm, MPI_STATUS_IGNORE));
if (MPI_SUCCESS != err) {
return err;
}
}
for (i = 1; i < size; ++i) {
err =
MCA_PML_CALL(send
(NULL, 0, MPI_BYTE, i,
MCA_COLL_BASE_TAG_BARRIER,
MCA_PML_BASE_SEND_STANDARD, comm));
if (MPI_SUCCESS != err) {
return err;
}
}
}
/* All done */
return MPI_SUCCESS;
}
/* copied function (with appropriate renaming) ends here */
/* The following are used by dynamic and forced rules */
@ -247,7 +316,7 @@ return (MPI_SUCCESS);
int mca_coll_tuned_barrier_intra_query ( )
{
return (4); /* 4 algorithms available */
return (5); /* 4 algorithms available */
/* 2 to do */
}
@ -258,7 +327,7 @@ int mca_coll_tuned_barrier_intra_do_forced(struct ompi_communicator_t *comm)
switch (mca_coll_tuned_barrier_forced_choice) {
case (0): return mca_coll_tuned_barrier_intra_dec_fixed (comm);
/* case (1): return mca_coll_tuned_barrier_intra_basic_linear (comm); */
case (1): return mca_coll_tuned_barrier_intra_basic_linear (comm);
case (2): return mca_coll_tuned_barrier_intra_doublering (comm);
case (3): return mca_coll_tuned_barrier_intra_recursivedoubling (comm);
case (4): return mca_coll_tuned_barrier_intra_bruck (comm);
@ -272,3 +341,24 @@ switch (mca_coll_tuned_barrier_forced_choice) {
}
int mca_coll_tuned_barrier_intra_do_this (struct ompi_communicator_t *comm, int choice, int faninout, int segsize)
{
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:barrier_intra_do_this selected algorithm %d topo fanin/out%d", choice, faninout));
switch (choice) {
case (0): return mca_coll_tuned_barrier_intra_dec_fixed (comm);
case (1): return mca_coll_tuned_barrier_intra_basic_linear (comm);
case (2): return mca_coll_tuned_barrier_intra_doublering (comm);
case (3): return mca_coll_tuned_barrier_intra_recursivedoubling (comm);
case (4): return mca_coll_tuned_barrier_intra_bruck (comm);
case (5): return mca_coll_tuned_barrier_intra_two_procs (comm);
/* case (6): return mca_coll_tuned_barrier_intra_bmtree_step (comm); */
default:
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:barrier_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
choice, mca_coll_tuned_barrier_intra_query()));
return (MPI_ERR_ARG);
} /* switch */
}

Просмотреть файл

@ -841,3 +841,31 @@ switch (mca_coll_tuned_bcast_forced_choice) {
}
int mca_coll_tuned_bcast_intra_do_this(void *buf, int count,
struct ompi_datatype_t *dtype,
int root,
struct ompi_communicator_t *comm,
int choice, int faninout, int segsize)
{
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:bcast_intra_do_this algorithm %d topo faninout %d segsize %d",
choice, faninout, segsize));
switch (choice) {
case (0): return mca_coll_tuned_bcast_intra_dec_fixed (buf, count, dtype, root, comm);
case (1): return mca_coll_tuned_bcast_intra_basic_linear (buf, count, dtype, root, comm);
case (2): return mca_coll_tuned_bcast_intra_chain (buf, count, dtype, root, comm, segsize, faninout );
case (3): return mca_coll_tuned_bcast_intra_pipeline (buf, count, dtype, root, comm, segsize);
case (4): return mca_coll_tuned_bcast_intra_split_bintree (buf, count, dtype, root, comm, segsize);
case (5): return mca_coll_tuned_bcast_intra_bintree (buf, count, dtype, root, comm, segsize);
/* case (6): return mca_coll_tuned_bcast_intra_bmtree (buf, count, dtype, root, comm,
* segsize); */
default:
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:bcast_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
choice, mca_coll_tuned_bcast_intra_query()));
return (MPI_ERR_ARG);
} /* switch */
}

Просмотреть файл

@ -150,6 +150,7 @@ static int tuned_open(void)
false, false, mca_coll_tuned_use_dynamic_rules,
&mca_coll_tuned_use_dynamic_rules);
/* if dynamic rules allowed then look up dynamic rules config filename, else we leave it an empty filename (NULL) */
if (mca_coll_tuned_use_dynamic_rules) {
/* char *default_name; */

Просмотреть файл

@ -22,6 +22,8 @@
#include "include/constants.h"
#include "datatype/datatype.h"
#include "communicator/communicator.h"
#include "mca/base/mca_base_param.h"
#include "mca/coll/base/base.h"
#include "mca/coll/coll.h"
#include "mca/coll/base/coll_tags.h"
#include "coll_tuned.h"
@ -30,6 +32,19 @@
#include "coll_tuned.h"
/*
* Notes on evaluation rules and ordering
*
* The order is:
* use file based rules if presented (-coll_tuned_dynamic_rules_filename = rules)
* Else
* use forced rules (-coll_tuned_dynamic_ALG_intra_algorithm = algorithm-number)
* Else
* use fixed (compiled) rule set (or nested ifs)
*
*/
/*
* allreduce_intra
*
@ -43,17 +58,28 @@ mca_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count,
struct ompi_op_t *op,
struct ompi_communicator_t *comm)
{
int i;
int size;
int rank;
int err;
int contig;
int dsize;
OPAL_OUTPUT((mca_coll_tuned_stream, "mca_coll_tuned_allreduce_intra_dec_dynamic"));
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
/* check to see if we have some filebased rules */
if (comm->c_coll_selected_data->com_rules[ALLREDUCE]) {
/* we do, so calc the message size or what ever we need and use this for the evaluation */
int alg, faninout, segsize;
/* int contig; */
int dsize;
ompi_ddt_type_size (dtype, &dsize);
dsize *= count;
alg = coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[ALLREDUCE],
dsize, &faninout, &segsize);
if (alg) { /* we have found a valid choice from the file based rules for this message size */
return mca_coll_tuned_allreduce_intra_do_this (sbuf, rbuf, count, dtype, op, comm,
alg, faninout, segsize);
} /* found a method */
} /*end if any com rules to check */
if (mca_coll_tuned_allreduce_forced_choice) {
return mca_coll_tuned_allreduce_intra_do_forced (sbuf, rbuf, count, dtype, op, comm);
@ -77,17 +103,30 @@ int mca_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm)
{
int i;
int size;
int rank;
int err;
int contig;
int dsize;
OPAL_OUTPUT((mca_coll_tuned_stream, "mca_coll_tuned_alltoall_intra_dec_dynamic"));
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
/* check to see if we have some filebased rules */
if (comm->c_coll_selected_data->com_rules[ALLTOALL]) {
/* we do, so calc the message size or what ever we need and use this for the evaluation */
int comsize;
int alg, faninout, segsize;
int dsize;
ompi_ddt_type_size (sdtype, &dsize);
comsize = ompi_comm_size(comm);
dsize *= comsize * scount;
alg = coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[ALLTOALL],
dsize, &faninout, &segsize);
if (alg) { /* we have found a valid choice from the file based rules for this message size */
return mca_coll_tuned_alltoall_intra_do_this (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm,
alg, faninout, segsize);
} /* found a method */
} /*end if any com rules to check */
if (mca_coll_tuned_alltoall_forced_choice) {
return mca_coll_tuned_alltoall_intra_do_forced (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
@ -106,17 +145,23 @@ int mca_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
*/
int mca_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm)
{
int i;
int size;
int rank;
int err;
int contig;
int dsize;
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_barrier_intra_dec_dynamic"));
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
/* check to see if we have some filebased rules */
if (comm->c_coll_selected_data->com_rules[BARRIER]) {
/* we do, so calc the message size or what ever we need and use this for the evaluation */
int alg, faninout, segsize;
alg = coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[BARRIER],
0, &faninout, &segsize);
if (alg) { /* we have found a valid choice from the file based rules for this message size */
return mca_coll_tuned_barrier_intra_do_this (comm,
alg, faninout, segsize);
} /* found a method */
} /*end if any com rules to check */
if (mca_coll_tuned_barrier_forced_choice) {
return mca_coll_tuned_barrier_intra_do_forced (comm);
@ -138,17 +183,28 @@ int mca_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm)
{
int i;
int size;
int rank;
int err;
int contig;
int dsize;
OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:bcast_intra_dec_dynamic"));
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
/* check to see if we have some filebased rules */
if (comm->c_coll_selected_data->com_rules[BCAST]) {
/* we do, so calc the message size or what ever we need and use this for the evaluation */
int alg, faninout, segsize;
int dsize;
ompi_ddt_type_size (datatype, &dsize);
dsize *= count;
alg = coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[BCAST],
dsize, &faninout, &segsize);
if (alg) { /* we have found a valid choice from the file based rules for this message size */
return mca_coll_tuned_bcast_intra_do_this (buff, count, datatype, root, comm,
alg, faninout, segsize);
} /* found a method */
} /*end if any com rules to check */
if (mca_coll_tuned_bcast_forced_choice) {
return mca_coll_tuned_bcast_intra_do_forced (buff, count, datatype, root, comm);
@ -172,17 +228,28 @@ int mca_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf,
struct ompi_op_t* op, int root,
struct ompi_communicator_t* comm)
{
int i;
int size;
int rank;
int err;
int contig;
int dsize;
OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:reduce_intra_dec_dynamic"));
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
/* check to see if we have some filebased rules */
if (comm->c_coll_selected_data->com_rules[REDUCE]) {
/* we do, so calc the message size or what ever we need and use this for the evaluation */
int alg, faninout, segsize;
int dsize;
int contig;
ompi_ddt_type_size (datatype, &dsize);
dsize *= count;
alg = coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[REDUCE],
dsize, &faninout, &segsize);
if (alg) { /* we have found a valid choice from the file based rules for this message size */
return mca_coll_tuned_reduce_intra_do_this (sendbuf, recvbuf, count, datatype, op, root, comm,
alg, faninout, segsize);
} /* found a method */
} /*end if any com rules to check */
if (mca_coll_tuned_reduce_forced_choice) {
return mca_coll_tuned_reduce_intra_do_forced (sendbuf, recvbuf, count, datatype, op, root, comm);

Просмотреть файл

@ -43,17 +43,13 @@ mca_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count,
struct ompi_op_t *op,
struct ompi_communicator_t *comm)
{
int i;
int size;
int rank;
int err;
int contig;
int dsize;
/* int size; */
/* int contig; */
/* int dsize; */
OPAL_OUTPUT((mca_coll_tuned_stream, "mca_coll_tuned_allreduce_intra_dec_fixed"));
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
/* size = ompi_comm_size(comm); */
return (mca_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm));
@ -74,11 +70,9 @@ int mca_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm)
{
int i;
int size;
int rank;
int err;
int contig;
int dsize;
MPI_Aint sext;
long lb;
@ -123,17 +117,11 @@ int mca_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount,
*/
int mca_coll_tuned_barrier_intra_dec_fixed(struct ompi_communicator_t *comm)
{
int i;
int size;
int rank;
int err;
int contig;
int dsize;
OPAL_OUTPUT((mca_coll_tuned_stream, "mca_coll_tuned_barrier_intra_dec_fixed"));
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
if (2==size)
return mca_coll_tuned_barrier_intra_two_procs(comm);
@ -157,11 +145,9 @@ int mca_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm)
{
int i;
int size;
int rank;
int err;
int contig;
int msgsize;
MPI_Aint ext;
long lb;
@ -227,11 +213,10 @@ int mca_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
struct ompi_op_t* op, int root,
struct ompi_communicator_t* comm)
{
int i;
int size;
int rank;
int err;
int contig;
/* int contig; */
int msgsize;
MPI_Aint ext;
long lb;

Просмотреть файл

@ -26,6 +26,7 @@
#include "mca/coll/coll.h"
#include "request/request.h"
#include "mca/pml/pml.h"
#include "coll_tuned.h"
/* need to include our own topo prototypes so we can malloc data on the comm correctly */
#include "coll_tuned_topo.h"
@ -64,9 +65,8 @@ int coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules
int NCS;
int CS;
int NMS;
int MS, ALG, SS;
int x, ci, ncs, cs, nms;
int i;
int MS, ALG, FANINOUT, SS;
int x, ncs, nms;
ompi_coll_alg_rule_t *alg_rules = (ompi_coll_alg_rule_t*) NULL; /* complete table of rules */
@ -81,23 +81,23 @@ int coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules
int total_msg_count = 0;
if (!fname) {
fprintf(stderr,"Gave NULL as rule table configuration file for tuned collectives... ignoring!\n");
OPAL_OUTPUT((mca_coll_tuned_stream,"Gave NULL as rule table configuration file for tuned collectives... ignoring!\n"));
return (-1);
}
if (!rules) {
fprintf(stderr,"Gave NULL as rule table result ptr!... ignoring!\n");
OPAL_OUTPUT((mca_coll_tuned_stream,"Gave NULL as rule table result ptr!... ignoring!\n"));
return (-2);
}
if (n_collectives<1) {
fprintf(stderr,"Gave %d as max number of collectives in the rule table configuration file for tuned collectives!... ignoring!\n", n_collectives);
OPAL_OUTPUT((mca_coll_tuned_stream,"Gave %d as max number of collectives in the rule table configuration file for tuned collectives!... ignoring!\n", n_collectives));
return (-3);
}
fptr = fopen (fname, "r");
if (!fptr) {
fprintf(stderr,"cannot read rules file [%s]\n", fname);
OPAL_OUTPUT((mca_coll_tuned_stream,"cannot read rules file [%s]\n", fname));
goto on_file_error;
}
@ -106,11 +106,11 @@ int coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules
X = getnext(fptr);
if (X<0) {
fprintf(stderr,"Could not read number of collectives in configuration file around line %d\n", fileline);
OPAL_OUTPUT((mca_coll_tuned_stream,"Could not read number of collectives in configuration file around line %d\n", fileline));
goto on_file_error;
}
if (X>n_collectives) {
fprintf(stderr,"Number of collectives in configuration file %d is greater than number of MPI collectives possible %d ??? error around line %d\n", X, n_collectives, fileline);
OPAL_OUTPUT((mca_coll_tuned_stream,"Number of collectives in configuration file %d is greater than number of MPI collectives possible %d ??? error around line %d\n", X, n_collectives, fileline));
goto on_file_error;
}
@ -118,16 +118,16 @@ int coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules
CI = getnext (fptr);
if (CI<0) {
fprintf(stderr,"Could not read next Collective id in configuration file around line %d\n", fileline);
OPAL_OUTPUT((mca_coll_tuned_stream,"Could not read next Collective id in configuration file around line %d\n", fileline));
goto on_file_error;
}
if (CI>=n_collectives) {
fprintf(stderr,"Collective id in configuration file %d is greater than MPI collectives possible %d. Error around line %d\n", CI, n_collectives, fileline);
OPAL_OUTPUT((mca_coll_tuned_stream,"Collective id in configuration file %d is greater than MPI collectives possible %d. Error around line %d\n", CI, n_collectives, fileline));
goto on_file_error;
}
if (alg_rules[CI].alg_rule_id != CI) {
printf("Internal error in handling collective ID %d\n", CI);
OPAL_OUTPUT((mca_coll_tuned_stream, "Internal error in handling collective ID %d\n", CI));
coll_tuned_free_all_rules (*rules, n_collectives);
return (-4);
}
@ -140,7 +140,7 @@ int coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules
NCS = getnext (fptr);
if (NCS<0) {
fprintf(stderr,"Couldnot read count of communicators for collective ID %d at around line %d\n", CI, fileline);
OPAL_OUTPUT((mca_coll_tuned_stream,"Could not read count of communicators for collective ID %d at around line %d\n", CI, fileline));
goto on_file_error;
}
@ -153,7 +153,7 @@ int coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules
CS = getnext (fptr);
if (CS<0) {
fprintf(stderr,"Couldnot read communicator size for collective ID %d com rule %d at around line %d\n", CI, ncs, fileline);
OPAL_OUTPUT((mca_coll_tuned_stream,"Could not read communicator size for collective ID %d com rule %d at around line %d\n", CI, ncs, fileline));
goto on_file_error;
}
@ -161,7 +161,7 @@ int coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules
NMS = getnext (fptr);
if (NMS<0) {
fprintf(stderr,"Couldnot read number of message sizes for collective ID %d com rule %d at around line %d\n", CI, ncs, fileline);
OPAL_OUTPUT((mca_coll_tuned_stream,"Could not read number of message sizes for collective ID %d com rule %d at around line %d\n", CI, ncs, fileline));
goto on_file_error;
}
@ -176,28 +176,35 @@ int coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules
MS = getnext (fptr);
if (MS<0) {
fprintf(stderr,"Couldnot read message size for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline);
OPAL_OUTPUT((mca_coll_tuned_stream,"Could not read message size for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
goto on_file_error;
}
msg_p->msg_size = MS;
ALG = getnext (fptr);
if (ALG<0) {
fprintf(stderr,"Couldnot read target algorithm method for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline);
OPAL_OUTPUT((mca_coll_tuned_stream,"Could not read target algorithm method for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
goto on_file_error;
}
msg_p->result_alg = ALG;
FANINOUT = getnext (fptr);
if (FANINOUT<0) {
OPAL_OUTPUT((mca_coll_tuned_stream,"Could not read fan in/out topo for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
goto on_file_error;
}
msg_p->result_topo_faninout = FANINOUT;
SS = getnext (fptr);
if (SS<0) {
fprintf(stderr,"Couldnot read target segment size for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline);
OPAL_OUTPUT((mca_coll_tuned_stream,"Could not read target segment size for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
goto on_file_error;
}
msg_p->result_segsize = SS;
if (!nms && MS) {
fprintf(stderr,"All algorithms must specify a rule for message size of zero upwards always first!\n");
fprintf(stderr,"Message size was %d for collective ID %d com rule %d msg rule %d at around line %d\n", MS, CI, ncs, nms, fileline);
OPAL_OUTPUT((mca_coll_tuned_stream,"All algorithms must specify a rule for message size of zero upwards always first!\n"));
OPAL_OUTPUT((mca_coll_tuned_stream,"Message size was %d for collective ID %d com rule %d msg rule %d at around line %d\n", MS, CI, ncs, nms, fileline));
goto on_file_error;
}
@ -215,12 +222,11 @@ int coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules
fclose (fptr);
printf("\nStats\n");
printf("Collectives with rules\t\t\t: %5d\n", total_alg_count);
printf("Communicator sizes with rules\t\t: %5d\n", total_com_count);
printf("Message sizes with rules\t\t: %5d\n", total_msg_count);
printf("Lines in configuration file read\t\t: %5d\n", fileline);
printf("\n");
OPAL_OUTPUT((mca_coll_tuned_stream,"\nConfigure file Stats\n"));
OPAL_OUTPUT((mca_coll_tuned_stream,"Collectives with rules\t\t\t: %5d\n", total_alg_count));
OPAL_OUTPUT((mca_coll_tuned_stream,"Communicator sizes with rules\t\t: %5d\n", total_com_count));
OPAL_OUTPUT((mca_coll_tuned_stream,"Message sizes with rules\t\t: %5d\n", total_msg_count));
OPAL_OUTPUT((mca_coll_tuned_stream,"Lines in configuration file read\t\t: %5d\n", fileline));
/* return the rules to the caller */
*rules = alg_rules;
@ -234,10 +240,10 @@ on_file_error:
/* we return back a verbose message and a count of -1 algorithms read */
/* draconian but its better than having a bad collective decision table */
fprintf(stderr,"read_rules_config_file: bad configure file [%s]. Read afar as line %d\n", fname, fileline);
fprintf(stderr,"Ignoring user supplied tuned collectives configuration decision file.\n");
fprintf(stderr,"Switching back to [compiled in] fixed decision table.\n");
fprintf(stderr,"Fix errors as listed above and try again.\n");
OPAL_OUTPUT((mca_coll_tuned_stream,"read_rules_config_file: bad configure file [%s]. Read afar as line %d\n", fname, fileline));
OPAL_OUTPUT((mca_coll_tuned_stream,"Ignoring user supplied tuned collectives configuration decision file.\n"));
OPAL_OUTPUT((mca_coll_tuned_stream,"Switching back to [compiled in] fixed decision table.\n"));
OPAL_OUTPUT((mca_coll_tuned_stream,"Fix errors as listed above and try again.\n"));
/* deallocate memory if allocated */
if (alg_rules) coll_tuned_free_all_rules (alg_rules, n_collectives);

Просмотреть файл

@ -23,7 +23,10 @@
#include "mca/mca.h"
#include "mca/coll/coll.h"
#include "request/request.h"
#include "mca/pml/pml.h"
#include "ompi/include/constants.h"
#include "datatype/datatype.h"
#include "communicator/communicator.h"
#include "coll_tuned.h"
/* need to include our own topo prototypes so we can malloc data on the comm correctly */
#include "coll_tuned_topo.h"
@ -35,6 +38,12 @@
#include <stdio.h>
#include "coll_tuned_util.h"
ompi_coll_alg_rule_t* coll_tuned_mk_alg_rules (int n_alg)
{
int i;
@ -83,9 +92,10 @@ ompi_coll_msg_rule_t* coll_tuned_mk_msg_rules (int n_msg_rules, int alg_rule_id,
msg_rules[i].alg_rule_id = alg_rule_id;
msg_rules[i].com_rule_id = com_rule_id;
msg_rules[i].msg_rule_id = i;
msg_rules[i].msg_size = 0; /* unknown */
msg_rules[i].result_alg = 0; /* unknown */
msg_rules[i].result_segsize = 0; /* unknown */
msg_rules[i].msg_size = 0; /* unknown */
msg_rules[i].result_alg = 0; /* unknown */
msg_rules[i].result_topo_faninout = 0; /* unknown */
msg_rules[i].result_segsize = 0; /* unknown */
}
return (msg_rules);
}
@ -100,14 +110,15 @@ ompi_coll_msg_rule_t* coll_tuned_mk_msg_rules (int n_msg_rules, int alg_rule_id,
int coll_tuned_dump_msg_rule (ompi_coll_msg_rule_t* msg_p)
{
if (!msg_p) {
fprintf(stderr,"Message rule was a NULL ptr?!\n");
OPAL_OUTPUT((mca_coll_tuned_stream,"Message rule was a NULL ptr?!\n"));
return (-1);
}
printf("alg_id %3d\tcom_id %3d\tcom_size %3d\tmsg_id %3d\t", msg_p->alg_rule_id, msg_p->com_rule_id,
msg_p->mpi_comsize, msg_p->msg_rule_id);
OPAL_OUTPUT((mca_coll_tuned_stream,"alg_id %3d\tcom_id %3d\tcom_size %3d\tmsg_id %3d\t", msg_p->alg_rule_id,
msg_p->com_rule_id, msg_p->mpi_comsize, msg_p->msg_rule_id));
printf("msg_size %6d -> algorithm %2d\tsegsize %5d\n", msg_p->msg_size, msg_p->result_alg, msg_p->result_segsize);
OPAL_OUTPUT((mca_coll_tuned_stream,"msg_size %6d -> algorithm %2d\ttopo in/out %2d\tsegsize %5ld\n",
msg_p->msg_size, msg_p->result_alg, msg_p->result_topo_faninout, msg_p->result_segsize));
return (0);
}
@ -116,21 +127,20 @@ int coll_tuned_dump_msg_rule (ompi_coll_msg_rule_t* msg_p)
int coll_tuned_dump_com_rule (ompi_coll_com_rule_t* com_p)
{
int i;
ompi_coll_msg_rule_t* msg_p;
if (!com_p) {
fprintf(stderr,"Com rule was a NULL ptr?!\n");
OPAL_OUTPUT((mca_coll_tuned_stream,"Com rule was a NULL ptr?!\n"));
return (-1);
}
printf("alg_id %3d\tcom_id %3d\tcom_size %3d\t", com_p->alg_rule_id, com_p->com_rule_id, com_p->mpi_comsize);
OPAL_OUTPUT((mca_coll_tuned_stream, "alg_id %3d\tcom_id %3d\tcom_size %3d\t", com_p->alg_rule_id, com_p->com_rule_id, com_p->mpi_comsize));
if (!com_p->n_msg_sizes) {
printf("no msgsizes defined\n");
OPAL_OUTPUT((mca_coll_tuned_stream,"no msgsizes defined\n"));
return (0);
}
printf("number of message sizes %3d\n", com_p->n_msg_sizes);
OPAL_OUTPUT((mca_coll_tuned_stream,"number of message sizes %3d\n", com_p->n_msg_sizes));
for (i=0;i<com_p->n_msg_sizes;i++) {
coll_tuned_dump_msg_rule (&(com_p->msg_rules[i]));
@ -143,21 +153,20 @@ int coll_tuned_dump_com_rule (ompi_coll_com_rule_t* com_p)
int coll_tuned_dump_alg_rule (ompi_coll_alg_rule_t* alg_p)
{
int i;
ompi_coll_com_rule_t* com_p;
if (!alg_p) {
fprintf(stderr,"Algorithm rule was a NULL ptr?!\n");
OPAL_OUTPUT((mca_coll_tuned_stream,"Algorithm rule was a NULL ptr?!\n"));
return (-1);
}
printf("alg_id %3d\t", alg_p->alg_rule_id);
OPAL_OUTPUT((mca_coll_tuned_stream,"alg_id %3d\t", alg_p->alg_rule_id));
if (!alg_p->n_com_sizes) {
printf("no coms defined\n");
OPAL_OUTPUT((mca_coll_tuned_stream,"no coms defined\n"));
return (0);
}
printf("number of com sizes %3d\n", alg_p->n_com_sizes);
OPAL_OUTPUT((mca_coll_tuned_stream,"number of com sizes %3d\n", alg_p->n_com_sizes));
for (i=0;i<alg_p->n_com_sizes;i++) {
coll_tuned_dump_com_rule (&(alg_p->com_rules[i]));
@ -172,16 +181,17 @@ int coll_tuned_dump_all_rules (ompi_coll_alg_rule_t* alg_p, int n_rules)
int i;
if (!alg_p) {
fprintf(stderr,"Algorithm rule was a NULL ptr?!\n");
OPAL_OUTPUT((mca_coll_tuned_stream,"Algorithm rule was a NULL ptr?!\n"));
return (-1);
}
printf("Number of algorithm rules %3d\n", n_rules);
OPAL_OUTPUT((mca_coll_tuned_stream,"Number of algorithm rules %3d\n", n_rules));
for (i=0;i<n_rules;i++) {
coll_tuned_dump_alg_rule (&(alg_p[i]));
}
return (0);
}
@ -197,7 +207,7 @@ int coll_tuned_free_msg_rules_in_com_rule (ompi_coll_com_rule_t* com_p)
ompi_coll_msg_rule_t* msg_p;
if (!com_p) {
fprintf(stderr,"attempt to free NULL com_rule ptr\n");
OPAL_OUTPUT((mca_coll_tuned_stream,"attempt to free NULL com_rule ptr\n"));
return (-1);
}
@ -205,7 +215,7 @@ int coll_tuned_free_msg_rules_in_com_rule (ompi_coll_com_rule_t* com_p)
msg_p = com_p->msg_rules;
if (!msg_p) {
fprintf(stderr,"attempt to free NULL msg_rules when msg count was %d\n", com_p->n_msg_sizes);
OPAL_OUTPUT((mca_coll_tuned_stream,"attempt to free NULL msg_rules when msg count was %d\n", com_p->n_msg_sizes));
rc = -1; /* some error */
}
else {
@ -230,7 +240,7 @@ int coll_tuned_free_coms_in_alg_rule (ompi_coll_alg_rule_t* alg_p)
ompi_coll_com_rule_t* com_p;
if (!alg_p) {
fprintf(stderr,"attempt to free NULL alg_rule ptr\n");
OPAL_OUTPUT((mca_coll_tuned_stream,"attempt to free NULL alg_rule ptr\n"));
return (-1);
}
@ -238,7 +248,7 @@ int coll_tuned_free_coms_in_alg_rule (ompi_coll_alg_rule_t* alg_p)
com_p = alg_p->com_rules;
if (!com_p) {
fprintf(stderr,"attempt to free NULL com_rules when com count was %d\n", alg_p->n_com_sizes);
OPAL_OUTPUT((mca_coll_tuned_stream,"attempt to free NULL com_rules when com count was %d\n", alg_p->n_com_sizes));
}
else {
/* ok, memory exists for the com rules so free their message rules first */
@ -275,7 +285,7 @@ int coll_tuned_free_all_rules (ompi_coll_alg_rule_t* alg_p, int n_algs)
/*
* query functions
* i.e. the functions that get me the algorithm and segment size fast
* i.e. the functions that get me the algorithm, topo fanin/out and segment size fast
* and also get the rules that are needed by each communicator as needed
*
*/
@ -313,15 +323,15 @@ ompi_coll_com_rule_t* coll_tuned_get_com_rule_ptr (ompi_coll_alg_rule_t* rules,
i = best = 0;
while (i<alg_p->n_com_sizes) {
printf("checking comsize %d against alg_id %d com_id %d index %d com_size %d",
mpi_comsize, com_p->alg_rule_id, com_p->com_rule_id, i, com_p->mpi_comsize);
/* OPAL_OUTPUT((mca_coll_tuned_stream,"checking comsize %d against alg_id %d com_id %d index %d com_size %d", */
/* mpi_comsize, com_p->alg_rule_id, com_p->com_rule_id, i, com_p->mpi_comsize)); */
if (com_p->mpi_comsize <= mpi_comsize) {
best = i;
best_com_p = com_p;
printf(":ok\n");
/* OPAL_OUTPUT((mca_coll_tuned_stream(":ok\n")); */
}
else {
printf(":nop\n");
/* OPAL_OUTPUT((mca_coll_tuned_stream(":nop\n")); */
break;
}
/* go to the next entry */
@ -329,7 +339,7 @@ ompi_coll_com_rule_t* coll_tuned_get_com_rule_ptr (ompi_coll_alg_rule_t* rules,
i++;
}
printf("Selected the following com rule id %d\n", best_com_p->com_rule_id);
OPAL_OUTPUT((mca_coll_tuned_stream,"Selected the following com rule id %d\n", best_com_p->com_rule_id));
coll_tuned_dump_com_rule (best_com_p);
return (best_com_p);
@ -338,19 +348,20 @@ ompi_coll_com_rule_t* coll_tuned_get_com_rule_ptr (ompi_coll_alg_rule_t* rules,
/*
* This function takes a com_rule ptr (from the communicators coll tuned data structure)
* (Which is chosen for a particular MPI collective)
* and a (total_)msg_size and it returns (0) and a algorithm to use and a recommended segment size
* and a (total_)msg_size and it returns (0) and a algorithm to use and a recommended topo faninout and segment size
* all based on the user supplied rules
*
* Just like the above functions it uses a less than or equal msg size
* (hense config file must have a default defined for '0' if we reach this point)
* else if no rules match we return '0' + '0' or used fixed decision table with no segmentation
* else if no rules match we return '0' + '0,0' or used fixed decision table with no topo chand and no segmentation
* of users data.. shame.
*
* On error return 0 so we default to fixed rules anyway :)
*
*/
int coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rule, int mpi_msgsize, int* result_segsize)
int coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rule, int mpi_msgsize, int *result_topo_faninout,
int* result_segsize)
{
ompi_coll_msg_rule_t* msg_p = (ompi_coll_msg_rule_t*) NULL;
ompi_coll_msg_rule_t* best_msg_p = (ompi_coll_msg_rule_t*) NULL;
@ -360,6 +371,10 @@ int coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rule, in
return (0);
}
if (!result_topo_faninout) {
return (0);
}
if (!result_segsize) {
return (0);
}
@ -375,15 +390,15 @@ int coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rule, in
i = best = 0;
while (i<base_com_rule->n_msg_sizes) {
printf("checking mpi_msgsize %d against com_id %d msg_id %d index %d msg_size %d",
mpi_msgsize, msg_p->com_rule_id, msg_p->msg_rule_id, i, msg_p->msg_size);
/* OPAL_OUTPUT((mca_coll_tuned_stream,"checking mpi_msgsize %d against com_id %d msg_id %d index %d msg_size %d", */
/* mpi_msgsize, msg_p->com_rule_id, msg_p->msg_rule_id, i, msg_p->msg_size)); */
if (msg_p->msg_size <= mpi_msgsize) {
best = i;
best_msg_p = msg_p;
printf(":ok\n");
/* OPAL_OUTPUT((mca_coll_tuned_stream(":ok\n")); */
}
else {
printf(":nop\n");
/* OPAL_OUTPUT((mca_coll_tuned_stream(":nop\n")); */
break;
}
/* go to the next entry */
@ -391,11 +406,16 @@ int coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rule, in
i++;
}
printf("Selected the following msg rule id %d\n", best_msg_p->msg_rule_id);
OPAL_OUTPUT((mca_coll_tuned_stream,"Selected the following msg rule id %d\n", best_msg_p->msg_rule_id));
coll_tuned_dump_msg_rule (best_msg_p);
/* return the segment size */
*result_topo_faninout = best_msg_p->result_topo_faninout;
/* return the segment size */
*result_segsize = best_msg_p->result_segsize;
return (best_msg_p->result_alg);
/* return the algorithm/method to use */
return (best_msg_p->result_alg);
}

Просмотреть файл

@ -46,8 +46,9 @@ typedef struct msg_rule_s {
int msg_size; /* message size */
/* RESULT */
int result_alg; /* result algorithm to use */
long result_segsize; /* result segment size to use */
int result_alg; /* result algorithm to use */
int result_topo_faninout; /* result topology fan in/out to use (if applicable) */
long result_segsize; /* result segment size to use */
} ompi_coll_msg_rule_t;
@ -97,8 +98,11 @@ int coll_tuned_free_all_rules (ompi_coll_alg_rule_t* alg_p, int n_algs);
/* the IMPORTANT routines, i.e. the ones that do stuff for everyday communicators and collective calls */
ompi_coll_com_rule_t* coll_tuned_get_com_rule_ptr (ompi_coll_alg_rule_t* rules, int alg_id, int mpi_comsize);
int coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rule, int mpi_msgsize, int* result_segsize);
int coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rule, int mpi_msgsize,
int* result_topo_faninout, int* result_segsize);

Просмотреть файл

@ -537,3 +537,28 @@ switch (mca_coll_tuned_reduce_forced_choice) {
}
int mca_coll_tuned_reduce_intra_do_this(void *sbuf, void* rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm,
int choice, int faninout, int segsize)
{
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:reduce_intra_do_this selected algorithm %d topo faninout %d segsize %d",
choice, faninout, segsize));
switch (choice) {
case (0): return mca_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, root, comm);
case (1): return mca_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, root, comm);
case (2): return mca_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype, op, root, comm,
segsize, faninout);
case (3): return mca_coll_tuned_reduce_intra_pipeline (sbuf, rbuf, count, dtype, op, root, comm,
segsize);
default:
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:reduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
choice, mca_coll_tuned_reduce_intra_query()));
return (MPI_ERR_ARG);
} /* switch */
}