File based dynamic up and tested...
Lots of misc fixes: printfs->opal_output, handles fanin/out correctly for forced ops unused vars, correct calculations on meaning of 'msgsize' for decision functions (varies depending on algorithm), etc This commit was SVN r8113.
Этот коммит содержится в:
родитель
878676218e
Коммит
877f7bbe6a
@ -172,11 +172,19 @@ OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_chain_fanout;
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_tuned_allreduce_intra_do_forced(void *sbuf, void *rbuf,
|
||||
int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_tuned_allreduce_intra_do_this(void *sbuf, void *rbuf,
|
||||
int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm,
|
||||
int choice, int faninout, int segsize);
|
||||
|
||||
int mca_coll_tuned_allreduce_intra_check_forced(void);
|
||||
int mca_coll_tuned_allreduce_intra_query(void);
|
||||
|
||||
@ -214,11 +222,19 @@ OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_chain_fanout;
|
||||
void* rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_tuned_alltoall_intra_do_forced(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_tuned_alltoall_intra_do_this(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
int choice, int faninout, int segsize);
|
||||
|
||||
int mca_coll_tuned_alltoall_intra_check_forced(void);
|
||||
int mca_coll_tuned_alltoall_intra_query (void);
|
||||
|
||||
@ -308,7 +324,10 @@ OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_chain_fanout;
|
||||
int mca_coll_tuned_barrier_intra_dec_fixed(struct ompi_communicator_t *comm);
|
||||
int mca_coll_tuned_barrier_intra_dec_dynamic(
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_tuned_barrier_intra_do_forced(struct ompi_communicator_t *comm);
|
||||
int mca_coll_tuned_barrier_intra_do_this(struct ompi_communicator_t *comm, int choice, int faninout, int segsize);
|
||||
|
||||
int mca_coll_tuned_barrier_intra_check_forced(void);
|
||||
int mca_coll_tuned_barrier_intra_query (void);
|
||||
|
||||
@ -331,10 +350,17 @@ OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_chain_fanout;
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_tuned_bcast_intra_do_forced(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_tuned_bcast_intra_do_this(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
int choice, int faninout, int segsize);
|
||||
|
||||
int mca_coll_tuned_bcast_intra_check_forced(void);
|
||||
int mca_coll_tuned_bcast_intra_query (void);
|
||||
|
||||
@ -450,10 +476,17 @@ OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_chain_fanout;
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op, int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_tuned_reduce_intra_do_forced(void *sbuf, void* rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op, int root,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_tuned_reduce_intra_do_this(void *sbuf, void* rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op, int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
int choice, int faninout, int segsize);
|
||||
|
||||
int mca_coll_tuned_reduce_intra_check_forced(void);
|
||||
int mca_coll_tuned_reduce_intra_query (void);
|
||||
|
||||
|
@ -210,8 +210,8 @@ int mca_coll_tuned_allreduce_intra_do_forced(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced selected algorithm %d",
|
||||
mca_coll_tuned_allreduce_forced_choice));
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced selected algorithm %d",
|
||||
mca_coll_tuned_allreduce_forced_choice));
|
||||
|
||||
switch (mca_coll_tuned_allreduce_forced_choice) {
|
||||
case (0): return mca_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm);
|
||||
@ -226,4 +226,26 @@ switch (mca_coll_tuned_allreduce_forced_choice) {
|
||||
}
|
||||
|
||||
|
||||
int mca_coll_tuned_allreduce_intra_do_this(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm,
|
||||
int choice, int faninout, int segsize)
|
||||
{
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:allreduce_intra_do_this algorithm %d topo fan in/out %d segsize %d",
|
||||
choice, faninout, segsize));
|
||||
|
||||
switch (choice) {
|
||||
case (0): return mca_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm);
|
||||
case (1): return mca_coll_tuned_allreduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, comm);
|
||||
case (2): return mca_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm);
|
||||
default:
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:allreduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
|
||||
choice, mca_coll_tuned_allreduce_intra_query()));
|
||||
return (MPI_ERR_ARG);
|
||||
} /* switch */
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -505,3 +505,28 @@ switch (mca_coll_tuned_alltoall_forced_choice) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
int mca_coll_tuned_alltoall_intra_do_this(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
int choice, int faninout, int segsize)
|
||||
{
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:alltoall_intra_do_this selected algorithm %d topo faninout %d segsize %d",
|
||||
choice, faninout, segsize));
|
||||
|
||||
switch (choice) {
|
||||
case (0): return mca_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
case (1): return mca_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
case (2): return mca_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
case (3): return mca_coll_tuned_alltoall_intra_bruck (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
case (4): return mca_coll_tuned_alltoall_intra_two_procs (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
default:
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:alltoall_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
|
||||
choice, mca_coll_tuned_alltoall_intra_query()));
|
||||
return (MPI_ERR_ARG);
|
||||
} /* switch */
|
||||
|
||||
}
|
||||
|
||||
|
@ -221,10 +221,79 @@ int mca_coll_tuned_barrier_intra_two_procs(struct ompi_communicator_t *comm)
|
||||
}
|
||||
|
||||
|
||||
int mca_coll_tuned_barrier_intra_linear(struct ompi_communicator_t *comm)
|
||||
/*
|
||||
* Linear functions are copied from the BASIC coll module
|
||||
* they do not segment the message and are simple implementations
|
||||
* but for some small number of nodes and/or small data sizes they
|
||||
* are just as fast as tuned/tree based segmenting operations
|
||||
* and as such may be selected by the decision functions
|
||||
* These are copied into this module due to the way we select modules
|
||||
* in V1. i.e. in V2 we will handle this differently and so will not
|
||||
* have to duplicate code.
|
||||
* GEF Oct05 after asking Jeff.
|
||||
*/
|
||||
|
||||
/* copied function (with appropriate renaming) starts here */
|
||||
|
||||
int mca_coll_tuned_barrier_intra_basic_linear(struct ompi_communicator_t *comm)
|
||||
{
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
int i;
|
||||
int err;
|
||||
int size = ompi_comm_size(comm);
|
||||
int rank = ompi_comm_rank(comm);
|
||||
|
||||
/* All non-root send & receive zero-length message. */
|
||||
|
||||
if (rank > 0) {
|
||||
err =
|
||||
MCA_PML_CALL(send
|
||||
(NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
|
||||
err =
|
||||
MCA_PML_CALL(recv
|
||||
(NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* The root collects and broadcasts the messages. */
|
||||
|
||||
else {
|
||||
for (i = 1; i < size; ++i) {
|
||||
err = MCA_PML_CALL(recv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE,
|
||||
MCA_COLL_BASE_TAG_BARRIER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (i = 1; i < size; ++i) {
|
||||
err =
|
||||
MCA_PML_CALL(send
|
||||
(NULL, 0, MPI_BYTE, i,
|
||||
MCA_COLL_BASE_TAG_BARRIER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* All done */
|
||||
|
||||
return MPI_SUCCESS;
|
||||
|
||||
}
|
||||
/* copied function (with appropriate renaming) ends here */
|
||||
|
||||
|
||||
/* The following are used by dynamic and forced rules */
|
||||
|
||||
@ -247,7 +316,7 @@ return (MPI_SUCCESS);
|
||||
|
||||
int mca_coll_tuned_barrier_intra_query ( )
|
||||
{
|
||||
return (4); /* 4 algorithms available */
|
||||
return (5); /* 4 algorithms available */
|
||||
/* 2 to do */
|
||||
}
|
||||
|
||||
@ -258,7 +327,7 @@ int mca_coll_tuned_barrier_intra_do_forced(struct ompi_communicator_t *comm)
|
||||
|
||||
switch (mca_coll_tuned_barrier_forced_choice) {
|
||||
case (0): return mca_coll_tuned_barrier_intra_dec_fixed (comm);
|
||||
/* case (1): return mca_coll_tuned_barrier_intra_basic_linear (comm); */
|
||||
case (1): return mca_coll_tuned_barrier_intra_basic_linear (comm);
|
||||
case (2): return mca_coll_tuned_barrier_intra_doublering (comm);
|
||||
case (3): return mca_coll_tuned_barrier_intra_recursivedoubling (comm);
|
||||
case (4): return mca_coll_tuned_barrier_intra_bruck (comm);
|
||||
@ -272,3 +341,24 @@ switch (mca_coll_tuned_barrier_forced_choice) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
int mca_coll_tuned_barrier_intra_do_this (struct ompi_communicator_t *comm, int choice, int faninout, int segsize)
|
||||
{
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:barrier_intra_do_this selected algorithm %d topo fanin/out%d", choice, faninout));
|
||||
|
||||
switch (choice) {
|
||||
case (0): return mca_coll_tuned_barrier_intra_dec_fixed (comm);
|
||||
case (1): return mca_coll_tuned_barrier_intra_basic_linear (comm);
|
||||
case (2): return mca_coll_tuned_barrier_intra_doublering (comm);
|
||||
case (3): return mca_coll_tuned_barrier_intra_recursivedoubling (comm);
|
||||
case (4): return mca_coll_tuned_barrier_intra_bruck (comm);
|
||||
case (5): return mca_coll_tuned_barrier_intra_two_procs (comm);
|
||||
/* case (6): return mca_coll_tuned_barrier_intra_bmtree_step (comm); */
|
||||
default:
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:barrier_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
|
||||
choice, mca_coll_tuned_barrier_intra_query()));
|
||||
return (MPI_ERR_ARG);
|
||||
} /* switch */
|
||||
|
||||
}
|
||||
|
||||
|
@ -841,3 +841,31 @@ switch (mca_coll_tuned_bcast_forced_choice) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
int mca_coll_tuned_bcast_intra_do_this(void *buf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
int choice, int faninout, int segsize)
|
||||
|
||||
{
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:bcast_intra_do_this algorithm %d topo faninout %d segsize %d",
|
||||
choice, faninout, segsize));
|
||||
|
||||
switch (choice) {
|
||||
case (0): return mca_coll_tuned_bcast_intra_dec_fixed (buf, count, dtype, root, comm);
|
||||
case (1): return mca_coll_tuned_bcast_intra_basic_linear (buf, count, dtype, root, comm);
|
||||
case (2): return mca_coll_tuned_bcast_intra_chain (buf, count, dtype, root, comm, segsize, faninout );
|
||||
case (3): return mca_coll_tuned_bcast_intra_pipeline (buf, count, dtype, root, comm, segsize);
|
||||
case (4): return mca_coll_tuned_bcast_intra_split_bintree (buf, count, dtype, root, comm, segsize);
|
||||
case (5): return mca_coll_tuned_bcast_intra_bintree (buf, count, dtype, root, comm, segsize);
|
||||
/* case (6): return mca_coll_tuned_bcast_intra_bmtree (buf, count, dtype, root, comm,
|
||||
* segsize); */
|
||||
default:
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:bcast_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
|
||||
choice, mca_coll_tuned_bcast_intra_query()));
|
||||
return (MPI_ERR_ARG);
|
||||
} /* switch */
|
||||
|
||||
}
|
||||
|
||||
|
@ -150,6 +150,7 @@ static int tuned_open(void)
|
||||
false, false, mca_coll_tuned_use_dynamic_rules,
|
||||
&mca_coll_tuned_use_dynamic_rules);
|
||||
|
||||
|
||||
/* if dynamic rules allowed then look up dynamic rules config filename, else we leave it an empty filename (NULL) */
|
||||
if (mca_coll_tuned_use_dynamic_rules) {
|
||||
/* char *default_name; */
|
||||
|
@ -22,6 +22,8 @@
|
||||
#include "include/constants.h"
|
||||
#include "datatype/datatype.h"
|
||||
#include "communicator/communicator.h"
|
||||
#include "mca/base/mca_base_param.h"
|
||||
#include "mca/coll/base/base.h"
|
||||
#include "mca/coll/coll.h"
|
||||
#include "mca/coll/base/coll_tags.h"
|
||||
#include "coll_tuned.h"
|
||||
@ -30,6 +32,19 @@
|
||||
|
||||
#include "coll_tuned.h"
|
||||
|
||||
|
||||
/*
|
||||
* Notes on evaluation rules and ordering
|
||||
*
|
||||
* The order is:
|
||||
* use file based rules if presented (-coll_tuned_dynamic_rules_filename = rules)
|
||||
* Else
|
||||
* use forced rules (-coll_tuned_dynamic_ALG_intra_algorithm = algorithm-number)
|
||||
* Else
|
||||
* use fixed (compiled) rule set (or nested ifs)
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* allreduce_intra
|
||||
*
|
||||
@ -43,17 +58,28 @@ mca_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int size;
|
||||
int rank;
|
||||
int err;
|
||||
int contig;
|
||||
int dsize;
|
||||
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream, "mca_coll_tuned_allreduce_intra_dec_dynamic"));
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
/* check to see if we have some filebased rules */
|
||||
if (comm->c_coll_selected_data->com_rules[ALLREDUCE]) {
|
||||
|
||||
/* we do, so calc the message size or what ever we need and use this for the evaluation */
|
||||
int alg, faninout, segsize;
|
||||
/* int contig; */
|
||||
int dsize;
|
||||
|
||||
ompi_ddt_type_size (dtype, &dsize);
|
||||
dsize *= count;
|
||||
|
||||
alg = coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[ALLREDUCE],
|
||||
dsize, &faninout, &segsize);
|
||||
|
||||
if (alg) { /* we have found a valid choice from the file based rules for this message size */
|
||||
return mca_coll_tuned_allreduce_intra_do_this (sbuf, rbuf, count, dtype, op, comm,
|
||||
alg, faninout, segsize);
|
||||
} /* found a method */
|
||||
} /*end if any com rules to check */
|
||||
|
||||
if (mca_coll_tuned_allreduce_forced_choice) {
|
||||
return mca_coll_tuned_allreduce_intra_do_forced (sbuf, rbuf, count, dtype, op, comm);
|
||||
@ -77,17 +103,30 @@ int mca_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int size;
|
||||
int rank;
|
||||
int err;
|
||||
int contig;
|
||||
int dsize;
|
||||
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream, "mca_coll_tuned_alltoall_intra_dec_dynamic"));
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
/* check to see if we have some filebased rules */
|
||||
if (comm->c_coll_selected_data->com_rules[ALLTOALL]) {
|
||||
|
||||
/* we do, so calc the message size or what ever we need and use this for the evaluation */
|
||||
int comsize;
|
||||
int alg, faninout, segsize;
|
||||
int dsize;
|
||||
|
||||
ompi_ddt_type_size (sdtype, &dsize);
|
||||
comsize = ompi_comm_size(comm);
|
||||
dsize *= comsize * scount;
|
||||
|
||||
alg = coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[ALLTOALL],
|
||||
dsize, &faninout, &segsize);
|
||||
|
||||
if (alg) { /* we have found a valid choice from the file based rules for this message size */
|
||||
return mca_coll_tuned_alltoall_intra_do_this (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm,
|
||||
alg, faninout, segsize);
|
||||
} /* found a method */
|
||||
} /*end if any com rules to check */
|
||||
|
||||
|
||||
if (mca_coll_tuned_alltoall_forced_choice) {
|
||||
return mca_coll_tuned_alltoall_intra_do_forced (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
@ -106,17 +145,23 @@ int mca_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
|
||||
*/
|
||||
int mca_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int size;
|
||||
int rank;
|
||||
int err;
|
||||
int contig;
|
||||
int dsize;
|
||||
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_barrier_intra_dec_dynamic"));
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
/* check to see if we have some filebased rules */
|
||||
if (comm->c_coll_selected_data->com_rules[BARRIER]) {
|
||||
|
||||
/* we do, so calc the message size or what ever we need and use this for the evaluation */
|
||||
int alg, faninout, segsize;
|
||||
|
||||
alg = coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[BARRIER],
|
||||
0, &faninout, &segsize);
|
||||
|
||||
if (alg) { /* we have found a valid choice from the file based rules for this message size */
|
||||
return mca_coll_tuned_barrier_intra_do_this (comm,
|
||||
alg, faninout, segsize);
|
||||
} /* found a method */
|
||||
} /*end if any com rules to check */
|
||||
|
||||
if (mca_coll_tuned_barrier_forced_choice) {
|
||||
return mca_coll_tuned_barrier_intra_do_forced (comm);
|
||||
@ -138,17 +183,28 @@ int mca_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int size;
|
||||
int rank;
|
||||
int err;
|
||||
int contig;
|
||||
int dsize;
|
||||
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:bcast_intra_dec_dynamic"));
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
/* check to see if we have some filebased rules */
|
||||
if (comm->c_coll_selected_data->com_rules[BCAST]) {
|
||||
|
||||
/* we do, so calc the message size or what ever we need and use this for the evaluation */
|
||||
int alg, faninout, segsize;
|
||||
int dsize;
|
||||
|
||||
ompi_ddt_type_size (datatype, &dsize);
|
||||
dsize *= count;
|
||||
|
||||
alg = coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[BCAST],
|
||||
dsize, &faninout, &segsize);
|
||||
|
||||
if (alg) { /* we have found a valid choice from the file based rules for this message size */
|
||||
return mca_coll_tuned_bcast_intra_do_this (buff, count, datatype, root, comm,
|
||||
alg, faninout, segsize);
|
||||
} /* found a method */
|
||||
} /*end if any com rules to check */
|
||||
|
||||
|
||||
if (mca_coll_tuned_bcast_forced_choice) {
|
||||
return mca_coll_tuned_bcast_intra_do_forced (buff, count, datatype, root, comm);
|
||||
@ -172,17 +228,28 @@ int mca_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf,
|
||||
struct ompi_op_t* op, int root,
|
||||
struct ompi_communicator_t* comm)
|
||||
{
|
||||
int i;
|
||||
int size;
|
||||
int rank;
|
||||
int err;
|
||||
int contig;
|
||||
int dsize;
|
||||
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:reduce_intra_dec_dynamic"));
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
/* check to see if we have some filebased rules */
|
||||
if (comm->c_coll_selected_data->com_rules[REDUCE]) {
|
||||
|
||||
/* we do, so calc the message size or what ever we need and use this for the evaluation */
|
||||
int alg, faninout, segsize;
|
||||
int dsize;
|
||||
int contig;
|
||||
|
||||
ompi_ddt_type_size (datatype, &dsize);
|
||||
dsize *= count;
|
||||
|
||||
alg = coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[REDUCE],
|
||||
dsize, &faninout, &segsize);
|
||||
|
||||
if (alg) { /* we have found a valid choice from the file based rules for this message size */
|
||||
return mca_coll_tuned_reduce_intra_do_this (sendbuf, recvbuf, count, datatype, op, root, comm,
|
||||
alg, faninout, segsize);
|
||||
} /* found a method */
|
||||
} /*end if any com rules to check */
|
||||
|
||||
if (mca_coll_tuned_reduce_forced_choice) {
|
||||
return mca_coll_tuned_reduce_intra_do_forced (sendbuf, recvbuf, count, datatype, op, root, comm);
|
||||
|
@ -43,17 +43,13 @@ mca_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int size;
|
||||
int rank;
|
||||
int err;
|
||||
int contig;
|
||||
int dsize;
|
||||
/* int size; */
|
||||
/* int contig; */
|
||||
/* int dsize; */
|
||||
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream, "mca_coll_tuned_allreduce_intra_dec_fixed"));
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
/* size = ompi_comm_size(comm); */
|
||||
|
||||
return (mca_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm));
|
||||
|
||||
@ -74,11 +70,9 @@ int mca_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int size;
|
||||
int rank;
|
||||
int err;
|
||||
int contig;
|
||||
int dsize;
|
||||
MPI_Aint sext;
|
||||
long lb;
|
||||
@ -123,17 +117,11 @@ int mca_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount,
|
||||
*/
|
||||
int mca_coll_tuned_barrier_intra_dec_fixed(struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int size;
|
||||
int rank;
|
||||
int err;
|
||||
int contig;
|
||||
int dsize;
|
||||
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream, "mca_coll_tuned_barrier_intra_dec_fixed"));
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
if (2==size)
|
||||
return mca_coll_tuned_barrier_intra_two_procs(comm);
|
||||
@ -157,11 +145,9 @@ int mca_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int size;
|
||||
int rank;
|
||||
int err;
|
||||
int contig;
|
||||
int msgsize;
|
||||
MPI_Aint ext;
|
||||
long lb;
|
||||
@ -227,11 +213,10 @@ int mca_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
|
||||
struct ompi_op_t* op, int root,
|
||||
struct ompi_communicator_t* comm)
|
||||
{
|
||||
int i;
|
||||
int size;
|
||||
int rank;
|
||||
int err;
|
||||
int contig;
|
||||
/* int contig; */
|
||||
int msgsize;
|
||||
MPI_Aint ext;
|
||||
long lb;
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "mca/coll/coll.h"
|
||||
#include "request/request.h"
|
||||
#include "mca/pml/pml.h"
|
||||
#include "coll_tuned.h"
|
||||
|
||||
/* need to include our own topo prototypes so we can malloc data on the comm correctly */
|
||||
#include "coll_tuned_topo.h"
|
||||
@ -64,9 +65,8 @@ int coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules
|
||||
int NCS;
|
||||
int CS;
|
||||
int NMS;
|
||||
int MS, ALG, SS;
|
||||
int x, ci, ncs, cs, nms;
|
||||
int i;
|
||||
int MS, ALG, FANINOUT, SS;
|
||||
int x, ncs, nms;
|
||||
|
||||
ompi_coll_alg_rule_t *alg_rules = (ompi_coll_alg_rule_t*) NULL; /* complete table of rules */
|
||||
|
||||
@ -81,23 +81,23 @@ int coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules
|
||||
int total_msg_count = 0;
|
||||
|
||||
if (!fname) {
|
||||
fprintf(stderr,"Gave NULL as rule table configuration file for tuned collectives... ignoring!\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Gave NULL as rule table configuration file for tuned collectives... ignoring!\n"));
|
||||
return (-1);
|
||||
}
|
||||
|
||||
if (!rules) {
|
||||
fprintf(stderr,"Gave NULL as rule table result ptr!... ignoring!\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Gave NULL as rule table result ptr!... ignoring!\n"));
|
||||
return (-2);
|
||||
}
|
||||
|
||||
if (n_collectives<1) {
|
||||
fprintf(stderr,"Gave %d as max number of collectives in the rule table configuration file for tuned collectives!... ignoring!\n", n_collectives);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Gave %d as max number of collectives in the rule table configuration file for tuned collectives!... ignoring!\n", n_collectives));
|
||||
return (-3);
|
||||
}
|
||||
|
||||
fptr = fopen (fname, "r");
|
||||
if (!fptr) {
|
||||
fprintf(stderr,"cannot read rules file [%s]\n", fname);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"cannot read rules file [%s]\n", fname));
|
||||
goto on_file_error;
|
||||
}
|
||||
|
||||
@ -106,11 +106,11 @@ int coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules
|
||||
|
||||
X = getnext(fptr);
|
||||
if (X<0) {
|
||||
fprintf(stderr,"Could not read number of collectives in configuration file around line %d\n", fileline);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Could not read number of collectives in configuration file around line %d\n", fileline));
|
||||
goto on_file_error;
|
||||
}
|
||||
if (X>n_collectives) {
|
||||
fprintf(stderr,"Number of collectives in configuration file %d is greater than number of MPI collectives possible %d ??? error around line %d\n", X, n_collectives, fileline);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Number of collectives in configuration file %d is greater than number of MPI collectives possible %d ??? error around line %d\n", X, n_collectives, fileline));
|
||||
goto on_file_error;
|
||||
}
|
||||
|
||||
@ -118,16 +118,16 @@ int coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules
|
||||
|
||||
CI = getnext (fptr);
|
||||
if (CI<0) {
|
||||
fprintf(stderr,"Could not read next Collective id in configuration file around line %d\n", fileline);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Could not read next Collective id in configuration file around line %d\n", fileline));
|
||||
goto on_file_error;
|
||||
}
|
||||
if (CI>=n_collectives) {
|
||||
fprintf(stderr,"Collective id in configuration file %d is greater than MPI collectives possible %d. Error around line %d\n", CI, n_collectives, fileline);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Collective id in configuration file %d is greater than MPI collectives possible %d. Error around line %d\n", CI, n_collectives, fileline));
|
||||
goto on_file_error;
|
||||
}
|
||||
|
||||
if (alg_rules[CI].alg_rule_id != CI) {
|
||||
printf("Internal error in handling collective ID %d\n", CI);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream, "Internal error in handling collective ID %d\n", CI));
|
||||
coll_tuned_free_all_rules (*rules, n_collectives);
|
||||
return (-4);
|
||||
}
|
||||
@ -140,7 +140,7 @@ int coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules
|
||||
|
||||
NCS = getnext (fptr);
|
||||
if (NCS<0) {
|
||||
fprintf(stderr,"Couldnot read count of communicators for collective ID %d at around line %d\n", CI, fileline);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Could not read count of communicators for collective ID %d at around line %d\n", CI, fileline));
|
||||
goto on_file_error;
|
||||
}
|
||||
|
||||
@ -153,7 +153,7 @@ int coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules
|
||||
|
||||
CS = getnext (fptr);
|
||||
if (CS<0) {
|
||||
fprintf(stderr,"Couldnot read communicator size for collective ID %d com rule %d at around line %d\n", CI, ncs, fileline);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Could not read communicator size for collective ID %d com rule %d at around line %d\n", CI, ncs, fileline));
|
||||
goto on_file_error;
|
||||
}
|
||||
|
||||
@ -161,7 +161,7 @@ int coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules
|
||||
|
||||
NMS = getnext (fptr);
|
||||
if (NMS<0) {
|
||||
fprintf(stderr,"Couldnot read number of message sizes for collective ID %d com rule %d at around line %d\n", CI, ncs, fileline);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Could not read number of message sizes for collective ID %d com rule %d at around line %d\n", CI, ncs, fileline));
|
||||
goto on_file_error;
|
||||
}
|
||||
|
||||
@ -176,28 +176,35 @@ int coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules
|
||||
|
||||
MS = getnext (fptr);
|
||||
if (MS<0) {
|
||||
fprintf(stderr,"Couldnot read message size for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Could not read message size for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
|
||||
goto on_file_error;
|
||||
}
|
||||
msg_p->msg_size = MS;
|
||||
|
||||
ALG = getnext (fptr);
|
||||
if (ALG<0) {
|
||||
fprintf(stderr,"Couldnot read target algorithm method for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Could not read target algorithm method for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
|
||||
goto on_file_error;
|
||||
}
|
||||
msg_p->result_alg = ALG;
|
||||
|
||||
FANINOUT = getnext (fptr);
|
||||
if (FANINOUT<0) {
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Could not read fan in/out topo for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
|
||||
goto on_file_error;
|
||||
}
|
||||
msg_p->result_topo_faninout = FANINOUT;
|
||||
|
||||
SS = getnext (fptr);
|
||||
if (SS<0) {
|
||||
fprintf(stderr,"Couldnot read target segment size for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Could not read target segment size for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
|
||||
goto on_file_error;
|
||||
}
|
||||
msg_p->result_segsize = SS;
|
||||
|
||||
if (!nms && MS) {
|
||||
fprintf(stderr,"All algorithms must specify a rule for message size of zero upwards always first!\n");
|
||||
fprintf(stderr,"Message size was %d for collective ID %d com rule %d msg rule %d at around line %d\n", MS, CI, ncs, nms, fileline);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"All algorithms must specify a rule for message size of zero upwards always first!\n"));
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Message size was %d for collective ID %d com rule %d msg rule %d at around line %d\n", MS, CI, ncs, nms, fileline));
|
||||
goto on_file_error;
|
||||
}
|
||||
|
||||
@ -215,12 +222,11 @@ int coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules
|
||||
|
||||
fclose (fptr);
|
||||
|
||||
printf("\nStats\n");
|
||||
printf("Collectives with rules\t\t\t: %5d\n", total_alg_count);
|
||||
printf("Communicator sizes with rules\t\t: %5d\n", total_com_count);
|
||||
printf("Message sizes with rules\t\t: %5d\n", total_msg_count);
|
||||
printf("Lines in configuration file read\t\t: %5d\n", fileline);
|
||||
printf("\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"\nConfigure file Stats\n"));
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Collectives with rules\t\t\t: %5d\n", total_alg_count));
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Communicator sizes with rules\t\t: %5d\n", total_com_count));
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Message sizes with rules\t\t: %5d\n", total_msg_count));
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Lines in configuration file read\t\t: %5d\n", fileline));
|
||||
|
||||
/* return the rules to the caller */
|
||||
*rules = alg_rules;
|
||||
@ -234,10 +240,10 @@ on_file_error:
|
||||
/* we return back a verbose message and a count of -1 algorithms read */
|
||||
/* draconian but its better than having a bad collective decision table */
|
||||
|
||||
fprintf(stderr,"read_rules_config_file: bad configure file [%s]. Read afar as line %d\n", fname, fileline);
|
||||
fprintf(stderr,"Ignoring user supplied tuned collectives configuration decision file.\n");
|
||||
fprintf(stderr,"Switching back to [compiled in] fixed decision table.\n");
|
||||
fprintf(stderr,"Fix errors as listed above and try again.\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"read_rules_config_file: bad configure file [%s]. Read afar as line %d\n", fname, fileline));
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Ignoring user supplied tuned collectives configuration decision file.\n"));
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Switching back to [compiled in] fixed decision table.\n"));
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Fix errors as listed above and try again.\n"));
|
||||
|
||||
/* deallocate memory if allocated */
|
||||
if (alg_rules) coll_tuned_free_all_rules (alg_rules, n_collectives);
|
||||
|
@ -23,7 +23,10 @@
|
||||
#include "mca/mca.h"
|
||||
#include "mca/coll/coll.h"
|
||||
#include "request/request.h"
|
||||
#include "mca/pml/pml.h"
|
||||
#include "ompi/include/constants.h"
|
||||
#include "datatype/datatype.h"
|
||||
#include "communicator/communicator.h"
|
||||
#include "coll_tuned.h"
|
||||
|
||||
/* need to include our own topo prototypes so we can malloc data on the comm correctly */
|
||||
#include "coll_tuned_topo.h"
|
||||
@ -35,6 +38,12 @@
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
|
||||
|
||||
#include "coll_tuned_util.h"
|
||||
|
||||
|
||||
|
||||
ompi_coll_alg_rule_t* coll_tuned_mk_alg_rules (int n_alg)
|
||||
{
|
||||
int i;
|
||||
@ -83,9 +92,10 @@ ompi_coll_msg_rule_t* coll_tuned_mk_msg_rules (int n_msg_rules, int alg_rule_id,
|
||||
msg_rules[i].alg_rule_id = alg_rule_id;
|
||||
msg_rules[i].com_rule_id = com_rule_id;
|
||||
msg_rules[i].msg_rule_id = i;
|
||||
msg_rules[i].msg_size = 0; /* unknown */
|
||||
msg_rules[i].result_alg = 0; /* unknown */
|
||||
msg_rules[i].result_segsize = 0; /* unknown */
|
||||
msg_rules[i].msg_size = 0; /* unknown */
|
||||
msg_rules[i].result_alg = 0; /* unknown */
|
||||
msg_rules[i].result_topo_faninout = 0; /* unknown */
|
||||
msg_rules[i].result_segsize = 0; /* unknown */
|
||||
}
|
||||
return (msg_rules);
|
||||
}
|
||||
@ -100,14 +110,15 @@ ompi_coll_msg_rule_t* coll_tuned_mk_msg_rules (int n_msg_rules, int alg_rule_id,
|
||||
int coll_tuned_dump_msg_rule (ompi_coll_msg_rule_t* msg_p)
|
||||
{
|
||||
if (!msg_p) {
|
||||
fprintf(stderr,"Message rule was a NULL ptr?!\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Message rule was a NULL ptr?!\n"));
|
||||
return (-1);
|
||||
}
|
||||
|
||||
printf("alg_id %3d\tcom_id %3d\tcom_size %3d\tmsg_id %3d\t", msg_p->alg_rule_id, msg_p->com_rule_id,
|
||||
msg_p->mpi_comsize, msg_p->msg_rule_id);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"alg_id %3d\tcom_id %3d\tcom_size %3d\tmsg_id %3d\t", msg_p->alg_rule_id,
|
||||
msg_p->com_rule_id, msg_p->mpi_comsize, msg_p->msg_rule_id));
|
||||
|
||||
printf("msg_size %6d -> algorithm %2d\tsegsize %5d\n", msg_p->msg_size, msg_p->result_alg, msg_p->result_segsize);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"msg_size %6d -> algorithm %2d\ttopo in/out %2d\tsegsize %5ld\n",
|
||||
msg_p->msg_size, msg_p->result_alg, msg_p->result_topo_faninout, msg_p->result_segsize));
|
||||
|
||||
return (0);
|
||||
}
|
||||
@ -116,21 +127,20 @@ int coll_tuned_dump_msg_rule (ompi_coll_msg_rule_t* msg_p)
|
||||
int coll_tuned_dump_com_rule (ompi_coll_com_rule_t* com_p)
|
||||
{
|
||||
int i;
|
||||
ompi_coll_msg_rule_t* msg_p;
|
||||
|
||||
if (!com_p) {
|
||||
fprintf(stderr,"Com rule was a NULL ptr?!\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Com rule was a NULL ptr?!\n"));
|
||||
return (-1);
|
||||
}
|
||||
|
||||
printf("alg_id %3d\tcom_id %3d\tcom_size %3d\t", com_p->alg_rule_id, com_p->com_rule_id, com_p->mpi_comsize);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream, "alg_id %3d\tcom_id %3d\tcom_size %3d\t", com_p->alg_rule_id, com_p->com_rule_id, com_p->mpi_comsize));
|
||||
|
||||
if (!com_p->n_msg_sizes) {
|
||||
printf("no msgsizes defined\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"no msgsizes defined\n"));
|
||||
return (0);
|
||||
}
|
||||
|
||||
printf("number of message sizes %3d\n", com_p->n_msg_sizes);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"number of message sizes %3d\n", com_p->n_msg_sizes));
|
||||
|
||||
for (i=0;i<com_p->n_msg_sizes;i++) {
|
||||
coll_tuned_dump_msg_rule (&(com_p->msg_rules[i]));
|
||||
@ -143,21 +153,20 @@ int coll_tuned_dump_com_rule (ompi_coll_com_rule_t* com_p)
|
||||
int coll_tuned_dump_alg_rule (ompi_coll_alg_rule_t* alg_p)
|
||||
{
|
||||
int i;
|
||||
ompi_coll_com_rule_t* com_p;
|
||||
|
||||
if (!alg_p) {
|
||||
fprintf(stderr,"Algorithm rule was a NULL ptr?!\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Algorithm rule was a NULL ptr?!\n"));
|
||||
return (-1);
|
||||
}
|
||||
|
||||
printf("alg_id %3d\t", alg_p->alg_rule_id);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"alg_id %3d\t", alg_p->alg_rule_id));
|
||||
|
||||
if (!alg_p->n_com_sizes) {
|
||||
printf("no coms defined\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"no coms defined\n"));
|
||||
return (0);
|
||||
}
|
||||
|
||||
printf("number of com sizes %3d\n", alg_p->n_com_sizes);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"number of com sizes %3d\n", alg_p->n_com_sizes));
|
||||
|
||||
for (i=0;i<alg_p->n_com_sizes;i++) {
|
||||
coll_tuned_dump_com_rule (&(alg_p->com_rules[i]));
|
||||
@ -172,16 +181,17 @@ int coll_tuned_dump_all_rules (ompi_coll_alg_rule_t* alg_p, int n_rules)
|
||||
int i;
|
||||
|
||||
if (!alg_p) {
|
||||
fprintf(stderr,"Algorithm rule was a NULL ptr?!\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Algorithm rule was a NULL ptr?!\n"));
|
||||
return (-1);
|
||||
}
|
||||
|
||||
printf("Number of algorithm rules %3d\n", n_rules);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Number of algorithm rules %3d\n", n_rules));
|
||||
|
||||
for (i=0;i<n_rules;i++) {
|
||||
coll_tuned_dump_alg_rule (&(alg_p[i]));
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
||||
@ -197,7 +207,7 @@ int coll_tuned_free_msg_rules_in_com_rule (ompi_coll_com_rule_t* com_p)
|
||||
ompi_coll_msg_rule_t* msg_p;
|
||||
|
||||
if (!com_p) {
|
||||
fprintf(stderr,"attempt to free NULL com_rule ptr\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"attempt to free NULL com_rule ptr\n"));
|
||||
return (-1);
|
||||
}
|
||||
|
||||
@ -205,7 +215,7 @@ int coll_tuned_free_msg_rules_in_com_rule (ompi_coll_com_rule_t* com_p)
|
||||
msg_p = com_p->msg_rules;
|
||||
|
||||
if (!msg_p) {
|
||||
fprintf(stderr,"attempt to free NULL msg_rules when msg count was %d\n", com_p->n_msg_sizes);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"attempt to free NULL msg_rules when msg count was %d\n", com_p->n_msg_sizes));
|
||||
rc = -1; /* some error */
|
||||
}
|
||||
else {
|
||||
@ -230,7 +240,7 @@ int coll_tuned_free_coms_in_alg_rule (ompi_coll_alg_rule_t* alg_p)
|
||||
ompi_coll_com_rule_t* com_p;
|
||||
|
||||
if (!alg_p) {
|
||||
fprintf(stderr,"attempt to free NULL alg_rule ptr\n");
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"attempt to free NULL alg_rule ptr\n"));
|
||||
return (-1);
|
||||
}
|
||||
|
||||
@ -238,7 +248,7 @@ int coll_tuned_free_coms_in_alg_rule (ompi_coll_alg_rule_t* alg_p)
|
||||
com_p = alg_p->com_rules;
|
||||
|
||||
if (!com_p) {
|
||||
fprintf(stderr,"attempt to free NULL com_rules when com count was %d\n", alg_p->n_com_sizes);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"attempt to free NULL com_rules when com count was %d\n", alg_p->n_com_sizes));
|
||||
}
|
||||
else {
|
||||
/* ok, memory exists for the com rules so free their message rules first */
|
||||
@ -275,7 +285,7 @@ int coll_tuned_free_all_rules (ompi_coll_alg_rule_t* alg_p, int n_algs)
|
||||
|
||||
/*
|
||||
* query functions
|
||||
* i.e. the functions that get me the algorithm and segment size fast
|
||||
* i.e. the functions that get me the algorithm, topo fanin/out and segment size fast
|
||||
* and also get the rules that are needed by each communicator as needed
|
||||
*
|
||||
*/
|
||||
@ -313,15 +323,15 @@ ompi_coll_com_rule_t* coll_tuned_get_com_rule_ptr (ompi_coll_alg_rule_t* rules,
|
||||
i = best = 0;
|
||||
|
||||
while (i<alg_p->n_com_sizes) {
|
||||
printf("checking comsize %d against alg_id %d com_id %d index %d com_size %d",
|
||||
mpi_comsize, com_p->alg_rule_id, com_p->com_rule_id, i, com_p->mpi_comsize);
|
||||
/* OPAL_OUTPUT((mca_coll_tuned_stream,"checking comsize %d against alg_id %d com_id %d index %d com_size %d", */
|
||||
/* mpi_comsize, com_p->alg_rule_id, com_p->com_rule_id, i, com_p->mpi_comsize)); */
|
||||
if (com_p->mpi_comsize <= mpi_comsize) {
|
||||
best = i;
|
||||
best_com_p = com_p;
|
||||
printf(":ok\n");
|
||||
/* OPAL_OUTPUT((mca_coll_tuned_stream(":ok\n")); */
|
||||
}
|
||||
else {
|
||||
printf(":nop\n");
|
||||
/* OPAL_OUTPUT((mca_coll_tuned_stream(":nop\n")); */
|
||||
break;
|
||||
}
|
||||
/* go to the next entry */
|
||||
@ -329,7 +339,7 @@ ompi_coll_com_rule_t* coll_tuned_get_com_rule_ptr (ompi_coll_alg_rule_t* rules,
|
||||
i++;
|
||||
}
|
||||
|
||||
printf("Selected the following com rule id %d\n", best_com_p->com_rule_id);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Selected the following com rule id %d\n", best_com_p->com_rule_id));
|
||||
coll_tuned_dump_com_rule (best_com_p);
|
||||
|
||||
return (best_com_p);
|
||||
@ -338,19 +348,20 @@ ompi_coll_com_rule_t* coll_tuned_get_com_rule_ptr (ompi_coll_alg_rule_t* rules,
|
||||
/*
|
||||
* This function takes a com_rule ptr (from the communicators coll tuned data structure)
|
||||
* (Which is chosen for a particular MPI collective)
|
||||
* and a (total_)msg_size and it returns (0) and a algorithm to use and a recommended segment size
|
||||
* and a (total_)msg_size and it returns (0) and a algorithm to use and a recommended topo faninout and segment size
|
||||
* all based on the user supplied rules
|
||||
*
|
||||
* Just like the above functions it uses a less than or equal msg size
|
||||
* (hense config file must have a default defined for '0' if we reach this point)
|
||||
* else if no rules match we return '0' + '0' or used fixed decision table with no segmentation
|
||||
* else if no rules match we return '0' + '0,0' or used fixed decision table with no topo chand and no segmentation
|
||||
* of users data.. shame.
|
||||
*
|
||||
* On error return 0 so we default to fixed rules anyway :)
|
||||
*
|
||||
*/
|
||||
|
||||
int coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rule, int mpi_msgsize, int* result_segsize)
|
||||
int coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rule, int mpi_msgsize, int *result_topo_faninout,
|
||||
int* result_segsize)
|
||||
{
|
||||
ompi_coll_msg_rule_t* msg_p = (ompi_coll_msg_rule_t*) NULL;
|
||||
ompi_coll_msg_rule_t* best_msg_p = (ompi_coll_msg_rule_t*) NULL;
|
||||
@ -360,6 +371,10 @@ int coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rule, in
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (!result_topo_faninout) {
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (!result_segsize) {
|
||||
return (0);
|
||||
}
|
||||
@ -375,15 +390,15 @@ int coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rule, in
|
||||
i = best = 0;
|
||||
|
||||
while (i<base_com_rule->n_msg_sizes) {
|
||||
printf("checking mpi_msgsize %d against com_id %d msg_id %d index %d msg_size %d",
|
||||
mpi_msgsize, msg_p->com_rule_id, msg_p->msg_rule_id, i, msg_p->msg_size);
|
||||
/* OPAL_OUTPUT((mca_coll_tuned_stream,"checking mpi_msgsize %d against com_id %d msg_id %d index %d msg_size %d", */
|
||||
/* mpi_msgsize, msg_p->com_rule_id, msg_p->msg_rule_id, i, msg_p->msg_size)); */
|
||||
if (msg_p->msg_size <= mpi_msgsize) {
|
||||
best = i;
|
||||
best_msg_p = msg_p;
|
||||
printf(":ok\n");
|
||||
/* OPAL_OUTPUT((mca_coll_tuned_stream(":ok\n")); */
|
||||
}
|
||||
else {
|
||||
printf(":nop\n");
|
||||
/* OPAL_OUTPUT((mca_coll_tuned_stream(":nop\n")); */
|
||||
break;
|
||||
}
|
||||
/* go to the next entry */
|
||||
@ -391,11 +406,16 @@ int coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rule, in
|
||||
i++;
|
||||
}
|
||||
|
||||
printf("Selected the following msg rule id %d\n", best_msg_p->msg_rule_id);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"Selected the following msg rule id %d\n", best_msg_p->msg_rule_id));
|
||||
coll_tuned_dump_msg_rule (best_msg_p);
|
||||
|
||||
/* return the segment size */
|
||||
*result_topo_faninout = best_msg_p->result_topo_faninout;
|
||||
|
||||
/* return the segment size */
|
||||
*result_segsize = best_msg_p->result_segsize;
|
||||
return (best_msg_p->result_alg);
|
||||
|
||||
/* return the algorithm/method to use */
|
||||
return (best_msg_p->result_alg);
|
||||
}
|
||||
|
||||
|
@ -46,8 +46,9 @@ typedef struct msg_rule_s {
|
||||
int msg_size; /* message size */
|
||||
|
||||
/* RESULT */
|
||||
int result_alg; /* result algorithm to use */
|
||||
long result_segsize; /* result segment size to use */
|
||||
int result_alg; /* result algorithm to use */
|
||||
int result_topo_faninout; /* result topology fan in/out to use (if applicable) */
|
||||
long result_segsize; /* result segment size to use */
|
||||
|
||||
} ompi_coll_msg_rule_t;
|
||||
|
||||
@ -97,8 +98,11 @@ int coll_tuned_free_all_rules (ompi_coll_alg_rule_t* alg_p, int n_algs);
|
||||
|
||||
|
||||
/* the IMPORTANT routines, i.e. the ones that do stuff for everyday communicators and collective calls */
|
||||
|
||||
ompi_coll_com_rule_t* coll_tuned_get_com_rule_ptr (ompi_coll_alg_rule_t* rules, int alg_id, int mpi_comsize);
|
||||
int coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rule, int mpi_msgsize, int* result_segsize);
|
||||
|
||||
int coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rule, int mpi_msgsize,
|
||||
int* result_topo_faninout, int* result_segsize);
|
||||
|
||||
|
||||
|
||||
|
@ -537,3 +537,28 @@ switch (mca_coll_tuned_reduce_forced_choice) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
int mca_coll_tuned_reduce_intra_do_this(void *sbuf, void* rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op, int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
int choice, int faninout, int segsize)
|
||||
{
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:reduce_intra_do_this selected algorithm %d topo faninout %d segsize %d",
|
||||
choice, faninout, segsize));
|
||||
|
||||
switch (choice) {
|
||||
case (0): return mca_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, root, comm);
|
||||
case (1): return mca_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, root, comm);
|
||||
case (2): return mca_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype, op, root, comm,
|
||||
segsize, faninout);
|
||||
case (3): return mca_coll_tuned_reduce_intra_pipeline (sbuf, rbuf, count, dtype, op, root, comm,
|
||||
segsize);
|
||||
default:
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:reduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
|
||||
choice, mca_coll_tuned_reduce_intra_query()));
|
||||
return (MPI_ERR_ARG);
|
||||
} /* switch */
|
||||
|
||||
}
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user