1
1

Code cleaning of the tuned module.

Этот коммит содержится в:
George Bosilca 2016-10-25 11:30:11 -04:00
родитель 582f290519
Коммит 253eb80e26
13 изменённых файлов: 76 добавлений и 479 удалений

Просмотреть файл

@ -93,77 +93,66 @@ ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority);
/* All Gather */
int ompi_coll_tuned_allgather_intra_dec_fixed(ALLGATHER_ARGS);
int ompi_coll_tuned_allgather_intra_dec_dynamic(ALLGATHER_ARGS);
int ompi_coll_tuned_allgather_intra_do_forced(ALLGATHER_ARGS);
int ompi_coll_tuned_allgather_intra_do_this(ALLGATHER_ARGS, int algorithm, int faninout, int segsize);
int ompi_coll_tuned_allgather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
/* All GatherV */
int ompi_coll_tuned_allgatherv_intra_dec_fixed(ALLGATHERV_ARGS);
int ompi_coll_tuned_allgatherv_intra_dec_dynamic(ALLGATHERV_ARGS);
int ompi_coll_tuned_allgatherv_intra_do_forced(ALLGATHERV_ARGS);
int ompi_coll_tuned_allgatherv_intra_do_this(ALLGATHERV_ARGS, int algorithm, int faninout, int segsize);
int ompi_coll_tuned_allgatherv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
/* All Reduce */
int ompi_coll_tuned_allreduce_intra_dec_fixed(ALLREDUCE_ARGS);
int ompi_coll_tuned_allreduce_intra_dec_dynamic(ALLREDUCE_ARGS);
int ompi_coll_tuned_allreduce_intra_do_forced(ALLREDUCE_ARGS);
int ompi_coll_tuned_allreduce_intra_do_this(ALLREDUCE_ARGS, int algorithm, int faninout, int segsize);
int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
/* AlltoAll */
int ompi_coll_tuned_alltoall_intra_dec_fixed(ALLTOALL_ARGS);
int ompi_coll_tuned_alltoall_intra_dec_dynamic(ALLTOALL_ARGS);
int ompi_coll_tuned_alltoall_intra_do_forced(ALLTOALL_ARGS);
int ompi_coll_tuned_alltoall_intra_do_this(ALLTOALL_ARGS, int algorithm, int faninout, int segsize, int max_requests);
int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
/* AlltoAllV */
int ompi_coll_tuned_alltoallv_intra_dec_fixed(ALLTOALLV_ARGS);
int ompi_coll_tuned_alltoallv_intra_dec_dynamic(ALLTOALLV_ARGS);
int ompi_coll_tuned_alltoallv_intra_do_forced(ALLTOALLV_ARGS);
int ompi_coll_tuned_alltoallv_intra_do_this(ALLTOALLV_ARGS, int algorithm);
int ompi_coll_tuned_alltoallv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
/* Barrier */
int ompi_coll_tuned_barrier_intra_dec_fixed(BARRIER_ARGS);
int ompi_coll_tuned_barrier_intra_dec_dynamic(BARRIER_ARGS);
int ompi_coll_tuned_barrier_intra_do_forced(BARRIER_ARGS);
int ompi_coll_tuned_barrier_intra_do_this(BARRIER_ARGS, int algorithm, int faninout, int segsize);
int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
/* Bcast */
int ompi_coll_tuned_bcast_intra_dec_fixed(BCAST_ARGS);
int ompi_coll_tuned_bcast_intra_dec_dynamic(BCAST_ARGS);
int ompi_coll_tuned_bcast_intra_do_forced(BCAST_ARGS);
int ompi_coll_tuned_bcast_intra_do_this(BCAST_ARGS, int algorithm, int faninout, int segsize);
int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
/* Gather */
int ompi_coll_tuned_gather_intra_dec_fixed(GATHER_ARGS);
int ompi_coll_tuned_gather_intra_dec_dynamic(GATHER_ARGS);
int ompi_coll_tuned_gather_intra_do_forced(GATHER_ARGS);
int ompi_coll_tuned_gather_intra_do_this(GATHER_ARGS, int algorithm, int faninout, int segsize);
int ompi_coll_tuned_gather_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
/* Reduce */
int ompi_coll_tuned_reduce_intra_dec_fixed(REDUCE_ARGS);
int ompi_coll_tuned_reduce_intra_dec_dynamic(REDUCE_ARGS);
int ompi_coll_tuned_reduce_intra_do_forced(REDUCE_ARGS);
int ompi_coll_tuned_reduce_intra_do_this(REDUCE_ARGS, int algorithm, int faninout, int segsize, int max_oustanding_reqs);
int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
/* Reduce_scatter */
int ompi_coll_tuned_reduce_scatter_intra_dec_fixed(REDUCESCATTER_ARGS);
int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(REDUCESCATTER_ARGS);
int ompi_coll_tuned_reduce_scatter_intra_do_forced(REDUCESCATTER_ARGS);
int ompi_coll_tuned_reduce_scatter_intra_do_this(REDUCESCATTER_ARGS, int algorithm, int faninout, int segsize);
int ompi_coll_tuned_reduce_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
/* Scatter */
int ompi_coll_tuned_scatter_intra_dec_fixed(SCATTER_ARGS);
int ompi_coll_tuned_scatter_intra_dec_dynamic(SCATTER_ARGS);
int ompi_coll_tuned_scatter_intra_do_forced(SCATTER_ARGS);
int ompi_coll_tuned_scatter_intra_do_this(SCATTER_ARGS, int algorithm, int faninout, int segsize);
int ompi_coll_tuned_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);

Просмотреть файл

@ -120,57 +120,6 @@ ompi_coll_tuned_allgather_intra_check_forced_init(coll_tuned_force_algorithm_mca
return (MPI_SUCCESS);
}
int ompi_coll_tuned_allgather_intra_do_forced(const void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void* rbuf, int rcount,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
OPAL_OUTPUT((ompi_coll_tuned_stream,
"coll:tuned:allgather_intra_do_forced selected algorithm %d",
tuned_module->user_forced[ALLGATHER].algorithm));
switch (tuned_module->user_forced[ALLGATHER].algorithm) {
case (0):
return ompi_coll_tuned_allgather_intra_dec_fixed(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
case (1):
return ompi_coll_base_allgather_intra_basic_linear(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
case (2):
return ompi_coll_base_allgather_intra_bruck(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
case (3):
return ompi_coll_base_allgather_intra_recursivedoubling(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
case (4):
return ompi_coll_base_allgather_intra_ring(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
case (5):
return ompi_coll_base_allgather_intra_neighborexchange(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
case (6):
return ompi_coll_base_allgather_intra_two_procs(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
} /* switch */
OPAL_OUTPUT((ompi_coll_tuned_stream,
"coll:tuned:allgather_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
tuned_module->user_forced[ALLGATHER].algorithm,
ompi_coll_tuned_forced_max_algorithms[ALLGATHER]));
return (MPI_ERR_ARG);
}
int ompi_coll_tuned_allgather_intra_do_this(const void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void* rbuf, int rcount,

Просмотреть файл

@ -119,54 +119,6 @@ ompi_coll_tuned_allgatherv_intra_check_forced_init(coll_tuned_force_algorithm_mc
return (MPI_SUCCESS);
}
int ompi_coll_tuned_allgatherv_intra_do_forced(const void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void *rbuf, const int *rcounts,
const int *rdispls,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
OPAL_OUTPUT((ompi_coll_tuned_stream,
"coll:tuned:allgatherv_intra_do_forced selected algorithm %d",
tuned_module->user_forced[ALLGATHERV].algorithm));
switch (tuned_module->user_forced[ALLGATHERV].algorithm) {
case (0):
return ompi_coll_tuned_allgatherv_intra_dec_fixed(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm, module);
case (1):
return ompi_coll_base_allgatherv_intra_basic_default(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm, module);
case (2):
return ompi_coll_base_allgatherv_intra_bruck(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm, module);
case (3):
return ompi_coll_base_allgatherv_intra_ring(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm, module);
case (4):
return ompi_coll_base_allgatherv_intra_neighborexchange(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm, module);
case (5):
return ompi_coll_base_allgatherv_intra_two_procs(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm, module);
} /* switch */
OPAL_OUTPUT((ompi_coll_tuned_stream,
"coll:tuned:allgatherv_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
tuned_module->user_forced[ALLGATHERV].algorithm,
ompi_coll_tuned_forced_max_algorithms[ALLGATHERV]));
return (MPI_ERR_ARG);
}
int ompi_coll_tuned_allgatherv_intra_do_this(const void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void *rbuf, const int *rcounts,

Просмотреть файл

@ -119,40 +119,6 @@ int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorith
return (MPI_SUCCESS);
}
int ompi_coll_tuned_allreduce_intra_do_forced(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced selected algorithm %d, segment size %d",
tuned_module->user_forced[ALLREDUCE].algorithm,
tuned_module->user_forced[ALLREDUCE].segsize));
switch (tuned_module->user_forced[ALLREDUCE].algorithm) {
case (0):
return ompi_coll_tuned_allreduce_intra_dec_fixed(sbuf, rbuf, count, dtype, op, comm, module);
case (1):
return ompi_coll_base_allreduce_intra_basic_linear(sbuf, rbuf, count, dtype, op, comm, module);
case (2):
return ompi_coll_base_allreduce_intra_nonoverlapping(sbuf, rbuf, count, dtype, op, comm, module);
case (3):
return ompi_coll_base_allreduce_intra_recursivedoubling(sbuf, rbuf, count, dtype, op, comm, module);
case (4):
return ompi_coll_base_allreduce_intra_ring(sbuf, rbuf, count, dtype, op, comm, module);
case (5):
return ompi_coll_base_allreduce_intra_ring_segmented(sbuf, rbuf, count, dtype, op, comm, module, tuned_module->user_forced[ALLREDUCE].segsize);
} /* switch */
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
tuned_module->user_forced[ALLREDUCE].algorithm,
ompi_coll_tuned_forced_max_algorithms[ALLREDUCE]));
return (MPI_ERR_ARG);
}
int ompi_coll_tuned_allreduce_intra_do_this(const void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,

Просмотреть файл

@ -139,41 +139,6 @@ int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm
return (MPI_SUCCESS);
}
int ompi_coll_tuned_alltoall_intra_do_forced(const void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void* rbuf, int rcount,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced selected algorithm %d",
tuned_module->user_forced[ALLTOALL].algorithm));
switch (tuned_module->user_forced[ALLTOALL].algorithm) {
case (0):
return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
case (1):
return ompi_coll_base_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
case (2):
return ompi_coll_base_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
case (3):
return ompi_coll_base_alltoall_intra_bruck (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
case (4):
return ompi_coll_base_alltoall_intra_linear_sync (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module,
tuned_module->user_forced[ALLTOALL].max_requests);
case (5):
return ompi_coll_base_alltoall_intra_two_procs (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
} /* switch */
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
tuned_module->user_forced[ALLTOALL].algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALL]));
return (MPI_ERR_ARG);
}
int ompi_coll_tuned_alltoall_intra_do_this(const void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void* rbuf, int rcount,

Просмотреть файл

@ -84,43 +84,6 @@ int ompi_coll_tuned_alltoallv_intra_check_forced_init(coll_tuned_force_algorithm
return (MPI_SUCCESS);
}
int ompi_coll_tuned_alltoallv_intra_do_forced(const void *sbuf, const int *scounts, const int *sdisps,
struct ompi_datatype_t *sdtype,
void* rbuf, const int *rcounts, const int *rdisps,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
OPAL_OUTPUT((ompi_coll_tuned_stream,
"coll:tuned:alltoallv_intra_do_forced selected algorithm %d",
tuned_module->user_forced[ALLTOALLV].algorithm));
switch (tuned_module->user_forced[ALLTOALLV].algorithm) {
case (0):
return ompi_coll_tuned_alltoallv_intra_dec_fixed(sbuf, scounts, sdisps, sdtype,
rbuf, rcounts, rdisps, rdtype,
comm, module);
case (1):
return ompi_coll_base_alltoallv_intra_basic_linear(sbuf, scounts, sdisps, sdtype,
rbuf, rcounts, rdisps, rdtype,
comm, module);
case (2):
return ompi_coll_base_alltoallv_intra_pairwise(sbuf, scounts, sdisps, sdtype,
rbuf, rcounts, rdisps, rdtype,
comm, module);
} /* switch */
OPAL_OUTPUT((ompi_coll_tuned_stream,
"coll:tuned:alltoallv_intra_do_forced attempt to "
"select algorithm %d when only 0-%d is valid.",
tuned_module->user_forced[ALLTOALLV].algorithm,
ompi_coll_tuned_forced_max_algorithms[ALLTOALLV]));
return (MPI_ERR_ARG);
}
/* If the user selects dynamic rules and specifies the algorithm to
* use, then this function is called. */
int ompi_coll_tuned_alltoallv_intra_do_this(const void *sbuf, const int *scounts, const int *sdisps,

Просмотреть файл

@ -85,33 +85,6 @@ int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_
return (MPI_SUCCESS);
}
int ompi_coll_tuned_barrier_intra_do_forced(struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
OPAL_OUTPUT((ompi_coll_tuned_stream,
"coll:tuned:barrier_intra_do_forced selected algorithm %d",
tuned_module->user_forced[BARRIER].algorithm));
switch (tuned_module->user_forced[BARRIER].algorithm) {
case (0): return ompi_coll_tuned_barrier_intra_dec_fixed(comm, module);
case (1): return ompi_coll_base_barrier_intra_basic_linear(comm, module);
case (2): return ompi_coll_base_barrier_intra_doublering(comm, module);
case (3): return ompi_coll_base_barrier_intra_recursivedoubling(comm, module);
case (4): return ompi_coll_base_barrier_intra_bruck(comm, module);
case (5): return ompi_coll_base_barrier_intra_two_procs(comm, module);
case (6): return ompi_coll_base_barrier_intra_tree(comm, module);
} /* switch */
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
tuned_module->user_forced[BARRIER].algorithm,
ompi_coll_tuned_forced_max_algorithms[BARRIER]));
return (MPI_ERR_ARG);
}
int ompi_coll_tuned_barrier_intra_do_this (struct ompi_communicator_t *comm,
mca_coll_base_module_t *module,
int algorithm, int faninout, int segsize)

Просмотреть файл

@ -118,46 +118,12 @@ int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mc
return (MPI_SUCCESS);
}
int ompi_coll_tuned_bcast_intra_do_forced(void *buf, int count,
struct ompi_datatype_t *dtype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced algorithm %d",
tuned_module->user_forced[BCAST].algorithm));
switch (tuned_module->user_forced[BCAST].algorithm) {
case (0): return ompi_coll_tuned_bcast_intra_dec_fixed( buf, count, dtype, root, comm, module );
case (1): return ompi_coll_base_bcast_intra_basic_linear( buf, count, dtype, root, comm, module );
case (2): return ompi_coll_base_bcast_intra_chain( buf, count, dtype, root, comm, module,
tuned_module->user_forced[BCAST].segsize,
tuned_module->user_forced[BCAST].chain_fanout );
case (3): return ompi_coll_base_bcast_intra_pipeline( buf, count, dtype, root, comm, module,
tuned_module->user_forced[BCAST].segsize );
case (4): return ompi_coll_base_bcast_intra_split_bintree( buf, count, dtype, root, comm, module,
tuned_module->user_forced[BCAST].segsize );
case (5): return ompi_coll_base_bcast_intra_bintree( buf, count, dtype, root, comm, module,
tuned_module->user_forced[BCAST].segsize );
case (6): return ompi_coll_base_bcast_intra_binomial( buf, count, dtype, root, comm, module,
tuned_module->user_forced[BCAST].segsize );
} /* switch */
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
tuned_module->user_forced[BCAST].algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST]));
return (MPI_ERR_ARG);
}
int ompi_coll_tuned_bcast_intra_do_this(void *buf, int count,
struct ompi_datatype_t *dtype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module,
int algorithm, int faninout, int segsize)
{
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_this algorithm %d topo faninout %d segsize %d",
algorithm, faninout, segsize));

Просмотреть файл

@ -81,8 +81,10 @@ ompi_coll_tuned_allreduce_intra_dec_dynamic (const void *sbuf, void *rbuf, int c
} /*end if any com rules to check */
if (tuned_module->user_forced[ALLREDUCE].algorithm) {
return ompi_coll_tuned_allreduce_intra_do_forced (sbuf, rbuf, count, dtype, op,
comm, module);
return ompi_coll_tuned_allreduce_intra_do_this(sbuf, rbuf, count, dtype, op, comm, module,
tuned_module->user_forced[ALLREDUCE].algorithm,
tuned_module->user_forced[ALLREDUCE].tree_fanout,
tuned_module->user_forced[ALLREDUCE].segsize);
}
return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op,
comm, module);
@ -131,9 +133,13 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(const void *sbuf, int scount,
} /*end if any com rules to check */
if (tuned_module->user_forced[ALLTOALL].algorithm) {
return ompi_coll_tuned_alltoall_intra_do_forced (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
return ompi_coll_tuned_alltoall_intra_do_this(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module,
tuned_module->user_forced[ALLTOALL].algorithm,
tuned_module->user_forced[ALLTOALL].tree_fanout,
tuned_module->user_forced[ALLTOALL].segsize,
tuned_module->user_forced[ALLTOALL].max_requests);
}
return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
@ -179,9 +185,10 @@ int ompi_coll_tuned_alltoallv_intra_dec_dynamic(const void *sbuf, const int *sco
} /*end if any com rules to check */
if (tuned_module->user_forced[ALLTOALLV].algorithm) {
return ompi_coll_tuned_alltoallv_intra_do_forced(sbuf, scounts, sdisps, sdtype,
rbuf, rcounts, rdisps, rdtype,
comm, module);
return ompi_coll_tuned_alltoallv_intra_do_this(sbuf, scounts, sdisps, sdtype,
rbuf, rcounts, rdisps, rdtype,
comm, module,
tuned_module->user_forced[ALLTOALLV].algorithm);
}
return ompi_coll_tuned_alltoallv_intra_dec_fixed(sbuf, scounts, sdisps, sdtype,
rbuf, rcounts, rdisps, rdtype,
@ -218,7 +225,10 @@ int ompi_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm,
} /*end if any com rules to check */
if (tuned_module->user_forced[BARRIER].algorithm) {
return ompi_coll_tuned_barrier_intra_do_forced (comm, module);
return ompi_coll_tuned_barrier_intra_do_this(comm, module,
tuned_module->user_forced[BARRIER].algorithm,
tuned_module->user_forced[BARRIER].tree_fanout,
tuned_module->user_forced[BARRIER].segsize);
}
return ompi_coll_tuned_barrier_intra_dec_fixed (comm, module);
}
@ -230,8 +240,8 @@ int ompi_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm,
* Accepts: - same arguments as MPI_Bcast()
* Returns: - MPI_SUCCESS or error code (passed from the bcast implementation)
*/
int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
struct ompi_datatype_t *datatype, int root,
int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buf, int count,
struct ompi_datatype_t *dtype, int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
@ -245,7 +255,7 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
int alg, faninout, segsize, ignoreme;
size_t dsize;
ompi_datatype_type_size (datatype, &dsize);
ompi_datatype_type_size (dtype, &dsize);
dsize *= count;
alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[BCAST],
@ -253,7 +263,7 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
if (alg) {
/* we have found a valid choice from the file based rules for this message size */
return ompi_coll_tuned_bcast_intra_do_this (buff, count, datatype, root,
return ompi_coll_tuned_bcast_intra_do_this (buf, count, dtype, root,
comm, module,
alg, faninout, segsize);
} /* found a method */
@ -261,10 +271,13 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
if (tuned_module->user_forced[BCAST].algorithm) {
return ompi_coll_tuned_bcast_intra_do_forced (buff, count, datatype, root,
comm, module);
return ompi_coll_tuned_bcast_intra_do_this(buf, count, dtype,
root, comm, module,
tuned_module->user_forced[BCAST].algorithm,
tuned_module->user_forced[BCAST].chain_fanout,
tuned_module->user_forced[BCAST].segsize);
}
return ompi_coll_tuned_bcast_intra_dec_fixed (buff, count, datatype, root,
return ompi_coll_tuned_bcast_intra_dec_fixed (buf, count, dtype, root,
comm, module);
}
@ -276,8 +289,8 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
* Returns: - MPI_SUCCESS or error code (passed from the reduce implementation)
*
*/
int ompi_coll_tuned_reduce_intra_dec_dynamic( const void *sendbuf, void *recvbuf,
int count, struct ompi_datatype_t* datatype,
int ompi_coll_tuned_reduce_intra_dec_dynamic( const void *sbuf, void *rbuf,
int count, struct ompi_datatype_t* dtype,
struct ompi_op_t* op, int root,
struct ompi_communicator_t* comm,
mca_coll_base_module_t *module)
@ -293,7 +306,7 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( const void *sendbuf, void *recvbuf
int alg, faninout, segsize, max_requests;
size_t dsize;
ompi_datatype_type_size (datatype, &dsize);
ompi_datatype_type_size(dtype, &dsize);
dsize *= count;
alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[REDUCE],
@ -301,23 +314,23 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( const void *sendbuf, void *recvbuf
if (alg) {
/* we have found a valid choice from the file based rules for this message size */
return ompi_coll_tuned_reduce_intra_do_this (sendbuf, recvbuf, count, datatype,
op, root,
comm, module,
return ompi_coll_tuned_reduce_intra_do_this (sbuf, rbuf, count, dtype,
op, root, comm, module,
alg, faninout,
segsize,
max_requests);
segsize, max_requests);
} /* found a method */
} /*end if any com rules to check */
if (tuned_module->user_forced[REDUCE].algorithm) {
return ompi_coll_tuned_reduce_intra_do_forced (sendbuf, recvbuf, count, datatype,
op, root,
comm, module);
return ompi_coll_tuned_reduce_intra_do_this(sbuf, rbuf, count, dtype,
op, root, comm, module,
tuned_module->user_forced[REDUCE].algorithm,
tuned_module->user_forced[REDUCE].chain_fanout,
tuned_module->user_forced[REDUCE].segsize,
tuned_module->user_forced[REDUCE].max_requests);
}
return ompi_coll_tuned_reduce_intra_dec_fixed (sendbuf, recvbuf, count, datatype,
op, root,
comm, module);
return ompi_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype,
op, root, comm, module);
}
/*
@ -356,22 +369,21 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(const void *sbuf, void *rbu
&segsize, &ignoreme);
if (alg) {
/* we have found a valid choice from the file based rules for this message size */
return ompi_coll_tuned_reduce_scatter_intra_do_this (sbuf, rbuf, rcounts,
dtype, op,
comm, module,
alg, faninout,
segsize);
return ompi_coll_tuned_reduce_scatter_intra_do_this (sbuf, rbuf, rcounts, dtype,
op, comm, module,
alg, faninout, segsize);
} /* found a method */
} /*end if any com rules to check */
if (tuned_module->user_forced[REDUCESCATTER].algorithm) {
return ompi_coll_tuned_reduce_scatter_intra_do_forced (sbuf, rbuf, rcounts,
dtype, op,
comm, module);
return ompi_coll_tuned_reduce_scatter_intra_do_this(sbuf, rbuf, rcounts, dtype,
op, comm, module,
tuned_module->user_forced[REDUCESCATTER].algorithm,
tuned_module->user_forced[REDUCESCATTER].chain_fanout,
tuned_module->user_forced[REDUCESCATTER].segsize);
}
return ompi_coll_tuned_reduce_scatter_intra_dec_fixed (sbuf, rbuf, rcounts,
dtype, op,
comm, module);
dtype, op, comm, module);
}
/*
@ -421,9 +433,12 @@ int ompi_coll_tuned_allgather_intra_dec_dynamic(const void *sbuf, int scount,
/* We do not have file based rules */
if (tuned_module->user_forced[ALLGATHER].algorithm) {
/* User-forced algorithm */
return ompi_coll_tuned_allgather_intra_do_forced (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module);
return ompi_coll_tuned_allgather_intra_do_this(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
comm, module,
tuned_module->user_forced[ALLGATHER].algorithm,
tuned_module->user_forced[ALLGATHER].tree_fanout,
tuned_module->user_forced[ALLGATHER].segsize);
}
/* Use default decision */
@ -482,10 +497,12 @@ int ompi_coll_tuned_allgatherv_intra_dec_dynamic(const void *sbuf, int scount,
/* We do not have file based rules */
if (tuned_module->user_forced[ALLGATHERV].algorithm) {
/* User-forced algorithm */
return ompi_coll_tuned_allgatherv_intra_do_forced (sbuf, scount, sdtype,
rbuf, rcounts,
rdispls, rdtype,
comm, module);
return ompi_coll_tuned_allgatherv_intra_do_this(sbuf, scount, sdtype,
rbuf, rcounts, rdispls, rdtype,
comm, module,
tuned_module->user_forced[ALLGATHERV].algorithm,
tuned_module->user_forced[ALLGATHERV].tree_fanout,
tuned_module->user_forced[ALLGATHERV].segsize);
}
/* Use default decision */
@ -532,9 +549,12 @@ int ompi_coll_tuned_gather_intra_dec_dynamic(const void *sbuf, int scount,
} /*end if any com rules to check */
if (tuned_module->user_forced[GATHER].algorithm) {
return ompi_coll_tuned_gather_intra_do_forced (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm, module);
return ompi_coll_tuned_gather_intra_do_this(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm, module,
tuned_module->user_forced[GATHER].algorithm,
tuned_module->user_forced[GATHER].tree_fanout,
tuned_module->user_forced[GATHER].segsize);
}
return ompi_coll_tuned_gather_intra_dec_fixed (sbuf, scount, sdtype,
@ -578,9 +598,12 @@ int ompi_coll_tuned_scatter_intra_dec_dynamic(const void *sbuf, int scount,
} /*end if any com rules to check */
if (tuned_module->user_forced[SCATTER].algorithm) {
return ompi_coll_tuned_scatter_intra_do_forced (sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm, module);
return ompi_coll_tuned_scatter_intra_do_this(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm, module,
tuned_module->user_forced[SCATTER].algorithm,
tuned_module->user_forced[SCATTER].chain_fanout,
tuned_module->user_forced[SCATTER].segsize);
}
return ompi_coll_tuned_scatter_intra_dec_fixed (sbuf, scount, sdtype,

Просмотреть файл

@ -119,47 +119,6 @@ ompi_coll_tuned_gather_intra_check_forced_init(coll_tuned_force_algorithm_mca_pa
return (MPI_SUCCESS);
}
int
ompi_coll_tuned_gather_intra_do_forced(const void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void* rbuf, int rcount,
struct ompi_datatype_t *rdtype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
OPAL_OUTPUT((ompi_coll_tuned_stream,
"coll:tuned:gather_intra_do_forced selected algorithm %d",
tuned_module->user_forced[GATHER].algorithm));
switch (tuned_module->user_forced[GATHER].algorithm) {
case (0):
return ompi_coll_tuned_gather_intra_dec_fixed(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm, module);
case (1):
return ompi_coll_base_gather_intra_basic_linear(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm, module);
case (2):
return ompi_coll_base_gather_intra_binomial(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm, module);
case (3):
return ompi_coll_base_gather_intra_linear_sync(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm, module,
tuned_module->user_forced[GATHER].segsize);
} /* switch */
OPAL_OUTPUT((ompi_coll_tuned_stream,
"coll:tuned:gather_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
tuned_module->user_forced[GATHER].algorithm,
ompi_coll_tuned_forced_max_algorithms[GATHER]));
return (MPI_ERR_ARG);
}
int
ompi_coll_tuned_gather_intra_do_this(const void *sbuf, int scount,
struct ompi_datatype_t *sdtype,

Просмотреть файл

@ -142,50 +142,6 @@ int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_m
return (MPI_SUCCESS);
}
int ompi_coll_tuned_reduce_intra_do_forced(const void *sbuf, void* rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
const int segsize = tuned_module->user_forced[REDUCE].segsize;
const int chain_fanout = tuned_module->user_forced[REDUCE].chain_fanout;
const int max_requests = tuned_module->user_forced[REDUCE].max_requests;
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced selected algorithm %d",
tuned_module->user_forced[REDUCE].algorithm));
switch (tuned_module->user_forced[REDUCE].algorithm) {
case (0): return ompi_coll_tuned_reduce_intra_dec_fixed(sbuf, rbuf, count, dtype,
op, root, comm, module);
case (1): return ompi_coll_base_reduce_intra_basic_linear(sbuf, rbuf, count, dtype,
op, root, comm, module);
case (2): return ompi_coll_base_reduce_intra_chain(sbuf, rbuf, count, dtype,
op, root, comm, module,
segsize, chain_fanout, max_requests);
case (3): return ompi_coll_base_reduce_intra_pipeline(sbuf, rbuf, count, dtype,
op, root, comm, module,
segsize, max_requests);
case (4): return ompi_coll_base_reduce_intra_binary(sbuf, rbuf, count, dtype,
op, root, comm, module,
segsize, max_requests);
case (5): return ompi_coll_base_reduce_intra_binomial(sbuf, rbuf, count, dtype,
op, root, comm, module,
segsize, max_requests);
case (6): return ompi_coll_base_reduce_intra_in_order_binary(sbuf, rbuf, count, dtype,
op, root, comm, module,
segsize, max_requests);
} /* switch */
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
tuned_module->user_forced[REDUCE].algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE]));
return (MPI_ERR_ARG);
}
int ompi_coll_tuned_reduce_intra_do_this(const void *sbuf, void* rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op, int root,

Просмотреть файл

@ -119,35 +119,6 @@ int ompi_coll_tuned_reduce_scatter_intra_check_forced_init (coll_tuned_force_alg
return (MPI_SUCCESS);
}
int ompi_coll_tuned_reduce_scatter_intra_do_forced(const void *sbuf, void* rbuf,
const int *rcounts,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_do_forced selected algorithm %d",
tuned_module->user_forced[REDUCESCATTER].algorithm));
switch (tuned_module->user_forced[REDUCESCATTER].algorithm) {
case (0): return ompi_coll_tuned_reduce_scatter_intra_dec_fixed(sbuf, rbuf, rcounts,
dtype, op, comm, module);
case (1): return ompi_coll_base_reduce_scatter_intra_nonoverlapping(sbuf, rbuf, rcounts,
dtype, op, comm, module);
case (2): return ompi_coll_base_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts,
dtype, op, comm, module);
case (3): return ompi_coll_base_reduce_scatter_intra_ring(sbuf, rbuf, rcounts,
dtype, op, comm, module);
} /* switch */
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
tuned_module->user_forced[REDUCESCATTER].algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCESCATTER]));
return (MPI_ERR_ARG);
}
int ompi_coll_tuned_reduce_scatter_intra_do_this(const void *sbuf, void* rbuf,
const int *rcounts,
struct ompi_datatype_t *dtype,

Просмотреть файл

@ -117,41 +117,6 @@ ompi_coll_tuned_scatter_intra_check_forced_init(coll_tuned_force_algorithm_mca_p
return (MPI_SUCCESS);
}
int
ompi_coll_tuned_scatter_intra_do_forced(const void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void* rbuf, int rcount,
struct ompi_datatype_t *rdtype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
OPAL_OUTPUT((ompi_coll_tuned_stream,
"coll:tuned:scatter_intra_do_forced selected algorithm %d",
tuned_module->user_forced[SCATTER].algorithm));
switch (tuned_module->user_forced[SCATTER].algorithm) {
case (0):
return ompi_coll_tuned_scatter_intra_dec_fixed(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm, module);
case (1):
return ompi_coll_base_scatter_intra_basic_linear(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm, module);
case (2):
return ompi_coll_base_scatter_intra_binomial(sbuf, scount, sdtype,
rbuf, rcount, rdtype,
root, comm, module);
} /* switch */
OPAL_OUTPUT((ompi_coll_tuned_stream,
"coll:tuned:scatter_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
tuned_module->user_forced[SCATTER].algorithm, ompi_coll_tuned_forced_max_algorithms[SCATTER]));
return MPI_ERR_ARG;
}
int
ompi_coll_tuned_scatter_intra_do_this(const void *sbuf, int scount,
struct ompi_datatype_t *sdtype,