Code cleaning of the tuned module.
Этот коммит содержится в:
родитель
582f290519
Коммит
253eb80e26
@ -93,77 +93,66 @@ ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority);
|
||||
/* All Gather */
|
||||
int ompi_coll_tuned_allgather_intra_dec_fixed(ALLGATHER_ARGS);
|
||||
int ompi_coll_tuned_allgather_intra_dec_dynamic(ALLGATHER_ARGS);
|
||||
int ompi_coll_tuned_allgather_intra_do_forced(ALLGATHER_ARGS);
|
||||
int ompi_coll_tuned_allgather_intra_do_this(ALLGATHER_ARGS, int algorithm, int faninout, int segsize);
|
||||
int ompi_coll_tuned_allgather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
|
||||
|
||||
/* All GatherV */
|
||||
int ompi_coll_tuned_allgatherv_intra_dec_fixed(ALLGATHERV_ARGS);
|
||||
int ompi_coll_tuned_allgatherv_intra_dec_dynamic(ALLGATHERV_ARGS);
|
||||
int ompi_coll_tuned_allgatherv_intra_do_forced(ALLGATHERV_ARGS);
|
||||
int ompi_coll_tuned_allgatherv_intra_do_this(ALLGATHERV_ARGS, int algorithm, int faninout, int segsize);
|
||||
int ompi_coll_tuned_allgatherv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
|
||||
|
||||
/* All Reduce */
|
||||
int ompi_coll_tuned_allreduce_intra_dec_fixed(ALLREDUCE_ARGS);
|
||||
int ompi_coll_tuned_allreduce_intra_dec_dynamic(ALLREDUCE_ARGS);
|
||||
int ompi_coll_tuned_allreduce_intra_do_forced(ALLREDUCE_ARGS);
|
||||
int ompi_coll_tuned_allreduce_intra_do_this(ALLREDUCE_ARGS, int algorithm, int faninout, int segsize);
|
||||
int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
|
||||
|
||||
/* AlltoAll */
|
||||
int ompi_coll_tuned_alltoall_intra_dec_fixed(ALLTOALL_ARGS);
|
||||
int ompi_coll_tuned_alltoall_intra_dec_dynamic(ALLTOALL_ARGS);
|
||||
int ompi_coll_tuned_alltoall_intra_do_forced(ALLTOALL_ARGS);
|
||||
int ompi_coll_tuned_alltoall_intra_do_this(ALLTOALL_ARGS, int algorithm, int faninout, int segsize, int max_requests);
|
||||
int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
|
||||
|
||||
/* AlltoAllV */
|
||||
int ompi_coll_tuned_alltoallv_intra_dec_fixed(ALLTOALLV_ARGS);
|
||||
int ompi_coll_tuned_alltoallv_intra_dec_dynamic(ALLTOALLV_ARGS);
|
||||
int ompi_coll_tuned_alltoallv_intra_do_forced(ALLTOALLV_ARGS);
|
||||
int ompi_coll_tuned_alltoallv_intra_do_this(ALLTOALLV_ARGS, int algorithm);
|
||||
int ompi_coll_tuned_alltoallv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
|
||||
|
||||
/* Barrier */
|
||||
int ompi_coll_tuned_barrier_intra_dec_fixed(BARRIER_ARGS);
|
||||
int ompi_coll_tuned_barrier_intra_dec_dynamic(BARRIER_ARGS);
|
||||
int ompi_coll_tuned_barrier_intra_do_forced(BARRIER_ARGS);
|
||||
int ompi_coll_tuned_barrier_intra_do_this(BARRIER_ARGS, int algorithm, int faninout, int segsize);
|
||||
int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
|
||||
|
||||
/* Bcast */
|
||||
int ompi_coll_tuned_bcast_intra_dec_fixed(BCAST_ARGS);
|
||||
int ompi_coll_tuned_bcast_intra_dec_dynamic(BCAST_ARGS);
|
||||
int ompi_coll_tuned_bcast_intra_do_forced(BCAST_ARGS);
|
||||
int ompi_coll_tuned_bcast_intra_do_this(BCAST_ARGS, int algorithm, int faninout, int segsize);
|
||||
int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
|
||||
|
||||
/* Gather */
|
||||
int ompi_coll_tuned_gather_intra_dec_fixed(GATHER_ARGS);
|
||||
int ompi_coll_tuned_gather_intra_dec_dynamic(GATHER_ARGS);
|
||||
int ompi_coll_tuned_gather_intra_do_forced(GATHER_ARGS);
|
||||
int ompi_coll_tuned_gather_intra_do_this(GATHER_ARGS, int algorithm, int faninout, int segsize);
|
||||
int ompi_coll_tuned_gather_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
|
||||
|
||||
/* Reduce */
|
||||
int ompi_coll_tuned_reduce_intra_dec_fixed(REDUCE_ARGS);
|
||||
int ompi_coll_tuned_reduce_intra_dec_dynamic(REDUCE_ARGS);
|
||||
int ompi_coll_tuned_reduce_intra_do_forced(REDUCE_ARGS);
|
||||
int ompi_coll_tuned_reduce_intra_do_this(REDUCE_ARGS, int algorithm, int faninout, int segsize, int max_oustanding_reqs);
|
||||
int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
|
||||
|
||||
/* Reduce_scatter */
|
||||
int ompi_coll_tuned_reduce_scatter_intra_dec_fixed(REDUCESCATTER_ARGS);
|
||||
int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(REDUCESCATTER_ARGS);
|
||||
int ompi_coll_tuned_reduce_scatter_intra_do_forced(REDUCESCATTER_ARGS);
|
||||
int ompi_coll_tuned_reduce_scatter_intra_do_this(REDUCESCATTER_ARGS, int algorithm, int faninout, int segsize);
|
||||
int ompi_coll_tuned_reduce_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
|
||||
|
||||
/* Scatter */
|
||||
int ompi_coll_tuned_scatter_intra_dec_fixed(SCATTER_ARGS);
|
||||
int ompi_coll_tuned_scatter_intra_dec_dynamic(SCATTER_ARGS);
|
||||
int ompi_coll_tuned_scatter_intra_do_forced(SCATTER_ARGS);
|
||||
int ompi_coll_tuned_scatter_intra_do_this(SCATTER_ARGS, int algorithm, int faninout, int segsize);
|
||||
int ompi_coll_tuned_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
|
||||
|
||||
|
@ -120,57 +120,6 @@ ompi_coll_tuned_allgather_intra_check_forced_init(coll_tuned_force_algorithm_mca
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
int ompi_coll_tuned_allgather_intra_do_forced(const void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,
|
||||
"coll:tuned:allgather_intra_do_forced selected algorithm %d",
|
||||
tuned_module->user_forced[ALLGATHER].algorithm));
|
||||
|
||||
switch (tuned_module->user_forced[ALLGATHER].algorithm) {
|
||||
case (0):
|
||||
return ompi_coll_tuned_allgather_intra_dec_fixed(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
comm, module);
|
||||
case (1):
|
||||
return ompi_coll_base_allgather_intra_basic_linear(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
comm, module);
|
||||
case (2):
|
||||
return ompi_coll_base_allgather_intra_bruck(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
comm, module);
|
||||
case (3):
|
||||
return ompi_coll_base_allgather_intra_recursivedoubling(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
comm, module);
|
||||
case (4):
|
||||
return ompi_coll_base_allgather_intra_ring(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
comm, module);
|
||||
case (5):
|
||||
return ompi_coll_base_allgather_intra_neighborexchange(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
comm, module);
|
||||
case (6):
|
||||
return ompi_coll_base_allgather_intra_two_procs(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
comm, module);
|
||||
} /* switch */
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,
|
||||
"coll:tuned:allgather_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
|
||||
tuned_module->user_forced[ALLGATHER].algorithm,
|
||||
ompi_coll_tuned_forced_max_algorithms[ALLGATHER]));
|
||||
return (MPI_ERR_ARG);
|
||||
}
|
||||
|
||||
|
||||
int ompi_coll_tuned_allgather_intra_do_this(const void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, int rcount,
|
||||
|
@ -119,54 +119,6 @@ ompi_coll_tuned_allgatherv_intra_check_forced_init(coll_tuned_force_algorithm_mc
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
int ompi_coll_tuned_allgatherv_intra_do_forced(const void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, const int *rcounts,
|
||||
const int *rdispls,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,
|
||||
"coll:tuned:allgatherv_intra_do_forced selected algorithm %d",
|
||||
tuned_module->user_forced[ALLGATHERV].algorithm));
|
||||
|
||||
switch (tuned_module->user_forced[ALLGATHERV].algorithm) {
|
||||
case (0):
|
||||
return ompi_coll_tuned_allgatherv_intra_dec_fixed(sbuf, scount, sdtype,
|
||||
rbuf, rcounts, rdispls, rdtype,
|
||||
comm, module);
|
||||
case (1):
|
||||
return ompi_coll_base_allgatherv_intra_basic_default(sbuf, scount, sdtype,
|
||||
rbuf, rcounts, rdispls, rdtype,
|
||||
comm, module);
|
||||
case (2):
|
||||
return ompi_coll_base_allgatherv_intra_bruck(sbuf, scount, sdtype,
|
||||
rbuf, rcounts, rdispls, rdtype,
|
||||
comm, module);
|
||||
case (3):
|
||||
return ompi_coll_base_allgatherv_intra_ring(sbuf, scount, sdtype,
|
||||
rbuf, rcounts, rdispls, rdtype,
|
||||
comm, module);
|
||||
case (4):
|
||||
return ompi_coll_base_allgatherv_intra_neighborexchange(sbuf, scount, sdtype,
|
||||
rbuf, rcounts, rdispls, rdtype,
|
||||
comm, module);
|
||||
case (5):
|
||||
return ompi_coll_base_allgatherv_intra_two_procs(sbuf, scount, sdtype,
|
||||
rbuf, rcounts, rdispls, rdtype,
|
||||
comm, module);
|
||||
} /* switch */
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,
|
||||
"coll:tuned:allgatherv_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
|
||||
tuned_module->user_forced[ALLGATHERV].algorithm,
|
||||
ompi_coll_tuned_forced_max_algorithms[ALLGATHERV]));
|
||||
return (MPI_ERR_ARG);
|
||||
}
|
||||
|
||||
|
||||
int ompi_coll_tuned_allgatherv_intra_do_this(const void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, const int *rcounts,
|
||||
|
@ -119,40 +119,6 @@ int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorith
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
int ompi_coll_tuned_allreduce_intra_do_forced(const void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced selected algorithm %d, segment size %d",
|
||||
tuned_module->user_forced[ALLREDUCE].algorithm,
|
||||
tuned_module->user_forced[ALLREDUCE].segsize));
|
||||
|
||||
switch (tuned_module->user_forced[ALLREDUCE].algorithm) {
|
||||
case (0):
|
||||
return ompi_coll_tuned_allreduce_intra_dec_fixed(sbuf, rbuf, count, dtype, op, comm, module);
|
||||
case (1):
|
||||
return ompi_coll_base_allreduce_intra_basic_linear(sbuf, rbuf, count, dtype, op, comm, module);
|
||||
case (2):
|
||||
return ompi_coll_base_allreduce_intra_nonoverlapping(sbuf, rbuf, count, dtype, op, comm, module);
|
||||
case (3):
|
||||
return ompi_coll_base_allreduce_intra_recursivedoubling(sbuf, rbuf, count, dtype, op, comm, module);
|
||||
case (4):
|
||||
return ompi_coll_base_allreduce_intra_ring(sbuf, rbuf, count, dtype, op, comm, module);
|
||||
case (5):
|
||||
return ompi_coll_base_allreduce_intra_ring_segmented(sbuf, rbuf, count, dtype, op, comm, module, tuned_module->user_forced[ALLREDUCE].segsize);
|
||||
} /* switch */
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
|
||||
tuned_module->user_forced[ALLREDUCE].algorithm,
|
||||
ompi_coll_tuned_forced_max_algorithms[ALLREDUCE]));
|
||||
return (MPI_ERR_ARG);
|
||||
}
|
||||
|
||||
|
||||
int ompi_coll_tuned_allreduce_intra_do_this(const void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
|
@ -139,41 +139,6 @@ int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
|
||||
int ompi_coll_tuned_alltoall_intra_do_forced(const void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced selected algorithm %d",
|
||||
tuned_module->user_forced[ALLTOALL].algorithm));
|
||||
|
||||
switch (tuned_module->user_forced[ALLTOALL].algorithm) {
|
||||
case (0):
|
||||
return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
|
||||
case (1):
|
||||
return ompi_coll_base_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
|
||||
case (2):
|
||||
return ompi_coll_base_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
|
||||
case (3):
|
||||
return ompi_coll_base_alltoall_intra_bruck (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
|
||||
case (4):
|
||||
return ompi_coll_base_alltoall_intra_linear_sync (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module,
|
||||
tuned_module->user_forced[ALLTOALL].max_requests);
|
||||
case (5):
|
||||
return ompi_coll_base_alltoall_intra_two_procs (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
|
||||
} /* switch */
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
|
||||
tuned_module->user_forced[ALLTOALL].algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALL]));
|
||||
return (MPI_ERR_ARG);
|
||||
}
|
||||
|
||||
|
||||
int ompi_coll_tuned_alltoall_intra_do_this(const void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, int rcount,
|
||||
|
@ -84,43 +84,6 @@ int ompi_coll_tuned_alltoallv_intra_check_forced_init(coll_tuned_force_algorithm
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
|
||||
int ompi_coll_tuned_alltoallv_intra_do_forced(const void *sbuf, const int *scounts, const int *sdisps,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, const int *rcounts, const int *rdisps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,
|
||||
"coll:tuned:alltoallv_intra_do_forced selected algorithm %d",
|
||||
tuned_module->user_forced[ALLTOALLV].algorithm));
|
||||
|
||||
switch (tuned_module->user_forced[ALLTOALLV].algorithm) {
|
||||
case (0):
|
||||
return ompi_coll_tuned_alltoallv_intra_dec_fixed(sbuf, scounts, sdisps, sdtype,
|
||||
rbuf, rcounts, rdisps, rdtype,
|
||||
comm, module);
|
||||
case (1):
|
||||
return ompi_coll_base_alltoallv_intra_basic_linear(sbuf, scounts, sdisps, sdtype,
|
||||
rbuf, rcounts, rdisps, rdtype,
|
||||
comm, module);
|
||||
case (2):
|
||||
return ompi_coll_base_alltoallv_intra_pairwise(sbuf, scounts, sdisps, sdtype,
|
||||
rbuf, rcounts, rdisps, rdtype,
|
||||
comm, module);
|
||||
} /* switch */
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,
|
||||
"coll:tuned:alltoallv_intra_do_forced attempt to "
|
||||
"select algorithm %d when only 0-%d is valid.",
|
||||
tuned_module->user_forced[ALLTOALLV].algorithm,
|
||||
ompi_coll_tuned_forced_max_algorithms[ALLTOALLV]));
|
||||
return (MPI_ERR_ARG);
|
||||
}
|
||||
|
||||
/* If the user selects dynamic rules and specifies the algorithm to
|
||||
* use, then this function is called. */
|
||||
int ompi_coll_tuned_alltoallv_intra_do_this(const void *sbuf, const int *scounts, const int *sdisps,
|
||||
|
@ -85,33 +85,6 @@ int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
|
||||
int ompi_coll_tuned_barrier_intra_do_forced(struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,
|
||||
"coll:tuned:barrier_intra_do_forced selected algorithm %d",
|
||||
tuned_module->user_forced[BARRIER].algorithm));
|
||||
|
||||
switch (tuned_module->user_forced[BARRIER].algorithm) {
|
||||
case (0): return ompi_coll_tuned_barrier_intra_dec_fixed(comm, module);
|
||||
case (1): return ompi_coll_base_barrier_intra_basic_linear(comm, module);
|
||||
case (2): return ompi_coll_base_barrier_intra_doublering(comm, module);
|
||||
case (3): return ompi_coll_base_barrier_intra_recursivedoubling(comm, module);
|
||||
case (4): return ompi_coll_base_barrier_intra_bruck(comm, module);
|
||||
case (5): return ompi_coll_base_barrier_intra_two_procs(comm, module);
|
||||
case (6): return ompi_coll_base_barrier_intra_tree(comm, module);
|
||||
} /* switch */
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
|
||||
tuned_module->user_forced[BARRIER].algorithm,
|
||||
ompi_coll_tuned_forced_max_algorithms[BARRIER]));
|
||||
return (MPI_ERR_ARG);
|
||||
}
|
||||
|
||||
|
||||
int ompi_coll_tuned_barrier_intra_do_this (struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module,
|
||||
int algorithm, int faninout, int segsize)
|
||||
|
@ -118,46 +118,12 @@ int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mc
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
int ompi_coll_tuned_bcast_intra_do_forced(void *buf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced algorithm %d",
|
||||
tuned_module->user_forced[BCAST].algorithm));
|
||||
|
||||
switch (tuned_module->user_forced[BCAST].algorithm) {
|
||||
case (0): return ompi_coll_tuned_bcast_intra_dec_fixed( buf, count, dtype, root, comm, module );
|
||||
case (1): return ompi_coll_base_bcast_intra_basic_linear( buf, count, dtype, root, comm, module );
|
||||
case (2): return ompi_coll_base_bcast_intra_chain( buf, count, dtype, root, comm, module,
|
||||
tuned_module->user_forced[BCAST].segsize,
|
||||
tuned_module->user_forced[BCAST].chain_fanout );
|
||||
case (3): return ompi_coll_base_bcast_intra_pipeline( buf, count, dtype, root, comm, module,
|
||||
tuned_module->user_forced[BCAST].segsize );
|
||||
case (4): return ompi_coll_base_bcast_intra_split_bintree( buf, count, dtype, root, comm, module,
|
||||
tuned_module->user_forced[BCAST].segsize );
|
||||
case (5): return ompi_coll_base_bcast_intra_bintree( buf, count, dtype, root, comm, module,
|
||||
tuned_module->user_forced[BCAST].segsize );
|
||||
case (6): return ompi_coll_base_bcast_intra_binomial( buf, count, dtype, root, comm, module,
|
||||
tuned_module->user_forced[BCAST].segsize );
|
||||
} /* switch */
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
|
||||
tuned_module->user_forced[BCAST].algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST]));
|
||||
return (MPI_ERR_ARG);
|
||||
}
|
||||
|
||||
|
||||
int ompi_coll_tuned_bcast_intra_do_this(void *buf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module,
|
||||
int algorithm, int faninout, int segsize)
|
||||
|
||||
{
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_this algorithm %d topo faninout %d segsize %d",
|
||||
algorithm, faninout, segsize));
|
||||
|
@ -81,8 +81,10 @@ ompi_coll_tuned_allreduce_intra_dec_dynamic (const void *sbuf, void *rbuf, int c
|
||||
} /*end if any com rules to check */
|
||||
|
||||
if (tuned_module->user_forced[ALLREDUCE].algorithm) {
|
||||
return ompi_coll_tuned_allreduce_intra_do_forced (sbuf, rbuf, count, dtype, op,
|
||||
comm, module);
|
||||
return ompi_coll_tuned_allreduce_intra_do_this(sbuf, rbuf, count, dtype, op, comm, module,
|
||||
tuned_module->user_forced[ALLREDUCE].algorithm,
|
||||
tuned_module->user_forced[ALLREDUCE].tree_fanout,
|
||||
tuned_module->user_forced[ALLREDUCE].segsize);
|
||||
}
|
||||
return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op,
|
||||
comm, module);
|
||||
@ -131,9 +133,13 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(const void *sbuf, int scount,
|
||||
} /*end if any com rules to check */
|
||||
|
||||
if (tuned_module->user_forced[ALLTOALL].algorithm) {
|
||||
return ompi_coll_tuned_alltoall_intra_do_forced (sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
comm, module);
|
||||
return ompi_coll_tuned_alltoall_intra_do_this(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
comm, module,
|
||||
tuned_module->user_forced[ALLTOALL].algorithm,
|
||||
tuned_module->user_forced[ALLTOALL].tree_fanout,
|
||||
tuned_module->user_forced[ALLTOALL].segsize,
|
||||
tuned_module->user_forced[ALLTOALL].max_requests);
|
||||
}
|
||||
return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
@ -179,9 +185,10 @@ int ompi_coll_tuned_alltoallv_intra_dec_dynamic(const void *sbuf, const int *sco
|
||||
} /*end if any com rules to check */
|
||||
|
||||
if (tuned_module->user_forced[ALLTOALLV].algorithm) {
|
||||
return ompi_coll_tuned_alltoallv_intra_do_forced(sbuf, scounts, sdisps, sdtype,
|
||||
rbuf, rcounts, rdisps, rdtype,
|
||||
comm, module);
|
||||
return ompi_coll_tuned_alltoallv_intra_do_this(sbuf, scounts, sdisps, sdtype,
|
||||
rbuf, rcounts, rdisps, rdtype,
|
||||
comm, module,
|
||||
tuned_module->user_forced[ALLTOALLV].algorithm);
|
||||
}
|
||||
return ompi_coll_tuned_alltoallv_intra_dec_fixed(sbuf, scounts, sdisps, sdtype,
|
||||
rbuf, rcounts, rdisps, rdtype,
|
||||
@ -218,7 +225,10 @@ int ompi_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm,
|
||||
} /*end if any com rules to check */
|
||||
|
||||
if (tuned_module->user_forced[BARRIER].algorithm) {
|
||||
return ompi_coll_tuned_barrier_intra_do_forced (comm, module);
|
||||
return ompi_coll_tuned_barrier_intra_do_this(comm, module,
|
||||
tuned_module->user_forced[BARRIER].algorithm,
|
||||
tuned_module->user_forced[BARRIER].tree_fanout,
|
||||
tuned_module->user_forced[BARRIER].segsize);
|
||||
}
|
||||
return ompi_coll_tuned_barrier_intra_dec_fixed (comm, module);
|
||||
}
|
||||
@ -230,8 +240,8 @@ int ompi_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm,
|
||||
* Accepts: - same arguments as MPI_Bcast()
|
||||
* Returns: - MPI_SUCCESS or error code (passed from the bcast implementation)
|
||||
*/
|
||||
int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype, int root,
|
||||
int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buf, int count,
|
||||
struct ompi_datatype_t *dtype, int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
@ -245,7 +255,7 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
|
||||
int alg, faninout, segsize, ignoreme;
|
||||
size_t dsize;
|
||||
|
||||
ompi_datatype_type_size (datatype, &dsize);
|
||||
ompi_datatype_type_size (dtype, &dsize);
|
||||
dsize *= count;
|
||||
|
||||
alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[BCAST],
|
||||
@ -253,7 +263,7 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
|
||||
|
||||
if (alg) {
|
||||
/* we have found a valid choice from the file based rules for this message size */
|
||||
return ompi_coll_tuned_bcast_intra_do_this (buff, count, datatype, root,
|
||||
return ompi_coll_tuned_bcast_intra_do_this (buf, count, dtype, root,
|
||||
comm, module,
|
||||
alg, faninout, segsize);
|
||||
} /* found a method */
|
||||
@ -261,10 +271,13 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
|
||||
|
||||
|
||||
if (tuned_module->user_forced[BCAST].algorithm) {
|
||||
return ompi_coll_tuned_bcast_intra_do_forced (buff, count, datatype, root,
|
||||
comm, module);
|
||||
return ompi_coll_tuned_bcast_intra_do_this(buf, count, dtype,
|
||||
root, comm, module,
|
||||
tuned_module->user_forced[BCAST].algorithm,
|
||||
tuned_module->user_forced[BCAST].chain_fanout,
|
||||
tuned_module->user_forced[BCAST].segsize);
|
||||
}
|
||||
return ompi_coll_tuned_bcast_intra_dec_fixed (buff, count, datatype, root,
|
||||
return ompi_coll_tuned_bcast_intra_dec_fixed (buf, count, dtype, root,
|
||||
comm, module);
|
||||
}
|
||||
|
||||
@ -276,8 +289,8 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
|
||||
* Returns: - MPI_SUCCESS or error code (passed from the reduce implementation)
|
||||
*
|
||||
*/
|
||||
int ompi_coll_tuned_reduce_intra_dec_dynamic( const void *sendbuf, void *recvbuf,
|
||||
int count, struct ompi_datatype_t* datatype,
|
||||
int ompi_coll_tuned_reduce_intra_dec_dynamic( const void *sbuf, void *rbuf,
|
||||
int count, struct ompi_datatype_t* dtype,
|
||||
struct ompi_op_t* op, int root,
|
||||
struct ompi_communicator_t* comm,
|
||||
mca_coll_base_module_t *module)
|
||||
@ -293,7 +306,7 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( const void *sendbuf, void *recvbuf
|
||||
int alg, faninout, segsize, max_requests;
|
||||
size_t dsize;
|
||||
|
||||
ompi_datatype_type_size (datatype, &dsize);
|
||||
ompi_datatype_type_size(dtype, &dsize);
|
||||
dsize *= count;
|
||||
|
||||
alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[REDUCE],
|
||||
@ -301,23 +314,23 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( const void *sendbuf, void *recvbuf
|
||||
|
||||
if (alg) {
|
||||
/* we have found a valid choice from the file based rules for this message size */
|
||||
return ompi_coll_tuned_reduce_intra_do_this (sendbuf, recvbuf, count, datatype,
|
||||
op, root,
|
||||
comm, module,
|
||||
return ompi_coll_tuned_reduce_intra_do_this (sbuf, rbuf, count, dtype,
|
||||
op, root, comm, module,
|
||||
alg, faninout,
|
||||
segsize,
|
||||
max_requests);
|
||||
segsize, max_requests);
|
||||
} /* found a method */
|
||||
} /*end if any com rules to check */
|
||||
|
||||
if (tuned_module->user_forced[REDUCE].algorithm) {
|
||||
return ompi_coll_tuned_reduce_intra_do_forced (sendbuf, recvbuf, count, datatype,
|
||||
op, root,
|
||||
comm, module);
|
||||
return ompi_coll_tuned_reduce_intra_do_this(sbuf, rbuf, count, dtype,
|
||||
op, root, comm, module,
|
||||
tuned_module->user_forced[REDUCE].algorithm,
|
||||
tuned_module->user_forced[REDUCE].chain_fanout,
|
||||
tuned_module->user_forced[REDUCE].segsize,
|
||||
tuned_module->user_forced[REDUCE].max_requests);
|
||||
}
|
||||
return ompi_coll_tuned_reduce_intra_dec_fixed (sendbuf, recvbuf, count, datatype,
|
||||
op, root,
|
||||
comm, module);
|
||||
return ompi_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype,
|
||||
op, root, comm, module);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -356,22 +369,21 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(const void *sbuf, void *rbu
|
||||
&segsize, &ignoreme);
|
||||
if (alg) {
|
||||
/* we have found a valid choice from the file based rules for this message size */
|
||||
return ompi_coll_tuned_reduce_scatter_intra_do_this (sbuf, rbuf, rcounts,
|
||||
dtype, op,
|
||||
comm, module,
|
||||
alg, faninout,
|
||||
segsize);
|
||||
return ompi_coll_tuned_reduce_scatter_intra_do_this (sbuf, rbuf, rcounts, dtype,
|
||||
op, comm, module,
|
||||
alg, faninout, segsize);
|
||||
} /* found a method */
|
||||
} /*end if any com rules to check */
|
||||
|
||||
if (tuned_module->user_forced[REDUCESCATTER].algorithm) {
|
||||
return ompi_coll_tuned_reduce_scatter_intra_do_forced (sbuf, rbuf, rcounts,
|
||||
dtype, op,
|
||||
comm, module);
|
||||
return ompi_coll_tuned_reduce_scatter_intra_do_this(sbuf, rbuf, rcounts, dtype,
|
||||
op, comm, module,
|
||||
tuned_module->user_forced[REDUCESCATTER].algorithm,
|
||||
tuned_module->user_forced[REDUCESCATTER].chain_fanout,
|
||||
tuned_module->user_forced[REDUCESCATTER].segsize);
|
||||
}
|
||||
return ompi_coll_tuned_reduce_scatter_intra_dec_fixed (sbuf, rbuf, rcounts,
|
||||
dtype, op,
|
||||
comm, module);
|
||||
dtype, op, comm, module);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -421,9 +433,12 @@ int ompi_coll_tuned_allgather_intra_dec_dynamic(const void *sbuf, int scount,
|
||||
/* We do not have file based rules */
|
||||
if (tuned_module->user_forced[ALLGATHER].algorithm) {
|
||||
/* User-forced algorithm */
|
||||
return ompi_coll_tuned_allgather_intra_do_forced (sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
comm, module);
|
||||
return ompi_coll_tuned_allgather_intra_do_this(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
comm, module,
|
||||
tuned_module->user_forced[ALLGATHER].algorithm,
|
||||
tuned_module->user_forced[ALLGATHER].tree_fanout,
|
||||
tuned_module->user_forced[ALLGATHER].segsize);
|
||||
}
|
||||
|
||||
/* Use default decision */
|
||||
@ -482,10 +497,12 @@ int ompi_coll_tuned_allgatherv_intra_dec_dynamic(const void *sbuf, int scount,
|
||||
/* We do not have file based rules */
|
||||
if (tuned_module->user_forced[ALLGATHERV].algorithm) {
|
||||
/* User-forced algorithm */
|
||||
return ompi_coll_tuned_allgatherv_intra_do_forced (sbuf, scount, sdtype,
|
||||
rbuf, rcounts,
|
||||
rdispls, rdtype,
|
||||
comm, module);
|
||||
return ompi_coll_tuned_allgatherv_intra_do_this(sbuf, scount, sdtype,
|
||||
rbuf, rcounts, rdispls, rdtype,
|
||||
comm, module,
|
||||
tuned_module->user_forced[ALLGATHERV].algorithm,
|
||||
tuned_module->user_forced[ALLGATHERV].tree_fanout,
|
||||
tuned_module->user_forced[ALLGATHERV].segsize);
|
||||
}
|
||||
|
||||
/* Use default decision */
|
||||
@ -532,9 +549,12 @@ int ompi_coll_tuned_gather_intra_dec_dynamic(const void *sbuf, int scount,
|
||||
} /*end if any com rules to check */
|
||||
|
||||
if (tuned_module->user_forced[GATHER].algorithm) {
|
||||
return ompi_coll_tuned_gather_intra_do_forced (sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
root, comm, module);
|
||||
return ompi_coll_tuned_gather_intra_do_this(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
root, comm, module,
|
||||
tuned_module->user_forced[GATHER].algorithm,
|
||||
tuned_module->user_forced[GATHER].tree_fanout,
|
||||
tuned_module->user_forced[GATHER].segsize);
|
||||
}
|
||||
|
||||
return ompi_coll_tuned_gather_intra_dec_fixed (sbuf, scount, sdtype,
|
||||
@ -578,9 +598,12 @@ int ompi_coll_tuned_scatter_intra_dec_dynamic(const void *sbuf, int scount,
|
||||
} /*end if any com rules to check */
|
||||
|
||||
if (tuned_module->user_forced[SCATTER].algorithm) {
|
||||
return ompi_coll_tuned_scatter_intra_do_forced (sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
root, comm, module);
|
||||
return ompi_coll_tuned_scatter_intra_do_this(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
root, comm, module,
|
||||
tuned_module->user_forced[SCATTER].algorithm,
|
||||
tuned_module->user_forced[SCATTER].chain_fanout,
|
||||
tuned_module->user_forced[SCATTER].segsize);
|
||||
}
|
||||
|
||||
return ompi_coll_tuned_scatter_intra_dec_fixed (sbuf, scount, sdtype,
|
||||
|
@ -119,47 +119,6 @@ ompi_coll_tuned_gather_intra_check_forced_init(coll_tuned_force_algorithm_mca_pa
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
int
|
||||
ompi_coll_tuned_gather_intra_do_forced(const void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,
|
||||
"coll:tuned:gather_intra_do_forced selected algorithm %d",
|
||||
tuned_module->user_forced[GATHER].algorithm));
|
||||
|
||||
switch (tuned_module->user_forced[GATHER].algorithm) {
|
||||
case (0):
|
||||
return ompi_coll_tuned_gather_intra_dec_fixed(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
root, comm, module);
|
||||
case (1):
|
||||
return ompi_coll_base_gather_intra_basic_linear(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
root, comm, module);
|
||||
case (2):
|
||||
return ompi_coll_base_gather_intra_binomial(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
root, comm, module);
|
||||
case (3):
|
||||
return ompi_coll_base_gather_intra_linear_sync(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
root, comm, module,
|
||||
tuned_module->user_forced[GATHER].segsize);
|
||||
} /* switch */
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,
|
||||
"coll:tuned:gather_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
|
||||
tuned_module->user_forced[GATHER].algorithm,
|
||||
ompi_coll_tuned_forced_max_algorithms[GATHER]));
|
||||
return (MPI_ERR_ARG);
|
||||
}
|
||||
|
||||
int
|
||||
ompi_coll_tuned_gather_intra_do_this(const void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
|
@ -142,50 +142,6 @@ int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_m
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
int ompi_coll_tuned_reduce_intra_do_forced(const void *sbuf, void* rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op, int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
|
||||
|
||||
const int segsize = tuned_module->user_forced[REDUCE].segsize;
|
||||
const int chain_fanout = tuned_module->user_forced[REDUCE].chain_fanout;
|
||||
const int max_requests = tuned_module->user_forced[REDUCE].max_requests;
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced selected algorithm %d",
|
||||
tuned_module->user_forced[REDUCE].algorithm));
|
||||
|
||||
|
||||
switch (tuned_module->user_forced[REDUCE].algorithm) {
|
||||
case (0): return ompi_coll_tuned_reduce_intra_dec_fixed(sbuf, rbuf, count, dtype,
|
||||
op, root, comm, module);
|
||||
case (1): return ompi_coll_base_reduce_intra_basic_linear(sbuf, rbuf, count, dtype,
|
||||
op, root, comm, module);
|
||||
case (2): return ompi_coll_base_reduce_intra_chain(sbuf, rbuf, count, dtype,
|
||||
op, root, comm, module,
|
||||
segsize, chain_fanout, max_requests);
|
||||
case (3): return ompi_coll_base_reduce_intra_pipeline(sbuf, rbuf, count, dtype,
|
||||
op, root, comm, module,
|
||||
segsize, max_requests);
|
||||
case (4): return ompi_coll_base_reduce_intra_binary(sbuf, rbuf, count, dtype,
|
||||
op, root, comm, module,
|
||||
segsize, max_requests);
|
||||
case (5): return ompi_coll_base_reduce_intra_binomial(sbuf, rbuf, count, dtype,
|
||||
op, root, comm, module,
|
||||
segsize, max_requests);
|
||||
case (6): return ompi_coll_base_reduce_intra_in_order_binary(sbuf, rbuf, count, dtype,
|
||||
op, root, comm, module,
|
||||
segsize, max_requests);
|
||||
} /* switch */
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
|
||||
tuned_module->user_forced[REDUCE].algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE]));
|
||||
return (MPI_ERR_ARG);
|
||||
}
|
||||
|
||||
|
||||
int ompi_coll_tuned_reduce_intra_do_this(const void *sbuf, void* rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op, int root,
|
||||
|
@ -119,35 +119,6 @@ int ompi_coll_tuned_reduce_scatter_intra_check_forced_init (coll_tuned_force_alg
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
int ompi_coll_tuned_reduce_scatter_intra_do_forced(const void *sbuf, void* rbuf,
|
||||
const int *rcounts,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_do_forced selected algorithm %d",
|
||||
tuned_module->user_forced[REDUCESCATTER].algorithm));
|
||||
|
||||
switch (tuned_module->user_forced[REDUCESCATTER].algorithm) {
|
||||
case (0): return ompi_coll_tuned_reduce_scatter_intra_dec_fixed(sbuf, rbuf, rcounts,
|
||||
dtype, op, comm, module);
|
||||
case (1): return ompi_coll_base_reduce_scatter_intra_nonoverlapping(sbuf, rbuf, rcounts,
|
||||
dtype, op, comm, module);
|
||||
case (2): return ompi_coll_base_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts,
|
||||
dtype, op, comm, module);
|
||||
case (3): return ompi_coll_base_reduce_scatter_intra_ring(sbuf, rbuf, rcounts,
|
||||
dtype, op, comm, module);
|
||||
} /* switch */
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
|
||||
tuned_module->user_forced[REDUCESCATTER].algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCESCATTER]));
|
||||
return (MPI_ERR_ARG);
|
||||
}
|
||||
|
||||
|
||||
int ompi_coll_tuned_reduce_scatter_intra_do_this(const void *sbuf, void* rbuf,
|
||||
const int *rcounts,
|
||||
struct ompi_datatype_t *dtype,
|
||||
|
@ -117,41 +117,6 @@ ompi_coll_tuned_scatter_intra_check_forced_init(coll_tuned_force_algorithm_mca_p
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
int
|
||||
ompi_coll_tuned_scatter_intra_do_forced(const void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,
|
||||
"coll:tuned:scatter_intra_do_forced selected algorithm %d",
|
||||
tuned_module->user_forced[SCATTER].algorithm));
|
||||
|
||||
switch (tuned_module->user_forced[SCATTER].algorithm) {
|
||||
case (0):
|
||||
return ompi_coll_tuned_scatter_intra_dec_fixed(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
root, comm, module);
|
||||
case (1):
|
||||
return ompi_coll_base_scatter_intra_basic_linear(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
root, comm, module);
|
||||
case (2):
|
||||
return ompi_coll_base_scatter_intra_binomial(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
root, comm, module);
|
||||
} /* switch */
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,
|
||||
"coll:tuned:scatter_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
|
||||
tuned_module->user_forced[SCATTER].algorithm, ompi_coll_tuned_forced_max_algorithms[SCATTER]));
|
||||
return MPI_ERR_ARG;
|
||||
}
|
||||
|
||||
int
|
||||
ompi_coll_tuned_scatter_intra_do_this(const void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user