allow forced algorithms (where the user or *test* suite knows better) to
go through the dynamic decision rule interface. (forced algorithms are set with MCA params) fixed some silly verbose output with wrong func name in it etc updates to fixed dec rules. This commit was SVN r7940.
Этот коммит содержится в:
родитель
aa5b61e4f1
Коммит
fe03e068f2
@ -208,6 +208,9 @@ int mca_coll_tuned_allreduce_intra_do_forced(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced selected algorithm %d",
|
||||
mca_coll_tuned_allreduce_forced_choice));
|
||||
|
||||
switch (mca_coll_tuned_allreduce_forced_choice) {
|
||||
case (0): return mca_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm);
|
||||
case (1): return mca_coll_tuned_allreduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, comm);
|
||||
|
@ -53,7 +53,12 @@ mca_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count,
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
if (mca_coll_tuned_allreduce_forced_choice) {
|
||||
return mca_coll_tuned_allreduce_intra_do_forced (sbuf, rbuf, count, dtype, op, comm);
|
||||
}
|
||||
else {
|
||||
return mca_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -487,6 +487,8 @@ int mca_coll_tuned_alltoall_intra_do_forced(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced selected algorithm %d", mca_coll_tuned_alltoall_forced_choice));
|
||||
|
||||
switch (mca_coll_tuned_alltoall_forced_choice) {
|
||||
case (0): return mca_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
case (1): return mca_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
|
@ -54,7 +54,12 @@ int mca_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
if (mca_coll_tuned_alltoall_forced_choice) {
|
||||
return mca_coll_tuned_alltoall_intra_do_forced (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
}
|
||||
else {
|
||||
return mca_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -74,9 +74,7 @@ int mca_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount,
|
||||
return mca_coll_tuned_alltoall_intra_bruck (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
}
|
||||
else if (dsize <= 131072) {
|
||||
/* not implemented yet.. need to find a 'nice' way to use the basic linear version without duplicating code */
|
||||
/* return mca_coll_tuned_alltoall_intra_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); */
|
||||
return mca_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
return mca_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
}
|
||||
else {
|
||||
return mca_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
|
@ -42,7 +42,7 @@ int mca_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm)
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_doublering rank %d", rank));
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_barrier_intra_doublering rank %d", rank));
|
||||
|
||||
left = ((rank-1)%size);
|
||||
right = ((rank+1)%size);
|
||||
@ -100,7 +100,7 @@ int mca_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *c
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_recursivedoubling rank %d", rank));
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_barrier_intra_recursivedoubling rank %d", rank));
|
||||
|
||||
/* do nearest power of 2 less than size calc */
|
||||
adjsize = 1;
|
||||
@ -175,7 +175,7 @@ int mca_coll_tuned_barrier_intra_bruck(struct ompi_communicator_t *comm)
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_bruck rank %d", rank));
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_barrier_intra_bruck rank %d", rank));
|
||||
|
||||
/* exchange data with rank-2^k and rank+2^k */
|
||||
for (distance = 1; distance < size; distance <<= 1) {
|
||||
@ -202,7 +202,7 @@ int mca_coll_tuned_barrier_intra_two_procs(struct ompi_communicator_t *comm)
|
||||
int err=0;
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_bcast_intra_two_procs rank %d", rank));
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"mca_coll_tuned_barrier_intra_two_procs rank %d", rank));
|
||||
|
||||
if (0==rank) {
|
||||
err = coll_tuned_sendrecv (NULL, 0, MPI_BYTE, 1, MCA_COLL_BASE_TAG_BARRIER,
|
||||
@ -252,9 +252,11 @@ int mca_coll_tuned_barrier_intra_query ( )
|
||||
|
||||
int mca_coll_tuned_barrier_intra_do_forced(struct ompi_communicator_t *comm)
|
||||
{
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:barrier_intra_do_forced selected algorithm %d", mca_coll_tuned_barrier_forced_choice));
|
||||
|
||||
switch (mca_coll_tuned_barrier_forced_choice) {
|
||||
case (0): return mca_coll_tuned_barrier_intra_dec_fixed (comm);
|
||||
/* case (1): return mca_coll_tuned_barrier_intra_basic_linear (comm); */
|
||||
/* case (1): return mca_coll_tuned_barrier_intra_basic_linear (comm); */
|
||||
case (2): return mca_coll_tuned_barrier_intra_doublering (comm);
|
||||
case (3): return mca_coll_tuned_barrier_intra_recursivedoubling (comm);
|
||||
case (4): return mca_coll_tuned_barrier_intra_bruck (comm);
|
||||
|
@ -49,8 +49,13 @@ int mca_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm)
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
if (mca_coll_tuned_barrier_forced_choice) {
|
||||
return mca_coll_tuned_barrier_intra_do_forced (comm);
|
||||
}
|
||||
else {
|
||||
return mca_coll_tuned_barrier_intra_dec_fixed (comm);
|
||||
}
|
||||
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
|
||||
|
@ -820,6 +820,8 @@ int mca_coll_tuned_bcast_intra_do_forced(void *buf, int count,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced algorithm %d", mca_coll_tuned_bcast_forced_choice));
|
||||
|
||||
switch (mca_coll_tuned_bcast_forced_choice) {
|
||||
case (0): return mca_coll_tuned_bcast_intra_dec_fixed (buf, count, dtype, root, comm);
|
||||
case (1): return mca_coll_tuned_bcast_intra_basic_linear (buf, count, dtype, root, comm);
|
||||
|
@ -29,11 +29,11 @@
|
||||
#include "coll_tuned.h"
|
||||
|
||||
/*
|
||||
* bcast_intra_dec
|
||||
* bcast_intra_dec
|
||||
*
|
||||
* Function: - seletects broadcast algorithm to use
|
||||
* Accepts: - same arguments as MPI_Bcast()
|
||||
* Returns: - MPI_SUCCESS or error code (passed from the bcast implementation)
|
||||
* Function: - seletects broadcast algorithm to use
|
||||
* Accepts: - same arguments as MPI_Bcast()
|
||||
* Returns: - MPI_SUCCESS or error code (passed from the bcast implementation)
|
||||
*/
|
||||
int mca_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype, int root,
|
||||
@ -51,7 +51,13 @@ int mca_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
if (mca_coll_tuned_bcast_forced_choice) {
|
||||
return mca_coll_tuned_bcast_intra_do_forced (buff, count, datatype, root, comm);
|
||||
}
|
||||
else {
|
||||
return mca_coll_tuned_bcast_intra_dec_fixed (buff, count, datatype, root, comm);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -102,26 +102,26 @@ static const mca_coll_base_module_1_0_0_t intra_dynamic = {
|
||||
NULL,
|
||||
/* mca_coll_tuned_allgatherv_intra_dec_dynamic, */
|
||||
NULL,
|
||||
/* mca_coll_tuned_allreduce_intra_dec_dynamic, */
|
||||
NULL,
|
||||
/* mca_coll_tuned_alltoall_intra_dec_dynamic, */
|
||||
NULL,
|
||||
mca_coll_tuned_allreduce_intra_dec_dynamic,
|
||||
/* NULL, */
|
||||
mca_coll_tuned_alltoall_intra_dec_dynamic,
|
||||
/* NULL, */
|
||||
/* mca_coll_tuned_alltoallv_intra_dec_dynamic, */
|
||||
NULL,
|
||||
/* mca_coll_tuned_alltoallw_intra_dec_dynamic, */
|
||||
NULL,
|
||||
/* mca_coll_tuned_barrier_intra_dec_dynamic, */
|
||||
NULL,
|
||||
/* mca_coll_tuned_bcast_intra_dec_dynamic, */
|
||||
NULL,
|
||||
mca_coll_tuned_barrier_intra_dec_dynamic,
|
||||
/* NULL, */
|
||||
mca_coll_tuned_bcast_intra_dec_dynamic,
|
||||
/* NULL, */
|
||||
/* mca_coll_tuned_exscan_intra_dec_dynamic, */
|
||||
NULL,
|
||||
/* mca_coll_tuned_gather_intra_dec_dynamic, */
|
||||
NULL,
|
||||
/* mca_coll_tuned_gatherv_intra_dec_dynamic, */
|
||||
NULL,
|
||||
/* mca_coll_tuned_reduce_intra_dec_dynamic, */
|
||||
NULL,
|
||||
mca_coll_tuned_reduce_intra_dec_dynamic,
|
||||
/* NULL, */
|
||||
/* mca_coll_tuned_reduce_scatter_intra_dec_dynamic, */
|
||||
NULL,
|
||||
/* mca_coll_tuned_scan_intra_dec_dynamic, */
|
||||
|
@ -518,6 +518,8 @@ int mca_coll_tuned_reduce_intra_do_forced(void *sbuf, void* rbuf, int count,
|
||||
struct ompi_op_t *op, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced selected algorithm %d", mca_coll_tuned_reduce_forced_choice));
|
||||
|
||||
switch (mca_coll_tuned_reduce_forced_choice) {
|
||||
case (0): return mca_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, root, comm);
|
||||
case (1): return mca_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, root, comm);
|
||||
|
@ -54,6 +54,12 @@ int mca_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf,
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
if (mca_coll_tuned_reduce_forced_choice) {
|
||||
return mca_coll_tuned_reduce_intra_do_forced (sendbuf, recvbuf, count, datatype, op, root, comm);
|
||||
}
|
||||
else {
|
||||
return mca_coll_tuned_reduce_intra_dec_fixed (sendbuf, recvbuf, count, datatype, op, root, comm);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user