Correct the bcast problem where we always did a bcast with segzise of 0.
Activate the reduce decision function. Others small updates (mostly TAB to spaces). This commit was SVN r12161.
Этот коммит содержится в:
родитель
50649dd6a9
Коммит
be27ee6fa0
@ -106,7 +106,6 @@ ompi_coll_tuned_allreduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_basic_linear rank %d", rank));
|
||||
|
||||
/* Reduce to 0 and broadcast. */
|
||||
@ -228,5 +227,3 @@ switch (algorithm) {
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -735,7 +735,6 @@ ompi_coll_tuned_bcast_intra_basic_linear (void *buff, int count,
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_bcast_intra_basic_linear rank %d root %d", rank, root));
|
||||
|
||||
|
||||
/* Non-root receive the data. */
|
||||
|
||||
if (rank != root) {
|
||||
|
@ -83,10 +83,8 @@ ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count,
|
||||
if (comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm) {
|
||||
return ompi_coll_tuned_allreduce_intra_do_forced (sbuf, rbuf, count, dtype, op, comm);
|
||||
}
|
||||
else {
|
||||
return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* alltoall_intra_dec
|
||||
@ -130,10 +128,8 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
|
||||
if (comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm) {
|
||||
return ompi_coll_tuned_alltoall_intra_do_forced (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
}
|
||||
else {
|
||||
return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* barrier_intra_dec
|
||||
@ -165,12 +161,9 @@ int ompi_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm)
|
||||
if (comm->c_coll_selected_data->user_forced[BARRIER].algorithm) {
|
||||
return ompi_coll_tuned_barrier_intra_do_forced (comm);
|
||||
}
|
||||
else {
|
||||
return ompi_coll_tuned_barrier_intra_dec_fixed (comm);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* bcast_intra_dec
|
||||
*
|
||||
@ -208,12 +201,9 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
|
||||
if (comm->c_coll_selected_data->user_forced[BCAST].algorithm) {
|
||||
return ompi_coll_tuned_bcast_intra_do_forced (buff, count, datatype, root, comm);
|
||||
}
|
||||
else {
|
||||
return ompi_coll_tuned_bcast_intra_dec_fixed (buff, count, datatype, root, comm);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* reduce_intra_dec
|
||||
*
|
||||
@ -252,9 +242,6 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf,
|
||||
if (comm->c_coll_selected_data->user_forced[REDUCE].algorithm) {
|
||||
return ompi_coll_tuned_reduce_intra_do_forced (sendbuf, recvbuf, count, datatype, op, root, comm);
|
||||
}
|
||||
else {
|
||||
return ompi_coll_tuned_reduce_intra_dec_fixed (sendbuf, recvbuf, count, datatype, op, root, comm);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -41,17 +41,9 @@ ompi_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
/* int size; */
|
||||
/* int contig; */
|
||||
/* int dsize; */
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allreduce_intra_dec_fixed"));
|
||||
|
||||
/* size = ompi_comm_size(comm); */
|
||||
|
||||
return (ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm));
|
||||
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
@ -68,11 +60,8 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int comsize;
|
||||
int rank;
|
||||
int err;
|
||||
unsigned long dsize;
|
||||
unsigned long total_dsize;
|
||||
int comsize, rank, err;
|
||||
size_t dsize, total_dsize;
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_alltoall_intra_dec_fixed"));
|
||||
|
||||
@ -91,18 +80,16 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount,
|
||||
return (err);
|
||||
}
|
||||
|
||||
total_dsize = dsize * scount * (unsigned long)comsize; /* needed for decision */
|
||||
total_dsize = dsize * scount * comsize; /* needed for decision */
|
||||
|
||||
if (comsize >= 12 && total_dsize <= 768) {
|
||||
return ompi_coll_tuned_alltoall_intra_bruck (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
}
|
||||
else if (total_dsize <= 131072) {
|
||||
if (total_dsize <= 131072) {
|
||||
return ompi_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
}
|
||||
else {
|
||||
return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
@ -122,7 +109,6 @@ int ompi_coll_tuned_barrier_intra_dec_fixed(struct ompi_communicator_t *comm)
|
||||
|
||||
if (2==comsize)
|
||||
return ompi_coll_tuned_barrier_intra_two_procs(comm);
|
||||
else
|
||||
/* return ompi_coll_tuned_barrier_intra_doublering(comm); */
|
||||
return ompi_coll_tuned_barrier_intra_recursivedoubling(comm);
|
||||
/* return ompi_coll_tuned_barrier_intra_bruck(comm); */
|
||||
@ -142,13 +128,9 @@ int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype, int root,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int comsize;
|
||||
int rank;
|
||||
int err;
|
||||
unsigned long msgsize;
|
||||
unsigned long dsize;
|
||||
int comsize, rank, err;
|
||||
int segsize = 0;
|
||||
|
||||
size_t msgsize, dsize;
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_bcast_intra_dec_fixed"));
|
||||
|
||||
@ -166,36 +148,31 @@ int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
|
||||
|
||||
/* this is based on gige measurements */
|
||||
|
||||
if ((comsize < 4)) {
|
||||
segsize = 0;
|
||||
if (comsize < 4) {
|
||||
return ompi_coll_tuned_bcast_intra_basic_linear (buff, count, datatype, root, comm);
|
||||
}
|
||||
else if (comsize == 4) {
|
||||
if (comsize == 4) {
|
||||
if (msgsize < 524288) segsize = 0;
|
||||
else msgsize = 16384;
|
||||
else segsize = 16384;
|
||||
return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, segsize);
|
||||
}
|
||||
else if (comsize > 4 && comsize <= 8 && msgsize < 4096) {
|
||||
segsize = 0;
|
||||
if (comsize <= 8 && msgsize < 4096) {
|
||||
return ompi_coll_tuned_bcast_intra_basic_linear (buff, count, datatype, root, comm);
|
||||
}
|
||||
else if (comsize > 8 && msgsize >= 32768 && msgsize < 524288) {
|
||||
if (comsize > 8 && msgsize >= 32768 && msgsize < 524288) {
|
||||
segsize = 16384;
|
||||
return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, segsize);
|
||||
}
|
||||
else if (comsize > 4 && msgsize >= 524288) {
|
||||
if (msgsize >= 524288) {
|
||||
segsize = 16384;
|
||||
return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, root, comm, segsize);
|
||||
}
|
||||
else {
|
||||
segsize = 0;
|
||||
/* once tested can swap this back in */
|
||||
/* return ompi_coll_tuned_bcast_intra_bmtree (buff, count, datatype, root, comm, segsize); */
|
||||
return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, segsize);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* reduce_intra_dec
|
||||
*
|
||||
@ -209,15 +186,8 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
|
||||
struct ompi_op_t* op, int root,
|
||||
struct ompi_communicator_t* comm)
|
||||
{
|
||||
int comsize;
|
||||
int rank;
|
||||
int err;
|
||||
/* int contig; */
|
||||
unsigned long msgsize;
|
||||
unsigned long dsize;
|
||||
int segsize = 0;
|
||||
/* int fanout = 0; */
|
||||
|
||||
int comsize, rank, err, segsize = 0, fanout = 0;
|
||||
size_t msgsize, dsize;
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_reduce_intra_dec_fixed"));
|
||||
|
||||
@ -231,35 +201,29 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
|
||||
return (err);
|
||||
}
|
||||
|
||||
msgsize = dsize * (unsigned long)count; /* needed for decision */
|
||||
msgsize = dsize * count; /* needed for decision */
|
||||
|
||||
return ompi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm);
|
||||
#ifdef coconuts
|
||||
/* for small messages use linear algorithm */
|
||||
if (msgsize <= 4096) {
|
||||
segsize = 0;
|
||||
fanout = size-1;
|
||||
fanout = comsize - 1;
|
||||
/* when linear implemented or taken from basic put here, right now using chain as a linear system */
|
||||
/* it is implemented and I shouldn't be calling a chain with a fanout bigger than MAXTREEFANOUT from topo.h! */
|
||||
return ompi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm);
|
||||
/* return ompi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout); */
|
||||
} else if (msgsize <= 65536 ) {
|
||||
}
|
||||
if (msgsize < 524288) {
|
||||
if (msgsize <= 65536 ) {
|
||||
segsize = 32768;
|
||||
fanout = 8;
|
||||
return ompi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout);
|
||||
} else if (msgsize < 524288) {
|
||||
} else {
|
||||
segsize = 1024;
|
||||
fanout = size/2;
|
||||
fanout = comsize/2;
|
||||
}
|
||||
/* later swap this for a binary tree */
|
||||
/* fanout = 2; */
|
||||
return ompi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
}
|
||||
segsize = 1024;
|
||||
return ompi_coll_tuned_reduce_intra_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm, segsize);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -221,7 +221,6 @@ int ompi_coll_tuned_free_msg_rules_in_com_rule (ompi_coll_com_rule_t* com_p)
|
||||
|
||||
} /* if we have msg rules to free as well */
|
||||
|
||||
|
||||
return (rc);
|
||||
}
|
||||
|
||||
|
@ -61,8 +61,6 @@ int ompi_coll_tuned_forced_getvalues (coll_tuned_force_algorithm_mca_param_indic
|
||||
int ompi_coll_tuned_forced_getvalues_barrier (coll_tuned_force_algorithm_mca_param_indices_t mca_params,
|
||||
coll_tuned_force_algorithm_params_t *forced_values);
|
||||
|
||||
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
@ -480,7 +480,6 @@ mca_param_indices->chain_fanout_param_index = mca_base_param_reg_int(&mca_coll_t
|
||||
false, false,
|
||||
ompi_coll_tuned_init_chain_fanout, /* get system wide default */
|
||||
NULL);
|
||||
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
@ -506,7 +505,6 @@ switch (comm->c_coll_selected_data->user_forced[REDUCE].algorithm) {
|
||||
comm->c_coll_selected_data->user_forced[REDUCE].algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE]));
|
||||
return (MPI_ERR_ARG);
|
||||
} /* switch */
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -531,6 +529,5 @@ switch (algorithm) {
|
||||
algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE]));
|
||||
return (MPI_ERR_ARG);
|
||||
} /* switch */
|
||||
|
||||
}
|
||||
|
||||
|
@ -65,8 +65,6 @@ int ompi_coll_tuned_topo_destroy_chain( ompi_coll_chain_t** chain );
|
||||
int ompi_coll_tuned_topo_dump_tree (ompi_coll_tree_t* tree, int rank);
|
||||
int ompi_coll_tuned_topo_dump_chain (ompi_coll_chain_t* chain, int rank);
|
||||
|
||||
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
@ -98,3 +98,4 @@ ompi_status_public_t tmpstatus;
|
||||
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",__FILE__,line,err));
|
||||
return (err);
|
||||
}
|
||||
|
||||
|
@ -53,11 +53,9 @@ static inline int ompi_coll_tuned_sendrecv( void* sendbuf, int scount, ompi_data
|
||||
if ((dest==myid)&&(source==myid)) {
|
||||
return (int) ompi_ddt_sndrcv(sendbuf, (int32_t) scount, sdatatype, recvbuf, (int32_t) rcount, rdatatype);
|
||||
}
|
||||
else {
|
||||
return ompi_coll_tuned_sendrecv_actual (sendbuf, scount, sdatatype, dest, stag, recvbuf, rcount, rdatatype,
|
||||
source, rtag, comm, status);
|
||||
}
|
||||
}
|
||||
|
||||
int ompi_coll_tuned_sendrecv_actual_localcompleted( void* sendbuf, int scount, ompi_datatype_t* sdatatype,
|
||||
int dest, int stag,
|
||||
@ -79,11 +77,9 @@ static inline int ompi_coll_tuned_sendrecv_localcompleted( void* sendbuf, int sc
|
||||
if ((dest==myid)&&(source==myid)) {
|
||||
return (int) ompi_ddt_sndrcv(sendbuf, (int32_t) scount, sdatatype, recvbuf, (int32_t) rcount, rdatatype);
|
||||
}
|
||||
else {
|
||||
return ompi_coll_tuned_sendrecv_actual_localcompleted (sendbuf, scount, sdatatype, dest, stag, recvbuf, rcount, rdatatype,
|
||||
source, rtag, comm, status);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user