Modifying type of binomial tree used for binomial reduce:
switching: 0 0 / \ \ / \ \ 1 \ \ --> 4 \ \ / \ \ / \ \ 3 2 \ 3 2 \ 4 1 (duh). The first form is the bmtree suitable for bcast, but the latter is better for reduce. Updating default decision function accordingly. This commit was SVN r15422.
Этот коммит содержится в:
родитель
685dd6f47b
Коммит
1b66a52c50
@ -351,19 +351,15 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
|
||||
if ((communicator_size < 8) && (message_size < 512)){
|
||||
/* Linear_0K */
|
||||
return ompi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm);
|
||||
} else if ((communicator_size < 8) && (message_size < 20480)) {
|
||||
} else if (((communicator_size < 8) && (message_size < 20480)) ||
|
||||
(message_size < 2048) || (count <= 1)) {
|
||||
/* Binomial_0K */
|
||||
segsize = 0;
|
||||
return ompi_coll_tuned_reduce_intra_binomial(sendbuf, recvbuf, count, datatype, op, root, comm, segsize, max_requests);
|
||||
} else if ((message_size < 2048) ||
|
||||
(count <= 1)){
|
||||
/* Binary_0K */
|
||||
segsize = 0;
|
||||
return ompi_coll_tuned_reduce_intra_binary(sendbuf, recvbuf, count, datatype, op, root, comm, segsize, max_requests);
|
||||
} else if (communicator_size > (a1 * message_size + b1)) {
|
||||
/* Binary_1K */
|
||||
/* Binomial_1K */
|
||||
segsize = 1024;
|
||||
return ompi_coll_tuned_reduce_intra_binary(sendbuf, recvbuf, count, datatype, op, root, comm, segsize, max_requests);
|
||||
return ompi_coll_tuned_reduce_intra_binomial(sendbuf, recvbuf, count, datatype, op, root, comm, segsize, max_requests);
|
||||
} else if (communicator_size > (a2 * message_size + b2)) {
|
||||
/* Pipeline_1K */
|
||||
segsize = 1024;
|
||||
|
@ -437,7 +437,7 @@ int ompi_coll_tuned_reduce_intra_binomial( void *sendbuf, void *recvbuf,
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_binomial rank %d ss %5d",
|
||||
ompi_comm_rank(comm), segsize));
|
||||
|
||||
COLL_TUNED_UPDATE_BMTREE( comm, root );
|
||||
COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, root );
|
||||
|
||||
/**
|
||||
* Determine number of segments and number of elements
|
||||
@ -448,7 +448,7 @@ int ompi_coll_tuned_reduce_intra_binomial( void *sendbuf, void *recvbuf,
|
||||
|
||||
return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype,
|
||||
op, root, comm,
|
||||
comm->c_coll_selected_data->cached_bmtree,
|
||||
comm->c_coll_selected_data->cached_in_order_bmtree,
|
||||
segcount, max_outstanding_reqs );
|
||||
}
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user