1
1

In the case where we detect a value of 0 in the recvcount

array, fall back to the simpler algorithms.  This is not
the optimal solution, but it works.  

This commit was SVN r19702.
Этот коммит содержится в:
Rolf vandeVaart 2008-10-07 19:44:51 +00:00
родитель b7560c52be
Коммит 13e8975f83
2 изменённых файлов: 29 добавлений и 8 удалений

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -53,6 +54,10 @@
* halving is used to be nice to the app memory wise. There are much
* better algorithms for large messages with cummutative operations,
* so this should be investigated further.
*
* NOTE: We default to a simple reduce/scatterv if one of the rcounts
* is zero. This is because the existing algorithms do not currently
* support a count of zero in the array.
*/
int
mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts,
@ -66,6 +71,7 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts,
int *disps = NULL;
char *recv_buf = NULL, *recv_buf_free = NULL;
char *result_buf = NULL, *result_buf_free = NULL;
bool zerocounts = false;
/* Initialize */
rank = ompi_comm_rank(comm);
@ -78,8 +84,14 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts,
disps[0] = 0;
for (i = 0; i < (size - 1); ++i) {
disps[i + 1] = disps[i] + rcounts[i];
if (0 == rcounts[i]) {
zerocounts = true;
}
}
count = disps[size - 1] + rcounts[size - 1];
if (0 == rcounts[size - 1]) {
zerocounts = true;
}
/* short cut the trivial case */
if (0 == count) {
@ -98,7 +110,7 @@ mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts,
}
if ((op->o_flags & OMPI_OP_FLAGS_COMMUTE) &&
(buf_size < COMMUTATIVE_LONG_MSG)) {
(buf_size < COMMUTATIVE_LONG_MSG) && (!zerocounts)) {
int tmp_size = 1, remain = 0, tmp_rank;
/* temporary receive buffer. See coll_basic_reduce.c for details on sizing */

Просмотреть файл

@ -450,7 +450,9 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
* Accepts: - same arguments as MPI_Reduce_scatter()
* Returns: - MPI_SUCCESS or error code (passed from
* the reduce scatter implementation)
*
* Note: If we detect zero valued counts in the rcounts array, we
* fall back to the nonoverlapping algorithm because the other
* algorithms do not currently handle it.
*/
int ompi_coll_tuned_reduce_scatter_intra_dec_fixed( void *sbuf, void *rbuf,
int *rcounts,
@ -465,20 +467,27 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_fixed( void *sbuf, void *rbuf,
const double b = 8.0;
const size_t small_message_size = 12 * 1024;
const size_t large_message_size = 256 * 1024;
bool zerocounts = false;
OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_reduce_scatter_intra_dec_fixed"));
if( !ompi_op_is_commute(op) ) {
comm_size = ompi_comm_size(comm);
/* We need data size for decision function */
ompi_ddt_type_size(dtype, &dsize);
total_message_size = 0;
for (i = 0; i < comm_size; i++) {
total_message_size += rcounts[i];
if (0 == rcounts[i]) {
zerocounts = true;
}
}
if( !ompi_op_is_commute(op) || (zerocounts)) {
return ompi_coll_tuned_reduce_scatter_intra_nonoverlapping (sbuf, rbuf, rcounts,
dtype, op,
comm, module);
}
comm_size = ompi_comm_size(comm);
/* We need data size for decision function */
ompi_ddt_type_size(dtype, &dsize);
total_message_size = 0;
for (i = 0; i < comm_size; i++) { total_message_size += rcounts[i]; }
total_message_size *= dsize;
/* compute the nearest power of 2 */