2004-01-12 00:26:55 +03:00
|
|
|
/*
|
2005-11-05 22:57:48 +03:00
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
2006-08-24 20:38:08 +04:00
|
|
|
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
2005-11-05 22:57:48 +03:00
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
2004-11-28 23:09:25 +03:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
2005-03-24 15:43:37 +03:00
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
2004-11-22 04:38:40 +03:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
2004-01-12 00:26:55 +03:00
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
2004-06-07 19:33:53 +04:00
|
|
|
#include "ompi_config.h"
|
2004-02-15 00:26:30 +03:00
|
|
|
#include "coll_basic.h"
|
2004-01-12 00:26:55 +03:00
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <errno.h>
|
|
|
|
|
|
|
|
#include "mpi.h"
|
2006-02-12 04:33:29 +03:00
|
|
|
#include "ompi/constants.h"
|
2005-09-13 00:21:53 +04:00
|
|
|
#include "ompi/mca/coll/coll.h"
|
|
|
|
#include "ompi/mca/coll/base/coll_tags.h"
|
2007-01-29 22:29:35 +03:00
|
|
|
#include "ompi/datatype/datatype.h"
|
2004-01-12 00:26:55 +03:00
|
|
|
#include "coll_basic.h"
|
2005-09-13 00:21:53 +04:00
|
|
|
#include "ompi/op/op.h"
|
2004-01-12 00:26:55 +03:00
|
|
|
|
2007-01-29 22:29:35 +03:00
|
|
|
#define COMMUTATIVE_LONG_MSG 8 * 1024 * 1024
|
2004-01-12 00:26:55 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* reduce_scatter
|
|
|
|
*
|
|
|
|
* Function: - reduce then scatter
|
|
|
|
* Accepts: - same as MPI_Reduce_scatter()
|
|
|
|
* Returns: - MPI_SUCCESS or error code
|
2007-01-29 22:29:35 +03:00
|
|
|
*
|
|
|
|
* Algorithm:
|
|
|
|
* Cummutative, reasonable sized messages
|
|
|
|
* recursive halving algorithm
|
|
|
|
* Others:
|
|
|
|
* reduce and scatterv (needs to be cleaned
|
|
|
|
* up at some point)
|
|
|
|
*
|
|
|
|
* NOTE: that the recursive halving algorithm should be faster than
|
|
|
|
* the reduce/scatter for all message sizes. However, the memory
|
|
|
|
* usage for the recusive halving is msg_size + 2 * comm_size greater
|
|
|
|
* for the recursive halving, so I've limited where the recursive
|
|
|
|
* halving is used to be nice to the app memory wise. There are much
|
|
|
|
* better algorithms for large messages with cummutative operations,
|
|
|
|
* so this should be investigated further.
|
2004-01-12 00:26:55 +03:00
|
|
|
*/
|
2005-08-10 14:51:42 +04:00
|
|
|
int
|
|
|
|
mca_coll_basic_reduce_scatter_intra(void *sbuf, void *rbuf, int *rcounts,
|
2005-08-10 21:53:43 +04:00
|
|
|
struct ompi_datatype_t *dtype,
|
|
|
|
struct ompi_op_t *op,
|
2007-08-19 07:37:49 +04:00
|
|
|
struct ompi_communicator_t *comm,
|
2008-07-29 02:40:57 +04:00
|
|
|
mca_coll_base_module_t *module)
|
2004-01-12 00:26:55 +03:00
|
|
|
{
|
2007-01-29 22:29:35 +03:00
|
|
|
int i, rank, size, count, err = OMPI_SUCCESS;
|
|
|
|
ptrdiff_t true_lb, true_extent, lb, extent, buf_size;
|
2005-08-10 14:51:42 +04:00
|
|
|
int *disps = NULL;
|
2007-01-29 22:29:35 +03:00
|
|
|
char *recv_buf = NULL, *recv_buf_free = NULL;
|
|
|
|
char *result_buf = NULL, *result_buf_free = NULL;
|
2004-01-12 00:26:55 +03:00
|
|
|
|
2005-08-10 14:51:42 +04:00
|
|
|
/* Initialize */
|
|
|
|
rank = ompi_comm_rank(comm);
|
|
|
|
size = ompi_comm_size(comm);
|
2004-06-29 04:02:25 +04:00
|
|
|
|
2007-01-29 22:29:35 +03:00
|
|
|
/* Find displacements and the like */
|
|
|
|
disps = (int*) malloc(sizeof(int) * size);
|
|
|
|
if (NULL == disps) return OMPI_ERR_OUT_OF_RESOURCE;
|
2004-06-29 04:02:25 +04:00
|
|
|
|
2007-01-29 22:29:35 +03:00
|
|
|
disps[0] = 0;
|
|
|
|
for (i = 0; i < (size - 1); ++i) {
|
|
|
|
disps[i + 1] = disps[i] + rcounts[i];
|
2004-01-12 00:26:55 +03:00
|
|
|
}
|
2007-01-29 22:29:35 +03:00
|
|
|
count = disps[size - 1] + rcounts[size - 1];
|
2004-01-12 00:26:55 +03:00
|
|
|
|
2007-01-29 22:29:35 +03:00
|
|
|
/* short cut the trivial case */
|
|
|
|
if (0 == count) {
|
|
|
|
free(disps);
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* get datatype information */
|
|
|
|
ompi_ddt_get_extent(dtype, &lb, &extent);
|
|
|
|
ompi_ddt_get_true_extent(dtype, &true_lb, &true_extent);
|
|
|
|
buf_size = true_extent + (count - 1) * extent;
|
2005-08-10 21:53:43 +04:00
|
|
|
|
2007-01-29 22:29:35 +03:00
|
|
|
/* Handle MPI_IN_PLACE */
|
|
|
|
if (MPI_IN_PLACE == sbuf) {
|
|
|
|
sbuf = rbuf;
|
|
|
|
}
|
2005-08-10 21:53:43 +04:00
|
|
|
|
2007-01-29 22:29:35 +03:00
|
|
|
if ((op->o_flags & OMPI_OP_FLAGS_COMMUTE) &&
|
|
|
|
(buf_size < COMMUTATIVE_LONG_MSG)) {
|
|
|
|
int tmp_size = 1, remain = 0, tmp_rank;
|
2005-08-10 21:53:43 +04:00
|
|
|
|
2007-01-29 22:29:35 +03:00
|
|
|
/* temporary receive buffer. See coll_basic_reduce.c for details on sizing */
|
|
|
|
recv_buf_free = (char*) malloc(buf_size);
|
|
|
|
recv_buf = recv_buf_free - lb;
|
|
|
|
if (NULL == recv_buf_free) {
|
|
|
|
err = OMPI_ERR_OUT_OF_RESOURCE;
|
|
|
|
goto cleanup;
|
2005-08-10 21:53:43 +04:00
|
|
|
}
|
|
|
|
|
2007-01-29 22:29:35 +03:00
|
|
|
/* allocate temporary buffer for results */
|
|
|
|
result_buf_free = (char*) malloc(buf_size);
|
|
|
|
result_buf = result_buf_free - lb;
|
|
|
|
|
|
|
|
/* copy local buffer into the temporary results */
|
|
|
|
err = ompi_ddt_sndrcv(sbuf, count, dtype, result_buf, count, dtype);
|
|
|
|
if (OMPI_SUCCESS != err) goto cleanup;
|
|
|
|
|
|
|
|
/* figure out power of two mapping: grow until larger than
|
|
|
|
comm size, then go back one, to get the largest power of
|
|
|
|
two less than comm size */
|
|
|
|
while (tmp_size <= size) tmp_size <<= 1;
|
|
|
|
tmp_size >>= 1;
|
|
|
|
remain = size - tmp_size;
|
|
|
|
|
|
|
|
/* If comm size is not a power of two, have the first "remain"
|
|
|
|
procs with an even rank send to rank + 1, leaving a power of
|
|
|
|
two procs to do the rest of the algorithm */
|
|
|
|
if (rank < 2 * remain) {
|
|
|
|
if ((rank & 1) == 0) {
|
|
|
|
err = MCA_PML_CALL(send(result_buf, count, dtype, rank + 1,
|
|
|
|
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
|
|
|
MCA_PML_BASE_SEND_STANDARD,
|
|
|
|
comm));
|
|
|
|
if (OMPI_SUCCESS != err) goto cleanup;
|
|
|
|
|
|
|
|
/* we don't participate from here on out */
|
|
|
|
tmp_rank = -1;
|
|
|
|
} else {
|
|
|
|
err = MCA_PML_CALL(recv(recv_buf, count, dtype, rank - 1,
|
|
|
|
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
|
|
|
comm, MPI_STATUS_IGNORE));
|
|
|
|
|
|
|
|
/* integrate their results into our temp results */
|
|
|
|
ompi_op_reduce(op, recv_buf, result_buf, count, dtype);
|
|
|
|
|
|
|
|
/* adjust rank to be the bottom "remain" ranks */
|
|
|
|
tmp_rank = rank / 2;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* just need to adjust rank to show that the bottom "even
|
|
|
|
remain" ranks dropped out */
|
|
|
|
tmp_rank = rank - remain;
|
2005-08-10 21:53:43 +04:00
|
|
|
}
|
2004-01-12 00:26:55 +03:00
|
|
|
|
2007-01-29 22:29:35 +03:00
|
|
|
/* For ranks not kicked out by the above code, perform the
|
|
|
|
recursive halving */
|
|
|
|
if (tmp_rank >= 0) {
|
|
|
|
int *tmp_disps = NULL, *tmp_rcounts = NULL;
|
|
|
|
int mask, send_index, recv_index, last_index;
|
2005-09-15 23:33:54 +04:00
|
|
|
|
2007-01-29 22:29:35 +03:00
|
|
|
/* recalculate disps and rcounts to account for the
|
|
|
|
special "remainder" processes that are no longer doing
|
|
|
|
anything */
|
|
|
|
tmp_rcounts = (int*) malloc(tmp_size * sizeof(int));
|
|
|
|
if (NULL == tmp_rcounts) {
|
|
|
|
err = OMPI_ERR_OUT_OF_RESOURCE;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
tmp_disps = (int*) malloc(tmp_size * sizeof(int));
|
|
|
|
if (NULL == tmp_disps) {
|
|
|
|
free(tmp_rcounts);
|
|
|
|
err = OMPI_ERR_OUT_OF_RESOURCE;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2005-09-15 23:33:54 +04:00
|
|
|
|
2007-01-29 22:29:35 +03:00
|
|
|
for (i = 0 ; i < tmp_size ; ++i) {
|
|
|
|
if (i < remain) {
|
|
|
|
/* need to include old neighbor as well */
|
|
|
|
tmp_rcounts[i] = rcounts[i * 2 + 1] + rcounts[i * 2];
|
|
|
|
} else {
|
|
|
|
tmp_rcounts[i] = rcounts[i + remain];
|
|
|
|
}
|
|
|
|
}
|
2004-01-12 00:26:55 +03:00
|
|
|
|
2007-01-29 22:29:35 +03:00
|
|
|
tmp_disps[0] = 0;
|
|
|
|
for (i = 0; i < tmp_size - 1; ++i) {
|
|
|
|
tmp_disps[i + 1] = tmp_disps[i] + tmp_rcounts[i];
|
|
|
|
}
|
2004-01-12 00:26:55 +03:00
|
|
|
|
2007-01-29 22:29:35 +03:00
|
|
|
/* do the recursive halving communication. Don't use the
|
|
|
|
dimension information on the communicator because I
|
|
|
|
think the information is invalidated by our "shrinking"
|
|
|
|
of the communicator */
|
|
|
|
mask = tmp_size >> 1;
|
|
|
|
send_index = recv_index = 0;
|
|
|
|
last_index = tmp_size;
|
|
|
|
while (mask > 0) {
|
|
|
|
int tmp_peer, peer, send_count, recv_count;
|
|
|
|
struct ompi_request_t *request;
|
2004-01-12 00:26:55 +03:00
|
|
|
|
2007-01-29 22:29:35 +03:00
|
|
|
tmp_peer = tmp_rank ^ mask;
|
|
|
|
peer = (tmp_peer < remain) ? tmp_peer * 2 + 1 : tmp_peer + remain;
|
2004-06-29 04:02:25 +04:00
|
|
|
|
2007-01-29 22:29:35 +03:00
|
|
|
/* figure out if we're sending, receiving, or both */
|
|
|
|
send_count = recv_count = 0;
|
|
|
|
if (tmp_rank < tmp_peer) {
|
|
|
|
send_index = recv_index + mask;
|
|
|
|
for (i = send_index ; i < last_index ; ++i) {
|
|
|
|
send_count += tmp_rcounts[i];
|
|
|
|
}
|
|
|
|
for (i = recv_index ; i < send_index ; ++i) {
|
|
|
|
recv_count += tmp_rcounts[i];
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
recv_index = send_index + mask;
|
|
|
|
for (i = send_index ; i < recv_index ; ++i) {
|
|
|
|
send_count += tmp_rcounts[i];
|
|
|
|
}
|
|
|
|
for (i = recv_index ; i < last_index ; ++i) {
|
|
|
|
recv_count += tmp_rcounts[i];
|
|
|
|
}
|
|
|
|
}
|
2004-06-29 04:02:25 +04:00
|
|
|
|
2007-01-29 22:29:35 +03:00
|
|
|
/* actual data transfer. Send from result_buf,
|
|
|
|
receive into recv_buf */
|
|
|
|
if (send_count > 0 && recv_count != 0) {
|
|
|
|
err = MCA_PML_CALL(irecv(recv_buf + tmp_disps[recv_index] * extent,
|
|
|
|
recv_count, dtype, peer,
|
|
|
|
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
|
|
|
comm, &request));
|
|
|
|
if (OMPI_SUCCESS != err) {
|
|
|
|
free(tmp_rcounts);
|
|
|
|
free(tmp_disps);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (recv_count > 0 && send_count != 0) {
|
|
|
|
err = MCA_PML_CALL(send(result_buf + tmp_disps[send_index] * extent,
|
|
|
|
send_count, dtype, peer,
|
|
|
|
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
|
|
|
MCA_PML_BASE_SEND_STANDARD,
|
|
|
|
comm));
|
|
|
|
if (OMPI_SUCCESS != err) {
|
|
|
|
free(tmp_rcounts);
|
|
|
|
free(tmp_disps);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (send_count > 0 && recv_count != 0) {
|
|
|
|
err = ompi_request_wait(&request, MPI_STATUS_IGNORE);
|
|
|
|
if (OMPI_SUCCESS != err) {
|
|
|
|
free(tmp_rcounts);
|
|
|
|
free(tmp_disps);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* if we received something on this step, push it into
|
|
|
|
the results buffer */
|
|
|
|
if (recv_count > 0) {
|
|
|
|
ompi_op_reduce(op,
|
|
|
|
recv_buf + tmp_disps[recv_index] * extent,
|
|
|
|
result_buf + tmp_disps[recv_index] * extent,
|
|
|
|
recv_count, dtype);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* update for next iteration */
|
|
|
|
send_index = recv_index;
|
|
|
|
last_index = recv_index + mask;
|
|
|
|
mask >>= 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* copy local results from results buffer into real receive buffer */
|
|
|
|
if (0 != rcounts[rank]) {
|
|
|
|
err = ompi_ddt_sndrcv(result_buf + disps[rank] * extent,
|
|
|
|
rcounts[rank], dtype,
|
|
|
|
rbuf, rcounts[rank], dtype);
|
|
|
|
if (OMPI_SUCCESS != err) {
|
|
|
|
free(tmp_rcounts);
|
|
|
|
free(tmp_disps);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
free(tmp_rcounts);
|
|
|
|
free(tmp_disps);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now fix up the non-power of two case, by having the odd
|
|
|
|
procs send the even procs the proper results */
|
|
|
|
if (rank < 2 * remain) {
|
|
|
|
if ((rank & 1) == 0) {
|
|
|
|
if (rcounts[rank]) {
|
|
|
|
err = MCA_PML_CALL(recv(rbuf, rcounts[rank], dtype, rank + 1,
|
|
|
|
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
|
|
|
comm, MPI_STATUS_IGNORE));
|
|
|
|
if (OMPI_SUCCESS != err) goto cleanup;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (rcounts[rank - 1]) {
|
|
|
|
err = MCA_PML_CALL(send(result_buf + disps[rank - 1] * extent,
|
|
|
|
rcounts[rank - 1], dtype, rank - 1,
|
|
|
|
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
|
|
|
MCA_PML_BASE_SEND_STANDARD,
|
|
|
|
comm));
|
|
|
|
if (OMPI_SUCCESS != err) goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} else {
|
|
|
|
if (0 == rank) {
|
|
|
|
/* temporary receive buffer. See coll_basic_reduce.c for
|
|
|
|
details on sizing */
|
|
|
|
recv_buf_free = (char*) malloc(buf_size);
|
|
|
|
recv_buf = recv_buf_free - lb;
|
|
|
|
if (NULL == recv_buf_free) {
|
|
|
|
err = OMPI_ERR_OUT_OF_RESOURCE;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* reduction */
|
|
|
|
err =
|
|
|
|
comm->c_coll.coll_reduce(sbuf, recv_buf, count, dtype, op, 0,
|
2007-10-04 20:29:24 +04:00
|
|
|
comm, comm->c_coll.coll_reduce_module);
|
2007-01-29 22:29:35 +03:00
|
|
|
|
|
|
|
/* scatter */
|
|
|
|
if (MPI_SUCCESS == err) {
|
|
|
|
err = comm->c_coll.coll_scatterv(recv_buf, rcounts, disps, dtype,
|
|
|
|
rbuf, rcounts[rank], dtype, 0,
|
2007-10-04 20:29:24 +04:00
|
|
|
comm, comm->c_coll.coll_scatterv_module);
|
2007-01-29 22:29:35 +03:00
|
|
|
}
|
2005-08-10 14:51:42 +04:00
|
|
|
}
|
2004-01-12 00:26:55 +03:00
|
|
|
|
2007-01-29 22:29:35 +03:00
|
|
|
cleanup:
|
|
|
|
if (NULL != disps) free(disps);
|
|
|
|
if (NULL != recv_buf_free) free(recv_buf_free);
|
|
|
|
if (NULL != result_buf_free) free(result_buf_free);
|
|
|
|
|
2005-08-10 14:51:42 +04:00
|
|
|
return err;
|
2004-06-29 04:02:25 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* reduce_scatter_inter
|
|
|
|
*
|
|
|
|
* Function: - reduce/scatter operation
|
|
|
|
* Accepts: - same arguments as MPI_Reduce_scatter()
|
|
|
|
* Returns: - MPI_SUCCESS or error code
|
|
|
|
*/
|
2005-08-10 14:51:42 +04:00
|
|
|
int
|
|
|
|
mca_coll_basic_reduce_scatter_inter(void *sbuf, void *rbuf, int *rcounts,
|
2005-08-10 21:53:43 +04:00
|
|
|
struct ompi_datatype_t *dtype,
|
|
|
|
struct ompi_op_t *op,
|
2007-08-19 07:37:49 +04:00
|
|
|
struct ompi_communicator_t *comm,
|
2008-07-29 02:40:57 +04:00
|
|
|
mca_coll_base_module_t *module)
|
2004-06-29 04:02:25 +04:00
|
|
|
{
|
2006-10-18 00:20:58 +04:00
|
|
|
int err, i, rank, root = 0, rsize;
|
2004-08-04 02:12:57 +04:00
|
|
|
int totalcounts, tcount;
|
2006-10-18 00:20:58 +04:00
|
|
|
ptrdiff_t lb, extent;
|
2005-08-10 14:51:42 +04:00
|
|
|
char *tmpbuf = NULL, *tmpbuf2 = NULL, *tbuf = NULL;
|
2004-08-04 02:12:57 +04:00
|
|
|
ompi_request_t *req;
|
2007-08-19 07:37:49 +04:00
|
|
|
mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
|
|
|
|
ompi_request_t **reqs = basic_module->mccb_reqs;
|
2004-08-04 02:12:57 +04:00
|
|
|
|
2005-08-10 14:51:42 +04:00
|
|
|
rank = ompi_comm_rank(comm);
|
|
|
|
rsize = ompi_comm_remote_size(comm);
|
2004-08-04 02:12:57 +04:00
|
|
|
|
|
|
|
/* According to MPI-2, the total sum of elements transfered has to
|
2005-08-10 14:51:42 +04:00
|
|
|
* be identical in both groups. Thus, it is enough to calculate
|
|
|
|
* that locally.
|
|
|
|
*/
|
|
|
|
for (totalcounts = 0, i = 0; i < rsize; i++) {
|
2005-08-10 21:53:43 +04:00
|
|
|
totalcounts += rcounts[i];
|
2004-08-04 02:12:57 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* determine result of the remote group, you cannot
|
2005-08-10 14:51:42 +04:00
|
|
|
* use coll_reduce for inter-communicators, since than
|
|
|
|
* you would need to determine an order between the
|
|
|
|
* two groups (e.g. which group is providing the data
|
|
|
|
* and which one enters coll_reduce with providing
|
|
|
|
* MPI_PROC_NULL as root argument etc.) Here,
|
|
|
|
* we execute the data exchange for both groups
|
|
|
|
* simultaniously. */
|
2004-08-04 02:12:57 +04:00
|
|
|
/*****************************************************************/
|
2005-08-10 14:51:42 +04:00
|
|
|
if (rank == root) {
|
2005-08-10 21:53:43 +04:00
|
|
|
err = ompi_ddt_get_extent(dtype, &lb, &extent);
|
|
|
|
if (OMPI_SUCCESS != err) {
|
|
|
|
return OMPI_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
tmpbuf = (char *) malloc(totalcounts * extent);
|
|
|
|
tmpbuf2 = (char *) malloc(totalcounts * extent);
|
|
|
|
if (NULL == tmpbuf || NULL == tmpbuf2) {
|
|
|
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Do a send-recv between the two root procs. to avoid deadlock */
|
|
|
|
err = MCA_PML_CALL(isend(sbuf, totalcounts, dtype, 0,
|
|
|
|
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
|
|
|
MCA_PML_BASE_SEND_STANDARD, comm, &req));
|
|
|
|
if (OMPI_SUCCESS != err) {
|
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = MCA_PML_CALL(recv(tmpbuf2, totalcounts, dtype, 0,
|
|
|
|
MCA_COLL_BASE_TAG_REDUCE_SCATTER, comm,
|
|
|
|
MPI_STATUS_IGNORE));
|
|
|
|
if (OMPI_SUCCESS != err) {
|
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = ompi_request_wait_all(1, &req, MPI_STATUS_IGNORE);
|
|
|
|
if (OMPI_SUCCESS != err) {
|
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Loop receiving and calling reduction function (C or Fortran)
|
|
|
|
* The result of this reduction operations is then in
|
|
|
|
* tmpbuf2.
|
|
|
|
*/
|
|
|
|
for (i = 1; i < rsize; i++) {
|
|
|
|
err = MCA_PML_CALL(recv(tmpbuf, totalcounts, dtype, i,
|
|
|
|
MCA_COLL_BASE_TAG_REDUCE_SCATTER, comm,
|
|
|
|
MPI_STATUS_IGNORE));
|
|
|
|
if (MPI_SUCCESS != err) {
|
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Perform the reduction */
|
|
|
|
ompi_op_reduce(op, tmpbuf, tmpbuf2, totalcounts, dtype);
|
|
|
|
}
|
2005-08-10 14:51:42 +04:00
|
|
|
} else {
|
2005-08-10 21:53:43 +04:00
|
|
|
/* If not root, send data to the root. */
|
|
|
|
err = MCA_PML_CALL(send(sbuf, totalcounts, dtype, root,
|
|
|
|
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
|
|
|
MCA_PML_BASE_SEND_STANDARD, comm));
|
|
|
|
if (OMPI_SUCCESS != err) {
|
|
|
|
goto exit;
|
|
|
|
}
|
2004-08-04 02:12:57 +04:00
|
|
|
}
|
2005-08-10 14:51:42 +04:00
|
|
|
|
2004-08-04 02:12:57 +04:00
|
|
|
|
|
|
|
/* now we have on one process the result of the remote group. To distribute
|
2005-08-10 14:51:42 +04:00
|
|
|
* the data to all processes in the local group, we exchange the data between
|
|
|
|
* the two root processes. They then send it to every other process in the
|
|
|
|
* remote group.
|
|
|
|
*/
|
2004-08-04 02:12:57 +04:00
|
|
|
/***************************************************************************/
|
2005-08-10 14:51:42 +04:00
|
|
|
if (rank == root) {
|
2005-08-10 21:53:43 +04:00
|
|
|
/* sendrecv between the two roots */
|
|
|
|
err = MCA_PML_CALL(irecv(tmpbuf, totalcounts, dtype, 0,
|
|
|
|
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
|
|
|
comm, &req));
|
|
|
|
if (OMPI_SUCCESS != err) {
|
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = MCA_PML_CALL(send(tmpbuf2, totalcounts, dtype, 0,
|
|
|
|
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
|
|
|
MCA_PML_BASE_SEND_STANDARD, comm));
|
|
|
|
if (OMPI_SUCCESS != err) {
|
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = ompi_request_wait_all(1, &req, MPI_STATUS_IGNORE);
|
|
|
|
if (OMPI_SUCCESS != err) {
|
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* distribute the data to other processes in remote group.
|
|
|
|
* Note that we start from 1 (not from zero), since zero
|
|
|
|
* has already the correct data AND we avoid a potential
|
|
|
|
* deadlock here.
|
|
|
|
*/
|
|
|
|
err = MCA_PML_CALL(irecv(rbuf, rcounts[rank], dtype, root,
|
|
|
|
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
|
|
|
comm, &req));
|
|
|
|
|
|
|
|
tcount = 0;
|
|
|
|
for (i = 0; i < rsize; i++) {
|
|
|
|
tbuf = (char *) tmpbuf + tcount * extent;
|
|
|
|
err = MCA_PML_CALL(isend(tbuf, rcounts[i], dtype, i,
|
|
|
|
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
|
|
|
MCA_PML_BASE_SEND_STANDARD, comm,
|
|
|
|
reqs++));
|
|
|
|
if (OMPI_SUCCESS != err) {
|
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
tcount += rcounts[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
err =
|
|
|
|
ompi_request_wait_all(rsize,
|
2007-08-19 07:37:49 +04:00
|
|
|
basic_module->mccb_reqs,
|
2005-08-10 21:53:43 +04:00
|
|
|
MPI_STATUSES_IGNORE);
|
|
|
|
if (OMPI_SUCCESS != err) {
|
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = ompi_request_wait_all(1, &req, MPI_STATUS_IGNORE);
|
|
|
|
if (OMPI_SUCCESS != err) {
|
|
|
|
goto exit;
|
|
|
|
}
|
2005-08-10 14:51:42 +04:00
|
|
|
} else {
|
2005-08-10 21:53:43 +04:00
|
|
|
err = MCA_PML_CALL(recv(rbuf, rcounts[rank], dtype, root,
|
|
|
|
MCA_COLL_BASE_TAG_REDUCE_SCATTER,
|
|
|
|
comm, MPI_STATUS_IGNORE));
|
2004-08-04 02:12:57 +04:00
|
|
|
}
|
|
|
|
|
2005-08-10 14:51:42 +04:00
|
|
|
exit:
|
|
|
|
if (NULL != tmpbuf) {
|
2005-08-10 21:53:43 +04:00
|
|
|
free(tmpbuf);
|
2004-08-04 02:12:57 +04:00
|
|
|
}
|
|
|
|
|
2005-08-10 14:51:42 +04:00
|
|
|
if (NULL != tmpbuf2) {
|
2005-08-10 21:53:43 +04:00
|
|
|
free(tmpbuf2);
|
2004-08-04 02:12:57 +04:00
|
|
|
}
|
|
|
|
|
2004-08-04 21:08:29 +04:00
|
|
|
return err;
|
2004-01-12 00:26:55 +03:00
|
|
|
}
|