
Implements butterfly algorithm for MPI_Reduce_scatter. The algorithm can be used both by commutative and non-commutative operations, for power-of-two and non-power-of-two number of processes. Signed-off-by: Mikhail Kurnosov <mkurnosov@gmail.com>
96 строки
3.6 KiB
C
96 строки
3.6 KiB
C
/*
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2016 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* Copyright (c) 2014-2017 Research Organization for Information Science
|
|
* and Technology (RIST). All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "ompi_config.h"
|
|
|
|
#include "mpi.h"
|
|
#include "ompi/constants.h"
|
|
#include "ompi/datatype/ompi_datatype.h"
|
|
#include "ompi/communicator/communicator.h"
|
|
#include "ompi/mca/coll/base/coll_tags.h"
|
|
#include "ompi/mca/coll/base/coll_base_functions.h"
|
|
#include "ompi/mca/pml/pml.h"
|
|
#include "coll_base_util.h"
|
|
|
|
int ompi_coll_base_sendrecv_actual( const void* sendbuf, size_t scount,
|
|
ompi_datatype_t* sdatatype,
|
|
int dest, int stag,
|
|
void* recvbuf, size_t rcount,
|
|
ompi_datatype_t* rdatatype,
|
|
int source, int rtag,
|
|
struct ompi_communicator_t* comm,
|
|
ompi_status_public_t* status )
|
|
|
|
{ /* post receive first, then send, then wait... should be fast (I hope) */
|
|
int err, line = 0;
|
|
size_t rtypesize, stypesize;
|
|
ompi_request_t *req;
|
|
ompi_status_public_t rstatus;
|
|
|
|
/* post new irecv */
|
|
ompi_datatype_type_size(rdatatype, &rtypesize);
|
|
err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag,
|
|
comm, &req));
|
|
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
|
|
|
|
/* send data to children */
|
|
ompi_datatype_type_size(sdatatype, &stypesize);
|
|
err = MCA_PML_CALL(send( sendbuf, scount, sdatatype, dest, stag,
|
|
MCA_PML_BASE_SEND_STANDARD, comm));
|
|
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
|
|
|
|
err = ompi_request_wait( &req, &rstatus);
|
|
if (err != MPI_SUCCESS) { line = __LINE__; goto error_handler; }
|
|
|
|
if (MPI_STATUS_IGNORE != status) {
|
|
*status = rstatus;
|
|
}
|
|
|
|
return (MPI_SUCCESS);
|
|
|
|
error_handler:
|
|
/* Error discovered during the posting of the irecv or send,
|
|
* and no status is available.
|
|
*/
|
|
OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred\n",
|
|
__FILE__, line, err));
|
|
(void)line; // silence compiler warning
|
|
if (MPI_STATUS_IGNORE != status) {
|
|
status->MPI_ERROR = err;
|
|
}
|
|
return (err);
|
|
}
|
|
|
|
/*
|
|
* ompi_mirror_perm: Returns mirror permutation of nbits low-order bits
|
|
* of x [*].
|
|
* [*] Warren Jr., Henry S. Hacker's Delight (2ed). 2013.
|
|
* Chapter 7. Rearranging Bits and Bytes.
|
|
*/
|
|
unsigned int ompi_mirror_perm(unsigned int x, int nbits)
|
|
{
|
|
x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1));
|
|
x = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2));
|
|
x = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4));
|
|
x = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8));
|
|
x = ((x >> 16) | (x << 16));
|
|
return x >> (sizeof(x) * CHAR_BIT - nbits);
|
|
}
|