Redirect most of the basic module functions to base.
Этот коммит содержится в:
родитель
211f05fb09
Коммит
67d01bd8cd
@ -593,12 +593,12 @@ int ompi_coll_base_allgatherv_intra_two_procs(void *sbuf, int scount,
|
||||
*/
|
||||
int
|
||||
ompi_coll_base_allgatherv_intra_basic_default(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts,
|
||||
int *disps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts,
|
||||
int *disps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
int i, size, rank, err;
|
||||
MPI_Aint extent, lb;
|
||||
|
@ -35,11 +35,11 @@
|
||||
#include "coll_base_util.h"
|
||||
|
||||
/* MPI_IN_PLACE all to all algorithm. TODO: implement a better one. */
|
||||
static int
|
||||
int
|
||||
mca_coll_base_alltoall_intra_basic_inplace(void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
|
||||
int i, j, size, rank, err=MPI_SUCCESS;
|
||||
@ -567,11 +567,11 @@ int ompi_coll_base_alltoall_intra_two_procs(void *sbuf, int scount,
|
||||
/* copied function (with appropriate renaming) starts here */
|
||||
|
||||
int ompi_coll_base_alltoall_intra_basic_linear(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
int i, rank, size, err, nreqs;
|
||||
char *psnd, *prcv;
|
||||
@ -593,7 +593,6 @@ int ompi_coll_base_alltoall_intra_basic_linear(void *sbuf, int scount,
|
||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
||||
"ompi_coll_base_alltoall_intra_basic_linear rank %d", rank));
|
||||
|
||||
|
||||
err = ompi_datatype_get_extent(sdtype, &lb, &sndinc);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
|
@ -36,7 +36,7 @@
|
||||
#include "coll_base_topo.h"
|
||||
#include "coll_base_util.h"
|
||||
|
||||
static int
|
||||
int
|
||||
mca_coll_base_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, const int *rdisps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
|
@ -321,8 +321,8 @@ int ompi_coll_base_barrier_intra_two_procs(struct ompi_communicator_t *comm,
|
||||
|
||||
/* copied function (with appropriate renaming) starts here */
|
||||
|
||||
static int ompi_coll_base_barrier_intra_basic_linear(struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
int ompi_coll_base_barrier_intra_basic_linear(struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
int i, err, rank, size;
|
||||
|
||||
@ -363,15 +363,14 @@ static int ompi_coll_base_barrier_intra_basic_linear(struct ompi_communicator_t
|
||||
ompi_request_wait_all( size-1, requests+1, MPI_STATUSES_IGNORE );
|
||||
|
||||
for (i = 1; i < size; ++i) {
|
||||
err = MCA_PML_CALL(isend(NULL, 0, MPI_BYTE, i,
|
||||
MCA_COLL_BASE_TAG_BARRIER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm,
|
||||
&(requests[i])));
|
||||
err = MCA_PML_CALL(send(NULL, 0, MPI_BYTE, i,
|
||||
MCA_COLL_BASE_TAG_BARRIER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
ompi_request_wait_all( size-1, requests+1, MPI_STATUSES_IGNORE );
|
||||
|
||||
free( requests );
|
||||
}
|
||||
|
||||
|
@ -631,10 +631,10 @@ ompi_coll_base_bcast_intra_split_bintree ( void* buffer,
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int
|
||||
ompi_coll_base_bcast_intra_basic_linear (void *buff, int count,
|
||||
struct ompi_datatype_t *datatype, int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
ompi_coll_base_bcast_intra_basic_linear(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype, int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
int i, size, rank, err;
|
||||
mca_coll_base_comm_t *data = module->base_data;
|
||||
|
@ -104,10 +104,18 @@ int ompi_coll_base_alltoall_intra_bruck(ALLTOALL_ARGS);
|
||||
int ompi_coll_base_alltoall_intra_basic_linear(ALLTOALL_ARGS);
|
||||
int ompi_coll_base_alltoall_intra_linear_sync(ALLTOALL_ARGS, int max_requests);
|
||||
int ompi_coll_base_alltoall_intra_two_procs(ALLTOALL_ARGS);
|
||||
int mca_coll_base_alltoall_intra_basic_inplace(void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module); /* special version for INPLACE */
|
||||
|
||||
/* AlltoAllV */
|
||||
int ompi_coll_base_alltoallv_intra_pairwise(ALLTOALLV_ARGS);
|
||||
int ompi_coll_base_alltoallv_intra_basic_linear(ALLTOALLV_ARGS);
|
||||
int mca_coll_base_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, const int *rdisps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module); /* special version for INPLACE */
|
||||
|
||||
/* AlltoAllW */
|
||||
|
||||
@ -116,8 +124,8 @@ int ompi_coll_base_barrier_intra_doublering(BARRIER_ARGS);
|
||||
int ompi_coll_base_barrier_intra_recursivedoubling(BARRIER_ARGS);
|
||||
int ompi_coll_base_barrier_intra_bruck(BARRIER_ARGS);
|
||||
int ompi_coll_base_barrier_intra_two_procs(BARRIER_ARGS);
|
||||
int ompi_coll_base_barrier_intra_linear(BARRIER_ARGS);
|
||||
int ompi_coll_base_barrier_intra_tree(BARRIER_ARGS);
|
||||
int ompi_coll_base_barrier_intra_basic_linear(BARRIER_ARGS);
|
||||
|
||||
/* Bcast */
|
||||
int ompi_coll_base_bcast_intra_basic_linear(BCAST_ARGS);
|
||||
|
@ -606,16 +606,16 @@ ompi_coll_base_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
|
||||
{
|
||||
int i, rank, err, size;
|
||||
ptrdiff_t true_lb, true_extent, lb, extent;
|
||||
char *free_buffer = NULL, *pml_buffer = NULL;
|
||||
char *inplace_temp = NULL, *inbuf;
|
||||
char *free_buffer = NULL;
|
||||
char *pml_buffer = NULL;
|
||||
char *inplace_temp = NULL;
|
||||
char *inbuf;
|
||||
|
||||
/* Initialize */
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_basic_linear rank %d", rank));
|
||||
|
||||
/* If not root, send data to the root. */
|
||||
|
||||
if (rank != root) {
|
||||
@ -625,16 +625,136 @@ ompi_coll_base_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
|
||||
return err;
|
||||
}
|
||||
|
||||
/* see discussion in ompi_coll_basic_reduce_lin_intra about
|
||||
extent and true extent */
|
||||
/* for reducing buffer allocation lengths.... */
|
||||
/* Root receives and reduces messages. Allocate buffer to receive
|
||||
* messages. This comment applies to all collectives in this basic
|
||||
* module where we allocate a temporary buffer. For the next few
|
||||
* lines of code, it's tremendously complicated how we decided that
|
||||
* this was the Right Thing to do. Sit back and enjoy. And prepare
|
||||
* to have your mind warped. :-)
|
||||
*
|
||||
* Recall some definitions (I always get these backwards, so I'm
|
||||
* going to put them here):
|
||||
*
|
||||
* extent: the length from the lower bound to the upper bound -- may
|
||||
* be considerably larger than the buffer required to hold the data
|
||||
* (or smaller! But it's easiest to think about when it's larger).
|
||||
*
|
||||
* true extent: the exact number of bytes required to hold the data
|
||||
* in the layout pattern in the datatype.
|
||||
*
|
||||
* For example, consider the following buffer (just talking about
|
||||
* true_lb, extent, and true extent -- extrapolate for true_ub:
|
||||
*
|
||||
* A B C
|
||||
* --------------------------------------------------------
|
||||
* | | |
|
||||
* --------------------------------------------------------
|
||||
*
|
||||
* There are multiple cases:
|
||||
*
|
||||
* 1. A is what we give to MPI_Send (and friends), and A is where
|
||||
* the data starts, and C is where the data ends. In this case:
|
||||
*
|
||||
* - extent: C-A
|
||||
* - true extent: C-A
|
||||
* - true_lb: 0
|
||||
*
|
||||
* A C
|
||||
* --------------------------------------------------------
|
||||
* | |
|
||||
* --------------------------------------------------------
|
||||
* <=======================extent=========================>
|
||||
* <======================true extent=====================>
|
||||
*
|
||||
* 2. A is what we give to MPI_Send (and friends), B is where the
|
||||
* data starts, and C is where the data ends. In this case:
|
||||
*
|
||||
* - extent: C-A
|
||||
* - true extent: C-B
|
||||
* - true_lb: positive
|
||||
*
|
||||
* A B C
|
||||
* --------------------------------------------------------
|
||||
* | | User buffer |
|
||||
* --------------------------------------------------------
|
||||
* <=======================extent=========================>
|
||||
* <===============true extent=============>
|
||||
*
|
||||
* 3. B is what we give to MPI_Send (and friends), A is where the
|
||||
* data starts, and C is where the data ends. In this case:
|
||||
*
|
||||
* - extent: C-A
|
||||
* - true extent: C-A
|
||||
* - true_lb: negative
|
||||
*
|
||||
* A B C
|
||||
* --------------------------------------------------------
|
||||
* | | User buffer |
|
||||
* --------------------------------------------------------
|
||||
* <=======================extent=========================>
|
||||
* <======================true extent=====================>
|
||||
*
|
||||
* 4. MPI_BOTTOM is what we give to MPI_Send (and friends), B is
|
||||
* where the data starts, and C is where the data ends. In this
|
||||
* case:
|
||||
*
|
||||
* - extent: C-MPI_BOTTOM
|
||||
* - true extent: C-B
|
||||
* - true_lb: [potentially very large] positive
|
||||
*
|
||||
* MPI_BOTTOM B C
|
||||
* --------------------------------------------------------
|
||||
* | | User buffer |
|
||||
* --------------------------------------------------------
|
||||
* <=======================extent=========================>
|
||||
* <===============true extent=============>
|
||||
*
|
||||
* So in all cases, for a temporary buffer, all we need to malloc()
|
||||
* is a buffer of size true_extent. We therefore need to know two
|
||||
* pointer values: what value to give to MPI_Send (and friends) and
|
||||
* what value to give to free(), because they might not be the same.
|
||||
*
|
||||
* Clearly, what we give to free() is exactly what was returned from
|
||||
* malloc(). That part is easy. :-)
|
||||
*
|
||||
* What we give to MPI_Send (and friends) is a bit more complicated.
|
||||
* Let's take the 4 cases from above:
|
||||
*
|
||||
* 1. If A is what we give to MPI_Send and A is where the data
|
||||
* starts, then clearly we give to MPI_Send what we got back from
|
||||
* malloc().
|
||||
*
|
||||
* 2. If B is what we get back from malloc, but we give A to
|
||||
* MPI_Send, then the buffer range [A,B) represents "dead space"
|
||||
* -- no data will be put there. So it's safe to give B-true_lb to
|
||||
* MPI_Send. More specifically, the true_lb is positive, so B-true_lb is
|
||||
* actually A.
|
||||
*
|
||||
* 3. If A is what we get back from malloc, and B is what we give to
|
||||
* MPI_Send, then the true_lb is negative, so A-true_lb will actually equal
|
||||
* B.
|
||||
*
|
||||
* 4. Although this seems like the weirdest case, it's actually
|
||||
* quite similar to case #2 -- the pointer we give to MPI_Send is
|
||||
* smaller than the pointer we got back from malloc().
|
||||
*
|
||||
* Hence, in all cases, we give (return_from_malloc - true_lb) to MPI_Send.
|
||||
*
|
||||
* This works fine and dandy if we only have (count==1), which we
|
||||
* rarely do. ;-) So we really need to allocate (true_extent +
|
||||
* ((count - 1) * extent)) to get enough space for the rest. This may
|
||||
* be more than is necessary, but it's ok.
|
||||
*
|
||||
* Simple, no? :-)
|
||||
*
|
||||
*/
|
||||
|
||||
ompi_datatype_get_extent(dtype, &lb, &extent);
|
||||
ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent);
|
||||
|
||||
if (MPI_IN_PLACE == sbuf) {
|
||||
sbuf = rbuf;
|
||||
inplace_temp = (char*)malloc(true_extent + (ptrdiff_t)(count - 1) * extent);
|
||||
inplace_temp = (char*)malloc(true_extent + (count - 1) * extent);
|
||||
if (NULL == inplace_temp) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
@ -642,10 +762,12 @@ ompi_coll_base_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
|
||||
}
|
||||
|
||||
if (size > 1) {
|
||||
free_buffer = (char*)malloc(true_extent + (ptrdiff_t)(count - 1) * extent);
|
||||
free_buffer = (char*)malloc(true_extent + (count - 1) * extent);
|
||||
if (NULL == free_buffer) {
|
||||
err = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto exit;
|
||||
if (NULL != inplace_temp) {
|
||||
free(inplace_temp);
|
||||
}
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
pml_buffer = free_buffer - true_lb;
|
||||
}
|
||||
@ -653,15 +775,17 @@ ompi_coll_base_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
|
||||
/* Initialize the receive buffer. */
|
||||
|
||||
if (rank == (size - 1)) {
|
||||
err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)rbuf,
|
||||
(char*)sbuf);
|
||||
err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)rbuf, (char*)sbuf);
|
||||
} else {
|
||||
err = MCA_PML_CALL(recv(rbuf, count, dtype, size - 1,
|
||||
MCA_COLL_BASE_TAG_REDUCE, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
}
|
||||
if (MPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Loop receiving and calling reduction function (C or Fortran). */
|
||||
@ -674,25 +798,22 @@ ompi_coll_base_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
|
||||
MCA_COLL_BASE_TAG_REDUCE, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
inbuf = pml_buffer;
|
||||
}
|
||||
|
||||
/* Perform the reduction */
|
||||
|
||||
ompi_op_reduce(op, inbuf, rbuf, count, dtype);
|
||||
}
|
||||
|
||||
if (NULL != inplace_temp) {
|
||||
err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)sbuf,
|
||||
inplace_temp);
|
||||
} else {
|
||||
err = MPI_SUCCESS;
|
||||
}
|
||||
|
||||
exit:
|
||||
if (NULL != inplace_temp) {
|
||||
err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)sbuf, inplace_temp);
|
||||
free(inplace_temp);
|
||||
}
|
||||
if (NULL != free_buffer) {
|
||||
@ -700,7 +821,8 @@ ompi_coll_base_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
|
||||
}
|
||||
|
||||
/* All done */
|
||||
return err;
|
||||
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* copied function (with appropriate renaming) ends here */
|
||||
|
@ -194,12 +194,12 @@ ompi_coll_base_scatter_intra_binomial(void *sbuf, int scount,
|
||||
*/
|
||||
int
|
||||
ompi_coll_base_scatter_intra_basic_linear(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
int i, rank, size, err;
|
||||
ptrdiff_t lb, incr;
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2015 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -31,6 +31,7 @@
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
#include "ompi/request/request.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/mca/coll/base/coll_base_functions.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
@ -52,12 +53,6 @@ BEGIN_C_DECLS
|
||||
int mca_coll_basic_module_enable(mca_coll_base_module_t *module,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_basic_allgather_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_basic_allgather_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
@ -65,13 +60,6 @@ BEGIN_C_DECLS
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_basic_allgatherv_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts,
|
||||
int *disps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_basic_allgatherv_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts,
|
||||
@ -91,12 +79,6 @@ BEGIN_C_DECLS
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_basic_alltoall_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_basic_alltoall_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
@ -104,14 +86,6 @@ BEGIN_C_DECLS
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_basic_alltoallv_intra(void *sbuf, int *scounts,
|
||||
int *sdisps,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts,
|
||||
int *rdisps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_basic_alltoallv_inter(void *sbuf, int *scounts,
|
||||
int *sdisps,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
@ -138,21 +112,12 @@ BEGIN_C_DECLS
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_basic_barrier_intra_lin(struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_basic_barrier_inter_lin(struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_basic_barrier_intra_log(struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_basic_bcast_lin_intra(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_basic_bcast_lin_inter(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
@ -183,13 +148,6 @@ BEGIN_C_DECLS
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_basic_gather_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_basic_gather_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
@ -214,12 +172,6 @@ BEGIN_C_DECLS
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_basic_reduce_lin_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_basic_reduce_lin_inter(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
@ -279,13 +231,6 @@ BEGIN_C_DECLS
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_basic_scatter_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_basic_scatter_inter(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
|
@ -32,50 +32,6 @@
|
||||
#include "coll_basic.h"
|
||||
|
||||
|
||||
/*
|
||||
* allgather_intra
|
||||
*
|
||||
* Function: - allgather using other MPI collections
|
||||
* Accepts: - same as MPI_Allgather()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int
|
||||
mca_coll_basic_allgather_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||
int rcount, struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
int err;
|
||||
ptrdiff_t lb, extent;
|
||||
|
||||
/* Handle MPI_IN_PLACE (see explanantion in reduce.c for how to
|
||||
allocate temp buffer) -- note that rank 0 can use IN_PLACE
|
||||
natively, and we can just alias the right position in rbuf
|
||||
as sbuf and avoid using a temporary buffer if gather is
|
||||
implemented correctly */
|
||||
if (MPI_IN_PLACE == sbuf && 0 != ompi_comm_rank(comm)) {
|
||||
ompi_datatype_get_extent(rdtype, &lb, &extent);
|
||||
sbuf = ((char*) rbuf) + (ompi_comm_rank(comm) * extent * rcount);
|
||||
sdtype = rdtype;
|
||||
scount = rcount;
|
||||
}
|
||||
|
||||
/* Gather and broadcast. */
|
||||
|
||||
err = comm->c_coll.coll_gather(sbuf, scount, sdtype, rbuf, rcount,
|
||||
rdtype, 0, comm, comm->c_coll.coll_gather_module);
|
||||
if (MPI_SUCCESS == err) {
|
||||
err = comm->c_coll.coll_bcast(rbuf, rcount * ompi_comm_size(comm),
|
||||
rdtype, 0, comm, comm->c_coll.coll_bcast_module);
|
||||
}
|
||||
|
||||
/* All done */
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* allgather_inter
|
||||
*
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2015 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -28,87 +28,6 @@
|
||||
#include "coll_basic.h"
|
||||
|
||||
|
||||
/*
|
||||
* allgatherv_intra
|
||||
*
|
||||
* Function: - allgatherv using other MPI collectives
|
||||
* Accepts: - same as MPI_Allgatherv()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int
|
||||
mca_coll_basic_allgatherv_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts, int *disps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
int i, size, rank ;
|
||||
int err;
|
||||
MPI_Aint extent;
|
||||
MPI_Aint lb;
|
||||
char *send_buf = NULL;
|
||||
struct ompi_datatype_t *newtype, *send_type;
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
/*
|
||||
* We don't have a root process defined. Arbitrarily assign root
|
||||
* to process with rank 0 (OMPI convention)
|
||||
*/
|
||||
|
||||
if (MPI_IN_PLACE == sbuf) {
|
||||
ompi_datatype_get_extent(rdtype, &lb, &extent);
|
||||
send_type = rdtype;
|
||||
send_buf = (char*)rbuf;
|
||||
for (i = 0; i < rank; ++i) {
|
||||
send_buf += (rcounts[i] * extent);
|
||||
}
|
||||
} else {
|
||||
send_buf = (char*)sbuf;
|
||||
send_type = sdtype;
|
||||
}
|
||||
|
||||
err = comm->c_coll.coll_gatherv(send_buf,
|
||||
rcounts[rank], send_type,rbuf,
|
||||
rcounts, disps, rdtype, 0,
|
||||
comm, comm->c_coll.coll_gatherv_module);
|
||||
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
/*
|
||||
* we now have all the data in the root's rbuf. Need to
|
||||
* broadcast the data out to the other processes
|
||||
*
|
||||
* Need to define a datatype that captures the different vectors
|
||||
* from each process. MPI_TYPE_INDEXED with params
|
||||
* size,rcount,displs,rdtype,newtype
|
||||
* should do the trick.
|
||||
* Use underlying ddt functions to create, and commit the
|
||||
* new datatype on each process, then broadcast and destroy the
|
||||
* datatype.
|
||||
*/
|
||||
|
||||
err = ompi_datatype_create_indexed(size,rcounts,disps,rdtype,&newtype);
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
|
||||
err = ompi_datatype_commit(&newtype);
|
||||
if(MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
|
||||
err = comm->c_coll.coll_bcast( rbuf, 1 ,newtype,0,comm,
|
||||
comm->c_coll.coll_bcast_module);
|
||||
|
||||
ompi_datatype_destroy (&newtype);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* allgatherv_inter
|
||||
*
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2015 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -32,224 +32,6 @@
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
|
||||
|
||||
static int
|
||||
mca_coll_basic_alltoall_intra_inplace(void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
|
||||
int i, j, size, rank, err=MPI_SUCCESS;
|
||||
MPI_Request *preq;
|
||||
char *tmp_buffer;
|
||||
size_t max_size;
|
||||
ptrdiff_t ext;
|
||||
|
||||
/* Initialize. */
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
/* If only one process, we're done. */
|
||||
if (1 == size) {
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Find the largest receive amount */
|
||||
ompi_datatype_type_extent (rdtype, &ext);
|
||||
max_size = ext * rcount;
|
||||
|
||||
/* Allocate a temporary buffer */
|
||||
tmp_buffer = calloc (max_size, 1);
|
||||
if (NULL == tmp_buffer) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* in-place alltoall slow algorithm (but works) */
|
||||
for (i = 0 ; i < size ; ++i) {
|
||||
for (j = i+1 ; j < size ; ++j) {
|
||||
/* Initiate all send/recv to/from others. */
|
||||
preq = basic_module->mccb_reqs;
|
||||
|
||||
if (i == rank) {
|
||||
/* Copy the data into the temporary buffer */
|
||||
err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer,
|
||||
(char *) rbuf + j * max_size);
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
/* Exchange data with the peer */
|
||||
err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * j, rcount, rdtype,
|
||||
j, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
err = MCA_PML_CALL(isend ((char *) tmp_buffer, rcount, rdtype,
|
||||
j, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
} else if (j == rank) {
|
||||
/* Copy the data into the temporary buffer */
|
||||
err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer,
|
||||
(char *) rbuf + i * max_size);
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
/* Exchange data with the peer */
|
||||
err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * i, rcount, rdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
err = MCA_PML_CALL(isend ((char *) tmp_buffer, rcount, rdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Wait for the requests to complete */
|
||||
err = ompi_request_wait_all (2, basic_module->mccb_reqs, MPI_STATUSES_IGNORE);
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
/* Free the requests. */
|
||||
mca_coll_basic_free_reqs(basic_module->mccb_reqs, 2);
|
||||
}
|
||||
}
|
||||
|
||||
error_hndl:
|
||||
/* Free the temporary buffer */
|
||||
free (tmp_buffer);
|
||||
|
||||
/* All done */
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* alltoall_intra
|
||||
*
|
||||
* Function: - MPI_Alltoall
|
||||
* Accepts: - same as MPI_Alltoall()
|
||||
* Returns: - MPI_SUCCESS or an MPI error code
|
||||
*/
|
||||
int
|
||||
mca_coll_basic_alltoall_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
int i;
|
||||
int rank;
|
||||
int size;
|
||||
int err;
|
||||
int nreqs;
|
||||
char *psnd;
|
||||
char *prcv;
|
||||
MPI_Aint lb;
|
||||
MPI_Aint sndinc;
|
||||
MPI_Aint rcvinc;
|
||||
|
||||
ompi_request_t **req;
|
||||
ompi_request_t **sreq;
|
||||
ompi_request_t **rreq;
|
||||
mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
|
||||
|
||||
/* Initialize. */
|
||||
if (MPI_IN_PLACE == sbuf) {
|
||||
return mca_coll_basic_alltoall_intra_inplace (rbuf, rcount, rdtype,
|
||||
comm, module);
|
||||
}
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
err = ompi_datatype_get_extent(sdtype, &lb, &sndinc);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
sndinc *= scount;
|
||||
|
||||
err = ompi_datatype_get_extent(rdtype, &lb, &rcvinc);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
rcvinc *= rcount;
|
||||
|
||||
/* simple optimization */
|
||||
|
||||
psnd = ((char *) sbuf) + (rank * sndinc);
|
||||
prcv = ((char *) rbuf) + (rank * rcvinc);
|
||||
|
||||
err = ompi_datatype_sndrcv(psnd, scount, sdtype, prcv, rcount, rdtype);
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
|
||||
/* If only one process, we're done. */
|
||||
|
||||
if (1 == size) {
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Initiate all send/recv to/from others. */
|
||||
|
||||
req = rreq = basic_module->mccb_reqs;
|
||||
sreq = rreq + size - 1;
|
||||
|
||||
prcv = (char *) rbuf;
|
||||
psnd = (char *) sbuf;
|
||||
|
||||
/* Post all receives first -- a simple optimization */
|
||||
|
||||
for (nreqs = 0, i = (rank + 1) % size; i != rank; i = (i + 1) % size, ++rreq, ++nreqs) {
|
||||
err =
|
||||
MCA_PML_CALL(irecv_init
|
||||
(prcv + (i * rcvinc), rcount, rdtype, i,
|
||||
MCA_COLL_BASE_TAG_ALLTOALL, comm, rreq));
|
||||
if (MPI_SUCCESS != err) {
|
||||
mca_coll_basic_free_reqs(req, nreqs);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now post all sends */
|
||||
|
||||
for (nreqs = 0, i = (rank + 1) % size; i != rank; i = (i + 1) % size, ++sreq, ++nreqs) {
|
||||
err =
|
||||
MCA_PML_CALL(isend_init
|
||||
(psnd + (i * sndinc), scount, sdtype, i,
|
||||
MCA_COLL_BASE_TAG_ALLTOALL,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm, sreq));
|
||||
if (MPI_SUCCESS != err) {
|
||||
mca_coll_basic_free_reqs(req, nreqs);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
nreqs = (size - 1) * 2;
|
||||
/* Start your engines. This will never return an error. */
|
||||
|
||||
MCA_PML_CALL(start(nreqs, req));
|
||||
|
||||
/* Wait for them all. If there's an error, note that we don't
|
||||
* care what the error was -- just that there *was* an error. The
|
||||
* PML will finish all requests, even if one or more of them fail.
|
||||
* i.e., by the end of this call, all the requests are free-able.
|
||||
* So free them anyway -- even if there was an error, and return
|
||||
* the error after we free everything. */
|
||||
|
||||
err = ompi_request_wait_all(nreqs, req, MPI_STATUSES_IGNORE);
|
||||
|
||||
/* Free the reqs */
|
||||
|
||||
mca_coll_basic_free_reqs(req, nreqs);
|
||||
|
||||
/* All done */
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* alltoall_inter
|
||||
*
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2015 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -33,226 +33,6 @@
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
|
||||
|
||||
static int
|
||||
mca_coll_basic_alltoallv_intra_inplace(void *rbuf, const int *rcounts, const int *rdisps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
|
||||
int i, j, size, rank, err=MPI_SUCCESS;
|
||||
MPI_Request *preq;
|
||||
char *tmp_buffer;
|
||||
size_t max_size;
|
||||
ptrdiff_t ext;
|
||||
|
||||
/* Initialize. */
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
/* If only one process, we're done. */
|
||||
if (1 == size) {
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Find the largest receive amount */
|
||||
ompi_datatype_type_extent (rdtype, &ext);
|
||||
for (i = 0, max_size = 0 ; i < size ; ++i) {
|
||||
size_t size = ext * rcounts[i];
|
||||
|
||||
max_size = size > max_size ? size : max_size;
|
||||
}
|
||||
|
||||
/* Allocate a temporary buffer */
|
||||
tmp_buffer = calloc (max_size, 1);
|
||||
if (NULL == tmp_buffer) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* in-place alltoallv slow algorithm (but works) */
|
||||
for (i = 0 ; i < size ; ++i) {
|
||||
for (j = i+1 ; j < size ; ++j) {
|
||||
/* Initiate all send/recv to/from others. */
|
||||
preq = basic_module->mccb_reqs;
|
||||
|
||||
if (i == rank && rcounts[j]) {
|
||||
/* Copy the data into the temporary buffer */
|
||||
err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[j],
|
||||
tmp_buffer, (char *) rbuf + rdisps[j] * ext);
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
/* Exchange data with the peer */
|
||||
err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j] * ext, rcounts[j], rdtype,
|
||||
j, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[j], rdtype,
|
||||
j, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
} else if (j == rank && rcounts[i]) {
|
||||
/* Copy the data into the temporary buffer */
|
||||
err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[i],
|
||||
tmp_buffer, (char *) rbuf + rdisps[i] * ext);
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
/* Exchange data with the peer */
|
||||
err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i] * ext, rcounts[i], rdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[i], rdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, preq++));
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Wait for the requests to complete */
|
||||
err = ompi_request_wait_all (2, basic_module->mccb_reqs, MPI_STATUSES_IGNORE);
|
||||
if (MPI_SUCCESS != err) { goto error_hndl; }
|
||||
|
||||
/* Free the requests. */
|
||||
mca_coll_basic_free_reqs(basic_module->mccb_reqs, 2);
|
||||
}
|
||||
}
|
||||
|
||||
error_hndl:
|
||||
/* Free the temporary buffer */
|
||||
free (tmp_buffer);
|
||||
|
||||
/* All done */
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* alltoallv_intra
|
||||
*
|
||||
* Function: - MPI_Alltoallv
|
||||
* Accepts: - same as MPI_Alltoallv()
|
||||
* Returns: - MPI_SUCCESS or an MPI error code
|
||||
*/
|
||||
int
|
||||
mca_coll_basic_alltoallv_intra(void *sbuf, int *scounts, int *sdisps,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts, int *rdisps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
int i;
|
||||
int size;
|
||||
int rank;
|
||||
int err;
|
||||
char *psnd;
|
||||
char *prcv;
|
||||
int nreqs;
|
||||
MPI_Aint sndextent;
|
||||
MPI_Aint rcvextent;
|
||||
MPI_Request *preq;
|
||||
|
||||
mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
|
||||
|
||||
/* Initialize. */
|
||||
if (MPI_IN_PLACE == sbuf) {
|
||||
return mca_coll_basic_alltoallv_intra_inplace (rbuf, rcounts, rdisps,
|
||||
rdtype, comm, module);
|
||||
}
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
ompi_datatype_type_extent(sdtype, &sndextent);
|
||||
ompi_datatype_type_extent(rdtype, &rcvextent);
|
||||
|
||||
/* simple optimization */
|
||||
|
||||
psnd = ((char *) sbuf) + (sdisps[rank] * sndextent);
|
||||
prcv = ((char *) rbuf) + (rdisps[rank] * rcvextent);
|
||||
|
||||
if (0 != scounts[rank]) {
|
||||
err = ompi_datatype_sndrcv(psnd, scounts[rank], sdtype,
|
||||
prcv, rcounts[rank], rdtype);
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* If only one process, we're done. */
|
||||
|
||||
if (1 == size) {
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Initiate all send/recv to/from others. */
|
||||
|
||||
nreqs = 0;
|
||||
preq = basic_module->mccb_reqs;
|
||||
|
||||
/* Post all receives first -- a simple optimization */
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
if (i == rank || 0 == rcounts[i]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
prcv = ((char *) rbuf) + (rdisps[i] * rcvextent);
|
||||
err = MCA_PML_CALL(irecv_init(prcv, rcounts[i], rdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLV, comm,
|
||||
preq++));
|
||||
++nreqs;
|
||||
if (MPI_SUCCESS != err) {
|
||||
mca_coll_basic_free_reqs(basic_module->mccb_reqs, nreqs);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now post all sends */
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
if (i == rank || 0 == scounts[i]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
psnd = ((char *) sbuf) + (sdisps[i] * sndextent);
|
||||
err = MCA_PML_CALL(isend_init(psnd, scounts[i], sdtype,
|
||||
i, MCA_COLL_BASE_TAG_ALLTOALLV,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm,
|
||||
preq++));
|
||||
++nreqs;
|
||||
if (MPI_SUCCESS != err) {
|
||||
mca_coll_basic_free_reqs(basic_module->mccb_reqs, nreqs);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Start your engines. This will never return an error. */
|
||||
|
||||
MCA_PML_CALL(start(nreqs, basic_module->mccb_reqs));
|
||||
|
||||
/* Wait for them all. If there's an error, note that we don't care
|
||||
* what the error was -- just that there *was* an error. The PML
|
||||
* will finish all requests, even if one or more of them fail.
|
||||
* i.e., by the end of this call, all the requests are free-able.
|
||||
* So free them anyway -- even if there was an error, and return the
|
||||
* error after we free everything. */
|
||||
|
||||
err = ompi_request_wait_all(nreqs, basic_module->mccb_reqs,
|
||||
MPI_STATUSES_IGNORE);
|
||||
|
||||
/* Free the requests. */
|
||||
|
||||
mca_coll_basic_free_reqs(basic_module->mccb_reqs, nreqs);
|
||||
|
||||
/* All done */
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* alltoallv_inter
|
||||
*
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2015 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -30,72 +30,6 @@
|
||||
#include "coll_basic.h"
|
||||
|
||||
|
||||
/*
|
||||
* barrier_intra_lin
|
||||
*
|
||||
* Function: - barrier using O(N) algorithm
|
||||
* Accepts: - same as MPI_Barrier()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int
|
||||
mca_coll_basic_barrier_intra_lin(struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
int i;
|
||||
int err;
|
||||
int size = ompi_comm_size(comm);
|
||||
int rank = ompi_comm_rank(comm);
|
||||
|
||||
/* All non-root send & receive zero-length message. */
|
||||
|
||||
if (rank > 0) {
|
||||
err =
|
||||
MCA_PML_CALL(send
|
||||
(NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
|
||||
err =
|
||||
MCA_PML_CALL(recv
|
||||
(NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* The root collects and broadcasts the messages. */
|
||||
|
||||
else {
|
||||
for (i = 1; i < size; ++i) {
|
||||
err = MCA_PML_CALL(recv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE,
|
||||
MCA_COLL_BASE_TAG_BARRIER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 1; i < size; ++i) {
|
||||
err =
|
||||
MCA_PML_CALL(send
|
||||
(NULL, 0, MPI_BYTE, i,
|
||||
MCA_COLL_BASE_TAG_BARRIER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* All done */
|
||||
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* barrier_intra_log
|
||||
*
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2015 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -30,78 +30,6 @@
|
||||
#include "opal/util/bit_ops.h"
|
||||
|
||||
|
||||
/*
|
||||
* bcast_lin_intra
|
||||
*
|
||||
* Function: - broadcast using O(N) algorithm
|
||||
* Accepts: - same arguments as MPI_Bcast()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int
|
||||
mca_coll_basic_bcast_lin_intra(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype, int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
int i;
|
||||
int size;
|
||||
int rank;
|
||||
int err;
|
||||
ompi_request_t **preq;
|
||||
mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
|
||||
ompi_request_t **reqs = basic_module->mccb_reqs;
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
/* Non-root receive the data. */
|
||||
|
||||
if (rank != root) {
|
||||
return MCA_PML_CALL(recv(buff, count, datatype, root,
|
||||
MCA_COLL_BASE_TAG_BCAST, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
}
|
||||
|
||||
/* Root sends data to all others. */
|
||||
|
||||
for (i = 0, preq = reqs; i < size; ++i) {
|
||||
if (i == rank) {
|
||||
continue;
|
||||
}
|
||||
|
||||
err = MCA_PML_CALL(isend_init(buff, count, datatype, i,
|
||||
MCA_COLL_BASE_TAG_BCAST,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, preq++));
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
--i;
|
||||
|
||||
/* Start your engines. This will never return an error. */
|
||||
|
||||
MCA_PML_CALL(start(i, reqs));
|
||||
|
||||
/* Wait for them all. If there's an error, note that we don't
|
||||
* care what the error was -- just that there *was* an error. The
|
||||
* PML will finish all requests, even if one or more of them fail.
|
||||
* i.e., by the end of this call, all the requests are free-able.
|
||||
* So free them anyway -- even if there was an error, and return
|
||||
* the error after we free everything. */
|
||||
|
||||
err = ompi_request_wait_all(i, reqs, MPI_STATUSES_IGNORE);
|
||||
|
||||
/* Free the reqs */
|
||||
|
||||
mca_coll_basic_free_reqs(reqs, i);
|
||||
|
||||
/* All done */
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* bcast_log_intra
|
||||
*
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2015 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -27,68 +27,6 @@
|
||||
#include "ompi/mca/coll/base/coll_tags.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
|
||||
/*
|
||||
* gather_intra
|
||||
*
|
||||
* Function: - basic gather operation
|
||||
* Accepts: - same arguments as MPI_Gather()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int
|
||||
mca_coll_basic_gather_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root, struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
int i;
|
||||
int err;
|
||||
int rank;
|
||||
int size;
|
||||
char *ptmp;
|
||||
MPI_Aint incr;
|
||||
MPI_Aint extent;
|
||||
MPI_Aint lb;
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
/* Everyone but root sends data and returns. */
|
||||
|
||||
if (rank != root) {
|
||||
return MCA_PML_CALL(send(sbuf, scount, sdtype, root,
|
||||
MCA_COLL_BASE_TAG_GATHER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
}
|
||||
|
||||
/* I am the root, loop receiving the data. */
|
||||
|
||||
ompi_datatype_get_extent(rdtype, &lb, &extent);
|
||||
incr = extent * rcount;
|
||||
for (i = 0, ptmp = (char *) rbuf; i < size; ++i, ptmp += incr) {
|
||||
if (i == rank) {
|
||||
if (MPI_IN_PLACE != sbuf) {
|
||||
err = ompi_datatype_sndrcv(sbuf, scount, sdtype,
|
||||
ptmp, rcount, rdtype);
|
||||
} else {
|
||||
err = MPI_SUCCESS;
|
||||
}
|
||||
} else {
|
||||
err = MCA_PML_CALL(recv(ptmp, rcount, rdtype, i,
|
||||
MCA_COLL_BASE_TAG_GATHER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
}
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* All done */
|
||||
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* gather_inter
|
||||
|
@ -129,40 +129,40 @@ mca_coll_basic_comm_query(struct ompi_communicator_t *comm,
|
||||
basic_module->super.coll_scatter = mca_coll_basic_scatter_inter;
|
||||
basic_module->super.coll_scatterv = mca_coll_basic_scatterv_inter;
|
||||
} else if (ompi_comm_size(comm) <= mca_coll_basic_crossover) {
|
||||
basic_module->super.coll_allgather = mca_coll_basic_allgather_intra;
|
||||
basic_module->super.coll_allgatherv = mca_coll_basic_allgatherv_intra;
|
||||
basic_module->super.coll_allgather = ompi_coll_base_allgather_intra_basic_linear;
|
||||
basic_module->super.coll_allgatherv = ompi_coll_base_allgatherv_intra_basic_default;
|
||||
basic_module->super.coll_allreduce = mca_coll_basic_allreduce_intra;
|
||||
basic_module->super.coll_alltoall = mca_coll_basic_alltoall_intra;
|
||||
basic_module->super.coll_alltoallv = mca_coll_basic_alltoallv_intra;
|
||||
basic_module->super.coll_alltoall = ompi_coll_base_alltoall_intra_basic_linear;
|
||||
basic_module->super.coll_alltoallv = ompi_coll_base_alltoallv_intra_basic_linear;
|
||||
basic_module->super.coll_alltoallw = mca_coll_basic_alltoallw_intra;
|
||||
basic_module->super.coll_barrier = mca_coll_basic_barrier_intra_lin;
|
||||
basic_module->super.coll_bcast = mca_coll_basic_bcast_lin_intra;
|
||||
basic_module->super.coll_barrier = ompi_coll_base_barrier_intra_basic_linear;
|
||||
basic_module->super.coll_bcast = ompi_coll_base_bcast_intra_basic_linear;
|
||||
basic_module->super.coll_exscan = mca_coll_basic_exscan_intra;
|
||||
basic_module->super.coll_gather = mca_coll_basic_gather_intra;
|
||||
basic_module->super.coll_gather = ompi_coll_base_gather_intra_basic_linear;
|
||||
basic_module->super.coll_gatherv = mca_coll_basic_gatherv_intra;
|
||||
basic_module->super.coll_reduce = mca_coll_basic_reduce_lin_intra;
|
||||
basic_module->super.coll_reduce = ompi_coll_base_reduce_intra_basic_linear;
|
||||
basic_module->super.coll_reduce_scatter_block = mca_coll_basic_reduce_scatter_block_intra;
|
||||
basic_module->super.coll_reduce_scatter = mca_coll_basic_reduce_scatter_intra;
|
||||
basic_module->super.coll_scan = mca_coll_basic_scan_intra;
|
||||
basic_module->super.coll_scatter = mca_coll_basic_scatter_intra;
|
||||
basic_module->super.coll_scatter = ompi_coll_base_scatter_intra_basic_linear;
|
||||
basic_module->super.coll_scatterv = mca_coll_basic_scatterv_intra;
|
||||
} else {
|
||||
basic_module->super.coll_allgather = mca_coll_basic_allgather_intra;
|
||||
basic_module->super.coll_allgatherv = mca_coll_basic_allgatherv_intra;
|
||||
basic_module->super.coll_allgather = ompi_coll_base_allgather_intra_basic_linear;
|
||||
basic_module->super.coll_allgatherv = ompi_coll_base_allgatherv_intra_basic_default;
|
||||
basic_module->super.coll_allreduce = mca_coll_basic_allreduce_intra;
|
||||
basic_module->super.coll_alltoall = mca_coll_basic_alltoall_intra;
|
||||
basic_module->super.coll_alltoallv = mca_coll_basic_alltoallv_intra;
|
||||
basic_module->super.coll_alltoall = ompi_coll_base_alltoall_intra_basic_linear;
|
||||
basic_module->super.coll_alltoallv = ompi_coll_base_alltoallv_intra_basic_linear;
|
||||
basic_module->super.coll_alltoallw = mca_coll_basic_alltoallw_intra;
|
||||
basic_module->super.coll_barrier = mca_coll_basic_barrier_intra_log;
|
||||
basic_module->super.coll_bcast = mca_coll_basic_bcast_log_intra;
|
||||
basic_module->super.coll_exscan = mca_coll_basic_exscan_intra;
|
||||
basic_module->super.coll_gather = mca_coll_basic_gather_intra;
|
||||
basic_module->super.coll_gather = ompi_coll_base_gather_intra_basic_linear;
|
||||
basic_module->super.coll_gatherv = mca_coll_basic_gatherv_intra;
|
||||
basic_module->super.coll_reduce = mca_coll_basic_reduce_log_intra;
|
||||
basic_module->super.coll_reduce_scatter_block = mca_coll_basic_reduce_scatter_block_intra;
|
||||
basic_module->super.coll_reduce_scatter = mca_coll_basic_reduce_scatter_intra;
|
||||
basic_module->super.coll_scan = mca_coll_basic_scan_intra;
|
||||
basic_module->super.coll_scatter = mca_coll_basic_scatter_intra;
|
||||
basic_module->super.coll_scatter = ompi_coll_base_scatter_intra_basic_linear;
|
||||
basic_module->super.coll_scatterv = mca_coll_basic_scatterv_intra;
|
||||
}
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2015 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -28,241 +28,6 @@
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
#include "ompi/op/op.h"
|
||||
|
||||
/*
|
||||
* reduce_lin_intra
|
||||
*
|
||||
* Function: - reduction using O(N) algorithm
|
||||
* Accepts: - same as MPI_Reduce()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int
|
||||
mca_coll_basic_reduce_lin_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root, struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
int i, rank, err, size;
|
||||
ptrdiff_t true_lb, true_extent, lb, extent;
|
||||
char *free_buffer = NULL;
|
||||
char *pml_buffer = NULL;
|
||||
char *inplace_temp = NULL;
|
||||
char *inbuf;
|
||||
|
||||
/* Initialize */
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
|
||||
/* If not root, send data to the root. */
|
||||
|
||||
if (rank != root) {
|
||||
err = MCA_PML_CALL(send(sbuf, count, dtype, root,
|
||||
MCA_COLL_BASE_TAG_REDUCE,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Root receives and reduces messages. Allocate buffer to receive
|
||||
* messages. This comment applies to all collectives in this basic
|
||||
* module where we allocate a temporary buffer. For the next few
|
||||
* lines of code, it's tremendously complicated how we decided that
|
||||
* this was the Right Thing to do. Sit back and enjoy. And prepare
|
||||
* to have your mind warped. :-)
|
||||
*
|
||||
* Recall some definitions (I always get these backwards, so I'm
|
||||
* going to put them here):
|
||||
*
|
||||
* extent: the length from the lower bound to the upper bound -- may
|
||||
* be considerably larger than the buffer required to hold the data
|
||||
* (or smaller! But it's easiest to think about when it's larger).
|
||||
*
|
||||
* true extent: the exact number of bytes required to hold the data
|
||||
* in the layout pattern in the datatype.
|
||||
*
|
||||
* For example, consider the following buffer (just talking about
|
||||
* true_lb, extent, and true extent -- extrapolate for true_ub:
|
||||
*
|
||||
* A B C
|
||||
* --------------------------------------------------------
|
||||
* | | |
|
||||
* --------------------------------------------------------
|
||||
*
|
||||
* There are multiple cases:
|
||||
*
|
||||
* 1. A is what we give to MPI_Send (and friends), and A is where
|
||||
* the data starts, and C is where the data ends. In this case:
|
||||
*
|
||||
* - extent: C-A
|
||||
* - true extent: C-A
|
||||
* - true_lb: 0
|
||||
*
|
||||
* A C
|
||||
* --------------------------------------------------------
|
||||
* | |
|
||||
* --------------------------------------------------------
|
||||
* <=======================extent=========================>
|
||||
* <======================true extent=====================>
|
||||
*
|
||||
* 2. A is what we give to MPI_Send (and friends), B is where the
|
||||
* data starts, and C is where the data ends. In this case:
|
||||
*
|
||||
* - extent: C-A
|
||||
* - true extent: C-B
|
||||
* - true_lb: positive
|
||||
*
|
||||
* A B C
|
||||
* --------------------------------------------------------
|
||||
* | | User buffer |
|
||||
* --------------------------------------------------------
|
||||
* <=======================extent=========================>
|
||||
* <===============true extent=============>
|
||||
*
|
||||
* 3. B is what we give to MPI_Send (and friends), A is where the
|
||||
* data starts, and C is where the data ends. In this case:
|
||||
*
|
||||
* - extent: C-A
|
||||
* - true extent: C-A
|
||||
* - true_lb: negative
|
||||
*
|
||||
* A B C
|
||||
* --------------------------------------------------------
|
||||
* | | User buffer |
|
||||
* --------------------------------------------------------
|
||||
* <=======================extent=========================>
|
||||
* <======================true extent=====================>
|
||||
*
|
||||
* 4. MPI_BOTTOM is what we give to MPI_Send (and friends), B is
|
||||
* where the data starts, and C is where the data ends. In this
|
||||
* case:
|
||||
*
|
||||
* - extent: C-MPI_BOTTOM
|
||||
* - true extent: C-B
|
||||
* - true_lb: [potentially very large] positive
|
||||
*
|
||||
* MPI_BOTTOM B C
|
||||
* --------------------------------------------------------
|
||||
* | | User buffer |
|
||||
* --------------------------------------------------------
|
||||
* <=======================extent=========================>
|
||||
* <===============true extent=============>
|
||||
*
|
||||
* So in all cases, for a temporary buffer, all we need to malloc()
|
||||
* is a buffer of size true_extent. We therefore need to know two
|
||||
* pointer values: what value to give to MPI_Send (and friends) and
|
||||
* what value to give to free(), because they might not be the same.
|
||||
*
|
||||
* Clearly, what we give to free() is exactly what was returned from
|
||||
* malloc(). That part is easy. :-)
|
||||
*
|
||||
* What we give to MPI_Send (and friends) is a bit more complicated.
|
||||
* Let's take the 4 cases from above:
|
||||
*
|
||||
* 1. If A is what we give to MPI_Send and A is where the data
|
||||
* starts, then clearly we give to MPI_Send what we got back from
|
||||
* malloc().
|
||||
*
|
||||
* 2. If B is what we get back from malloc, but we give A to
|
||||
* MPI_Send, then the buffer range [A,B) represents "dead space"
|
||||
* -- no data will be put there. So it's safe to give B-true_lb to
|
||||
* MPI_Send. More specifically, the true_lb is positive, so B-true_lb is
|
||||
* actually A.
|
||||
*
|
||||
* 3. If A is what we get back from malloc, and B is what we give to
|
||||
* MPI_Send, then the true_lb is negative, so A-true_lb will actually equal
|
||||
* B.
|
||||
*
|
||||
* 4. Although this seems like the weirdest case, it's actually
|
||||
* quite similar to case #2 -- the pointer we give to MPI_Send is
|
||||
* smaller than the pointer we got back from malloc().
|
||||
*
|
||||
* Hence, in all cases, we give (return_from_malloc - true_lb) to MPI_Send.
|
||||
*
|
||||
* This works fine and dandy if we only have (count==1), which we
|
||||
* rarely do. ;-) So we really need to allocate (true_extent +
|
||||
* ((count - 1) * extent)) to get enough space for the rest. This may
|
||||
* be more than is necessary, but it's ok.
|
||||
*
|
||||
* Simple, no? :-)
|
||||
*
|
||||
*/
|
||||
|
||||
ompi_datatype_get_extent(dtype, &lb, &extent);
|
||||
ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent);
|
||||
|
||||
if (MPI_IN_PLACE == sbuf) {
|
||||
sbuf = rbuf;
|
||||
inplace_temp = (char*)malloc(true_extent + (count - 1) * extent);
|
||||
if (NULL == inplace_temp) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
rbuf = inplace_temp - true_lb;
|
||||
}
|
||||
|
||||
if (size > 1) {
|
||||
free_buffer = (char*)malloc(true_extent + (count - 1) * extent);
|
||||
if (NULL == free_buffer) {
|
||||
if (NULL != inplace_temp) {
|
||||
free(inplace_temp);
|
||||
}
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
pml_buffer = free_buffer - true_lb;
|
||||
}
|
||||
|
||||
/* Initialize the receive buffer. */
|
||||
|
||||
if (rank == (size - 1)) {
|
||||
err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)rbuf, (char*)sbuf);
|
||||
} else {
|
||||
err = MCA_PML_CALL(recv(rbuf, count, dtype, size - 1,
|
||||
MCA_COLL_BASE_TAG_REDUCE, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
}
|
||||
if (MPI_SUCCESS != err) {
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Loop receiving and calling reduction function (C or Fortran). */
|
||||
|
||||
for (i = size - 2; i >= 0; --i) {
|
||||
if (rank == i) {
|
||||
inbuf = (char*)sbuf;
|
||||
} else {
|
||||
err = MCA_PML_CALL(recv(pml_buffer, count, dtype, i,
|
||||
MCA_COLL_BASE_TAG_REDUCE, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
inbuf = pml_buffer;
|
||||
}
|
||||
|
||||
/* Perform the reduction */
|
||||
|
||||
ompi_op_reduce(op, inbuf, rbuf, count, dtype);
|
||||
}
|
||||
|
||||
if (NULL != inplace_temp) {
|
||||
err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)sbuf, inplace_temp);
|
||||
free(inplace_temp);
|
||||
}
|
||||
if (NULL != free_buffer) {
|
||||
free(free_buffer);
|
||||
}
|
||||
|
||||
/* All done */
|
||||
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* reduce_log_intra
|
||||
@ -339,8 +104,8 @@ mca_coll_basic_reduce_log_intra(void *sbuf, void *rbuf, int count,
|
||||
* operations. */
|
||||
|
||||
if (!ompi_op_is_commute(op)) {
|
||||
return mca_coll_basic_reduce_lin_intra(sbuf, rbuf, count, dtype,
|
||||
op, root, comm, module);
|
||||
return ompi_coll_base_reduce_intra_basic_linear(sbuf, rbuf, count, dtype,
|
||||
op, root, comm, module);
|
||||
}
|
||||
|
||||
/* Some variables */
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2015 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -28,73 +28,6 @@
|
||||
#include "coll_basic.h"
|
||||
|
||||
|
||||
/*
|
||||
* scatter_intra
|
||||
*
|
||||
* Function: - scatter operation
|
||||
* Accepts: - same arguments as MPI_Scatter()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int
|
||||
mca_coll_basic_scatter_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root, struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
int i, rank, size, err;
|
||||
char *ptmp;
|
||||
ptrdiff_t lb, incr;
|
||||
|
||||
/* Initialize */
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
|
||||
/* If not root, receive data. */
|
||||
|
||||
if (rank != root) {
|
||||
err = MCA_PML_CALL(recv(rbuf, rcount, rdtype, root,
|
||||
MCA_COLL_BASE_TAG_SCATTER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
return err;
|
||||
}
|
||||
|
||||
/* I am the root, loop sending data. */
|
||||
|
||||
err = ompi_datatype_get_extent(sdtype, &lb, &incr);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
incr *= scount;
|
||||
for (i = 0, ptmp = (char *) sbuf; i < size; ++i, ptmp += incr) {
|
||||
|
||||
/* simple optimization */
|
||||
|
||||
if (i == rank) {
|
||||
if (MPI_IN_PLACE != rbuf) {
|
||||
err =
|
||||
ompi_datatype_sndrcv(ptmp, scount, sdtype, rbuf, rcount,
|
||||
rdtype);
|
||||
}
|
||||
} else {
|
||||
err = MCA_PML_CALL(send(ptmp, scount, sdtype, i,
|
||||
MCA_COLL_BASE_TAG_SCATTER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
}
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* All done */
|
||||
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* scatter_inter
|
||||
*
|
||||
|
@ -98,7 +98,7 @@ int ompi_coll_tuned_barrier_intra_do_forced(struct ompi_communicator_t *comm,
|
||||
|
||||
switch (tuned_module->user_forced[BARRIER].algorithm) {
|
||||
case (0): return ompi_coll_tuned_barrier_intra_dec_fixed(comm, module);
|
||||
case (1): return ompi_coll_base_barrier_intra_linear(comm, module);
|
||||
case (1): return ompi_coll_base_barrier_intra_basic_linear(comm, module);
|
||||
case (2): return ompi_coll_base_barrier_intra_doublering(comm, module);
|
||||
case (3): return ompi_coll_base_barrier_intra_recursivedoubling(comm, module);
|
||||
case (4): return ompi_coll_base_barrier_intra_bruck(comm, module);
|
||||
@ -122,7 +122,7 @@ int ompi_coll_tuned_barrier_intra_do_this (struct ompi_communicator_t *comm,
|
||||
|
||||
switch (algorithm) {
|
||||
case (0): return ompi_coll_tuned_barrier_intra_dec_fixed(comm, module);
|
||||
case (1): return ompi_coll_base_barrier_intra_linear(comm, module);
|
||||
case (1): return ompi_coll_base_barrier_intra_basic_linear(comm, module);
|
||||
case (2): return ompi_coll_base_barrier_intra_doublering(comm, module);
|
||||
case (3): return ompi_coll_base_barrier_intra_recursivedoubling(comm, module);
|
||||
case (4): return ompi_coll_base_barrier_intra_bruck(comm, module);
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user