From 67d01bd8cdf10f1bfe38b29a5c887bfe38aa4705 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Sun, 15 Feb 2015 17:59:18 -0500 Subject: [PATCH] Redirect most of the basic module functions to base. --- ompi/mca/coll/base/coll_base_allgatherv.c | 12 +- ompi/mca/coll/base/coll_base_alltoall.c | 19 +- ompi/mca/coll/base/coll_base_alltoallv.c | 2 +- ompi/mca/coll/base/coll_base_barrier.c | 13 +- ompi/mca/coll/base/coll_base_bcast.c | 8 +- ompi/mca/coll/base/coll_base_functions.h | 10 +- ompi/mca/coll/base/coll_base_reduce.c | 170 ++++++++++-- ompi/mca/coll/base/coll_base_scatter.c | 12 +- ompi/mca/coll/basic/coll_basic.h | 59 +---- ompi/mca/coll/basic/coll_basic_allgather.c | 44 ---- ompi/mca/coll/basic/coll_basic_allgatherv.c | 83 +----- ompi/mca/coll/basic/coll_basic_alltoall.c | 220 +--------------- ompi/mca/coll/basic/coll_basic_alltoallv.c | 222 +--------------- ompi/mca/coll/basic/coll_basic_barrier.c | 68 +---- ompi/mca/coll/basic/coll_basic_bcast.c | 74 +----- ompi/mca/coll/basic/coll_basic_gather.c | 64 +---- ompi/mca/coll/basic/coll_basic_module.c | 30 +-- ompi/mca/coll/basic/coll_basic_reduce.c | 241 +----------------- ompi/mca/coll/basic/coll_basic_scatter.c | 69 +---- .../coll/tuned/coll_tuned_barrier_decision.c | 4 +- 20 files changed, 216 insertions(+), 1208 deletions(-) diff --git a/ompi/mca/coll/base/coll_base_allgatherv.c b/ompi/mca/coll/base/coll_base_allgatherv.c index b884dc8591..c290ed3daa 100644 --- a/ompi/mca/coll/base/coll_base_allgatherv.c +++ b/ompi/mca/coll/base/coll_base_allgatherv.c @@ -593,12 +593,12 @@ int ompi_coll_base_allgatherv_intra_two_procs(void *sbuf, int scount, */ int ompi_coll_base_allgatherv_intra_basic_default(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, - int *disps, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + struct ompi_datatype_t *sdtype, + void *rbuf, int *rcounts, + int *disps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) { int i, size, rank, err; MPI_Aint extent, lb; diff --git a/ompi/mca/coll/base/coll_base_alltoall.c b/ompi/mca/coll/base/coll_base_alltoall.c index fe71c5345f..d9e36ba9e9 100644 --- a/ompi/mca/coll/base/coll_base_alltoall.c +++ b/ompi/mca/coll/base/coll_base_alltoall.c @@ -35,11 +35,11 @@ #include "coll_base_util.h" /* MPI_IN_PLACE all to all algorithm. TODO: implement a better one. */ -static int +int mca_coll_base_alltoall_intra_basic_inplace(void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) { mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module; int i, j, size, rank, err=MPI_SUCCESS; @@ -567,11 +567,11 @@ int ompi_coll_base_alltoall_intra_two_procs(void *sbuf, int scount, /* copied function (with appropriate renaming) starts here */ int ompi_coll_base_alltoall_intra_basic_linear(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void* rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + struct ompi_datatype_t *sdtype, + void* rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) { int i, rank, size, err, nreqs; char *psnd, *prcv; @@ -593,7 +593,6 @@ int ompi_coll_base_alltoall_intra_basic_linear(void *sbuf, int scount, OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "ompi_coll_base_alltoall_intra_basic_linear rank %d", rank)); - err = ompi_datatype_get_extent(sdtype, &lb, &sndinc); if (OMPI_SUCCESS != err) { return err; diff --git a/ompi/mca/coll/base/coll_base_alltoallv.c b/ompi/mca/coll/base/coll_base_alltoallv.c index 19f71674dd..b10a2b8120 100644 --- a/ompi/mca/coll/base/coll_base_alltoallv.c +++ b/ompi/mca/coll/base/coll_base_alltoallv.c @@ -36,7 +36,7 @@ #include "coll_base_topo.h" #include "coll_base_util.h" -static int +int mca_coll_base_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, const int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, diff --git a/ompi/mca/coll/base/coll_base_barrier.c b/ompi/mca/coll/base/coll_base_barrier.c index 15b3f4883d..1896f23e22 100644 --- a/ompi/mca/coll/base/coll_base_barrier.c +++ b/ompi/mca/coll/base/coll_base_barrier.c @@ -321,8 +321,8 @@ int ompi_coll_base_barrier_intra_two_procs(struct ompi_communicator_t *comm, /* copied function (with appropriate renaming) starts here */ -static int ompi_coll_base_barrier_intra_basic_linear(struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) +int ompi_coll_base_barrier_intra_basic_linear(struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) { int i, err, rank, size; @@ -363,15 +363,14 @@ static int ompi_coll_base_barrier_intra_basic_linear(struct ompi_communicator_t ompi_request_wait_all( size-1, requests+1, MPI_STATUSES_IGNORE ); for (i = 1; i < size; ++i) { - err = MCA_PML_CALL(isend(NULL, 0, MPI_BYTE, i, - MCA_COLL_BASE_TAG_BARRIER, - MCA_PML_BASE_SEND_STANDARD, comm, - &(requests[i]))); + err = MCA_PML_CALL(send(NULL, 0, MPI_BYTE, i, + MCA_COLL_BASE_TAG_BARRIER, + MCA_PML_BASE_SEND_STANDARD, comm)); if (MPI_SUCCESS != err) { return err; } } - ompi_request_wait_all( size-1, requests+1, MPI_STATUSES_IGNORE ); + free( requests ); } diff --git a/ompi/mca/coll/base/coll_base_bcast.c b/ompi/mca/coll/base/coll_base_bcast.c index 8f7fe1b3e4..6ce03884d5 100644 --- a/ompi/mca/coll/base/coll_base_bcast.c +++ b/ompi/mca/coll/base/coll_base_bcast.c @@ -631,10 +631,10 @@ ompi_coll_base_bcast_intra_split_bintree ( void* buffer, * Returns: - MPI_SUCCESS or error code */ int -ompi_coll_base_bcast_intra_basic_linear (void *buff, int count, - struct ompi_datatype_t *datatype, int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) +ompi_coll_base_bcast_intra_basic_linear(void *buff, int count, + struct ompi_datatype_t *datatype, int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) { int i, size, rank, err; mca_coll_base_comm_t *data = module->base_data; diff --git a/ompi/mca/coll/base/coll_base_functions.h b/ompi/mca/coll/base/coll_base_functions.h index 5291f8725e..6770d3d914 100644 --- a/ompi/mca/coll/base/coll_base_functions.h +++ b/ompi/mca/coll/base/coll_base_functions.h @@ -104,10 +104,18 @@ int ompi_coll_base_alltoall_intra_bruck(ALLTOALL_ARGS); int ompi_coll_base_alltoall_intra_basic_linear(ALLTOALL_ARGS); int ompi_coll_base_alltoall_intra_linear_sync(ALLTOALL_ARGS, int max_requests); int ompi_coll_base_alltoall_intra_two_procs(ALLTOALL_ARGS); +int mca_coll_base_alltoall_intra_basic_inplace(void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); /* special version for INPLACE */ /* AlltoAllV */ int ompi_coll_base_alltoallv_intra_pairwise(ALLTOALLV_ARGS); int ompi_coll_base_alltoallv_intra_basic_linear(ALLTOALLV_ARGS); +int mca_coll_base_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, const int *rdisps, + struct ompi_datatype_t *rdtype, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module); /* special version for INPLACE */ /* AlltoAllW */ @@ -116,8 +124,8 @@ int ompi_coll_base_barrier_intra_doublering(BARRIER_ARGS); int ompi_coll_base_barrier_intra_recursivedoubling(BARRIER_ARGS); int ompi_coll_base_barrier_intra_bruck(BARRIER_ARGS); int ompi_coll_base_barrier_intra_two_procs(BARRIER_ARGS); -int ompi_coll_base_barrier_intra_linear(BARRIER_ARGS); int ompi_coll_base_barrier_intra_tree(BARRIER_ARGS); +int ompi_coll_base_barrier_intra_basic_linear(BARRIER_ARGS); /* Bcast */ int ompi_coll_base_bcast_intra_basic_linear(BCAST_ARGS); diff --git a/ompi/mca/coll/base/coll_base_reduce.c b/ompi/mca/coll/base/coll_base_reduce.c index f6752579c9..644ff66f76 100644 --- a/ompi/mca/coll/base/coll_base_reduce.c +++ b/ompi/mca/coll/base/coll_base_reduce.c @@ -606,16 +606,16 @@ ompi_coll_base_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count, { int i, rank, err, size; ptrdiff_t true_lb, true_extent, lb, extent; - char *free_buffer = NULL, *pml_buffer = NULL; - char *inplace_temp = NULL, *inbuf; + char *free_buffer = NULL; + char *pml_buffer = NULL; + char *inplace_temp = NULL; + char *inbuf; /* Initialize */ rank = ompi_comm_rank(comm); size = ompi_comm_size(comm); - OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_basic_linear rank %d", rank)); - /* If not root, send data to the root. */ if (rank != root) { @@ -625,16 +625,136 @@ ompi_coll_base_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count, return err; } - /* see discussion in ompi_coll_basic_reduce_lin_intra about - extent and true extent */ - /* for reducing buffer allocation lengths.... */ + /* Root receives and reduces messages. Allocate buffer to receive + * messages. This comment applies to all collectives in this basic + * module where we allocate a temporary buffer. For the next few + * lines of code, it's tremendously complicated how we decided that + * this was the Right Thing to do. Sit back and enjoy. And prepare + * to have your mind warped. :-) + * + * Recall some definitions (I always get these backwards, so I'm + * going to put them here): + * + * extent: the length from the lower bound to the upper bound -- may + * be considerably larger than the buffer required to hold the data + * (or smaller! But it's easiest to think about when it's larger). + * + * true extent: the exact number of bytes required to hold the data + * in the layout pattern in the datatype. + * + * For example, consider the following buffer (just talking about + * true_lb, extent, and true extent -- extrapolate for true_ub: + * + * A B C + * -------------------------------------------------------- + * | | | + * -------------------------------------------------------- + * + * There are multiple cases: + * + * 1. A is what we give to MPI_Send (and friends), and A is where + * the data starts, and C is where the data ends. In this case: + * + * - extent: C-A + * - true extent: C-A + * - true_lb: 0 + * + * A C + * -------------------------------------------------------- + * | | + * -------------------------------------------------------- + * <=======================extent=========================> + * <======================true extent=====================> + * + * 2. A is what we give to MPI_Send (and friends), B is where the + * data starts, and C is where the data ends. In this case: + * + * - extent: C-A + * - true extent: C-B + * - true_lb: positive + * + * A B C + * -------------------------------------------------------- + * | | User buffer | + * -------------------------------------------------------- + * <=======================extent=========================> + * <===============true extent=============> + * + * 3. B is what we give to MPI_Send (and friends), A is where the + * data starts, and C is where the data ends. In this case: + * + * - extent: C-A + * - true extent: C-A + * - true_lb: negative + * + * A B C + * -------------------------------------------------------- + * | | User buffer | + * -------------------------------------------------------- + * <=======================extent=========================> + * <======================true extent=====================> + * + * 4. MPI_BOTTOM is what we give to MPI_Send (and friends), B is + * where the data starts, and C is where the data ends. In this + * case: + * + * - extent: C-MPI_BOTTOM + * - true extent: C-B + * - true_lb: [potentially very large] positive + * + * MPI_BOTTOM B C + * -------------------------------------------------------- + * | | User buffer | + * -------------------------------------------------------- + * <=======================extent=========================> + * <===============true extent=============> + * + * So in all cases, for a temporary buffer, all we need to malloc() + * is a buffer of size true_extent. We therefore need to know two + * pointer values: what value to give to MPI_Send (and friends) and + * what value to give to free(), because they might not be the same. + * + * Clearly, what we give to free() is exactly what was returned from + * malloc(). That part is easy. :-) + * + * What we give to MPI_Send (and friends) is a bit more complicated. + * Let's take the 4 cases from above: + * + * 1. If A is what we give to MPI_Send and A is where the data + * starts, then clearly we give to MPI_Send what we got back from + * malloc(). + * + * 2. If B is what we get back from malloc, but we give A to + * MPI_Send, then the buffer range [A,B) represents "dead space" + * -- no data will be put there. So it's safe to give B-true_lb to + * MPI_Send. More specifically, the true_lb is positive, so B-true_lb is + * actually A. + * + * 3. If A is what we get back from malloc, and B is what we give to + * MPI_Send, then the true_lb is negative, so A-true_lb will actually equal + * B. + * + * 4. Although this seems like the weirdest case, it's actually + * quite similar to case #2 -- the pointer we give to MPI_Send is + * smaller than the pointer we got back from malloc(). + * + * Hence, in all cases, we give (return_from_malloc - true_lb) to MPI_Send. + * + * This works fine and dandy if we only have (count==1), which we + * rarely do. ;-) So we really need to allocate (true_extent + + * ((count - 1) * extent)) to get enough space for the rest. This may + * be more than is necessary, but it's ok. + * + * Simple, no? :-) + * + */ ompi_datatype_get_extent(dtype, &lb, &extent); ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); if (MPI_IN_PLACE == sbuf) { sbuf = rbuf; - inplace_temp = (char*)malloc(true_extent + (ptrdiff_t)(count - 1) * extent); + inplace_temp = (char*)malloc(true_extent + (count - 1) * extent); if (NULL == inplace_temp) { return OMPI_ERR_OUT_OF_RESOURCE; } @@ -642,10 +762,12 @@ ompi_coll_base_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count, } if (size > 1) { - free_buffer = (char*)malloc(true_extent + (ptrdiff_t)(count - 1) * extent); + free_buffer = (char*)malloc(true_extent + (count - 1) * extent); if (NULL == free_buffer) { - err = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; + if (NULL != inplace_temp) { + free(inplace_temp); + } + return OMPI_ERR_OUT_OF_RESOURCE; } pml_buffer = free_buffer - true_lb; } @@ -653,15 +775,17 @@ ompi_coll_base_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count, /* Initialize the receive buffer. */ if (rank == (size - 1)) { - err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)rbuf, - (char*)sbuf); + err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)rbuf, (char*)sbuf); } else { err = MCA_PML_CALL(recv(rbuf, count, dtype, size - 1, MCA_COLL_BASE_TAG_REDUCE, comm, MPI_STATUS_IGNORE)); } if (MPI_SUCCESS != err) { - goto exit; + if (NULL != free_buffer) { + free(free_buffer); + } + return err; } /* Loop receiving and calling reduction function (C or Fortran). */ @@ -674,25 +798,22 @@ ompi_coll_base_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count, MCA_COLL_BASE_TAG_REDUCE, comm, MPI_STATUS_IGNORE)); if (MPI_SUCCESS != err) { - goto exit; + if (NULL != free_buffer) { + free(free_buffer); + } + return err; } inbuf = pml_buffer; } /* Perform the reduction */ + ompi_op_reduce(op, inbuf, rbuf, count, dtype); } if (NULL != inplace_temp) { - err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)sbuf, - inplace_temp); - } else { - err = MPI_SUCCESS; - } - - exit: - if (NULL != inplace_temp) { + err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)sbuf, inplace_temp); free(inplace_temp); } if (NULL != free_buffer) { @@ -700,7 +821,8 @@ ompi_coll_base_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count, } /* All done */ - return err; + + return MPI_SUCCESS; } /* copied function (with appropriate renaming) ends here */ diff --git a/ompi/mca/coll/base/coll_base_scatter.c b/ompi/mca/coll/base/coll_base_scatter.c index e832f4064e..bd4b140078 100644 --- a/ompi/mca/coll/base/coll_base_scatter.c +++ b/ompi/mca/coll/base/coll_base_scatter.c @@ -194,12 +194,12 @@ ompi_coll_base_scatter_intra_binomial(void *sbuf, int scount, */ int ompi_coll_base_scatter_intra_basic_linear(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, - int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) + struct ompi_datatype_t *sdtype, + void *rbuf, int rcount, + struct ompi_datatype_t *rdtype, + int root, + struct ompi_communicator_t *comm, + mca_coll_base_module_t *module) { int i, rank, size, err; ptrdiff_t lb, incr; diff --git a/ompi/mca/coll/basic/coll_basic.h b/ompi/mca/coll/basic/coll_basic.h index ca0b6d558f..6549cd42eb 100644 --- a/ompi/mca/coll/basic/coll_basic.h +++ b/ompi/mca/coll/basic/coll_basic.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -31,6 +31,7 @@ #include "ompi/mca/coll/coll.h" #include "ompi/request/request.h" #include "ompi/communicator/communicator.h" +#include "ompi/mca/coll/base/coll_base_functions.h" BEGIN_C_DECLS @@ -52,12 +53,6 @@ BEGIN_C_DECLS int mca_coll_basic_module_enable(mca_coll_base_module_t *module, struct ompi_communicator_t *comm); - int mca_coll_basic_allgather_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); int mca_coll_basic_allgather_inter(void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, @@ -65,13 +60,6 @@ BEGIN_C_DECLS struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_allgatherv_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, - int *disps, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); int mca_coll_basic_allgatherv_inter(void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, @@ -91,12 +79,6 @@ BEGIN_C_DECLS struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_alltoall_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); int mca_coll_basic_alltoall_inter(void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, @@ -104,14 +86,6 @@ BEGIN_C_DECLS struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_alltoallv_intra(void *sbuf, int *scounts, - int *sdisps, - struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, - int *rdisps, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); int mca_coll_basic_alltoallv_inter(void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t *sdtype, @@ -138,21 +112,12 @@ BEGIN_C_DECLS struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_barrier_intra_lin(struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - int mca_coll_basic_barrier_inter_lin(struct ompi_communicator_t *comm, mca_coll_base_module_t *module); int mca_coll_basic_barrier_intra_log(struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_bcast_lin_intra(void *buff, int count, - struct ompi_datatype_t *datatype, - int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); - int mca_coll_basic_bcast_lin_inter(void *buff, int count, struct ompi_datatype_t *datatype, int root, @@ -183,13 +148,6 @@ BEGIN_C_DECLS struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_gather_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, - int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); int mca_coll_basic_gather_inter(void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, @@ -214,12 +172,6 @@ BEGIN_C_DECLS struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_reduce_lin_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); int mca_coll_basic_reduce_lin_inter(void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, @@ -279,13 +231,6 @@ BEGIN_C_DECLS struct ompi_communicator_t *comm, mca_coll_base_module_t *module); - int mca_coll_basic_scatter_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, - int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module); int mca_coll_basic_scatter_inter(void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, diff --git a/ompi/mca/coll/basic/coll_basic_allgather.c b/ompi/mca/coll/basic/coll_basic_allgather.c index 06d58cfe69..6a187cc4d8 100644 --- a/ompi/mca/coll/basic/coll_basic_allgather.c +++ b/ompi/mca/coll/basic/coll_basic_allgather.c @@ -32,50 +32,6 @@ #include "coll_basic.h" -/* - * allgather_intra - * - * Function: - allgather using other MPI collections - * Accepts: - same as MPI_Allgather() - * Returns: - MPI_SUCCESS or error code - */ -int -mca_coll_basic_allgather_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, void *rbuf, - int rcount, struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - int err; - ptrdiff_t lb, extent; - - /* Handle MPI_IN_PLACE (see explanantion in reduce.c for how to - allocate temp buffer) -- note that rank 0 can use IN_PLACE - natively, and we can just alias the right position in rbuf - as sbuf and avoid using a temporary buffer if gather is - implemented correctly */ - if (MPI_IN_PLACE == sbuf && 0 != ompi_comm_rank(comm)) { - ompi_datatype_get_extent(rdtype, &lb, &extent); - sbuf = ((char*) rbuf) + (ompi_comm_rank(comm) * extent * rcount); - sdtype = rdtype; - scount = rcount; - } - - /* Gather and broadcast. */ - - err = comm->c_coll.coll_gather(sbuf, scount, sdtype, rbuf, rcount, - rdtype, 0, comm, comm->c_coll.coll_gather_module); - if (MPI_SUCCESS == err) { - err = comm->c_coll.coll_bcast(rbuf, rcount * ompi_comm_size(comm), - rdtype, 0, comm, comm->c_coll.coll_bcast_module); - } - - /* All done */ - - return err; -} - - /* * allgather_inter * diff --git a/ompi/mca/coll/basic/coll_basic_allgatherv.c b/ompi/mca/coll/basic/coll_basic_allgatherv.c index c3054bbadd..329eaf8ad8 100644 --- a/ompi/mca/coll/basic/coll_basic_allgatherv.c +++ b/ompi/mca/coll/basic/coll_basic_allgatherv.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -28,87 +28,6 @@ #include "coll_basic.h" -/* - * allgatherv_intra - * - * Function: - allgatherv using other MPI collectives - * Accepts: - same as MPI_Allgatherv() - * Returns: - MPI_SUCCESS or error code - */ -int -mca_coll_basic_allgatherv_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *disps, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - int i, size, rank ; - int err; - MPI_Aint extent; - MPI_Aint lb; - char *send_buf = NULL; - struct ompi_datatype_t *newtype, *send_type; - - size = ompi_comm_size(comm); - rank = ompi_comm_rank(comm); - /* - * We don't have a root process defined. Arbitrarily assign root - * to process with rank 0 (OMPI convention) - */ - - if (MPI_IN_PLACE == sbuf) { - ompi_datatype_get_extent(rdtype, &lb, &extent); - send_type = rdtype; - send_buf = (char*)rbuf; - for (i = 0; i < rank; ++i) { - send_buf += (rcounts[i] * extent); - } - } else { - send_buf = (char*)sbuf; - send_type = sdtype; - } - - err = comm->c_coll.coll_gatherv(send_buf, - rcounts[rank], send_type,rbuf, - rcounts, disps, rdtype, 0, - comm, comm->c_coll.coll_gatherv_module); - - if (MPI_SUCCESS != err) { - return err; - } - /* - * we now have all the data in the root's rbuf. Need to - * broadcast the data out to the other processes - * - * Need to define a datatype that captures the different vectors - * from each process. MPI_TYPE_INDEXED with params - * size,rcount,displs,rdtype,newtype - * should do the trick. - * Use underlying ddt functions to create, and commit the - * new datatype on each process, then broadcast and destroy the - * datatype. - */ - - err = ompi_datatype_create_indexed(size,rcounts,disps,rdtype,&newtype); - if (MPI_SUCCESS != err) { - return err; - } - - err = ompi_datatype_commit(&newtype); - if(MPI_SUCCESS != err) { - return err; - } - - err = comm->c_coll.coll_bcast( rbuf, 1 ,newtype,0,comm, - comm->c_coll.coll_bcast_module); - - ompi_datatype_destroy (&newtype); - - return err; -} - - /* * allgatherv_inter * diff --git a/ompi/mca/coll/basic/coll_basic_alltoall.c b/ompi/mca/coll/basic/coll_basic_alltoall.c index 5fedab92ce..6f46594b40 100644 --- a/ompi/mca/coll/basic/coll_basic_alltoall.c +++ b/ompi/mca/coll/basic/coll_basic_alltoall.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -32,224 +32,6 @@ #include "ompi/mca/pml/pml.h" -static int -mca_coll_basic_alltoall_intra_inplace(void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module; - int i, j, size, rank, err=MPI_SUCCESS; - MPI_Request *preq; - char *tmp_buffer; - size_t max_size; - ptrdiff_t ext; - - /* Initialize. */ - - size = ompi_comm_size(comm); - rank = ompi_comm_rank(comm); - - /* If only one process, we're done. */ - if (1 == size) { - return MPI_SUCCESS; - } - - /* Find the largest receive amount */ - ompi_datatype_type_extent (rdtype, &ext); - max_size = ext * rcount; - - /* Allocate a temporary buffer */ - tmp_buffer = calloc (max_size, 1); - if (NULL == tmp_buffer) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* in-place alltoall slow algorithm (but works) */ - for (i = 0 ; i < size ; ++i) { - for (j = i+1 ; j < size ; ++j) { - /* Initiate all send/recv to/from others. */ - preq = basic_module->mccb_reqs; - - if (i == rank) { - /* Copy the data into the temporary buffer */ - err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer, - (char *) rbuf + j * max_size); - if (MPI_SUCCESS != err) { goto error_hndl; } - - /* Exchange data with the peer */ - err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * j, rcount, rdtype, - j, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++)); - if (MPI_SUCCESS != err) { goto error_hndl; } - - err = MCA_PML_CALL(isend ((char *) tmp_buffer, rcount, rdtype, - j, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, - comm, preq++)); - if (MPI_SUCCESS != err) { goto error_hndl; } - } else if (j == rank) { - /* Copy the data into the temporary buffer */ - err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer, - (char *) rbuf + i * max_size); - if (MPI_SUCCESS != err) { goto error_hndl; } - - /* Exchange data with the peer */ - err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * i, rcount, rdtype, - i, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++)); - if (MPI_SUCCESS != err) { goto error_hndl; } - - err = MCA_PML_CALL(isend ((char *) tmp_buffer, rcount, rdtype, - i, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD, - comm, preq++)); - if (MPI_SUCCESS != err) { goto error_hndl; } - } else { - continue; - } - - /* Wait for the requests to complete */ - err = ompi_request_wait_all (2, basic_module->mccb_reqs, MPI_STATUSES_IGNORE); - if (MPI_SUCCESS != err) { goto error_hndl; } - - /* Free the requests. */ - mca_coll_basic_free_reqs(basic_module->mccb_reqs, 2); - } - } - - error_hndl: - /* Free the temporary buffer */ - free (tmp_buffer); - - /* All done */ - - return err; -} - -/* - * alltoall_intra - * - * Function: - MPI_Alltoall - * Accepts: - same as MPI_Alltoall() - * Returns: - MPI_SUCCESS or an MPI error code - */ -int -mca_coll_basic_alltoall_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - int i; - int rank; - int size; - int err; - int nreqs; - char *psnd; - char *prcv; - MPI_Aint lb; - MPI_Aint sndinc; - MPI_Aint rcvinc; - - ompi_request_t **req; - ompi_request_t **sreq; - ompi_request_t **rreq; - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module; - - /* Initialize. */ - if (MPI_IN_PLACE == sbuf) { - return mca_coll_basic_alltoall_intra_inplace (rbuf, rcount, rdtype, - comm, module); - } - - size = ompi_comm_size(comm); - rank = ompi_comm_rank(comm); - - err = ompi_datatype_get_extent(sdtype, &lb, &sndinc); - if (OMPI_SUCCESS != err) { - return err; - } - sndinc *= scount; - - err = ompi_datatype_get_extent(rdtype, &lb, &rcvinc); - if (OMPI_SUCCESS != err) { - return err; - } - rcvinc *= rcount; - - /* simple optimization */ - - psnd = ((char *) sbuf) + (rank * sndinc); - prcv = ((char *) rbuf) + (rank * rcvinc); - - err = ompi_datatype_sndrcv(psnd, scount, sdtype, prcv, rcount, rdtype); - if (MPI_SUCCESS != err) { - return err; - } - - /* If only one process, we're done. */ - - if (1 == size) { - return MPI_SUCCESS; - } - - /* Initiate all send/recv to/from others. */ - - req = rreq = basic_module->mccb_reqs; - sreq = rreq + size - 1; - - prcv = (char *) rbuf; - psnd = (char *) sbuf; - - /* Post all receives first -- a simple optimization */ - - for (nreqs = 0, i = (rank + 1) % size; i != rank; i = (i + 1) % size, ++rreq, ++nreqs) { - err = - MCA_PML_CALL(irecv_init - (prcv + (i * rcvinc), rcount, rdtype, i, - MCA_COLL_BASE_TAG_ALLTOALL, comm, rreq)); - if (MPI_SUCCESS != err) { - mca_coll_basic_free_reqs(req, nreqs); - return err; - } - } - - /* Now post all sends */ - - for (nreqs = 0, i = (rank + 1) % size; i != rank; i = (i + 1) % size, ++sreq, ++nreqs) { - err = - MCA_PML_CALL(isend_init - (psnd + (i * sndinc), scount, sdtype, i, - MCA_COLL_BASE_TAG_ALLTOALL, - MCA_PML_BASE_SEND_STANDARD, comm, sreq)); - if (MPI_SUCCESS != err) { - mca_coll_basic_free_reqs(req, nreqs); - return err; - } - } - - nreqs = (size - 1) * 2; - /* Start your engines. This will never return an error. */ - - MCA_PML_CALL(start(nreqs, req)); - - /* Wait for them all. If there's an error, note that we don't - * care what the error was -- just that there *was* an error. The - * PML will finish all requests, even if one or more of them fail. - * i.e., by the end of this call, all the requests are free-able. - * So free them anyway -- even if there was an error, and return - * the error after we free everything. */ - - err = ompi_request_wait_all(nreqs, req, MPI_STATUSES_IGNORE); - - /* Free the reqs */ - - mca_coll_basic_free_reqs(req, nreqs); - - /* All done */ - - return err; -} - - /* * alltoall_inter * diff --git a/ompi/mca/coll/basic/coll_basic_alltoallv.c b/ompi/mca/coll/basic/coll_basic_alltoallv.c index 4c9ffebf51..77245cb4e4 100644 --- a/ompi/mca/coll/basic/coll_basic_alltoallv.c +++ b/ompi/mca/coll/basic/coll_basic_alltoallv.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -33,226 +33,6 @@ #include "ompi/mca/pml/pml.h" -static int -mca_coll_basic_alltoallv_intra_inplace(void *rbuf, const int *rcounts, const int *rdisps, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module; - int i, j, size, rank, err=MPI_SUCCESS; - MPI_Request *preq; - char *tmp_buffer; - size_t max_size; - ptrdiff_t ext; - - /* Initialize. */ - - size = ompi_comm_size(comm); - rank = ompi_comm_rank(comm); - - /* If only one process, we're done. */ - if (1 == size) { - return MPI_SUCCESS; - } - - /* Find the largest receive amount */ - ompi_datatype_type_extent (rdtype, &ext); - for (i = 0, max_size = 0 ; i < size ; ++i) { - size_t size = ext * rcounts[i]; - - max_size = size > max_size ? size : max_size; - } - - /* Allocate a temporary buffer */ - tmp_buffer = calloc (max_size, 1); - if (NULL == tmp_buffer) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* in-place alltoallv slow algorithm (but works) */ - for (i = 0 ; i < size ; ++i) { - for (j = i+1 ; j < size ; ++j) { - /* Initiate all send/recv to/from others. */ - preq = basic_module->mccb_reqs; - - if (i == rank && rcounts[j]) { - /* Copy the data into the temporary buffer */ - err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[j], - tmp_buffer, (char *) rbuf + rdisps[j] * ext); - if (MPI_SUCCESS != err) { goto error_hndl; } - - /* Exchange data with the peer */ - err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j] * ext, rcounts[j], rdtype, - j, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++)); - if (MPI_SUCCESS != err) { goto error_hndl; } - - err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[j], rdtype, - j, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD, - comm, preq++)); - if (MPI_SUCCESS != err) { goto error_hndl; } - } else if (j == rank && rcounts[i]) { - /* Copy the data into the temporary buffer */ - err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[i], - tmp_buffer, (char *) rbuf + rdisps[i] * ext); - if (MPI_SUCCESS != err) { goto error_hndl; } - - /* Exchange data with the peer */ - err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i] * ext, rcounts[i], rdtype, - i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++)); - if (MPI_SUCCESS != err) { goto error_hndl; } - - err = MCA_PML_CALL(isend ((void *) tmp_buffer, rcounts[i], rdtype, - i, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD, - comm, preq++)); - if (MPI_SUCCESS != err) { goto error_hndl; } - } else { - continue; - } - - /* Wait for the requests to complete */ - err = ompi_request_wait_all (2, basic_module->mccb_reqs, MPI_STATUSES_IGNORE); - if (MPI_SUCCESS != err) { goto error_hndl; } - - /* Free the requests. */ - mca_coll_basic_free_reqs(basic_module->mccb_reqs, 2); - } - } - - error_hndl: - /* Free the temporary buffer */ - free (tmp_buffer); - - /* All done */ - - return err; -} - -/* - * alltoallv_intra - * - * Function: - MPI_Alltoallv - * Accepts: - same as MPI_Alltoallv() - * Returns: - MPI_SUCCESS or an MPI error code - */ -int -mca_coll_basic_alltoallv_intra(void *sbuf, int *scounts, int *sdisps, - struct ompi_datatype_t *sdtype, - void *rbuf, int *rcounts, int *rdisps, - struct ompi_datatype_t *rdtype, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - int i; - int size; - int rank; - int err; - char *psnd; - char *prcv; - int nreqs; - MPI_Aint sndextent; - MPI_Aint rcvextent; - MPI_Request *preq; - - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module; - - /* Initialize. */ - if (MPI_IN_PLACE == sbuf) { - return mca_coll_basic_alltoallv_intra_inplace (rbuf, rcounts, rdisps, - rdtype, comm, module); - } - - size = ompi_comm_size(comm); - rank = ompi_comm_rank(comm); - - ompi_datatype_type_extent(sdtype, &sndextent); - ompi_datatype_type_extent(rdtype, &rcvextent); - - /* simple optimization */ - - psnd = ((char *) sbuf) + (sdisps[rank] * sndextent); - prcv = ((char *) rbuf) + (rdisps[rank] * rcvextent); - - if (0 != scounts[rank]) { - err = ompi_datatype_sndrcv(psnd, scounts[rank], sdtype, - prcv, rcounts[rank], rdtype); - if (MPI_SUCCESS != err) { - return err; - } - } - - /* If only one process, we're done. */ - - if (1 == size) { - return MPI_SUCCESS; - } - - /* Initiate all send/recv to/from others. */ - - nreqs = 0; - preq = basic_module->mccb_reqs; - - /* Post all receives first -- a simple optimization */ - - for (i = 0; i < size; ++i) { - if (i == rank || 0 == rcounts[i]) { - continue; - } - - prcv = ((char *) rbuf) + (rdisps[i] * rcvextent); - err = MCA_PML_CALL(irecv_init(prcv, rcounts[i], rdtype, - i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, - preq++)); - ++nreqs; - if (MPI_SUCCESS != err) { - mca_coll_basic_free_reqs(basic_module->mccb_reqs, nreqs); - return err; - } - } - - /* Now post all sends */ - - for (i = 0; i < size; ++i) { - if (i == rank || 0 == scounts[i]) { - continue; - } - - psnd = ((char *) sbuf) + (sdisps[i] * sndextent); - err = MCA_PML_CALL(isend_init(psnd, scounts[i], sdtype, - i, MCA_COLL_BASE_TAG_ALLTOALLV, - MCA_PML_BASE_SEND_STANDARD, comm, - preq++)); - ++nreqs; - if (MPI_SUCCESS != err) { - mca_coll_basic_free_reqs(basic_module->mccb_reqs, nreqs); - return err; - } - } - - /* Start your engines. This will never return an error. */ - - MCA_PML_CALL(start(nreqs, basic_module->mccb_reqs)); - - /* Wait for them all. If there's an error, note that we don't care - * what the error was -- just that there *was* an error. The PML - * will finish all requests, even if one or more of them fail. - * i.e., by the end of this call, all the requests are free-able. - * So free them anyway -- even if there was an error, and return the - * error after we free everything. */ - - err = ompi_request_wait_all(nreqs, basic_module->mccb_reqs, - MPI_STATUSES_IGNORE); - - /* Free the requests. */ - - mca_coll_basic_free_reqs(basic_module->mccb_reqs, nreqs); - - /* All done */ - - return err; -} - - /* * alltoallv_inter * diff --git a/ompi/mca/coll/basic/coll_basic_barrier.c b/ompi/mca/coll/basic/coll_basic_barrier.c index 8bce69e345..2c9568a2a6 100644 --- a/ompi/mca/coll/basic/coll_basic_barrier.c +++ b/ompi/mca/coll/basic/coll_basic_barrier.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -30,72 +30,6 @@ #include "coll_basic.h" -/* - * barrier_intra_lin - * - * Function: - barrier using O(N) algorithm - * Accepts: - same as MPI_Barrier() - * Returns: - MPI_SUCCESS or error code - */ -int -mca_coll_basic_barrier_intra_lin(struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - int i; - int err; - int size = ompi_comm_size(comm); - int rank = ompi_comm_rank(comm); - - /* All non-root send & receive zero-length message. */ - - if (rank > 0) { - err = - MCA_PML_CALL(send - (NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER, - MCA_PML_BASE_SEND_STANDARD, comm)); - if (MPI_SUCCESS != err) { - return err; - } - - err = - MCA_PML_CALL(recv - (NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER, - comm, MPI_STATUS_IGNORE)); - if (MPI_SUCCESS != err) { - return err; - } - } - - /* The root collects and broadcasts the messages. */ - - else { - for (i = 1; i < size; ++i) { - err = MCA_PML_CALL(recv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE, - MCA_COLL_BASE_TAG_BARRIER, - comm, MPI_STATUS_IGNORE)); - if (MPI_SUCCESS != err) { - return err; - } - } - - for (i = 1; i < size; ++i) { - err = - MCA_PML_CALL(send - (NULL, 0, MPI_BYTE, i, - MCA_COLL_BASE_TAG_BARRIER, - MCA_PML_BASE_SEND_STANDARD, comm)); - if (MPI_SUCCESS != err) { - return err; - } - } - } - - /* All done */ - - return MPI_SUCCESS; -} - - /* * barrier_intra_log * diff --git a/ompi/mca/coll/basic/coll_basic_bcast.c b/ompi/mca/coll/basic/coll_basic_bcast.c index 43e3ed4e6f..f03bf1251d 100644 --- a/ompi/mca/coll/basic/coll_basic_bcast.c +++ b/ompi/mca/coll/basic/coll_basic_bcast.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -30,78 +30,6 @@ #include "opal/util/bit_ops.h" -/* - * bcast_lin_intra - * - * Function: - broadcast using O(N) algorithm - * Accepts: - same arguments as MPI_Bcast() - * Returns: - MPI_SUCCESS or error code - */ -int -mca_coll_basic_bcast_lin_intra(void *buff, int count, - struct ompi_datatype_t *datatype, int root, - struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - int i; - int size; - int rank; - int err; - ompi_request_t **preq; - mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module; - ompi_request_t **reqs = basic_module->mccb_reqs; - - size = ompi_comm_size(comm); - rank = ompi_comm_rank(comm); - - /* Non-root receive the data. */ - - if (rank != root) { - return MCA_PML_CALL(recv(buff, count, datatype, root, - MCA_COLL_BASE_TAG_BCAST, comm, - MPI_STATUS_IGNORE)); - } - - /* Root sends data to all others. */ - - for (i = 0, preq = reqs; i < size; ++i) { - if (i == rank) { - continue; - } - - err = MCA_PML_CALL(isend_init(buff, count, datatype, i, - MCA_COLL_BASE_TAG_BCAST, - MCA_PML_BASE_SEND_STANDARD, - comm, preq++)); - if (MPI_SUCCESS != err) { - return err; - } - } - --i; - - /* Start your engines. This will never return an error. */ - - MCA_PML_CALL(start(i, reqs)); - - /* Wait for them all. If there's an error, note that we don't - * care what the error was -- just that there *was* an error. The - * PML will finish all requests, even if one or more of them fail. - * i.e., by the end of this call, all the requests are free-able. - * So free them anyway -- even if there was an error, and return - * the error after we free everything. */ - - err = ompi_request_wait_all(i, reqs, MPI_STATUSES_IGNORE); - - /* Free the reqs */ - - mca_coll_basic_free_reqs(reqs, i); - - /* All done */ - - return err; -} - - /* * bcast_log_intra * diff --git a/ompi/mca/coll/basic/coll_basic_gather.c b/ompi/mca/coll/basic/coll_basic_gather.c index cccd7ce0b7..74353fa3d5 100644 --- a/ompi/mca/coll/basic/coll_basic_gather.c +++ b/ompi/mca/coll/basic/coll_basic_gather.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -27,68 +27,6 @@ #include "ompi/mca/coll/base/coll_tags.h" #include "ompi/mca/pml/pml.h" -/* - * gather_intra - * - * Function: - basic gather operation - * Accepts: - same arguments as MPI_Gather() - * Returns: - MPI_SUCCESS or error code - */ -int -mca_coll_basic_gather_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, - int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - int i; - int err; - int rank; - int size; - char *ptmp; - MPI_Aint incr; - MPI_Aint extent; - MPI_Aint lb; - - size = ompi_comm_size(comm); - rank = ompi_comm_rank(comm); - - /* Everyone but root sends data and returns. */ - - if (rank != root) { - return MCA_PML_CALL(send(sbuf, scount, sdtype, root, - MCA_COLL_BASE_TAG_GATHER, - MCA_PML_BASE_SEND_STANDARD, comm)); - } - - /* I am the root, loop receiving the data. */ - - ompi_datatype_get_extent(rdtype, &lb, &extent); - incr = extent * rcount; - for (i = 0, ptmp = (char *) rbuf; i < size; ++i, ptmp += incr) { - if (i == rank) { - if (MPI_IN_PLACE != sbuf) { - err = ompi_datatype_sndrcv(sbuf, scount, sdtype, - ptmp, rcount, rdtype); - } else { - err = MPI_SUCCESS; - } - } else { - err = MCA_PML_CALL(recv(ptmp, rcount, rdtype, i, - MCA_COLL_BASE_TAG_GATHER, - comm, MPI_STATUS_IGNORE)); - } - if (MPI_SUCCESS != err) { - return err; - } - } - - /* All done */ - - return MPI_SUCCESS; -} - /* * gather_inter diff --git a/ompi/mca/coll/basic/coll_basic_module.c b/ompi/mca/coll/basic/coll_basic_module.c index 80fb7b584e..c557eccc35 100644 --- a/ompi/mca/coll/basic/coll_basic_module.c +++ b/ompi/mca/coll/basic/coll_basic_module.c @@ -129,40 +129,40 @@ mca_coll_basic_comm_query(struct ompi_communicator_t *comm, basic_module->super.coll_scatter = mca_coll_basic_scatter_inter; basic_module->super.coll_scatterv = mca_coll_basic_scatterv_inter; } else if (ompi_comm_size(comm) <= mca_coll_basic_crossover) { - basic_module->super.coll_allgather = mca_coll_basic_allgather_intra; - basic_module->super.coll_allgatherv = mca_coll_basic_allgatherv_intra; + basic_module->super.coll_allgather = ompi_coll_base_allgather_intra_basic_linear; + basic_module->super.coll_allgatherv = ompi_coll_base_allgatherv_intra_basic_default; basic_module->super.coll_allreduce = mca_coll_basic_allreduce_intra; - basic_module->super.coll_alltoall = mca_coll_basic_alltoall_intra; - basic_module->super.coll_alltoallv = mca_coll_basic_alltoallv_intra; + basic_module->super.coll_alltoall = ompi_coll_base_alltoall_intra_basic_linear; + basic_module->super.coll_alltoallv = ompi_coll_base_alltoallv_intra_basic_linear; basic_module->super.coll_alltoallw = mca_coll_basic_alltoallw_intra; - basic_module->super.coll_barrier = mca_coll_basic_barrier_intra_lin; - basic_module->super.coll_bcast = mca_coll_basic_bcast_lin_intra; + basic_module->super.coll_barrier = ompi_coll_base_barrier_intra_basic_linear; + basic_module->super.coll_bcast = ompi_coll_base_bcast_intra_basic_linear; basic_module->super.coll_exscan = mca_coll_basic_exscan_intra; - basic_module->super.coll_gather = mca_coll_basic_gather_intra; + basic_module->super.coll_gather = ompi_coll_base_gather_intra_basic_linear; basic_module->super.coll_gatherv = mca_coll_basic_gatherv_intra; - basic_module->super.coll_reduce = mca_coll_basic_reduce_lin_intra; + basic_module->super.coll_reduce = ompi_coll_base_reduce_intra_basic_linear; basic_module->super.coll_reduce_scatter_block = mca_coll_basic_reduce_scatter_block_intra; basic_module->super.coll_reduce_scatter = mca_coll_basic_reduce_scatter_intra; basic_module->super.coll_scan = mca_coll_basic_scan_intra; - basic_module->super.coll_scatter = mca_coll_basic_scatter_intra; + basic_module->super.coll_scatter = ompi_coll_base_scatter_intra_basic_linear; basic_module->super.coll_scatterv = mca_coll_basic_scatterv_intra; } else { - basic_module->super.coll_allgather = mca_coll_basic_allgather_intra; - basic_module->super.coll_allgatherv = mca_coll_basic_allgatherv_intra; + basic_module->super.coll_allgather = ompi_coll_base_allgather_intra_basic_linear; + basic_module->super.coll_allgatherv = ompi_coll_base_allgatherv_intra_basic_default; basic_module->super.coll_allreduce = mca_coll_basic_allreduce_intra; - basic_module->super.coll_alltoall = mca_coll_basic_alltoall_intra; - basic_module->super.coll_alltoallv = mca_coll_basic_alltoallv_intra; + basic_module->super.coll_alltoall = ompi_coll_base_alltoall_intra_basic_linear; + basic_module->super.coll_alltoallv = ompi_coll_base_alltoallv_intra_basic_linear; basic_module->super.coll_alltoallw = mca_coll_basic_alltoallw_intra; basic_module->super.coll_barrier = mca_coll_basic_barrier_intra_log; basic_module->super.coll_bcast = mca_coll_basic_bcast_log_intra; basic_module->super.coll_exscan = mca_coll_basic_exscan_intra; - basic_module->super.coll_gather = mca_coll_basic_gather_intra; + basic_module->super.coll_gather = ompi_coll_base_gather_intra_basic_linear; basic_module->super.coll_gatherv = mca_coll_basic_gatherv_intra; basic_module->super.coll_reduce = mca_coll_basic_reduce_log_intra; basic_module->super.coll_reduce_scatter_block = mca_coll_basic_reduce_scatter_block_intra; basic_module->super.coll_reduce_scatter = mca_coll_basic_reduce_scatter_intra; basic_module->super.coll_scan = mca_coll_basic_scan_intra; - basic_module->super.coll_scatter = mca_coll_basic_scatter_intra; + basic_module->super.coll_scatter = ompi_coll_base_scatter_intra_basic_linear; basic_module->super.coll_scatterv = mca_coll_basic_scatterv_intra; } diff --git a/ompi/mca/coll/basic/coll_basic_reduce.c b/ompi/mca/coll/basic/coll_basic_reduce.c index 0e3810cf36..a8b8a7bd94 100644 --- a/ompi/mca/coll/basic/coll_basic_reduce.c +++ b/ompi/mca/coll/basic/coll_basic_reduce.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -28,241 +28,6 @@ #include "ompi/mca/pml/pml.h" #include "ompi/op/op.h" -/* - * reduce_lin_intra - * - * Function: - reduction using O(N) algorithm - * Accepts: - same as MPI_Reduce() - * Returns: - MPI_SUCCESS or error code - */ -int -mca_coll_basic_reduce_lin_intra(void *sbuf, void *rbuf, int count, - struct ompi_datatype_t *dtype, - struct ompi_op_t *op, - int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - int i, rank, err, size; - ptrdiff_t true_lb, true_extent, lb, extent; - char *free_buffer = NULL; - char *pml_buffer = NULL; - char *inplace_temp = NULL; - char *inbuf; - - /* Initialize */ - - rank = ompi_comm_rank(comm); - size = ompi_comm_size(comm); - - /* If not root, send data to the root. */ - - if (rank != root) { - err = MCA_PML_CALL(send(sbuf, count, dtype, root, - MCA_COLL_BASE_TAG_REDUCE, - MCA_PML_BASE_SEND_STANDARD, comm)); - return err; - } - - /* Root receives and reduces messages. Allocate buffer to receive - * messages. This comment applies to all collectives in this basic - * module where we allocate a temporary buffer. For the next few - * lines of code, it's tremendously complicated how we decided that - * this was the Right Thing to do. Sit back and enjoy. And prepare - * to have your mind warped. :-) - * - * Recall some definitions (I always get these backwards, so I'm - * going to put them here): - * - * extent: the length from the lower bound to the upper bound -- may - * be considerably larger than the buffer required to hold the data - * (or smaller! But it's easiest to think about when it's larger). - * - * true extent: the exact number of bytes required to hold the data - * in the layout pattern in the datatype. - * - * For example, consider the following buffer (just talking about - * true_lb, extent, and true extent -- extrapolate for true_ub: - * - * A B C - * -------------------------------------------------------- - * | | | - * -------------------------------------------------------- - * - * There are multiple cases: - * - * 1. A is what we give to MPI_Send (and friends), and A is where - * the data starts, and C is where the data ends. In this case: - * - * - extent: C-A - * - true extent: C-A - * - true_lb: 0 - * - * A C - * -------------------------------------------------------- - * | | - * -------------------------------------------------------- - * <=======================extent=========================> - * <======================true extent=====================> - * - * 2. A is what we give to MPI_Send (and friends), B is where the - * data starts, and C is where the data ends. In this case: - * - * - extent: C-A - * - true extent: C-B - * - true_lb: positive - * - * A B C - * -------------------------------------------------------- - * | | User buffer | - * -------------------------------------------------------- - * <=======================extent=========================> - * <===============true extent=============> - * - * 3. B is what we give to MPI_Send (and friends), A is where the - * data starts, and C is where the data ends. In this case: - * - * - extent: C-A - * - true extent: C-A - * - true_lb: negative - * - * A B C - * -------------------------------------------------------- - * | | User buffer | - * -------------------------------------------------------- - * <=======================extent=========================> - * <======================true extent=====================> - * - * 4. MPI_BOTTOM is what we give to MPI_Send (and friends), B is - * where the data starts, and C is where the data ends. In this - * case: - * - * - extent: C-MPI_BOTTOM - * - true extent: C-B - * - true_lb: [potentially very large] positive - * - * MPI_BOTTOM B C - * -------------------------------------------------------- - * | | User buffer | - * -------------------------------------------------------- - * <=======================extent=========================> - * <===============true extent=============> - * - * So in all cases, for a temporary buffer, all we need to malloc() - * is a buffer of size true_extent. We therefore need to know two - * pointer values: what value to give to MPI_Send (and friends) and - * what value to give to free(), because they might not be the same. - * - * Clearly, what we give to free() is exactly what was returned from - * malloc(). That part is easy. :-) - * - * What we give to MPI_Send (and friends) is a bit more complicated. - * Let's take the 4 cases from above: - * - * 1. If A is what we give to MPI_Send and A is where the data - * starts, then clearly we give to MPI_Send what we got back from - * malloc(). - * - * 2. If B is what we get back from malloc, but we give A to - * MPI_Send, then the buffer range [A,B) represents "dead space" - * -- no data will be put there. So it's safe to give B-true_lb to - * MPI_Send. More specifically, the true_lb is positive, so B-true_lb is - * actually A. - * - * 3. If A is what we get back from malloc, and B is what we give to - * MPI_Send, then the true_lb is negative, so A-true_lb will actually equal - * B. - * - * 4. Although this seems like the weirdest case, it's actually - * quite similar to case #2 -- the pointer we give to MPI_Send is - * smaller than the pointer we got back from malloc(). - * - * Hence, in all cases, we give (return_from_malloc - true_lb) to MPI_Send. - * - * This works fine and dandy if we only have (count==1), which we - * rarely do. ;-) So we really need to allocate (true_extent + - * ((count - 1) * extent)) to get enough space for the rest. This may - * be more than is necessary, but it's ok. - * - * Simple, no? :-) - * - */ - - ompi_datatype_get_extent(dtype, &lb, &extent); - ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent); - - if (MPI_IN_PLACE == sbuf) { - sbuf = rbuf; - inplace_temp = (char*)malloc(true_extent + (count - 1) * extent); - if (NULL == inplace_temp) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - rbuf = inplace_temp - true_lb; - } - - if (size > 1) { - free_buffer = (char*)malloc(true_extent + (count - 1) * extent); - if (NULL == free_buffer) { - if (NULL != inplace_temp) { - free(inplace_temp); - } - return OMPI_ERR_OUT_OF_RESOURCE; - } - pml_buffer = free_buffer - true_lb; - } - - /* Initialize the receive buffer. */ - - if (rank == (size - 1)) { - err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)rbuf, (char*)sbuf); - } else { - err = MCA_PML_CALL(recv(rbuf, count, dtype, size - 1, - MCA_COLL_BASE_TAG_REDUCE, comm, - MPI_STATUS_IGNORE)); - } - if (MPI_SUCCESS != err) { - if (NULL != free_buffer) { - free(free_buffer); - } - return err; - } - - /* Loop receiving and calling reduction function (C or Fortran). */ - - for (i = size - 2; i >= 0; --i) { - if (rank == i) { - inbuf = (char*)sbuf; - } else { - err = MCA_PML_CALL(recv(pml_buffer, count, dtype, i, - MCA_COLL_BASE_TAG_REDUCE, comm, - MPI_STATUS_IGNORE)); - if (MPI_SUCCESS != err) { - if (NULL != free_buffer) { - free(free_buffer); - } - return err; - } - - inbuf = pml_buffer; - } - - /* Perform the reduction */ - - ompi_op_reduce(op, inbuf, rbuf, count, dtype); - } - - if (NULL != inplace_temp) { - err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)sbuf, inplace_temp); - free(inplace_temp); - } - if (NULL != free_buffer) { - free(free_buffer); - } - - /* All done */ - - return MPI_SUCCESS; -} - /* * reduce_log_intra @@ -339,8 +104,8 @@ mca_coll_basic_reduce_log_intra(void *sbuf, void *rbuf, int count, * operations. */ if (!ompi_op_is_commute(op)) { - return mca_coll_basic_reduce_lin_intra(sbuf, rbuf, count, dtype, - op, root, comm, module); + return ompi_coll_base_reduce_intra_basic_linear(sbuf, rbuf, count, dtype, + op, root, comm, module); } /* Some variables */ diff --git a/ompi/mca/coll/basic/coll_basic_scatter.c b/ompi/mca/coll/basic/coll_basic_scatter.c index 46d9a20263..74aa9e8ed6 100644 --- a/ompi/mca/coll/basic/coll_basic_scatter.c +++ b/ompi/mca/coll/basic/coll_basic_scatter.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University + * Copyright (c) 2004-2015 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -28,73 +28,6 @@ #include "coll_basic.h" -/* - * scatter_intra - * - * Function: - scatter operation - * Accepts: - same arguments as MPI_Scatter() - * Returns: - MPI_SUCCESS or error code - */ -int -mca_coll_basic_scatter_intra(void *sbuf, int scount, - struct ompi_datatype_t *sdtype, - void *rbuf, int rcount, - struct ompi_datatype_t *rdtype, - int root, struct ompi_communicator_t *comm, - mca_coll_base_module_t *module) -{ - int i, rank, size, err; - char *ptmp; - ptrdiff_t lb, incr; - - /* Initialize */ - - rank = ompi_comm_rank(comm); - size = ompi_comm_size(comm); - - /* If not root, receive data. */ - - if (rank != root) { - err = MCA_PML_CALL(recv(rbuf, rcount, rdtype, root, - MCA_COLL_BASE_TAG_SCATTER, - comm, MPI_STATUS_IGNORE)); - return err; - } - - /* I am the root, loop sending data. */ - - err = ompi_datatype_get_extent(sdtype, &lb, &incr); - if (OMPI_SUCCESS != err) { - return OMPI_ERROR; - } - - incr *= scount; - for (i = 0, ptmp = (char *) sbuf; i < size; ++i, ptmp += incr) { - - /* simple optimization */ - - if (i == rank) { - if (MPI_IN_PLACE != rbuf) { - err = - ompi_datatype_sndrcv(ptmp, scount, sdtype, rbuf, rcount, - rdtype); - } - } else { - err = MCA_PML_CALL(send(ptmp, scount, sdtype, i, - MCA_COLL_BASE_TAG_SCATTER, - MCA_PML_BASE_SEND_STANDARD, comm)); - } - if (MPI_SUCCESS != err) { - return err; - } - } - - /* All done */ - - return MPI_SUCCESS; -} - - /* * scatter_inter * diff --git a/ompi/mca/coll/tuned/coll_tuned_barrier_decision.c b/ompi/mca/coll/tuned/coll_tuned_barrier_decision.c index 6e05857ebc..c46d14bf40 100644 --- a/ompi/mca/coll/tuned/coll_tuned_barrier_decision.c +++ b/ompi/mca/coll/tuned/coll_tuned_barrier_decision.c @@ -98,7 +98,7 @@ int ompi_coll_tuned_barrier_intra_do_forced(struct ompi_communicator_t *comm, switch (tuned_module->user_forced[BARRIER].algorithm) { case (0): return ompi_coll_tuned_barrier_intra_dec_fixed(comm, module); - case (1): return ompi_coll_base_barrier_intra_linear(comm, module); + case (1): return ompi_coll_base_barrier_intra_basic_linear(comm, module); case (2): return ompi_coll_base_barrier_intra_doublering(comm, module); case (3): return ompi_coll_base_barrier_intra_recursivedoubling(comm, module); case (4): return ompi_coll_base_barrier_intra_bruck(comm, module); @@ -122,7 +122,7 @@ int ompi_coll_tuned_barrier_intra_do_this (struct ompi_communicator_t *comm, switch (algorithm) { case (0): return ompi_coll_tuned_barrier_intra_dec_fixed(comm, module); - case (1): return ompi_coll_base_barrier_intra_linear(comm, module); + case (1): return ompi_coll_base_barrier_intra_basic_linear(comm, module); case (2): return ompi_coll_base_barrier_intra_doublering(comm, module); case (3): return ompi_coll_base_barrier_intra_recursivedoubling(comm, module); case (4): return ompi_coll_base_barrier_intra_bruck(comm, module);